Active_prgs/Redgrave/PrivilegeLogGenerator.py

"""

PrivilegeLogGenerator

Created by:
Emanuel Borges
05.21.2025

This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.

"""

import csv
from win32com.client import Dispatch


class ConcordanceLoader:
    def __init__(self, filePath):
        self.filePath = filePath
        self.delimiter = '\x14'  # ASCII 20
        self.quotechar = '\xfe'  # ASCII 254
        self.records = []


    def load(self):
        with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
            reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
            self.records = [row for row in reader]


    def get_headers(self):
        return self.records[0] if self.records else []

    def get_data(self):
        return self.records[1:] if len(self.records) >1 else []

    def get_all(self):
        return self.records


    def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
        if not self.records:
            raise ValueError("No data loaded.  Run .load() first.")

        headers = self.get_headers()
        try:
            report_idx = headers.index(report_on_field)
            check_idx = headers.index(field_to_check)
        except ValueError as e:
            raise ValueError(f"Column not found: {e}")

        issues = []

        for row_num, row in enumerate(self.get_data(), start=2):  # Use Excel-style row numbers
            report_val = row[report_idx] if report_idx < len(row) else ''
            check_val = row[check_idx] if check_idx < len(row) else ''

            row_issues = []

            if "  " in check_val:
                row_issues.append("double spaces")
            if ".." in check_val:
                row_issues.append("double periods")
            if ",," in check_val:
                row_issues.append("double commas")
            if check_val.strip() != check_val:
                row_issue.append("leading/trailing whitespace")

            stripped = check_val.strip()

            if stripped and stripped[0].islower():
                row_issues.append("sentance startes with lowercase letter")
                
            if stripped and stripped[-1] not in ('.','?','!'):
                row_issues.append("missing final punctuation")
                
            if row_issues:
                issues.append({
                    'document':report_val,
                    'problems': row_issues,
                    'row': row_num,
                    'text': check_val
                })

        return issues


    def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
        if not self.records:
            raise ValueError("No data loaded.  Run .load() first.")

        xlApp = Dispatch('Excel.Application')
        xlApp.Visible = visible
        wb = xlApp.Workbooks.Add()
        ws = wb.Worksheets(1)

        ##  Write data
        for row_idx, row in enumerate(self.records, start=1):
            for col_idx, value in enumerate(row, start=1):
                ws.Cells(row_idx, col_idx).Value = value


        ##  Optional cell highlighting
        if cell_highlighting:
            if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
                raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
            
            document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
            
            headers = self.get_headers()
            try:
                doc_id_col_idx = headers.index(document_id_column_name) + 1  #  1-based for Excel
                highlight_col_idx = headers.index(highlight_column_name) +1
            except ValueError as e:
                raise ValueError(f"Colun not found: {e}")
            
            for row_idx, row in enumerate(self.get_data(), start=2):  #  Skip header
                if doc_id_col_idx <= len(row):
                    doc_id_value = row[doc_id_col_idx - 1]
                    if doc_id_value in doc_ids_list:
                        cell = ws.Cells(row_idx, highlight_col_idx)
                        #cell.Interior.Color = 5296274  #  Light green
                        #cell.Interior.Color = 15122175  #  Light purple
                        cell.Interior.Color = 13148390  #  Medium purple


        wb.SaveAs(outputPath)
        wb.Close(SaveChanges = False)
        #xlApp.Quit()


if __name__ == '__main__':
    ##  Full path to the input file
    #inputFilePath = r"C:\Test_Dir\ATT\export_20250523_214343.dat"
    #inputFilePath = r"C:\Test_Dir\ATT\FCC_Manual_Log\export_20250701_223436.dat"
    inputFilePath = r"C:\Users\eborges\Downloads\export_20250703_230430.dat"

    ##  Full path to the output xlsx file
    outputFilePath = r"C:\Users\eborges\Downloads\export_20250703_230430.xlsx"

    ##  Grammar report path, if you use one.
    outputReportFilePath = r"C:\Users\eborges\Downloads\export_20250703_230430-GrammarReport.txt"

    ##  Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
    #highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"
    highlightFilePath = r"C:\Test_Dir\ATT\ShinyPrivLogHighlighter2.csv"

    highlighterDocumentList = []
    contents = open(highlightFilePath).readlines()
    for line in contents:
        line = line.replace("\n","")
        highlighterDocumentList.append(line)


    loader = ConcordanceLoader(inputFilePath)
    loader.load()

    print("Headers", loader.get_headers())
    #print("First row:", loader.get_data()[0])

    #report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
    report = loader.generate_grammar_check_report("REVIEWID", "DOJ CID Final Privilege Description")
    reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
    for item in report:
        if item['document'] in highlighterDocumentList:
            reportOutputFile.write(f"{item['document']}|{','.join(item['problems'])}|{item['text']}\n")
        #reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
        #reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
        #reportOutputFile.write(f" - Text: {item['text']}\n")
        #print(f"Row {item['row']} (DocID: {item['document']}):")
        #print(f" - Issues: {','.join(item['problems'])}")
        #print(f" - Text: {item['text']}\n")
    reportOutputFile.close()

    #loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
    loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "DOJ CID Final Privilege Description", highlighterDocumentList))
    
Revision:	926
Committed:	Thu Aug 7 20:30:36 2025 UTC (7 months, 2 weeks ago) by nino.borges
Content type:	text/x-python
File size:	7098 byte(s)
Log Message:	general use changes
#	Content
1	"""
2
3	PrivilegeLogGenerator
4
5	Created by:
6	Emanuel Borges
7	05.21.2025
8
9	This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.
10
11	"""
12
13	import csv
14	from win32com.client import Dispatch
15
16
17
18	class ConcordanceLoader:
19	def __init__(self, filePath):
20	self.filePath = filePath
21	self.delimiter = '\x14' # ASCII 20
22	self.quotechar = '\xfe' # ASCII 254
23	self.records = []
24
25
26	def load(self):
27	with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
28	reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
29	self.records = [row for row in reader]
30
31
32	def get_headers(self):
33	return self.records[0] if self.records else []
34
35	def get_data(self):
36	return self.records[1:] if len(self.records) >1 else []
37
38	def get_all(self):
39	return self.records
40
41
42
43	def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
44	if not self.records:
45	raise ValueError("No data loaded. Run .load() first.")
46
47	headers = self.get_headers()
48	try:
49	report_idx = headers.index(report_on_field)
50	check_idx = headers.index(field_to_check)
51	except ValueError as e:
52	raise ValueError(f"Column not found: {e}")
53
54	issues = []
55
56	for row_num, row in enumerate(self.get_data(), start=2): # Use Excel-style row numbers
57	report_val = row[report_idx] if report_idx < len(row) else ''
58	check_val = row[check_idx] if check_idx < len(row) else ''
59
60	row_issues = []
61
62	if " " in check_val:
63	row_issues.append("double spaces")
64	if ".." in check_val:
65	row_issues.append("double periods")
66	if ",," in check_val:
67	row_issues.append("double commas")
68	if check_val.strip() != check_val:
69	row_issue.append("leading/trailing whitespace")
70
71	stripped = check_val.strip()
72
73	if stripped and stripped[0].islower():
74	row_issues.append("sentance startes with lowercase letter")
75
76	if stripped and stripped[-1] not in ('.','?','!'):
77	row_issues.append("missing final punctuation")
78
79	if row_issues:
80	issues.append({
81	'document':report_val,
82	'problems': row_issues,
83	'row': row_num,
84	'text': check_val
85	})
86
87	return issues
88
89
90	def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
91	if not self.records:
92	raise ValueError("No data loaded. Run .load() first.")
93
94	xlApp = Dispatch('Excel.Application')
95	xlApp.Visible = visible
96	wb = xlApp.Workbooks.Add()
97	ws = wb.Worksheets(1)
98
99	## Write data
100	for row_idx, row in enumerate(self.records, start=1):
101	for col_idx, value in enumerate(row, start=1):
102	ws.Cells(row_idx, col_idx).Value = value
103
104
105	## Optional cell highlighting
106	if cell_highlighting:
107	if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
108	raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
109
110	document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
111
112	headers = self.get_headers()
113	try:
114	doc_id_col_idx = headers.index(document_id_column_name) + 1 # 1-based for Excel
115	highlight_col_idx = headers.index(highlight_column_name) +1
116	except ValueError as e:
117	raise ValueError(f"Colun not found: {e}")
118
119	for row_idx, row in enumerate(self.get_data(), start=2): # Skip header
120	if doc_id_col_idx <= len(row):
121	doc_id_value = row[doc_id_col_idx - 1]
122	if doc_id_value in doc_ids_list:
123	cell = ws.Cells(row_idx, highlight_col_idx)
124	#cell.Interior.Color = 5296274 # Light green
125	#cell.Interior.Color = 15122175 # Light purple
126	cell.Interior.Color = 13148390 # Medium purple
127
128
129	wb.SaveAs(outputPath)
130	wb.Close(SaveChanges = False)
131	#xlApp.Quit()
132
133
134
135	if __name__ == '__main__':
136	## Full path to the input file
137	#inputFilePath = r"C:\Test_Dir\ATT\export_20250523_214343.dat"
138	#inputFilePath = r"C:\Test_Dir\ATT\FCC_Manual_Log\export_20250701_223436.dat"
139	inputFilePath = r"C:\Users\eborges\Downloads\export_20250703_230430.dat"
140
141	## Full path to the output xlsx file
142	outputFilePath = r"C:\Users\eborges\Downloads\export_20250703_230430.xlsx"
143
144	## Grammar report path, if you use one.
145	outputReportFilePath = r"C:\Users\eborges\Downloads\export_20250703_230430-GrammarReport.txt"
146
147	## Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
148	#highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"
149	highlightFilePath = r"C:\Test_Dir\ATT\ShinyPrivLogHighlighter2.csv"
150
151	highlighterDocumentList = []
152	contents = open(highlightFilePath).readlines()
153	for line in contents:
154	line = line.replace("\n","")
155	highlighterDocumentList.append(line)
156
157
158	loader = ConcordanceLoader(inputFilePath)
159	loader.load()
160
161	print("Headers", loader.get_headers())
162	#print("First row:", loader.get_data()[0])
163
164	#report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
165	report = loader.generate_grammar_check_report("REVIEWID", "DOJ CID Final Privilege Description")
166	reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
167	for item in report:
168	if item['document'] in highlighterDocumentList:
169	reportOutputFile.write(f"{item['document']}\|{','.join(item['problems'])}\|{item['text']}\n")
170	#reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
171	#reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
172	#reportOutputFile.write(f" - Text: {item['text']}\n")
173	#print(f"Row {item['row']} (DocID: {item['document']}):")
174	#print(f" - Issues: {','.join(item['problems'])}")
175	#print(f" - Text: {item['text']}\n")
176	reportOutputFile.close()
177
178	#loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
179	loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "DOJ CID Final Privilege Description", highlighterDocumentList))
180