Active_prgs/Redgrave/PrivilegeLogGenerator.py

"""

PrivilegeLogGenerator

Created by:
Emanuel Borges
05.21.2025

This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.

"""

import csv
from win32com.client import Dispatch


class ConcordanceLoader:
    def __init__(self, filePath):
        self.filePath = filePath
        self.delimiter = '\x14'  # ASCII 20
        self.quotechar = '\xfe'  # ASCII 254
        self.records = []


    def load(self):
        with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
            reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
            self.records = [row for row in reader]


    def get_headers(self):
        return self.records[0] if self.records else []

    def get_data(self):
        return self.records[1:] if len(self.records) >1 else []

    def get_all(self):
        return self.records


    def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
        if not self.records:
            raise ValueError("No data loaded.  Run .load() first.")

        headers = self.get_headers()
        try:
            report_idx = headers.index(report_on_field)
            check_idx = headers.index(field_to_check)
        except ValueError as e:
            raise ValueError(f"Column not found: {e}")

        issues = []

        for row_num, row in enumerate(self.get_data(), start=2):  # Use Excel-style row numbers
            report_val = row[report_idx] if report_idx < len(row) else ''
            check_val = row[check_idx] if check_idx < len(row) else ''

            row_issues = []

            if "  " in check_val:
                row_issues.append("double spaces")
            if ".." in check_val:
                row_issues.append("double periods")
            if ",," in check_val:
                row_issues.append("double commas")
            if check_val.strip() != check_val:
                row_issue.append("leading/trailing whitespace")

            stripped = check_val.strip()

            if stripped and stripped[0].islower():
                row_issues.append("sentance startes with lowercase letter")
                
            if stripped and stripped[-1] not in ('.','?','!'):
                #for_issues.append("missing final punctuation")
                row_issues.append("missing final punctuation")
                
            if row_issues:
                issues.append({
                    'document':report_val,
                    'problems': row_issues,
                    'row': row_num,
                    'text': check_val
                })

        return issues


    def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
        if not self.records:
            raise ValueError("No data loaded.  Run .load() first.")

        xlApp = Dispatch('Excel.Application')
        xlApp.Visible = visible
        wb = xlApp.Workbooks.Add()
        ws = wb.Worksheets(1)

        ##  Write data
        for row_idx, row in enumerate(self.records, start=1):
            for col_idx, value in enumerate(row, start=1):
                ws.Cells(row_idx, col_idx).Value = value


        ##  Optional cell highlighting
        if cell_highlighting:
            if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
                raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
            
            document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
            
            headers = self.get_headers()
            try:
                doc_id_col_idx = headers.index(document_id_column_name) + 1  #  1-based for Excel
                highlight_col_idx = headers.index(highlight_column_name) +1
            except ValueError as e:
                raise ValueError(f"Colun not found: {e}")
            
            for row_idx, row in enumerate(self.get_data(), start=2):  #  Skip header
                if doc_id_col_idx <= len(row):
                    doc_id_value = row[doc_id_col_idx - 1]
                    if doc_id_value in doc_ids_list:
                        cell = ws.Cells(row_idx, highlight_col_idx)
                        #cell.Interior.Color = 5296274  #  Light green
                        #cell.Interior.Color = 15122175  #  Light purple
                        cell.Interior.Color = 13148390  #  Medium purple


        wb.SaveAs(outputPath)
        wb.Close(SaveChanges = False)
        xlApp.Quit()


if __name__ == '__main__':
    ##  Full path to the input file
    inputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.dat"

    ##  Full path to the output xlsx file
    outputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.xlsx"

    ##  Grammar report path, if you use one.
    outputReportFilePath = r"C:\Test_Dir\ATT\export_20250522_021450-GrammarReport.txt"

    ##  Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
    highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"

    highlighterDocumentList = []
    contents = open(highlightFilePath).readlines()
    for line in contents:
        line = line.replace("\n","")
        highlighterDocumentList.append(line)


    loader = ConcordanceLoader(inputFilePath)
    loader.load()

    print("Headers", loader.get_headers())
    #print("First row:", loader.get_data()[0])

    report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
    reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
    for item in report:
        if item['document'] in highlighterDocumentList:
            reportOutputFile.write(f"{item['document']}|{','.join(item['problems'])}|{item['text']}\n")
        #reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
        #reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
        #reportOutputFile.write(f" - Text: {item['text']}\n")
        #print(f"Row {item['row']} (DocID: {item['document']}):")
        #print(f" - Issues: {','.join(item['problems'])}")
        #print(f" - Text: {item['text']}\n")
    reportOutputFile.close()

    loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
    
Revision:	945
Committed:	Wed Nov 5 18:06:34 2025 UTC (4 months, 3 weeks ago) by nino.borges
Content type:	text/x-python
File size:	6641 byte(s)
Log Message:	cleaned up some conflicts.
#	Content
1	"""
2
3	PrivilegeLogGenerator
4
5	Created by:
6	Emanuel Borges
7	05.21.2025
8
9	This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.
10
11	"""
12
13	import csv
14	from win32com.client import Dispatch
15
16
17
18	class ConcordanceLoader:
19	def __init__(self, filePath):
20	self.filePath = filePath
21	self.delimiter = '\x14' # ASCII 20
22	self.quotechar = '\xfe' # ASCII 254
23	self.records = []
24
25
26	def load(self):
27	with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
28	reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
29	self.records = [row for row in reader]
30
31
32	def get_headers(self):
33	return self.records[0] if self.records else []
34
35	def get_data(self):
36	return self.records[1:] if len(self.records) >1 else []
37
38	def get_all(self):
39	return self.records
40
41
42
43	def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
44	if not self.records:
45	raise ValueError("No data loaded. Run .load() first.")
46
47	headers = self.get_headers()
48	try:
49	report_idx = headers.index(report_on_field)
50	check_idx = headers.index(field_to_check)
51	except ValueError as e:
52	raise ValueError(f"Column not found: {e}")
53
54	issues = []
55
56	for row_num, row in enumerate(self.get_data(), start=2): # Use Excel-style row numbers
57	report_val = row[report_idx] if report_idx < len(row) else ''
58	check_val = row[check_idx] if check_idx < len(row) else ''
59
60	row_issues = []
61
62	if " " in check_val:
63	row_issues.append("double spaces")
64	if ".." in check_val:
65	row_issues.append("double periods")
66	if ",," in check_val:
67	row_issues.append("double commas")
68	if check_val.strip() != check_val:
69	row_issue.append("leading/trailing whitespace")
70
71	stripped = check_val.strip()
72
73	if stripped and stripped[0].islower():
74	row_issues.append("sentance startes with lowercase letter")
75
76	if stripped and stripped[-1] not in ('.','?','!'):
77	#for_issues.append("missing final punctuation")
78	row_issues.append("missing final punctuation")
79
80	if row_issues:
81	issues.append({
82	'document':report_val,
83	'problems': row_issues,
84	'row': row_num,
85	'text': check_val
86	})
87
88	return issues
89
90
91	def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
92	if not self.records:
93	raise ValueError("No data loaded. Run .load() first.")
94
95	xlApp = Dispatch('Excel.Application')
96	xlApp.Visible = visible
97	wb = xlApp.Workbooks.Add()
98	ws = wb.Worksheets(1)
99
100	## Write data
101	for row_idx, row in enumerate(self.records, start=1):
102	for col_idx, value in enumerate(row, start=1):
103	ws.Cells(row_idx, col_idx).Value = value
104
105
106	## Optional cell highlighting
107	if cell_highlighting:
108	if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
109	raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
110
111	document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
112
113	headers = self.get_headers()
114	try:
115	doc_id_col_idx = headers.index(document_id_column_name) + 1 # 1-based for Excel
116	highlight_col_idx = headers.index(highlight_column_name) +1
117	except ValueError as e:
118	raise ValueError(f"Colun not found: {e}")
119
120	for row_idx, row in enumerate(self.get_data(), start=2): # Skip header
121	if doc_id_col_idx <= len(row):
122	doc_id_value = row[doc_id_col_idx - 1]
123	if doc_id_value in doc_ids_list:
124	cell = ws.Cells(row_idx, highlight_col_idx)
125	#cell.Interior.Color = 5296274 # Light green
126	#cell.Interior.Color = 15122175 # Light purple
127	cell.Interior.Color = 13148390 # Medium purple
128
129
130	wb.SaveAs(outputPath)
131	wb.Close(SaveChanges = False)
132	xlApp.Quit()
133
134
135
136	if __name__ == '__main__':
137	## Full path to the input file
138	inputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.dat"
139
140	## Full path to the output xlsx file
141	outputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.xlsx"
142
143	## Grammar report path, if you use one.
144	outputReportFilePath = r"C:\Test_Dir\ATT\export_20250522_021450-GrammarReport.txt"
145
146	## Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
147	highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"
148
149	highlighterDocumentList = []
150	contents = open(highlightFilePath).readlines()
151	for line in contents:
152	line = line.replace("\n","")
153	highlighterDocumentList.append(line)
154
155
156	loader = ConcordanceLoader(inputFilePath)
157	loader.load()
158
159	print("Headers", loader.get_headers())
160	#print("First row:", loader.get_data()[0])
161
162	report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
163	reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
164	for item in report:
165	if item['document'] in highlighterDocumentList:
166	reportOutputFile.write(f"{item['document']}\|{','.join(item['problems'])}\|{item['text']}\n")
167	#reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
168	#reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
169	#reportOutputFile.write(f" - Text: {item['text']}\n")
170	#print(f"Row {item['row']} (DocID: {item['document']}):")
171	#print(f" - Issues: {','.join(item['problems'])}")
172	#print(f" - Text: {item['text']}\n")
173	reportOutputFile.close()
174
175	loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
176