Active_prgs/Redgrave/PrivilegeLogGenerator.py

"""

PrivilegeLogGenerator

Created by:
Emanuel Borges
05.21.2025

This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.

"""

import csv
from win32com.client import Dispatch


class ConcordanceLoader:
    def __init__(self, filePath):
        self.filePath = filePath
        self.delimiter = '\x14'  # ASCII 20
        self.quotechar = '\xfe'  # ASCII 254
        self.records = []


    def load(self):
        with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
            reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
            self.records = [row for row in reader]


    def get_headers(self):
        return self.records[0] if self.records else []

    def get_data(self):
        return self.records[1:] if len(self.records) >1 else []

    def get_all(self):
        return self.records


    def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
        if not self.records:
            raise ValueError("No data loaded.  Run .load() first.")

        headers = self.get_headers()
        try:
            report_idx = headers.index(report_on_field)
            check_idx = headers.index(field_to_check)
        except ValueError as e:
            raise ValueError(f"Column not found: {e}")

        issues = []

        for row_num, row in enumerate(self.get_data(), start=2):  # Use Excel-style row numbers
            report_val = row[report_idx] if report_idx < len(row) else ''
            check_val = row[check_idx] if check_idx < len(row) else ''

            row_issues = []

            if "  " in check_val:
                row_issues.append("double spaces")
            if ".." in check_val:
                row_issues.append("double periods")
            if ",," in check_val:
                row_issues.append("double commas")
            if check_val.strip() != check_val:
                row_issue.append("leading/trailing whitespace")

            stripped = check_val.strip()

            if stripped and stripped[0].islower():
                row_issues.append("sentance startes with lowercase letter")
                
            if stripped and stripped[-1] not in ('.','?','!'):
                #for_issues.append("missing final punctuation")
                row_issues.append("missing final punctuation")
                
            if row_issues:
                issues.append({
                    'document':report_val,
                    'problems': row_issues,
                    'row': row_num,
                    'text': check_val
                })

        return issues


    def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
        if not self.records:
            raise ValueError("No data loaded.  Run .load() first.")

        xlApp = Dispatch('Excel.Application')
        xlApp.Visible = visible
        wb = xlApp.Workbooks.Add()
        ws = wb.Worksheets(1)

        ##  Write data
        for row_idx, row in enumerate(self.records, start=1):
            for col_idx, value in enumerate(row, start=1):
                ws.Cells(row_idx, col_idx).Value = value


        ##  Optional cell highlighting
        if cell_highlighting:
            if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
                raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
            
            document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
            
            headers = self.get_headers()
            try:
                doc_id_col_idx = headers.index(document_id_column_name) + 1  #  1-based for Excel
                highlight_col_idx = headers.index(highlight_column_name) +1
            except ValueError as e:
                raise ValueError(f"Colun not found: {e}")
            
            for row_idx, row in enumerate(self.get_data(), start=2):  #  Skip header
                if doc_id_col_idx <= len(row):
                    doc_id_value = row[doc_id_col_idx - 1]
                    if doc_id_value in doc_ids_list:
                        cell = ws.Cells(row_idx, highlight_col_idx)
                        #cell.Interior.Color = 5296274  #  Light green
                        #cell.Interior.Color = 15122175  #  Light purple
                        cell.Interior.Color = 13148390  #  Medium purple


        wb.SaveAs(outputPath)
        wb.Close(SaveChanges = False)
        xlApp.Quit()


if __name__ == '__main__':
    ##  Full path to the input file
    inputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.dat"

    ##  Full path to the output xlsx file
    outputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.xlsx"

    ##  Grammar report path, if you use one.
    outputReportFilePath = r"C:\Test_Dir\ATT\export_20250522_021450-GrammarReport.txt"

    ##  Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
    highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"

    highlighterDocumentList = []
    contents = open(highlightFilePath).readlines()
    for line in contents:
        line = line.replace("\n","")
        highlighterDocumentList.append(line)


    loader = ConcordanceLoader(inputFilePath)
    loader.load()

    print("Headers", loader.get_headers())
    #print("First row:", loader.get_data()[0])

    report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
    reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
    for item in report:
        if item['document'] in highlighterDocumentList:
            reportOutputFile.write(f"{item['document']}|{','.join(item['problems'])}|{item['text']}\n")
        #reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
        #reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
        #reportOutputFile.write(f" - Text: {item['text']}\n")
        #print(f"Row {item['row']} (DocID: {item['document']}):")
        #print(f" - Issues: {','.join(item['problems'])}")
        #print(f" - Text: {item['text']}\n")
    reportOutputFile.close()

    loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
    
Revision:	945
Committed:	Wed Nov 5 18:06:34 2025 UTC (4 months, 3 weeks ago) by nino.borges
Content type:	text/x-python
File size:	6641 byte(s)
Log Message:	cleaned up some conflicts.
#	User	Rev	Content
1	nino.borges	884	"""
2
3			PrivilegeLogGenerator
4
5			Created by:
6			Emanuel Borges
7			05.21.2025
8
9			This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.
10
11			"""
12
13			import csv
14			from win32com.client import Dispatch
15
16
17
18			class ConcordanceLoader:
19			def __init__(self, filePath):
20			self.filePath = filePath
21			self.delimiter = '\x14' # ASCII 20
22			self.quotechar = '\xfe' # ASCII 254
23			self.records = []
24
25
26			def load(self):
27			with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
28			reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
29			self.records = [row for row in reader]
30
31
32			def get_headers(self):
33			return self.records[0] if self.records else []
34
35			def get_data(self):
36			return self.records[1:] if len(self.records) >1 else []
37
38			def get_all(self):
39			return self.records
40
41
42
43			def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
44			if not self.records:
45			raise ValueError("No data loaded. Run .load() first.")
46
47			headers = self.get_headers()
48			try:
49			report_idx = headers.index(report_on_field)
50			check_idx = headers.index(field_to_check)
51			except ValueError as e:
52			raise ValueError(f"Column not found: {e}")
53
54			issues = []
55
56			for row_num, row in enumerate(self.get_data(), start=2): # Use Excel-style row numbers
57			report_val = row[report_idx] if report_idx < len(row) else ''
58			check_val = row[check_idx] if check_idx < len(row) else ''
59
60			row_issues = []
61
62			if " " in check_val:
63			row_issues.append("double spaces")
64			if ".." in check_val:
65			row_issues.append("double periods")
66			if ",," in check_val:
67			row_issues.append("double commas")
68			if check_val.strip() != check_val:
69			row_issue.append("leading/trailing whitespace")
70
71			stripped = check_val.strip()
72
73			if stripped and stripped[0].islower():
74			row_issues.append("sentance startes with lowercase letter")
75
76			if stripped and stripped[-1] not in ('.','?','!'):
77	nino.borges	945	#for_issues.append("missing final punctuation")
78	nino.borges	926	row_issues.append("missing final punctuation")
79	nino.borges	884
80			if row_issues:
81			issues.append({
82			'document':report_val,
83			'problems': row_issues,
84			'row': row_num,
85			'text': check_val
86			})
87
88			return issues
89
90
91			def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
92			if not self.records:
93			raise ValueError("No data loaded. Run .load() first.")
94
95			xlApp = Dispatch('Excel.Application')
96			xlApp.Visible = visible
97			wb = xlApp.Workbooks.Add()
98			ws = wb.Worksheets(1)
99
100			## Write data
101			for row_idx, row in enumerate(self.records, start=1):
102			for col_idx, value in enumerate(row, start=1):
103			ws.Cells(row_idx, col_idx).Value = value
104
105
106			## Optional cell highlighting
107			if cell_highlighting:
108			if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
109			raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
110
111			document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
112
113			headers = self.get_headers()
114			try:
115			doc_id_col_idx = headers.index(document_id_column_name) + 1 # 1-based for Excel
116			highlight_col_idx = headers.index(highlight_column_name) +1
117			except ValueError as e:
118			raise ValueError(f"Colun not found: {e}")
119
120			for row_idx, row in enumerate(self.get_data(), start=2): # Skip header
121			if doc_id_col_idx <= len(row):
122			doc_id_value = row[doc_id_col_idx - 1]
123			if doc_id_value in doc_ids_list:
124			cell = ws.Cells(row_idx, highlight_col_idx)
125			#cell.Interior.Color = 5296274 # Light green
126			#cell.Interior.Color = 15122175 # Light purple
127			cell.Interior.Color = 13148390 # Medium purple
128
129
130			wb.SaveAs(outputPath)
131			wb.Close(SaveChanges = False)
132	nino.borges	945	xlApp.Quit()
133	nino.borges	884
134
135
136			if __name__ == '__main__':
137			## Full path to the input file
138	nino.borges	945	inputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.dat"
139	nino.borges	884
140			## Full path to the output xlsx file
141	nino.borges	945	outputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.xlsx"
142	nino.borges	884
143			## Grammar report path, if you use one.
144	nino.borges	945	outputReportFilePath = r"C:\Test_Dir\ATT\export_20250522_021450-GrammarReport.txt"
145	nino.borges	884
146			## Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
147	nino.borges	945	highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"
148	nino.borges	884
149			highlighterDocumentList = []
150			contents = open(highlightFilePath).readlines()
151			for line in contents:
152			line = line.replace("\n","")
153			highlighterDocumentList.append(line)
154
155
156			loader = ConcordanceLoader(inputFilePath)
157			loader.load()
158
159			print("Headers", loader.get_headers())
160			#print("First row:", loader.get_data()[0])
161
162	nino.borges	945	report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
163	nino.borges	884	reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
164			for item in report:
165			if item['document'] in highlighterDocumentList:
166			reportOutputFile.write(f"{item['document']}\|{','.join(item['problems'])}\|{item['text']}\n")
167			#reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
168			#reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
169			#reportOutputFile.write(f" - Text: {item['text']}\n")
170			#print(f"Row {item['row']} (DocID: {item['document']}):")
171			#print(f" - Issues: {','.join(item['problems'])}")
172			#print(f" - Text: {item['text']}\n")
173			reportOutputFile.close()
174
175	nino.borges	945	loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
176	nino.borges	884