Active_prgs/Redgrave/PrivilegeLogGenerator.py

"""

PrivilegeLogGenerator

Created by:
Emanuel Borges
05.21.2025

This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.

"""

import csv
from win32com.client import Dispatch


class ConcordanceLoader:
    def __init__(self, filePath):
        self.filePath = filePath
        self.delimiter = '\x14'  # ASCII 20
        self.quotechar = '\xfe'  # ASCII 254
        self.records = []


    def load(self):
        with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
            reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
            self.records = [row for row in reader]


    def get_headers(self):
        return self.records[0] if self.records else []

    def get_data(self):
        return self.records[1:] if len(self.records) >1 else []

    def get_all(self):
        return self.records


    def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
        if not self.records:
            raise ValueError("No data loaded.  Run .load() first.")

        headers = self.get_headers()
        try:
            report_idx = headers.index(report_on_field)
            check_idx = headers.index(field_to_check)
        except ValueError as e:
            raise ValueError(f"Column not found: {e}")

        issues = []

        for row_num, row in enumerate(self.get_data(), start=2):  # Use Excel-style row numbers
            report_val = row[report_idx] if report_idx < len(row) else ''
            check_val = row[check_idx] if check_idx < len(row) else ''

            row_issues = []

            if "  " in check_val:
                row_issues.append("double spaces")
            if ".." in check_val:
                row_issues.append("double periods")
            if ",," in check_val:
                row_issues.append("double commas")
            if check_val.strip() != check_val:
                row_issue.append("leading/trailing whitespace")

            stripped = check_val.strip()

            if stripped and stripped[0].islower():
                row_issues.append("sentance startes with lowercase letter")
                
            if stripped and stripped[-1] not in ('.','?','!'):
                for_issues.append("missing final punctuation")
                
            if row_issues:
                issues.append({
                    'document':report_val,
                    'problems': row_issues,
                    'row': row_num,
                    'text': check_val
                })

        return issues


    def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
        if not self.records:
            raise ValueError("No data loaded.  Run .load() first.")

        xlApp = Dispatch('Excel.Application')
        xlApp.Visible = visible
        wb = xlApp.Workbooks.Add()
        ws = wb.Worksheets(1)

        ##  Write data
        for row_idx, row in enumerate(self.records, start=1):
            for col_idx, value in enumerate(row, start=1):
                ws.Cells(row_idx, col_idx).Value = value


        ##  Optional cell highlighting
        if cell_highlighting:
            if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
                raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
            
            document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
            
            headers = self.get_headers()
            try:
                doc_id_col_idx = headers.index(document_id_column_name) + 1  #  1-based for Excel
                highlight_col_idx = headers.index(highlight_column_name) +1
            except ValueError as e:
                raise ValueError(f"Colun not found: {e}")
            
            for row_idx, row in enumerate(self.get_data(), start=2):  #  Skip header
                if doc_id_col_idx <= len(row):
                    doc_id_value = row[doc_id_col_idx - 1]
                    if doc_id_value in doc_ids_list:
                        cell = ws.Cells(row_idx, highlight_col_idx)
                        #cell.Interior.Color = 5296274  #  Light green
                        #cell.Interior.Color = 15122175  #  Light purple
                        cell.Interior.Color = 13148390  #  Medium purple


        wb.SaveAs(outputPath)
        wb.Close(SaveChanges = False)
        xlApp.Quit()


if __name__ == '__main__':
    ##  Full path to the input file
    inputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.dat"

    ##  Full path to the output xlsx file
    outputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.xlsx"

    ##  Grammar report path, if you use one.
    outputReportFilePath = r"C:\Test_Dir\ATT\export_20250522_021450-GrammarReport.txt"

    ##  Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
    highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"

    highlighterDocumentList = []
    contents = open(highlightFilePath).readlines()
    for line in contents:
        line = line.replace("\n","")
        highlighterDocumentList.append(line)


    loader = ConcordanceLoader(inputFilePath)
    loader.load()

    print("Headers", loader.get_headers())
    #print("First row:", loader.get_data()[0])

    report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
    reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
    for item in report:
        if item['document'] in highlighterDocumentList:
            reportOutputFile.write(f"{item['document']}|{','.join(item['problems'])}|{item['text']}\n")
        #reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
        #reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
        #reportOutputFile.write(f" - Text: {item['text']}\n")
        #print(f"Row {item['row']} (DocID: {item['document']}):")
        #print(f" - Issues: {','.join(item['problems'])}")
        #print(f" - Text: {item['text']}\n")
    reportOutputFile.close()

    loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
    
Revision:	884
Committed:	Thu May 22 19:56:48 2025 UTC (10 months ago) by nino.borges
Content type:	text/x-python
File size:	6576 byte(s)
Log Message:	This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.
#	User	Rev	Content
1	nino.borges	884	"""
2
3			PrivilegeLogGenerator
4
5			Created by:
6			Emanuel Borges
7			05.21.2025
8
9			This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.
10
11			"""
12
13			import csv
14			from win32com.client import Dispatch
15
16
17
18			class ConcordanceLoader:
19			def __init__(self, filePath):
20			self.filePath = filePath
21			self.delimiter = '\x14' # ASCII 20
22			self.quotechar = '\xfe' # ASCII 254
23			self.records = []
24
25
26			def load(self):
27			with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
28			reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
29			self.records = [row for row in reader]
30
31
32			def get_headers(self):
33			return self.records[0] if self.records else []
34
35			def get_data(self):
36			return self.records[1:] if len(self.records) >1 else []
37
38			def get_all(self):
39			return self.records
40
41
42
43			def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
44			if not self.records:
45			raise ValueError("No data loaded. Run .load() first.")
46
47			headers = self.get_headers()
48			try:
49			report_idx = headers.index(report_on_field)
50			check_idx = headers.index(field_to_check)
51			except ValueError as e:
52			raise ValueError(f"Column not found: {e}")
53
54			issues = []
55
56			for row_num, row in enumerate(self.get_data(), start=2): # Use Excel-style row numbers
57			report_val = row[report_idx] if report_idx < len(row) else ''
58			check_val = row[check_idx] if check_idx < len(row) else ''
59
60			row_issues = []
61
62			if " " in check_val:
63			row_issues.append("double spaces")
64			if ".." in check_val:
65			row_issues.append("double periods")
66			if ",," in check_val:
67			row_issues.append("double commas")
68			if check_val.strip() != check_val:
69			row_issue.append("leading/trailing whitespace")
70
71			stripped = check_val.strip()
72
73			if stripped and stripped[0].islower():
74			row_issues.append("sentance startes with lowercase letter")
75
76			if stripped and stripped[-1] not in ('.','?','!'):
77			for_issues.append("missing final punctuation")
78
79			if row_issues:
80			issues.append({
81			'document':report_val,
82			'problems': row_issues,
83			'row': row_num,
84			'text': check_val
85			})
86
87			return issues
88
89
90			def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
91			if not self.records:
92			raise ValueError("No data loaded. Run .load() first.")
93
94			xlApp = Dispatch('Excel.Application')
95			xlApp.Visible = visible
96			wb = xlApp.Workbooks.Add()
97			ws = wb.Worksheets(1)
98
99			## Write data
100			for row_idx, row in enumerate(self.records, start=1):
101			for col_idx, value in enumerate(row, start=1):
102			ws.Cells(row_idx, col_idx).Value = value
103
104
105			## Optional cell highlighting
106			if cell_highlighting:
107			if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
108			raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
109
110			document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
111
112			headers = self.get_headers()
113			try:
114			doc_id_col_idx = headers.index(document_id_column_name) + 1 # 1-based for Excel
115			highlight_col_idx = headers.index(highlight_column_name) +1
116			except ValueError as e:
117			raise ValueError(f"Colun not found: {e}")
118
119			for row_idx, row in enumerate(self.get_data(), start=2): # Skip header
120			if doc_id_col_idx <= len(row):
121			doc_id_value = row[doc_id_col_idx - 1]
122			if doc_id_value in doc_ids_list:
123			cell = ws.Cells(row_idx, highlight_col_idx)
124			#cell.Interior.Color = 5296274 # Light green
125			#cell.Interior.Color = 15122175 # Light purple
126			cell.Interior.Color = 13148390 # Medium purple
127
128
129			wb.SaveAs(outputPath)
130			wb.Close(SaveChanges = False)
131			xlApp.Quit()
132
133
134
135			if __name__ == '__main__':
136			## Full path to the input file
137			inputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.dat"
138
139			## Full path to the output xlsx file
140			outputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.xlsx"
141
142			## Grammar report path, if you use one.
143			outputReportFilePath = r"C:\Test_Dir\ATT\export_20250522_021450-GrammarReport.txt"
144
145			## Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
146			highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"
147
148			highlighterDocumentList = []
149			contents = open(highlightFilePath).readlines()
150			for line in contents:
151			line = line.replace("\n","")
152			highlighterDocumentList.append(line)
153
154
155			loader = ConcordanceLoader(inputFilePath)
156			loader.load()
157
158			print("Headers", loader.get_headers())
159			#print("First row:", loader.get_data()[0])
160
161			report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
162			reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
163			for item in report:
164			if item['document'] in highlighterDocumentList:
165			reportOutputFile.write(f"{item['document']}\|{','.join(item['problems'])}\|{item['text']}\n")
166			#reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
167			#reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
168			#reportOutputFile.write(f" - Text: {item['text']}\n")
169			#print(f"Row {item['row']} (DocID: {item['document']}):")
170			#print(f" - Issues: {','.join(item['problems'])}")
171			#print(f" - Text: {item['text']}\n")
172			reportOutputFile.close()
173
174			loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
175