ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/PrivilegeLogGenerator.py
Revision: 945
Committed: Wed Nov 5 18:06:34 2025 UTC (4 months, 3 weeks ago) by nino.borges
Content type: text/x-python
File size: 6641 byte(s)
Log Message:
cleaned up some conflicts.

File Contents

# User Rev Content
1 nino.borges 884 """
2    
3     PrivilegeLogGenerator
4    
5     Created by:
6     Emanuel Borges
7     05.21.2025
8    
9     This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.
10    
11     """
12    
13     import csv
14     from win32com.client import Dispatch
15    
16    
17    
18     class ConcordanceLoader:
19     def __init__(self, filePath):
20     self.filePath = filePath
21     self.delimiter = '\x14' # ASCII 20
22     self.quotechar = '\xfe' # ASCII 254
23     self.records = []
24    
25    
26     def load(self):
27     with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
28     reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
29     self.records = [row for row in reader]
30    
31    
32     def get_headers(self):
33     return self.records[0] if self.records else []
34    
35     def get_data(self):
36     return self.records[1:] if len(self.records) >1 else []
37    
38     def get_all(self):
39     return self.records
40    
41    
42    
43     def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
44     if not self.records:
45     raise ValueError("No data loaded. Run .load() first.")
46    
47     headers = self.get_headers()
48     try:
49     report_idx = headers.index(report_on_field)
50     check_idx = headers.index(field_to_check)
51     except ValueError as e:
52     raise ValueError(f"Column not found: {e}")
53    
54     issues = []
55    
56     for row_num, row in enumerate(self.get_data(), start=2): # Use Excel-style row numbers
57     report_val = row[report_idx] if report_idx < len(row) else ''
58     check_val = row[check_idx] if check_idx < len(row) else ''
59    
60     row_issues = []
61    
62     if " " in check_val:
63     row_issues.append("double spaces")
64     if ".." in check_val:
65     row_issues.append("double periods")
66     if ",," in check_val:
67     row_issues.append("double commas")
68     if check_val.strip() != check_val:
69     row_issue.append("leading/trailing whitespace")
70    
71     stripped = check_val.strip()
72    
73     if stripped and stripped[0].islower():
74     row_issues.append("sentance startes with lowercase letter")
75    
76     if stripped and stripped[-1] not in ('.','?','!'):
77 nino.borges 945 #for_issues.append("missing final punctuation")
78 nino.borges 926 row_issues.append("missing final punctuation")
79 nino.borges 884
80     if row_issues:
81     issues.append({
82     'document':report_val,
83     'problems': row_issues,
84     'row': row_num,
85     'text': check_val
86     })
87    
88     return issues
89    
90    
91     def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
92     if not self.records:
93     raise ValueError("No data loaded. Run .load() first.")
94    
95     xlApp = Dispatch('Excel.Application')
96     xlApp.Visible = visible
97     wb = xlApp.Workbooks.Add()
98     ws = wb.Worksheets(1)
99    
100     ## Write data
101     for row_idx, row in enumerate(self.records, start=1):
102     for col_idx, value in enumerate(row, start=1):
103     ws.Cells(row_idx, col_idx).Value = value
104    
105    
106     ## Optional cell highlighting
107     if cell_highlighting:
108     if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
109     raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
110    
111     document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
112    
113     headers = self.get_headers()
114     try:
115     doc_id_col_idx = headers.index(document_id_column_name) + 1 # 1-based for Excel
116     highlight_col_idx = headers.index(highlight_column_name) +1
117     except ValueError as e:
118     raise ValueError(f"Colun not found: {e}")
119    
120     for row_idx, row in enumerate(self.get_data(), start=2): # Skip header
121     if doc_id_col_idx <= len(row):
122     doc_id_value = row[doc_id_col_idx - 1]
123     if doc_id_value in doc_ids_list:
124     cell = ws.Cells(row_idx, highlight_col_idx)
125     #cell.Interior.Color = 5296274 # Light green
126     #cell.Interior.Color = 15122175 # Light purple
127     cell.Interior.Color = 13148390 # Medium purple
128    
129    
130     wb.SaveAs(outputPath)
131     wb.Close(SaveChanges = False)
132 nino.borges 945 xlApp.Quit()
133 nino.borges 884
134    
135    
136     if __name__ == '__main__':
137     ## Full path to the input file
138 nino.borges 945 inputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.dat"
139 nino.borges 884
140     ## Full path to the output xlsx file
141 nino.borges 945 outputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.xlsx"
142 nino.borges 884
143     ## Grammar report path, if you use one.
144 nino.borges 945 outputReportFilePath = r"C:\Test_Dir\ATT\export_20250522_021450-GrammarReport.txt"
145 nino.borges 884
146     ## Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
147 nino.borges 945 highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"
148 nino.borges 884
149     highlighterDocumentList = []
150     contents = open(highlightFilePath).readlines()
151     for line in contents:
152     line = line.replace("\n","")
153     highlighterDocumentList.append(line)
154    
155    
156     loader = ConcordanceLoader(inputFilePath)
157     loader.load()
158    
159     print("Headers", loader.get_headers())
160     #print("First row:", loader.get_data()[0])
161    
162 nino.borges 945 report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
163 nino.borges 884 reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
164     for item in report:
165     if item['document'] in highlighterDocumentList:
166     reportOutputFile.write(f"{item['document']}|{','.join(item['problems'])}|{item['text']}\n")
167     #reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
168     #reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
169     #reportOutputFile.write(f" - Text: {item['text']}\n")
170     #print(f"Row {item['row']} (DocID: {item['document']}):")
171     #print(f" - Issues: {','.join(item['problems'])}")
172     #print(f" - Text: {item['text']}\n")
173     reportOutputFile.close()
174    
175 nino.borges 945 loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
176 nino.borges 884