ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/PrivilegeLogGenerator.py
Revision: 926
Committed: Thu Aug 7 20:30:36 2025 UTC (7 months, 2 weeks ago) by nino.borges
Content type: text/x-python
File size: 7098 byte(s)
Log Message:
general use changes

File Contents

# Content
1 """
2
3 PrivilegeLogGenerator
4
5 Created by:
6 Emanuel Borges
7 05.21.2025
8
9 This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.
10
11 """
12
13 import csv
14 from win32com.client import Dispatch
15
16
17
18 class ConcordanceLoader:
19 def __init__(self, filePath):
20 self.filePath = filePath
21 self.delimiter = '\x14' # ASCII 20
22 self.quotechar = '\xfe' # ASCII 254
23 self.records = []
24
25
26 def load(self):
27 with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
28 reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
29 self.records = [row for row in reader]
30
31
32 def get_headers(self):
33 return self.records[0] if self.records else []
34
35 def get_data(self):
36 return self.records[1:] if len(self.records) >1 else []
37
38 def get_all(self):
39 return self.records
40
41
42
43 def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
44 if not self.records:
45 raise ValueError("No data loaded. Run .load() first.")
46
47 headers = self.get_headers()
48 try:
49 report_idx = headers.index(report_on_field)
50 check_idx = headers.index(field_to_check)
51 except ValueError as e:
52 raise ValueError(f"Column not found: {e}")
53
54 issues = []
55
56 for row_num, row in enumerate(self.get_data(), start=2): # Use Excel-style row numbers
57 report_val = row[report_idx] if report_idx < len(row) else ''
58 check_val = row[check_idx] if check_idx < len(row) else ''
59
60 row_issues = []
61
62 if " " in check_val:
63 row_issues.append("double spaces")
64 if ".." in check_val:
65 row_issues.append("double periods")
66 if ",," in check_val:
67 row_issues.append("double commas")
68 if check_val.strip() != check_val:
69 row_issue.append("leading/trailing whitespace")
70
71 stripped = check_val.strip()
72
73 if stripped and stripped[0].islower():
74 row_issues.append("sentance startes with lowercase letter")
75
76 if stripped and stripped[-1] not in ('.','?','!'):
77 row_issues.append("missing final punctuation")
78
79 if row_issues:
80 issues.append({
81 'document':report_val,
82 'problems': row_issues,
83 'row': row_num,
84 'text': check_val
85 })
86
87 return issues
88
89
90 def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
91 if not self.records:
92 raise ValueError("No data loaded. Run .load() first.")
93
94 xlApp = Dispatch('Excel.Application')
95 xlApp.Visible = visible
96 wb = xlApp.Workbooks.Add()
97 ws = wb.Worksheets(1)
98
99 ## Write data
100 for row_idx, row in enumerate(self.records, start=1):
101 for col_idx, value in enumerate(row, start=1):
102 ws.Cells(row_idx, col_idx).Value = value
103
104
105 ## Optional cell highlighting
106 if cell_highlighting:
107 if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
108 raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
109
110 document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
111
112 headers = self.get_headers()
113 try:
114 doc_id_col_idx = headers.index(document_id_column_name) + 1 # 1-based for Excel
115 highlight_col_idx = headers.index(highlight_column_name) +1
116 except ValueError as e:
117 raise ValueError(f"Colun not found: {e}")
118
119 for row_idx, row in enumerate(self.get_data(), start=2): # Skip header
120 if doc_id_col_idx <= len(row):
121 doc_id_value = row[doc_id_col_idx - 1]
122 if doc_id_value in doc_ids_list:
123 cell = ws.Cells(row_idx, highlight_col_idx)
124 #cell.Interior.Color = 5296274 # Light green
125 #cell.Interior.Color = 15122175 # Light purple
126 cell.Interior.Color = 13148390 # Medium purple
127
128
129 wb.SaveAs(outputPath)
130 wb.Close(SaveChanges = False)
131 #xlApp.Quit()
132
133
134
135 if __name__ == '__main__':
136 ## Full path to the input file
137 #inputFilePath = r"C:\Test_Dir\ATT\export_20250523_214343.dat"
138 #inputFilePath = r"C:\Test_Dir\ATT\FCC_Manual_Log\export_20250701_223436.dat"
139 inputFilePath = r"C:\Users\eborges\Downloads\export_20250703_230430.dat"
140
141 ## Full path to the output xlsx file
142 outputFilePath = r"C:\Users\eborges\Downloads\export_20250703_230430.xlsx"
143
144 ## Grammar report path, if you use one.
145 outputReportFilePath = r"C:\Users\eborges\Downloads\export_20250703_230430-GrammarReport.txt"
146
147 ## Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
148 #highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"
149 highlightFilePath = r"C:\Test_Dir\ATT\ShinyPrivLogHighlighter2.csv"
150
151 highlighterDocumentList = []
152 contents = open(highlightFilePath).readlines()
153 for line in contents:
154 line = line.replace("\n","")
155 highlighterDocumentList.append(line)
156
157
158 loader = ConcordanceLoader(inputFilePath)
159 loader.load()
160
161 print("Headers", loader.get_headers())
162 #print("First row:", loader.get_data()[0])
163
164 #report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
165 report = loader.generate_grammar_check_report("REVIEWID", "DOJ CID Final Privilege Description")
166 reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
167 for item in report:
168 if item['document'] in highlighterDocumentList:
169 reportOutputFile.write(f"{item['document']}|{','.join(item['problems'])}|{item['text']}\n")
170 #reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
171 #reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
172 #reportOutputFile.write(f" - Text: {item['text']}\n")
173 #print(f"Row {item['row']} (DocID: {item['document']}):")
174 #print(f" - Issues: {','.join(item['problems'])}")
175 #print(f" - Text: {item['text']}\n")
176 reportOutputFile.close()
177
178 #loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
179 loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "DOJ CID Final Privilege Description", highlighterDocumentList))
180