ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/PrivilegeLogGenerator.py
Revision: 945
Committed: Wed Nov 5 18:06:34 2025 UTC (4 months, 3 weeks ago) by nino.borges
Content type: text/x-python
File size: 6641 byte(s)
Log Message:
cleaned up some conflicts.

File Contents

# Content
1 """
2
3 PrivilegeLogGenerator
4
5 Created by:
6 Emanuel Borges
7 05.21.2025
8
9 This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.
10
11 """
12
13 import csv
14 from win32com.client import Dispatch
15
16
17
18 class ConcordanceLoader:
19 def __init__(self, filePath):
20 self.filePath = filePath
21 self.delimiter = '\x14' # ASCII 20
22 self.quotechar = '\xfe' # ASCII 254
23 self.records = []
24
25
26 def load(self):
27 with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
28 reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
29 self.records = [row for row in reader]
30
31
32 def get_headers(self):
33 return self.records[0] if self.records else []
34
35 def get_data(self):
36 return self.records[1:] if len(self.records) >1 else []
37
38 def get_all(self):
39 return self.records
40
41
42
43 def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
44 if not self.records:
45 raise ValueError("No data loaded. Run .load() first.")
46
47 headers = self.get_headers()
48 try:
49 report_idx = headers.index(report_on_field)
50 check_idx = headers.index(field_to_check)
51 except ValueError as e:
52 raise ValueError(f"Column not found: {e}")
53
54 issues = []
55
56 for row_num, row in enumerate(self.get_data(), start=2): # Use Excel-style row numbers
57 report_val = row[report_idx] if report_idx < len(row) else ''
58 check_val = row[check_idx] if check_idx < len(row) else ''
59
60 row_issues = []
61
62 if " " in check_val:
63 row_issues.append("double spaces")
64 if ".." in check_val:
65 row_issues.append("double periods")
66 if ",," in check_val:
67 row_issues.append("double commas")
68 if check_val.strip() != check_val:
69 row_issue.append("leading/trailing whitespace")
70
71 stripped = check_val.strip()
72
73 if stripped and stripped[0].islower():
74 row_issues.append("sentance startes with lowercase letter")
75
76 if stripped and stripped[-1] not in ('.','?','!'):
77 #for_issues.append("missing final punctuation")
78 row_issues.append("missing final punctuation")
79
80 if row_issues:
81 issues.append({
82 'document':report_val,
83 'problems': row_issues,
84 'row': row_num,
85 'text': check_val
86 })
87
88 return issues
89
90
91 def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
92 if not self.records:
93 raise ValueError("No data loaded. Run .load() first.")
94
95 xlApp = Dispatch('Excel.Application')
96 xlApp.Visible = visible
97 wb = xlApp.Workbooks.Add()
98 ws = wb.Worksheets(1)
99
100 ## Write data
101 for row_idx, row in enumerate(self.records, start=1):
102 for col_idx, value in enumerate(row, start=1):
103 ws.Cells(row_idx, col_idx).Value = value
104
105
106 ## Optional cell highlighting
107 if cell_highlighting:
108 if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
109 raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
110
111 document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
112
113 headers = self.get_headers()
114 try:
115 doc_id_col_idx = headers.index(document_id_column_name) + 1 # 1-based for Excel
116 highlight_col_idx = headers.index(highlight_column_name) +1
117 except ValueError as e:
118 raise ValueError(f"Colun not found: {e}")
119
120 for row_idx, row in enumerate(self.get_data(), start=2): # Skip header
121 if doc_id_col_idx <= len(row):
122 doc_id_value = row[doc_id_col_idx - 1]
123 if doc_id_value in doc_ids_list:
124 cell = ws.Cells(row_idx, highlight_col_idx)
125 #cell.Interior.Color = 5296274 # Light green
126 #cell.Interior.Color = 15122175 # Light purple
127 cell.Interior.Color = 13148390 # Medium purple
128
129
130 wb.SaveAs(outputPath)
131 wb.Close(SaveChanges = False)
132 xlApp.Quit()
133
134
135
136 if __name__ == '__main__':
137 ## Full path to the input file
138 inputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.dat"
139
140 ## Full path to the output xlsx file
141 outputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.xlsx"
142
143 ## Grammar report path, if you use one.
144 outputReportFilePath = r"C:\Test_Dir\ATT\export_20250522_021450-GrammarReport.txt"
145
146 ## Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
147 highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"
148
149 highlighterDocumentList = []
150 contents = open(highlightFilePath).readlines()
151 for line in contents:
152 line = line.replace("\n","")
153 highlighterDocumentList.append(line)
154
155
156 loader = ConcordanceLoader(inputFilePath)
157 loader.load()
158
159 print("Headers", loader.get_headers())
160 #print("First row:", loader.get_data()[0])
161
162 report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
163 reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
164 for item in report:
165 if item['document'] in highlighterDocumentList:
166 reportOutputFile.write(f"{item['document']}|{','.join(item['problems'])}|{item['text']}\n")
167 #reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
168 #reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
169 #reportOutputFile.write(f" - Text: {item['text']}\n")
170 #print(f"Row {item['row']} (DocID: {item['document']}):")
171 #print(f" - Issues: {','.join(item['problems'])}")
172 #print(f" - Text: {item['text']}\n")
173 reportOutputFile.close()
174
175 loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
176