ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/PrivilegeLogGenerator.py
Revision: 884
Committed: Thu May 22 19:56:48 2025 UTC (10 months ago) by nino.borges
Content type: text/x-python
File size: 6576 byte(s)
Log Message:
This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.

File Contents

# Content
1 """
2
3 PrivilegeLogGenerator
4
5 Created by:
6 Emanuel Borges
7 05.21.2025
8
9 This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.
10
11 """
12
13 import csv
14 from win32com.client import Dispatch
15
16
17
18 class ConcordanceLoader:
19 def __init__(self, filePath):
20 self.filePath = filePath
21 self.delimiter = '\x14' # ASCII 20
22 self.quotechar = '\xfe' # ASCII 254
23 self.records = []
24
25
26 def load(self):
27 with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
28 reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
29 self.records = [row for row in reader]
30
31
32 def get_headers(self):
33 return self.records[0] if self.records else []
34
35 def get_data(self):
36 return self.records[1:] if len(self.records) >1 else []
37
38 def get_all(self):
39 return self.records
40
41
42
43 def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
44 if not self.records:
45 raise ValueError("No data loaded. Run .load() first.")
46
47 headers = self.get_headers()
48 try:
49 report_idx = headers.index(report_on_field)
50 check_idx = headers.index(field_to_check)
51 except ValueError as e:
52 raise ValueError(f"Column not found: {e}")
53
54 issues = []
55
56 for row_num, row in enumerate(self.get_data(), start=2): # Use Excel-style row numbers
57 report_val = row[report_idx] if report_idx < len(row) else ''
58 check_val = row[check_idx] if check_idx < len(row) else ''
59
60 row_issues = []
61
62 if " " in check_val:
63 row_issues.append("double spaces")
64 if ".." in check_val:
65 row_issues.append("double periods")
66 if ",," in check_val:
67 row_issues.append("double commas")
68 if check_val.strip() != check_val:
69 row_issue.append("leading/trailing whitespace")
70
71 stripped = check_val.strip()
72
73 if stripped and stripped[0].islower():
74 row_issues.append("sentance startes with lowercase letter")
75
76 if stripped and stripped[-1] not in ('.','?','!'):
77 for_issues.append("missing final punctuation")
78
79 if row_issues:
80 issues.append({
81 'document':report_val,
82 'problems': row_issues,
83 'row': row_num,
84 'text': check_val
85 })
86
87 return issues
88
89
90 def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
91 if not self.records:
92 raise ValueError("No data loaded. Run .load() first.")
93
94 xlApp = Dispatch('Excel.Application')
95 xlApp.Visible = visible
96 wb = xlApp.Workbooks.Add()
97 ws = wb.Worksheets(1)
98
99 ## Write data
100 for row_idx, row in enumerate(self.records, start=1):
101 for col_idx, value in enumerate(row, start=1):
102 ws.Cells(row_idx, col_idx).Value = value
103
104
105 ## Optional cell highlighting
106 if cell_highlighting:
107 if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
108 raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
109
110 document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
111
112 headers = self.get_headers()
113 try:
114 doc_id_col_idx = headers.index(document_id_column_name) + 1 # 1-based for Excel
115 highlight_col_idx = headers.index(highlight_column_name) +1
116 except ValueError as e:
117 raise ValueError(f"Colun not found: {e}")
118
119 for row_idx, row in enumerate(self.get_data(), start=2): # Skip header
120 if doc_id_col_idx <= len(row):
121 doc_id_value = row[doc_id_col_idx - 1]
122 if doc_id_value in doc_ids_list:
123 cell = ws.Cells(row_idx, highlight_col_idx)
124 #cell.Interior.Color = 5296274 # Light green
125 #cell.Interior.Color = 15122175 # Light purple
126 cell.Interior.Color = 13148390 # Medium purple
127
128
129 wb.SaveAs(outputPath)
130 wb.Close(SaveChanges = False)
131 xlApp.Quit()
132
133
134
135 if __name__ == '__main__':
136 ## Full path to the input file
137 inputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.dat"
138
139 ## Full path to the output xlsx file
140 outputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.xlsx"
141
142 ## Grammar report path, if you use one.
143 outputReportFilePath = r"C:\Test_Dir\ATT\export_20250522_021450-GrammarReport.txt"
144
145 ## Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
146 highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"
147
148 highlighterDocumentList = []
149 contents = open(highlightFilePath).readlines()
150 for line in contents:
151 line = line.replace("\n","")
152 highlighterDocumentList.append(line)
153
154
155 loader = ConcordanceLoader(inputFilePath)
156 loader.load()
157
158 print("Headers", loader.get_headers())
159 #print("First row:", loader.get_data()[0])
160
161 report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
162 reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
163 for item in report:
164 if item['document'] in highlighterDocumentList:
165 reportOutputFile.write(f"{item['document']}|{','.join(item['problems'])}|{item['text']}\n")
166 #reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
167 #reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
168 #reportOutputFile.write(f" - Text: {item['text']}\n")
169 #print(f"Row {item['row']} (DocID: {item['document']}):")
170 #print(f" - Issues: {','.join(item['problems'])}")
171 #print(f" - Text: {item['text']}\n")
172 reportOutputFile.close()
173
174 loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
175