ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/PrivilegeLogGenerator.py
Revision: 884
Committed: Thu May 22 19:56:48 2025 UTC (10 months ago) by nino.borges
Content type: text/x-python
File size: 6576 byte(s)
Log Message:
This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.

File Contents

# User Rev Content
1 nino.borges 884 """
2    
3     PrivilegeLogGenerator
4    
5     Created by:
6     Emanuel Borges
7     05.21.2025
8    
9     This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.
10    
11     """
12    
13     import csv
14     from win32com.client import Dispatch
15    
16    
17    
18     class ConcordanceLoader:
19     def __init__(self, filePath):
20     self.filePath = filePath
21     self.delimiter = '\x14' # ASCII 20
22     self.quotechar = '\xfe' # ASCII 254
23     self.records = []
24    
25    
26     def load(self):
27     with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
28     reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
29     self.records = [row for row in reader]
30    
31    
32     def get_headers(self):
33     return self.records[0] if self.records else []
34    
35     def get_data(self):
36     return self.records[1:] if len(self.records) >1 else []
37    
38     def get_all(self):
39     return self.records
40    
41    
42    
43     def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
44     if not self.records:
45     raise ValueError("No data loaded. Run .load() first.")
46    
47     headers = self.get_headers()
48     try:
49     report_idx = headers.index(report_on_field)
50     check_idx = headers.index(field_to_check)
51     except ValueError as e:
52     raise ValueError(f"Column not found: {e}")
53    
54     issues = []
55    
56     for row_num, row in enumerate(self.get_data(), start=2): # Use Excel-style row numbers
57     report_val = row[report_idx] if report_idx < len(row) else ''
58     check_val = row[check_idx] if check_idx < len(row) else ''
59    
60     row_issues = []
61    
62     if " " in check_val:
63     row_issues.append("double spaces")
64     if ".." in check_val:
65     row_issues.append("double periods")
66     if ",," in check_val:
67     row_issues.append("double commas")
68     if check_val.strip() != check_val:
69     row_issue.append("leading/trailing whitespace")
70    
71     stripped = check_val.strip()
72    
73     if stripped and stripped[0].islower():
74     row_issues.append("sentance startes with lowercase letter")
75    
76     if stripped and stripped[-1] not in ('.','?','!'):
77     for_issues.append("missing final punctuation")
78    
79     if row_issues:
80     issues.append({
81     'document':report_val,
82     'problems': row_issues,
83     'row': row_num,
84     'text': check_val
85     })
86    
87     return issues
88    
89    
90     def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
91     if not self.records:
92     raise ValueError("No data loaded. Run .load() first.")
93    
94     xlApp = Dispatch('Excel.Application')
95     xlApp.Visible = visible
96     wb = xlApp.Workbooks.Add()
97     ws = wb.Worksheets(1)
98    
99     ## Write data
100     for row_idx, row in enumerate(self.records, start=1):
101     for col_idx, value in enumerate(row, start=1):
102     ws.Cells(row_idx, col_idx).Value = value
103    
104    
105     ## Optional cell highlighting
106     if cell_highlighting:
107     if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
108     raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
109    
110     document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
111    
112     headers = self.get_headers()
113     try:
114     doc_id_col_idx = headers.index(document_id_column_name) + 1 # 1-based for Excel
115     highlight_col_idx = headers.index(highlight_column_name) +1
116     except ValueError as e:
117     raise ValueError(f"Colun not found: {e}")
118    
119     for row_idx, row in enumerate(self.get_data(), start=2): # Skip header
120     if doc_id_col_idx <= len(row):
121     doc_id_value = row[doc_id_col_idx - 1]
122     if doc_id_value in doc_ids_list:
123     cell = ws.Cells(row_idx, highlight_col_idx)
124     #cell.Interior.Color = 5296274 # Light green
125     #cell.Interior.Color = 15122175 # Light purple
126     cell.Interior.Color = 13148390 # Medium purple
127    
128    
129     wb.SaveAs(outputPath)
130     wb.Close(SaveChanges = False)
131     xlApp.Quit()
132    
133    
134    
135     if __name__ == '__main__':
136     ## Full path to the input file
137     inputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.dat"
138    
139     ## Full path to the output xlsx file
140     outputFilePath = r"C:\Test_Dir\ATT\export_20250522_021450.xlsx"
141    
142     ## Grammar report path, if you use one.
143     outputReportFilePath = r"C:\Test_Dir\ATT\export_20250522_021450-GrammarReport.txt"
144    
145     ## Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
146     highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"
147    
148     highlighterDocumentList = []
149     contents = open(highlightFilePath).readlines()
150     for line in contents:
151     line = line.replace("\n","")
152     highlighterDocumentList.append(line)
153    
154    
155     loader = ConcordanceLoader(inputFilePath)
156     loader.load()
157    
158     print("Headers", loader.get_headers())
159     #print("First row:", loader.get_data()[0])
160    
161     report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
162     reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
163     for item in report:
164     if item['document'] in highlighterDocumentList:
165     reportOutputFile.write(f"{item['document']}|{','.join(item['problems'])}|{item['text']}\n")
166     #reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
167     #reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
168     #reportOutputFile.write(f" - Text: {item['text']}\n")
169     #print(f"Row {item['row']} (DocID: {item['document']}):")
170     #print(f" - Issues: {','.join(item['problems'])}")
171     #print(f" - Text: {item['text']}\n")
172     reportOutputFile.close()
173    
174     loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
175