| 1 |
"""
|
| 2 |
|
| 3 |
PrivilegeLogGenerator
|
| 4 |
|
| 5 |
Created by:
|
| 6 |
Emanuel Borges
|
| 7 |
05.21.2025
|
| 8 |
|
| 9 |
This program will take a DAT export from Relativity and turn this into an Excel Spreadsheet Privilege Log.
|
| 10 |
|
| 11 |
"""
|
| 12 |
|
| 13 |
import csv
|
| 14 |
from win32com.client import Dispatch
|
| 15 |
|
| 16 |
|
| 17 |
|
| 18 |
class ConcordanceLoader:
|
| 19 |
def __init__(self, filePath):
|
| 20 |
self.filePath = filePath
|
| 21 |
self.delimiter = '\x14' # ASCII 20
|
| 22 |
self.quotechar = '\xfe' # ASCII 254
|
| 23 |
self.records = []
|
| 24 |
|
| 25 |
|
| 26 |
def load(self):
|
| 27 |
with open(self.filePath, 'r', encoding='utf-8', newline='') as file:
|
| 28 |
reader = csv.reader(file, delimiter = self.delimiter, quotechar = self.quotechar)
|
| 29 |
self.records = [row for row in reader]
|
| 30 |
|
| 31 |
|
| 32 |
def get_headers(self):
|
| 33 |
return self.records[0] if self.records else []
|
| 34 |
|
| 35 |
def get_data(self):
|
| 36 |
return self.records[1:] if len(self.records) >1 else []
|
| 37 |
|
| 38 |
def get_all(self):
|
| 39 |
return self.records
|
| 40 |
|
| 41 |
|
| 42 |
|
| 43 |
def generate_grammar_check_report(self, report_on_field: str, field_to_check: str):
|
| 44 |
if not self.records:
|
| 45 |
raise ValueError("No data loaded. Run .load() first.")
|
| 46 |
|
| 47 |
headers = self.get_headers()
|
| 48 |
try:
|
| 49 |
report_idx = headers.index(report_on_field)
|
| 50 |
check_idx = headers.index(field_to_check)
|
| 51 |
except ValueError as e:
|
| 52 |
raise ValueError(f"Column not found: {e}")
|
| 53 |
|
| 54 |
issues = []
|
| 55 |
|
| 56 |
for row_num, row in enumerate(self.get_data(), start=2): # Use Excel-style row numbers
|
| 57 |
report_val = row[report_idx] if report_idx < len(row) else ''
|
| 58 |
check_val = row[check_idx] if check_idx < len(row) else ''
|
| 59 |
|
| 60 |
row_issues = []
|
| 61 |
|
| 62 |
if " " in check_val:
|
| 63 |
row_issues.append("double spaces")
|
| 64 |
if ".." in check_val:
|
| 65 |
row_issues.append("double periods")
|
| 66 |
if ",," in check_val:
|
| 67 |
row_issues.append("double commas")
|
| 68 |
if check_val.strip() != check_val:
|
| 69 |
row_issue.append("leading/trailing whitespace")
|
| 70 |
|
| 71 |
stripped = check_val.strip()
|
| 72 |
|
| 73 |
if stripped and stripped[0].islower():
|
| 74 |
row_issues.append("sentance startes with lowercase letter")
|
| 75 |
|
| 76 |
if stripped and stripped[-1] not in ('.','?','!'):
|
| 77 |
row_issues.append("missing final punctuation")
|
| 78 |
|
| 79 |
if row_issues:
|
| 80 |
issues.append({
|
| 81 |
'document':report_val,
|
| 82 |
'problems': row_issues,
|
| 83 |
'row': row_num,
|
| 84 |
'text': check_val
|
| 85 |
})
|
| 86 |
|
| 87 |
return issues
|
| 88 |
|
| 89 |
|
| 90 |
def export_to_excel(self, outputPath, visible=False, cell_highlighting = False):
|
| 91 |
if not self.records:
|
| 92 |
raise ValueError("No data loaded. Run .load() first.")
|
| 93 |
|
| 94 |
xlApp = Dispatch('Excel.Application')
|
| 95 |
xlApp.Visible = visible
|
| 96 |
wb = xlApp.Workbooks.Add()
|
| 97 |
ws = wb.Worksheets(1)
|
| 98 |
|
| 99 |
## Write data
|
| 100 |
for row_idx, row in enumerate(self.records, start=1):
|
| 101 |
for col_idx, value in enumerate(row, start=1):
|
| 102 |
ws.Cells(row_idx, col_idx).Value = value
|
| 103 |
|
| 104 |
|
| 105 |
## Optional cell highlighting
|
| 106 |
if cell_highlighting:
|
| 107 |
if not isinstance(cell_highlighting, tuple) or len(cell_highlighting) != 3:
|
| 108 |
raise ValueError("cell_highlighting must be a 3-tuple: (document_id_column_name, highlight_column_name, doc_ids_list)")
|
| 109 |
|
| 110 |
document_id_column_name, highlight_column_name, doc_ids_list = cell_highlighting
|
| 111 |
|
| 112 |
headers = self.get_headers()
|
| 113 |
try:
|
| 114 |
doc_id_col_idx = headers.index(document_id_column_name) + 1 # 1-based for Excel
|
| 115 |
highlight_col_idx = headers.index(highlight_column_name) +1
|
| 116 |
except ValueError as e:
|
| 117 |
raise ValueError(f"Colun not found: {e}")
|
| 118 |
|
| 119 |
for row_idx, row in enumerate(self.get_data(), start=2): # Skip header
|
| 120 |
if doc_id_col_idx <= len(row):
|
| 121 |
doc_id_value = row[doc_id_col_idx - 1]
|
| 122 |
if doc_id_value in doc_ids_list:
|
| 123 |
cell = ws.Cells(row_idx, highlight_col_idx)
|
| 124 |
#cell.Interior.Color = 5296274 # Light green
|
| 125 |
#cell.Interior.Color = 15122175 # Light purple
|
| 126 |
cell.Interior.Color = 13148390 # Medium purple
|
| 127 |
|
| 128 |
|
| 129 |
wb.SaveAs(outputPath)
|
| 130 |
wb.Close(SaveChanges = False)
|
| 131 |
#xlApp.Quit()
|
| 132 |
|
| 133 |
|
| 134 |
|
| 135 |
if __name__ == '__main__':
|
| 136 |
## Full path to the input file
|
| 137 |
#inputFilePath = r"C:\Test_Dir\ATT\export_20250523_214343.dat"
|
| 138 |
#inputFilePath = r"C:\Test_Dir\ATT\FCC_Manual_Log\export_20250701_223436.dat"
|
| 139 |
inputFilePath = r"C:\Users\eborges\Downloads\export_20250703_230430.dat"
|
| 140 |
|
| 141 |
## Full path to the output xlsx file
|
| 142 |
outputFilePath = r"C:\Users\eborges\Downloads\export_20250703_230430.xlsx"
|
| 143 |
|
| 144 |
## Grammar report path, if you use one.
|
| 145 |
outputReportFilePath = r"C:\Users\eborges\Downloads\export_20250703_230430-GrammarReport.txt"
|
| 146 |
|
| 147 |
## Optional file path to file of docIDs where it will highlight a specific cell if a specfiic docID is found on this list of docIDs
|
| 148 |
#highlightFilePath = r"C:\Test_Dir\ATT\VermontPrivLogHighlighter.csv"
|
| 149 |
highlightFilePath = r"C:\Test_Dir\ATT\ShinyPrivLogHighlighter2.csv"
|
| 150 |
|
| 151 |
highlighterDocumentList = []
|
| 152 |
contents = open(highlightFilePath).readlines()
|
| 153 |
for line in contents:
|
| 154 |
line = line.replace("\n","")
|
| 155 |
highlighterDocumentList.append(line)
|
| 156 |
|
| 157 |
|
| 158 |
loader = ConcordanceLoader(inputFilePath)
|
| 159 |
loader.load()
|
| 160 |
|
| 161 |
print("Headers", loader.get_headers())
|
| 162 |
#print("First row:", loader.get_data()[0])
|
| 163 |
|
| 164 |
#report = loader.generate_grammar_check_report("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description")
|
| 165 |
report = loader.generate_grammar_check_report("REVIEWID", "DOJ CID Final Privilege Description")
|
| 166 |
reportOutputFile = open(outputReportFilePath,'w', encoding = 'utf-8')
|
| 167 |
for item in report:
|
| 168 |
if item['document'] in highlighterDocumentList:
|
| 169 |
reportOutputFile.write(f"{item['document']}|{','.join(item['problems'])}|{item['text']}\n")
|
| 170 |
#reportOutputFile.write(f"\nRow {item['row']} (DocID: {item['document']}):\n")
|
| 171 |
#reportOutputFile.write(f" - Issues: {','.join(item['problems'])}")
|
| 172 |
#reportOutputFile.write(f" - Text: {item['text']}\n")
|
| 173 |
#print(f"Row {item['row']} (DocID: {item['document']}):")
|
| 174 |
#print(f" - Issues: {','.join(item['problems'])}")
|
| 175 |
#print(f" - Text: {item['text']}\n")
|
| 176 |
reportOutputFile.close()
|
| 177 |
|
| 178 |
#loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "MA_PrivCustomValue::DOJ Privilege Description", highlighterDocumentList))
|
| 179 |
loader.export_to_excel(outputFilePath, visible = False, cell_highlighting = ("REVIEWID", "DOJ CID Final Privilege Description", highlighterDocumentList))
|
| 180 |
|