ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/NS-CategoricalGroupedLogCreator.py
Revision: 972
Committed: Wed Feb 11 22:45:06 2026 UTC (6 weeks, 2 days ago) by nino.borges
Content type: text/x-python
File size: 13045 byte(s)
Log Message:
Fixing some bugs with the newly added Document Type support.

File Contents

# User Rev Content
1 nino.borges 969 """
2     NS-CategoricalGroupedLogCreator
3    
4     Created by:
5     Emanuel Borges
6     02.09.2026
7    
8 nino.borges 972 Creates a grouped/concatenated categorical privilege log where there is one output row
9     per unique Privilege Description.
10 nino.borges 969
11 nino.borges 972 Notes:
12 nino.borges 970 - This implementation uses win32com / pywin32 and requires Windows + Microsoft Excel installed.
13 nino.borges 972 - Also note that I'm using DispatchEx instead of just normal Dispatch to force a fresh Excel instance. This is due to some crashing weirdness that is happening on this work laptop.
14     - I import pythoncom for some belt-and-suspenders. Mostly not needed.
15 nino.borges 969 """
16    
17     from __future__ import annotations
18    
19     from dataclasses import dataclass, field
20 nino.borges 970 from datetime import date, datetime
21 nino.borges 972 from typing import Any, Dict, Iterable, List, Optional, Set
22 nino.borges 969
23 nino.borges 971 import pythoncom
24     from win32com.client import DispatchEx
25 nino.borges 969
26 nino.borges 972 version = "0.4"
27 nino.borges 969
28 nino.borges 970
29     # ----------------------------
30     # My Helpers
31     # ----------------------------
32    
33     def col_letter_to_index(col: str) -> int:
34 nino.borges 972 """Convert an Excel column letter (e.g., 'A', 'Z', 'AA') to a 1-based index."""
35 nino.borges 970 col = col.strip().upper()
36     if not col.isalpha():
37     raise ValueError(f"Invalid column letter: {col!r}")
38    
39     idx = 0
40     for ch in col:
41     idx = idx * 26 + (ord(ch) - ord("A") + 1)
42     return idx
43    
44    
45 nino.borges 972 def normalize_key(v: Any) -> str:
46     """Normalize dictionary key input (Privilege Description)."""
47     if v is None:
48 nino.borges 970 return ""
49 nino.borges 972 return str(v).strip()
50 nino.borges 970
51    
52 nino.borges 972 def normalize_cell_text(v: Any) -> str:
53     """Normalize generic cell values to a trimmed string."""
54     if v in (None, ""):
55     return ""
56     return str(v).strip()
57    
58    
59 nino.borges 970 def excel_value_to_date(v: Any) -> Optional[date]:
60     """
61 nino.borges 972 Convert a win32com Excel cell value into a Python date, discarding time.
62    
63     Handles:
64     - datetime (common via pywin32)
65 nino.borges 970 - date
66 nino.borges 972 - strings like '2025-01-31', '2/21/2023', '02/21/2023 15:49'
67 nino.borges 970 """
68     if v in (None, ""):
69     return None
70    
71     if isinstance(v, datetime):
72     return v.date()
73     if isinstance(v, date):
74     return v
75    
76     if isinstance(v, str):
77     s = v.strip()
78     if not s:
79     return None
80 nino.borges 972
81     for fmt in (
82     "%Y-%m-%d",
83     "%m/%d/%Y",
84     "%m/%d/%y",
85     "%m/%d/%Y %H:%M",
86     "%m/%d/%Y %H:%M:%S",
87     "%m/%d/%y %H:%M",
88     "%m/%d/%y %H:%M:%S",
89     ):
90 nino.borges 970 try:
91     return datetime.strptime(s, fmt).date()
92     except ValueError:
93     pass
94 nino.borges 972
95 nino.borges 970 return None
96    
97     return None
98    
99    
100 nino.borges 972 def split_delimited(cell_value: Any, delimiter: str = ";") -> List[str]:
101     """Split a delimited cell value into trimmed non-empty parts."""
102 nino.borges 970 if cell_value in (None, ""):
103     return []
104     s = str(cell_value).strip()
105     if not s:
106     return []
107     return [p.strip() for p in s.split(delimiter) if p.strip()]
108    
109    
110     # ----------------------------
111 nino.borges 972 # The data model
112 nino.borges 970 # ----------------------------
113    
114 nino.borges 969 @dataclass
115     class DocValues:
116     BegDate_val: Optional[date] = None
117     EndDate_val: Optional[date] = None
118     Participants_val: Set[str] = field(default_factory=set)
119     PrivilegeTypes_val: Set[str] = field(default_factory=set)
120 nino.borges 971 DocumentTypes_val: Set[str] = field(default_factory=set)
121 nino.borges 969 DocCount_val: int = 0
122    
123 nino.borges 970 def update(
124     self,
125     doc_date: Optional[date],
126     participants: Iterable[str],
127     privilege_types: Iterable[str],
128 nino.borges 972 document_type: str,
129 nino.borges 970 ) -> None:
130 nino.borges 969 ## Count
131 nino.borges 970 self.DocCount_val += 1
132 nino.borges 969
133 nino.borges 972 # Dates
134 nino.borges 969 if doc_date is not None:
135     if self.BegDate_val is None or doc_date < self.BegDate_val:
136     self.BegDate_val = doc_date
137 nino.borges 970 if self.EndDate_val is None or doc_date > self.EndDate_val:
138 nino.borges 969 self.EndDate_val = doc_date
139    
140     ## Participants
141     for p in participants:
142 nino.borges 970 p = str(p).strip()
143 nino.borges 969 if p:
144     self.Participants_val.add(p)
145    
146 nino.borges 972 ## Privilege Types (multi-value, dedup)
147 nino.borges 969 for t in privilege_types:
148 nino.borges 970 t = str(t).strip()
149 nino.borges 969 if t:
150     self.PrivilegeTypes_val.add(t)
151    
152 nino.borges 972 ## Document Type (single value per record, but can be multiple across grouped records)
153     dt = (document_type or "").strip()
154     if dt:
155     self.DocumentTypes_val.add(dt)
156 nino.borges 969
157 nino.borges 971
158 nino.borges 970 # ----------------------------
159 nino.borges 972 # The main class
160 nino.borges 970 # ----------------------------
161 nino.borges 969
162 nino.borges 970 class CreateGroupedCategoricalLog:
163     def __init__(
164     self,
165     pathToExcelSpreadsheet: str,
166     privDescriptionColumnLetter: str,
167     docDatesColumnLettersList: List[str],
168     participantsColumnLettersList: List[str],
169     privilegeTypeColumnLetter: str,
170 nino.borges 971 documentTypeColumnLetter: str,
171 nino.borges 970 *,
172     sheet_name: Optional[str] = None,
173     header_row: int = 1,
174     data_start_row: int = 2,
175     totalNumberOfRows: Optional[int] = None,
176     participants_delimiter: str = ";",
177     privilege_types_delimiter: str = ";",
178     ) -> None:
179     """
180 nino.borges 972 privDescriptionColumnLetter, privilegeTypeColumnLetter, documentTypeColumnLetter:
181     single column letters.
182 nino.borges 969
183 nino.borges 970 docDatesColumnLettersList:
184 nino.borges 972 list of date column letters; first non-empty wins (per row).
185 nino.borges 969
186 nino.borges 970 participantsColumnLettersList:
187 nino.borges 972 list of participant column letters; aggregated/deduped across the group.
188 nino.borges 969
189 nino.borges 970 totalNumberOfRows:
190 nino.borges 972 if None, uses UsedRange row count.
191 nino.borges 969
192 nino.borges 970 sheet_name:
193 nino.borges 972 if None, uses worksheet 1.
194 nino.borges 970 """
195     self.pathToExcelSpreadsheet = pathToExcelSpreadsheet
196     self.sheet_name = sheet_name
197     self.header_row = header_row
198     self.data_start_row = data_start_row
199     self.totalNumberOfRows = totalNumberOfRows
200 nino.borges 969
201 nino.borges 970 self.privDescriptionColumnLetter = privDescriptionColumnLetter
202     self.docDatesColumnLettersList = docDatesColumnLettersList
203     self.participantsColumnLettersList = participantsColumnLettersList
204     self.privilegeTypeColumnLetter = privilegeTypeColumnLetter
205 nino.borges 971 self.documentTypeColumnLetter = documentTypeColumnLetter
206 nino.borges 970
207     self.participants_delimiter = participants_delimiter
208     self.privilege_types_delimiter = privilege_types_delimiter
209    
210 nino.borges 972 ## Keyed here by Privilege Description
211 nino.borges 970 self.catLogMatrix: Dict[str, DocValues] = {}
212    
213     ## ---------- Excel COM plumbing ----------
214    
215     def _open_excel(self):
216 nino.borges 971 pythoncom.CoInitialize()
217     xlApp = DispatchEx("Excel.Application")
218 nino.borges 970 xlApp.Visible = False
219     xlApp.DisplayAlerts = False
220     return xlApp
221    
222     def ingest_spreadsheet(self) -> None:
223 nino.borges 972 """Read the input privilege log and populate self.catLogMatrix."""
224 nino.borges 970 xlApp = self._open_excel()
225     wb = None
226    
227     try:
228     wb = xlApp.Workbooks.Open(self.pathToExcelSpreadsheet)
229    
230 nino.borges 972 ws = wb.Worksheets(self.sheet_name) if self.sheet_name else wb.Worksheets(1)
231 nino.borges 970
232     ## Determine last row
233     if self.totalNumberOfRows is not None:
234     last_row = self.totalNumberOfRows
235     else:
236     last_row = ws.UsedRange.Rows.Count
237    
238     priv_col = col_letter_to_index(self.privDescriptionColumnLetter)
239     priv_type_col = col_letter_to_index(self.privilegeTypeColumnLetter)
240 nino.borges 971 doc_type_col = col_letter_to_index(self.documentTypeColumnLetter)
241 nino.borges 972
242 nino.borges 970 date_cols = [col_letter_to_index(c) for c in self.docDatesColumnLettersList]
243     part_cols = [col_letter_to_index(c) for c in self.participantsColumnLettersList]
244    
245     for row in range(self.data_start_row, last_row + 1):
246 nino.borges 972 priv_desc = normalize_key(ws.Cells(row, priv_col).Value)
247 nino.borges 970 if not priv_desc:
248     continue
249    
250 nino.borges 972 ## Date: first non-empty among configured columns
251 nino.borges 970 doc_dt: Optional[date] = None
252     for dc in date_cols:
253 nino.borges 972 doc_dt = excel_value_to_date(ws.Cells(row, dc).Value)
254 nino.borges 970 if doc_dt is not None:
255     break
256    
257 nino.borges 972 ## Participants: across configured columns, delimited
258 nino.borges 970 participants: List[str] = []
259     for pc in part_cols:
260 nino.borges 972 participants.extend(split_delimited(ws.Cells(row, pc).Value, delimiter=self.participants_delimiter))
261 nino.borges 970
262 nino.borges 972 ## Privilege types: delimited
263     priv_types = split_delimited(ws.Cells(row, priv_type_col).Value, delimiter=self.privilege_types_delimiter)
264 nino.borges 970
265 nino.borges 972 ## Document type: single value (not delimited)
266     doc_type = normalize_cell_text(ws.Cells(row, doc_type_col).Value)
267    
268 nino.borges 970 dv = self.catLogMatrix.setdefault(priv_desc, DocValues())
269 nino.borges 972 dv.update(
270     doc_date=doc_dt,
271     participants=participants,
272     privilege_types=priv_types,
273     document_type=doc_type,
274     )
275 nino.borges 970
276     finally:
277 nino.borges 972 try:
278     if wb is not None:
279     wb.Close(SaveChanges=False)
280     except Exception:
281     pass
282     try:
283     xlApp.Quit()
284     except Exception:
285     pass
286 nino.borges 970
287     def export_grouped_log(self, output_path: str) -> None:
288 nino.borges 972 """Export the grouped results to a new Excel spreadsheet."""
289 nino.borges 970 xlApp = self._open_excel()
290     wb_out = None
291    
292     try:
293     wb_out = xlApp.Workbooks.Add()
294 nino.borges 971 wb_out.Activate()
295 nino.borges 972
296 nino.borges 970 ws_out = wb_out.Worksheets(1)
297 nino.borges 971 ws_out.Activate()
298 nino.borges 970 ws_out.Name = "Grouped Log"
299    
300     headers = [
301     "Privilege Description",
302     "BegDate",
303     "EndDate",
304     "Participants (Deduped)",
305     "Privilege Types (Deduped)",
306 nino.borges 971 "Document Types (Deduped)",
307 nino.borges 970 "DocCount",
308     ]
309    
310     ## Write headers
311     for c, h in enumerate(headers, start=1):
312 nino.borges 972 cell = ws_out.Cells(1, c)
313     cell.Value = str(h)
314     cell.Font.Bold = True
315 nino.borges 970
316     ## Write rows
317     row_out = 2
318     for priv_desc in sorted(self.catLogMatrix.keys()):
319     dv = self.catLogMatrix[priv_desc]
320    
321     participants_sorted = sorted(dv.Participants_val)
322     priv_types_sorted = sorted(dv.PrivilegeTypes_val)
323 nino.borges 972 doc_types_sorted = sorted(dv.DocumentTypes_val)
324 nino.borges 970
325     ws_out.Cells(row_out, 1).Value = priv_desc
326     ws_out.Cells(row_out, 2).Value = dv.BegDate_val.isoformat() if dv.BegDate_val else ""
327     ws_out.Cells(row_out, 3).Value = dv.EndDate_val.isoformat() if dv.EndDate_val else ""
328     ws_out.Cells(row_out, 4).Value = "; ".join(participants_sorted)
329     ws_out.Cells(row_out, 5).Value = "; ".join(priv_types_sorted)
330 nino.borges 972 ws_out.Cells(row_out, 6).Value = "; ".join(doc_types_sorted)
331     ws_out.Cells(row_out, 7).Value = dv.DocCount_val
332 nino.borges 970
333 nino.borges 972 ## Wrap long text columns
334 nino.borges 970 ws_out.Cells(row_out, 1).WrapText = True
335     ws_out.Cells(row_out, 4).WrapText = True
336     ws_out.Cells(row_out, 5).WrapText = True
337 nino.borges 972 ws_out.Cells(row_out, 6).WrapText = True
338 nino.borges 970
339     row_out += 1
340    
341     ws_out.Columns.AutoFit()
342    
343     wb_out.SaveAs(output_path)
344    
345     finally:
346 nino.borges 972 try:
347     if wb_out is not None:
348     wb_out.Close(SaveChanges=False)
349     except Exception:
350     pass
351     try:
352     xlApp.Quit()
353     except Exception:
354     pass
355 nino.borges 970
356    
357     if __name__ == "__main__":
358 nino.borges 972 inputSpreadsheetLog = r""
359     outputSpreadsheetLog = r""
360 nino.borges 970
361 nino.borges 972
362     privDescriptionColumnLetter = "F"
363     docDatesColumnLettersList = ["B"] # first non-empty wins!
364     participantsColumnLettersList = ["H", "I", "J", "K"] # e.g., Author/From/To/CC/BCC
365     privilegeTypeColumnLetter = "G"
366 nino.borges 971 documentTypeColumnLetter = "E"
367 nino.borges 970
368     grouper = CreateGroupedCategoricalLog(
369     pathToExcelSpreadsheet=inputSpreadsheetLog,
370     privDescriptionColumnLetter=privDescriptionColumnLetter,
371     docDatesColumnLettersList=docDatesColumnLettersList,
372     participantsColumnLettersList=participantsColumnLettersList,
373     privilegeTypeColumnLetter=privilegeTypeColumnLetter,
374 nino.borges 971 documentTypeColumnLetter=documentTypeColumnLetter,
375 nino.borges 972 sheet_name=None,
376 nino.borges 970 header_row=1,
377     data_start_row=2,
378 nino.borges 972 totalNumberOfRows=None,
379 nino.borges 970 participants_delimiter=";",
380     privilege_types_delimiter=";",
381     )
382    
383     grouper.ingest_spreadsheet()
384     grouper.export_grouped_log(outputSpreadsheetLog)
385     print(f"Done. Grouped rows: {len(grouper.catLogMatrix)}")