ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/NS-CategoricalGroupedLogCreator.py
Revision: 972
Committed: Wed Feb 11 22:45:06 2026 UTC (6 weeks, 2 days ago) by nino.borges
Content type: text/x-python
File size: 13045 byte(s)
Log Message:
Fixing some bugs with the newly added Document Type support.

File Contents

# Content
1 """
2 NS-CategoricalGroupedLogCreator
3
4 Created by:
5 Emanuel Borges
6 02.09.2026
7
8 Creates a grouped/concatenated categorical privilege log where there is one output row
9 per unique Privilege Description.
10
11 Notes:
12 - This implementation uses win32com / pywin32 and requires Windows + Microsoft Excel installed.
13 - Also note that I'm using DispatchEx instead of just normal Dispatch to force a fresh Excel instance. This is due to some crashing weirdness that is happening on this work laptop.
14 - I import pythoncom for some belt-and-suspenders. Mostly not needed.
15 """
16
17 from __future__ import annotations
18
19 from dataclasses import dataclass, field
20 from datetime import date, datetime
21 from typing import Any, Dict, Iterable, List, Optional, Set
22
23 import pythoncom
24 from win32com.client import DispatchEx
25
26 version = "0.4"
27
28
29 # ----------------------------
30 # My Helpers
31 # ----------------------------
32
33 def col_letter_to_index(col: str) -> int:
34 """Convert an Excel column letter (e.g., 'A', 'Z', 'AA') to a 1-based index."""
35 col = col.strip().upper()
36 if not col.isalpha():
37 raise ValueError(f"Invalid column letter: {col!r}")
38
39 idx = 0
40 for ch in col:
41 idx = idx * 26 + (ord(ch) - ord("A") + 1)
42 return idx
43
44
45 def normalize_key(v: Any) -> str:
46 """Normalize dictionary key input (Privilege Description)."""
47 if v is None:
48 return ""
49 return str(v).strip()
50
51
52 def normalize_cell_text(v: Any) -> str:
53 """Normalize generic cell values to a trimmed string."""
54 if v in (None, ""):
55 return ""
56 return str(v).strip()
57
58
59 def excel_value_to_date(v: Any) -> Optional[date]:
60 """
61 Convert a win32com Excel cell value into a Python date, discarding time.
62
63 Handles:
64 - datetime (common via pywin32)
65 - date
66 - strings like '2025-01-31', '2/21/2023', '02/21/2023 15:49'
67 """
68 if v in (None, ""):
69 return None
70
71 if isinstance(v, datetime):
72 return v.date()
73 if isinstance(v, date):
74 return v
75
76 if isinstance(v, str):
77 s = v.strip()
78 if not s:
79 return None
80
81 for fmt in (
82 "%Y-%m-%d",
83 "%m/%d/%Y",
84 "%m/%d/%y",
85 "%m/%d/%Y %H:%M",
86 "%m/%d/%Y %H:%M:%S",
87 "%m/%d/%y %H:%M",
88 "%m/%d/%y %H:%M:%S",
89 ):
90 try:
91 return datetime.strptime(s, fmt).date()
92 except ValueError:
93 pass
94
95 return None
96
97 return None
98
99
100 def split_delimited(cell_value: Any, delimiter: str = ";") -> List[str]:
101 """Split a delimited cell value into trimmed non-empty parts."""
102 if cell_value in (None, ""):
103 return []
104 s = str(cell_value).strip()
105 if not s:
106 return []
107 return [p.strip() for p in s.split(delimiter) if p.strip()]
108
109
110 # ----------------------------
111 # The data model
112 # ----------------------------
113
114 @dataclass
115 class DocValues:
116 BegDate_val: Optional[date] = None
117 EndDate_val: Optional[date] = None
118 Participants_val: Set[str] = field(default_factory=set)
119 PrivilegeTypes_val: Set[str] = field(default_factory=set)
120 DocumentTypes_val: Set[str] = field(default_factory=set)
121 DocCount_val: int = 0
122
123 def update(
124 self,
125 doc_date: Optional[date],
126 participants: Iterable[str],
127 privilege_types: Iterable[str],
128 document_type: str,
129 ) -> None:
130 ## Count
131 self.DocCount_val += 1
132
133 # Dates
134 if doc_date is not None:
135 if self.BegDate_val is None or doc_date < self.BegDate_val:
136 self.BegDate_val = doc_date
137 if self.EndDate_val is None or doc_date > self.EndDate_val:
138 self.EndDate_val = doc_date
139
140 ## Participants
141 for p in participants:
142 p = str(p).strip()
143 if p:
144 self.Participants_val.add(p)
145
146 ## Privilege Types (multi-value, dedup)
147 for t in privilege_types:
148 t = str(t).strip()
149 if t:
150 self.PrivilegeTypes_val.add(t)
151
152 ## Document Type (single value per record, but can be multiple across grouped records)
153 dt = (document_type or "").strip()
154 if dt:
155 self.DocumentTypes_val.add(dt)
156
157
158 # ----------------------------
159 # The main class
160 # ----------------------------
161
162 class CreateGroupedCategoricalLog:
163 def __init__(
164 self,
165 pathToExcelSpreadsheet: str,
166 privDescriptionColumnLetter: str,
167 docDatesColumnLettersList: List[str],
168 participantsColumnLettersList: List[str],
169 privilegeTypeColumnLetter: str,
170 documentTypeColumnLetter: str,
171 *,
172 sheet_name: Optional[str] = None,
173 header_row: int = 1,
174 data_start_row: int = 2,
175 totalNumberOfRows: Optional[int] = None,
176 participants_delimiter: str = ";",
177 privilege_types_delimiter: str = ";",
178 ) -> None:
179 """
180 privDescriptionColumnLetter, privilegeTypeColumnLetter, documentTypeColumnLetter:
181 single column letters.
182
183 docDatesColumnLettersList:
184 list of date column letters; first non-empty wins (per row).
185
186 participantsColumnLettersList:
187 list of participant column letters; aggregated/deduped across the group.
188
189 totalNumberOfRows:
190 if None, uses UsedRange row count.
191
192 sheet_name:
193 if None, uses worksheet 1.
194 """
195 self.pathToExcelSpreadsheet = pathToExcelSpreadsheet
196 self.sheet_name = sheet_name
197 self.header_row = header_row
198 self.data_start_row = data_start_row
199 self.totalNumberOfRows = totalNumberOfRows
200
201 self.privDescriptionColumnLetter = privDescriptionColumnLetter
202 self.docDatesColumnLettersList = docDatesColumnLettersList
203 self.participantsColumnLettersList = participantsColumnLettersList
204 self.privilegeTypeColumnLetter = privilegeTypeColumnLetter
205 self.documentTypeColumnLetter = documentTypeColumnLetter
206
207 self.participants_delimiter = participants_delimiter
208 self.privilege_types_delimiter = privilege_types_delimiter
209
210 ## Keyed here by Privilege Description
211 self.catLogMatrix: Dict[str, DocValues] = {}
212
213 ## ---------- Excel COM plumbing ----------
214
215 def _open_excel(self):
216 pythoncom.CoInitialize()
217 xlApp = DispatchEx("Excel.Application")
218 xlApp.Visible = False
219 xlApp.DisplayAlerts = False
220 return xlApp
221
222 def ingest_spreadsheet(self) -> None:
223 """Read the input privilege log and populate self.catLogMatrix."""
224 xlApp = self._open_excel()
225 wb = None
226
227 try:
228 wb = xlApp.Workbooks.Open(self.pathToExcelSpreadsheet)
229
230 ws = wb.Worksheets(self.sheet_name) if self.sheet_name else wb.Worksheets(1)
231
232 ## Determine last row
233 if self.totalNumberOfRows is not None:
234 last_row = self.totalNumberOfRows
235 else:
236 last_row = ws.UsedRange.Rows.Count
237
238 priv_col = col_letter_to_index(self.privDescriptionColumnLetter)
239 priv_type_col = col_letter_to_index(self.privilegeTypeColumnLetter)
240 doc_type_col = col_letter_to_index(self.documentTypeColumnLetter)
241
242 date_cols = [col_letter_to_index(c) for c in self.docDatesColumnLettersList]
243 part_cols = [col_letter_to_index(c) for c in self.participantsColumnLettersList]
244
245 for row in range(self.data_start_row, last_row + 1):
246 priv_desc = normalize_key(ws.Cells(row, priv_col).Value)
247 if not priv_desc:
248 continue
249
250 ## Date: first non-empty among configured columns
251 doc_dt: Optional[date] = None
252 for dc in date_cols:
253 doc_dt = excel_value_to_date(ws.Cells(row, dc).Value)
254 if doc_dt is not None:
255 break
256
257 ## Participants: across configured columns, delimited
258 participants: List[str] = []
259 for pc in part_cols:
260 participants.extend(split_delimited(ws.Cells(row, pc).Value, delimiter=self.participants_delimiter))
261
262 ## Privilege types: delimited
263 priv_types = split_delimited(ws.Cells(row, priv_type_col).Value, delimiter=self.privilege_types_delimiter)
264
265 ## Document type: single value (not delimited)
266 doc_type = normalize_cell_text(ws.Cells(row, doc_type_col).Value)
267
268 dv = self.catLogMatrix.setdefault(priv_desc, DocValues())
269 dv.update(
270 doc_date=doc_dt,
271 participants=participants,
272 privilege_types=priv_types,
273 document_type=doc_type,
274 )
275
276 finally:
277 try:
278 if wb is not None:
279 wb.Close(SaveChanges=False)
280 except Exception:
281 pass
282 try:
283 xlApp.Quit()
284 except Exception:
285 pass
286
287 def export_grouped_log(self, output_path: str) -> None:
288 """Export the grouped results to a new Excel spreadsheet."""
289 xlApp = self._open_excel()
290 wb_out = None
291
292 try:
293 wb_out = xlApp.Workbooks.Add()
294 wb_out.Activate()
295
296 ws_out = wb_out.Worksheets(1)
297 ws_out.Activate()
298 ws_out.Name = "Grouped Log"
299
300 headers = [
301 "Privilege Description",
302 "BegDate",
303 "EndDate",
304 "Participants (Deduped)",
305 "Privilege Types (Deduped)",
306 "Document Types (Deduped)",
307 "DocCount",
308 ]
309
310 ## Write headers
311 for c, h in enumerate(headers, start=1):
312 cell = ws_out.Cells(1, c)
313 cell.Value = str(h)
314 cell.Font.Bold = True
315
316 ## Write rows
317 row_out = 2
318 for priv_desc in sorted(self.catLogMatrix.keys()):
319 dv = self.catLogMatrix[priv_desc]
320
321 participants_sorted = sorted(dv.Participants_val)
322 priv_types_sorted = sorted(dv.PrivilegeTypes_val)
323 doc_types_sorted = sorted(dv.DocumentTypes_val)
324
325 ws_out.Cells(row_out, 1).Value = priv_desc
326 ws_out.Cells(row_out, 2).Value = dv.BegDate_val.isoformat() if dv.BegDate_val else ""
327 ws_out.Cells(row_out, 3).Value = dv.EndDate_val.isoformat() if dv.EndDate_val else ""
328 ws_out.Cells(row_out, 4).Value = "; ".join(participants_sorted)
329 ws_out.Cells(row_out, 5).Value = "; ".join(priv_types_sorted)
330 ws_out.Cells(row_out, 6).Value = "; ".join(doc_types_sorted)
331 ws_out.Cells(row_out, 7).Value = dv.DocCount_val
332
333 ## Wrap long text columns
334 ws_out.Cells(row_out, 1).WrapText = True
335 ws_out.Cells(row_out, 4).WrapText = True
336 ws_out.Cells(row_out, 5).WrapText = True
337 ws_out.Cells(row_out, 6).WrapText = True
338
339 row_out += 1
340
341 ws_out.Columns.AutoFit()
342
343 wb_out.SaveAs(output_path)
344
345 finally:
346 try:
347 if wb_out is not None:
348 wb_out.Close(SaveChanges=False)
349 except Exception:
350 pass
351 try:
352 xlApp.Quit()
353 except Exception:
354 pass
355
356
357 if __name__ == "__main__":
358 inputSpreadsheetLog = r""
359 outputSpreadsheetLog = r""
360
361
362 privDescriptionColumnLetter = "F"
363 docDatesColumnLettersList = ["B"] # first non-empty wins!
364 participantsColumnLettersList = ["H", "I", "J", "K"] # e.g., Author/From/To/CC/BCC
365 privilegeTypeColumnLetter = "G"
366 documentTypeColumnLetter = "E"
367
368 grouper = CreateGroupedCategoricalLog(
369 pathToExcelSpreadsheet=inputSpreadsheetLog,
370 privDescriptionColumnLetter=privDescriptionColumnLetter,
371 docDatesColumnLettersList=docDatesColumnLettersList,
372 participantsColumnLettersList=participantsColumnLettersList,
373 privilegeTypeColumnLetter=privilegeTypeColumnLetter,
374 documentTypeColumnLetter=documentTypeColumnLetter,
375 sheet_name=None,
376 header_row=1,
377 data_start_row=2,
378 totalNumberOfRows=None,
379 participants_delimiter=";",
380 privilege_types_delimiter=";",
381 )
382
383 grouper.ingest_spreadsheet()
384 grouper.export_grouped_log(outputSpreadsheetLog)
385 print(f"Done. Grouped rows: {len(grouper.catLogMatrix)}")