ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/NS-CategoricalGroupedLogCreator.py
Revision: 971
Committed: Wed Feb 11 22:20:19 2026 UTC (6 weeks, 2 days ago) by nino.borges
Content type: text/x-python
File size: 13323 byte(s)
Log Message:
version .3 adds support for document type to the grouped log.

File Contents

# User Rev Content
1 nino.borges 969 """
2     NS-CategoricalGroupedLogCreator
3    
4     Created by:
5     Emanuel Borges
6     02.09.2026
7    
8 nino.borges 970 This program will create the grouped concatenated categorical log where there is a row only for each unique
9     priv description.
10 nino.borges 969
11 nino.borges 970 NOTE:
12     - This implementation uses win32com / pywin32 and requires Windows + Microsoft Excel installed.
13 nino.borges 971 - Also note that I'm using DispatchEx instead of just normal Dispatch for win32.com. This is due to some crashing weirdness that is happening on this worklaptop...
14     - I import pythoncom for some belt-and-suspenders. Mostly not needed.
15 nino.borges 969 """
16    
17     from __future__ import annotations
18    
19     from dataclasses import dataclass, field
20 nino.borges 970 from datetime import date, datetime
21     from typing import Dict, Iterable, List, Optional, Set, Tuple, Any
22 nino.borges 969
23 nino.borges 971 import pythoncom
24     from win32com.client import DispatchEx
25 nino.borges 969
26    
27 nino.borges 971 version = "0.3"
28 nino.borges 970
29    
30     # ----------------------------
31     # My Helpers
32     # ----------------------------
33    
34     def col_letter_to_index(col: str) -> int:
35     """
36     Convert an Excel column letter (e.g., 'A', 'Z', 'AA') to a 1-based index.
37     """
38     col = col.strip().upper()
39     if not col.isalpha():
40     raise ValueError(f"Invalid column letter: {col!r}")
41    
42     idx = 0
43     for ch in col:
44     idx = idx * 26 + (ord(ch) - ord("A") + 1)
45     return idx
46    
47    
48     def normalize_key(s: Any) -> str:
49     """
50     Normalize dictionary key input (Privilege Description).
51     Adjust this if you later want casefolding, whitespace collapsing, etc.
52     """
53     if s is None:
54     return ""
55     return str(s).strip()
56    
57    
58     def excel_value_to_date(v: Any) -> Optional[date]:
59     """
60     Attempt to convert a win32com Excel cell value into a Python date.
61     Common cases:
62     - datetime (pywin32 often gives datetime)
63     - date
64     - string like '2025-01-31' or '1/31/2025'
65     - empty / None
66     """
67     if v in (None, ""):
68     return None
69    
70     if isinstance(v, datetime):
71     return v.date()
72     if isinstance(v, date):
73     return v
74    
75     ## If it comes back as a string, try a couple common formats.
76     if isinstance(v, str):
77     s = v.strip()
78     if not s:
79     return None
80     for fmt in ("%Y-%m-%d", "%m/%d/%Y", "%m/%d/%y"):
81     try:
82     return datetime.strptime(s, fmt).date()
83     except ValueError:
84     pass
85     ## If we can't parse, just skip (or raise if you prefer strict behavior)
86     return None
87    
88     ## Otherwise, unknown type - skip.
89     return None
90    
91    
92     def split_people(cell_value: Any, delimiter: str = ";") -> List[str]:
93     """
94     Split a participant cell into individual participants.
95     """
96     if cell_value in (None, ""):
97     return []
98     s = str(cell_value).strip()
99     if not s:
100     return []
101     return [p.strip() for p in s.split(delimiter) if p.strip()]
102    
103    
104     def split_priv_types(cell_value: Any, delimiter: str = ";") -> List[str]:
105     """
106     Split a privilege type cell into individual privilege types (if multi-valued).
107     """
108     if cell_value in (None, ""):
109     return []
110     s = str(cell_value).strip()
111     if not s:
112     return []
113     return [t.strip() for t in s.split(delimiter) if t.strip()]
114    
115    
116     # ----------------------------
117     # The Data model
118     # ----------------------------
119    
120 nino.borges 969 @dataclass
121     class DocValues:
122     BegDate_val: Optional[date] = None
123     EndDate_val: Optional[date] = None
124     Participants_val: Set[str] = field(default_factory=set)
125     PrivilegeTypes_val: Set[str] = field(default_factory=set)
126 nino.borges 971 DocumentTypes_val: Set[str] = field(default_factory=set)
127 nino.borges 969 DocCount_val: int = 0
128    
129 nino.borges 970 def update(
130     self,
131     doc_date: Optional[date],
132     participants: Iterable[str],
133     privilege_types: Iterable[str],
134 nino.borges 971 document_types: Iterable[str],
135 nino.borges 970 ) -> None:
136 nino.borges 969 ## Count
137 nino.borges 970 self.DocCount_val += 1
138 nino.borges 969
139 nino.borges 970 ## Dates
140 nino.borges 969 if doc_date is not None:
141     if self.BegDate_val is None or doc_date < self.BegDate_val:
142     self.BegDate_val = doc_date
143 nino.borges 970 if self.EndDate_val is None or doc_date > self.EndDate_val:
144 nino.borges 969 self.EndDate_val = doc_date
145    
146     ## Participants
147     for p in participants:
148 nino.borges 970 p = str(p).strip()
149 nino.borges 969 if p:
150     self.Participants_val.add(p)
151    
152     ## Privilege Types
153     for t in privilege_types:
154 nino.borges 970 t = str(t).strip()
155 nino.borges 969 if t:
156     self.PrivilegeTypes_val.add(t)
157    
158    
159 nino.borges 971 ## Document Types
160     for d in document_types:
161     d = str(d).strip()
162     if d:
163     self.DocumentTypes_val.add(d)
164    
165    
166 nino.borges 970 # ----------------------------
167     # The Main class
168     # ----------------------------
169 nino.borges 969
170 nino.borges 970 class CreateGroupedCategoricalLog:
171     def __init__(
172     self,
173     pathToExcelSpreadsheet: str,
174     privDescriptionColumnLetter: str,
175     docDatesColumnLettersList: List[str],
176     participantsColumnLettersList: List[str],
177     privilegeTypeColumnLetter: str,
178 nino.borges 971 documentTypeColumnLetter: str,
179 nino.borges 970 *,
180     sheet_name: Optional[str] = None,
181     header_row: int = 1,
182     data_start_row: int = 2,
183     totalNumberOfRows: Optional[int] = None,
184     participants_delimiter: str = ";",
185     privilege_types_delimiter: str = ";",
186     ) -> None:
187     """
188 nino.borges 971 privDescriptionColumnLetter, documentTypeColumnLetter, and privilegeTypeColumnLetter should each be a single column letter value.
189 nino.borges 969
190 nino.borges 970 docDatesColumnLettersList:
191     list of column letters; program uses the FIRST non-empty date cell among them.
192 nino.borges 969
193 nino.borges 970 participantsColumnLettersList:
194     list of all column letters that contain participants; participants are aggregated into a dedup set.
195 nino.borges 969
196 nino.borges 970 totalNumberOfRows:
197     optional override; if None, program uses Excel UsedRange to determine last row.
198 nino.borges 969
199 nino.borges 970 sheet_name:
200     optional; if None, uses the first worksheet.
201     """
202     self.pathToExcelSpreadsheet = pathToExcelSpreadsheet
203     self.sheet_name = sheet_name
204     self.header_row = header_row
205     self.data_start_row = data_start_row
206     self.totalNumberOfRows = totalNumberOfRows
207 nino.borges 969
208 nino.borges 970 self.privDescriptionColumnLetter = privDescriptionColumnLetter
209     self.docDatesColumnLettersList = docDatesColumnLettersList
210     self.participantsColumnLettersList = participantsColumnLettersList
211     self.privilegeTypeColumnLetter = privilegeTypeColumnLetter
212 nino.borges 971 self.documentTypeColumnLetter = documentTypeColumnLetter
213 nino.borges 970
214     self.participants_delimiter = participants_delimiter
215     self.privilege_types_delimiter = privilege_types_delimiter
216    
217     ## The matrix, keyed by Privilege Description
218     self.catLogMatrix: Dict[str, DocValues] = {}
219    
220     ## ---------- Excel COM plumbing ----------
221    
222     def _open_excel(self):
223 nino.borges 971 pythoncom.CoInitialize()
224     xlApp = DispatchEx("Excel.Application")
225 nino.borges 970 xlApp.Visible = False
226     xlApp.DisplayAlerts = False
227     return xlApp
228    
229     def ingest_spreadsheet(self) -> None:
230     """
231     Read the input privilege log spreadsheet and populate self.catLogMatrix.
232     """
233     xlApp = self._open_excel()
234     wb = None
235    
236     try:
237     wb = xlApp.Workbooks.Open(self.pathToExcelSpreadsheet)
238    
239     if self.sheet_name:
240     ws = wb.Worksheets(self.sheet_name)
241     else:
242     ws = wb.Worksheets(1)
243    
244     ## Determine last row
245     if self.totalNumberOfRows is not None:
246     last_row = self.totalNumberOfRows
247     else:
248     ## UsedRange can be imperfect if sheet has formatting far down,
249     ## but is generally fine for privilege logs.
250     last_row = ws.UsedRange.Rows.Count
251    
252     priv_col = col_letter_to_index(self.privDescriptionColumnLetter)
253     priv_type_col = col_letter_to_index(self.privilegeTypeColumnLetter)
254 nino.borges 971 doc_type_col = col_letter_to_index(self.documentTypeColumnLetter)
255 nino.borges 970 date_cols = [col_letter_to_index(c) for c in self.docDatesColumnLettersList]
256     part_cols = [col_letter_to_index(c) for c in self.participantsColumnLettersList]
257    
258     ## Loop rows
259     for row in range(self.data_start_row, last_row + 1):
260     priv_desc_raw = ws.Cells(row, priv_col).Value
261     priv_desc = normalize_key(priv_desc_raw)
262    
263     if not priv_desc:
264     ## Skip empty rows
265     continue
266    
267     ## Find first non-empty doc date among the configured columns
268     doc_dt: Optional[date] = None
269     for dc in date_cols:
270     v = ws.Cells(row, dc).Value
271     doc_dt = excel_value_to_date(v)
272     if doc_dt is not None:
273     break
274    
275     ## Participants across multiple columns
276     participants: List[str] = []
277     for pc in part_cols:
278     participants.extend(
279     split_people(ws.Cells(row, pc).Value, delimiter=self.participants_delimiter)
280     )
281    
282     ## Privilege type(s)
283     priv_types = split_priv_types(
284     ws.Cells(row, priv_type_col).Value,
285     delimiter=self.privilege_types_delimiter,
286     )
287    
288     ## Update matrix
289     dv = self.catLogMatrix.setdefault(priv_desc, DocValues())
290     dv.update(doc_date=doc_dt, participants=participants, privilege_types=priv_types)
291    
292     finally:
293     if wb is not None:
294     wb.Close(SaveChanges=False)
295     xlApp.Quit()
296    
297     def export_grouped_log(self, output_path: str) -> None:
298     """
299     Export the grouped results to a new Excel spreadsheet.
300     """
301     xlApp = self._open_excel()
302     wb_out = None
303    
304     try:
305     wb_out = xlApp.Workbooks.Add()
306 nino.borges 971 wb_out.Activate()
307 nino.borges 970 ws_out = wb_out.Worksheets(1)
308 nino.borges 971 ws_out.Activate()
309 nino.borges 970 ws_out.Name = "Grouped Log"
310    
311     headers = [
312     "Privilege Description",
313     "BegDate",
314     "EndDate",
315     "Participants (Deduped)",
316     "Privilege Types (Deduped)",
317 nino.borges 971 "Document Types (Deduped)",
318 nino.borges 970 "DocCount",
319     ]
320    
321     ## Write headers
322     for c, h in enumerate(headers, start=1):
323     ws_out.Cells(1, c).Value = h
324     ws_out.Cells(1, c).Font.Bold = True
325    
326     ## Write rows
327     row_out = 2
328     for priv_desc in sorted(self.catLogMatrix.keys()):
329     dv = self.catLogMatrix[priv_desc]
330    
331     participants_sorted = sorted(dv.Participants_val)
332     priv_types_sorted = sorted(dv.PrivilegeTypes_val)
333    
334     ws_out.Cells(row_out, 1).Value = priv_desc
335     ws_out.Cells(row_out, 2).Value = dv.BegDate_val.isoformat() if dv.BegDate_val else ""
336     ws_out.Cells(row_out, 3).Value = dv.EndDate_val.isoformat() if dv.EndDate_val else ""
337     ws_out.Cells(row_out, 4).Value = "; ".join(participants_sorted)
338     ws_out.Cells(row_out, 5).Value = "; ".join(priv_types_sorted)
339     ws_out.Cells(row_out, 6).Value = dv.DocCount_val
340    
341     ## Wrap the long text columns
342     ws_out.Cells(row_out, 1).WrapText = True
343     ws_out.Cells(row_out, 4).WrapText = True
344     ws_out.Cells(row_out, 5).WrapText = True
345    
346     row_out += 1
347    
348     ## AutoFit
349     ws_out.Columns.AutoFit()
350    
351     ## Save
352     wb_out.SaveAs(output_path)
353    
354     finally:
355     if wb_out is not None:
356     wb_out.Close(SaveChanges=True)
357     xlApp.Quit()
358    
359    
360     if __name__ == "__main__":
361 nino.borges 971 inputSpreadsheetLog = r"C:\Users\eborges\foo.xlsx"
362     outputSpreadsheetLog = r"C:\Users\eborges\bar.xlsx"
363 nino.borges 970
364 nino.borges 971 privDescriptionColumnLetter = "O"
365     docDatesColumnLettersList = ["C", "D"] # first non-empty wins!
366     participantsColumnLettersList = ["G", "H", "I", "J", "K", "L"] # Author/From/To/CC/BCC
367     privilegeTypeColumnLetter = "N"
368     documentTypeColumnLetter = "E"
369 nino.borges 970
370     grouper = CreateGroupedCategoricalLog(
371     pathToExcelSpreadsheet=inputSpreadsheetLog,
372     privDescriptionColumnLetter=privDescriptionColumnLetter,
373     docDatesColumnLettersList=docDatesColumnLettersList,
374     participantsColumnLettersList=participantsColumnLettersList,
375     privilegeTypeColumnLetter=privilegeTypeColumnLetter,
376 nino.borges 971 documentTypeColumnLetter=documentTypeColumnLetter,
377 nino.borges 970 sheet_name=None, ## or "Sheet1"
378     header_row=1,
379     data_start_row=2,
380     totalNumberOfRows=None, ## or set an int if you want to override UsedRange
381     participants_delimiter=";",
382     privilege_types_delimiter=";",
383     )
384    
385     grouper.ingest_spreadsheet()
386     grouper.export_grouped_log(outputSpreadsheetLog)
387     print(f"Done. Grouped rows: {len(grouper.catLogMatrix)}")