ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Tool_Box/ExcelSpreadsheetToDict.py
Revision: 942
Committed: Tue Sep 9 17:03:07 2025 UTC (6 months, 2 weeks ago) by nino.borges
Content type: text/x-python
File size: 3770 byte(s)
Log Message:
Originally _normalize_multi would deduplicate the values in the cell using a set.  I changed my mind and changed this to allow duplicates.  If the original cell had dups, I want the dictionary cell to have dups.

File Contents

# User Rev Content
1 nino.borges 941 """
2    
3     ExcelSpreadsheetToDict
4    
5     Created by:
6     Emanuel Borges
7     09.09.2025
8    
9     A library for manipulating Excel Spreadsheets. Created to compare privilege logs.
10    
11     """
12    
13    
14     from typing import Dict, Iterable, Optional, Tuple
15     from collections import namedtuple
16     from win32com import client as win32
17    
18 nino.borges 942 version = "2.0"
19 nino.borges 941
20     class ExcelToDictIngestor:
21     def __init__(self):
22     self._excel_app = None ## Will be ceated on demand
23    
24    
25     @staticmethod
26     def _normalize_col_letter(col: str) -> str:
27     if not isinstance(col, str) or not col.isalpha():
28     raise ValueError(f"Invalid Excel column letter: {col!r}")
29     return col.upper()
30    
31    
32     @staticmethod
33     def _cel_to_string(val) -> str:
34     if val is None:
35     return ""
36     return str(val)
37    
38     @staticmethod
39     def _normalize_single(val) -> str:
40     return ExcelToDictIngestor._cel_to_string(val).strip().upper()
41    
42     @staticmethod
43     def _normalize_multi(val,delimiter: str) -> list:
44     raw = ExcelToDictIngestor._cel_to_string(val)
45 nino.borges 942 parts = [p.strip().upper() for p in raw.split(delimiter) if p]
46     ## These next two lines deduplicate the values in the multi value cells but i'm reconsidering this. If the original had dups, I want the dict to have dups.
47     #uniq = sorted({p for p in parts if p})
48     #return uniq
49     return sorted(parts)
50 nino.borges 941
51     def _ensure_app(self, visible: bool):
52     if self._excel_app is None:
53     self._excel_app = win32.Dispatch("Excel.Application")
54     self._excel_app.Visible = bool(visible)
55    
56    
57     def _close_app(self):
58     if self._excel_app is not None:
59     try:
60     self._excel_app.Quit()
61     finally:
62     self._excel_app = None
63    
64    
65    
66     def load(self, filepath: str, doc_id_col: str, columns: Iterable[Tuple[str, bool]], start_row: int = 2, end_row: Optional[int] = None, sheet_name: Optional[str] = None, delimiter: str = ";", visible: bool = False,) -> Dict[str, tuple]:
67     doc_id_col = self._normalize_col_letter(doc_id_col)
68     col_specs = [(self._normalize_col_letter(c), bool(is_multi)) for c, is_multi in columns]
69     field_names = [c for c, _ in col_specs]
70     RowTuple = namedtuple("Row", field_names)
71    
72     data: Dict[str, RowTuple] = {}
73    
74     self._ensure_app(visible=visible)
75     wb = None
76     try:
77     wb = self._excel_app.Workbooks.Open(filepath)
78     ws = wb.Worksheets(sheet_name) if sheet_name else wb.ActiveSheet
79    
80     if end_row is None:
81     used = ws.UsedRange
82     end_row = used.Row + used.Rows.Count -1
83    
84     for row in range(start_row, end_row +1):
85     doc_id_val = ws.Range(f"{doc_id_col}{row}").Value
86     doc_id = self._normalize_single(doc_id_val)
87     if not doc_id:
88     continue
89    
90     row_values = []
91     for col_letter, is_multi in col_specs:
92     cell_val = ws.Range(f"{col_letter}{row}").Value
93     if is_multi:
94     row_values.append(self._normalize_multi(cell_val, delimiter))
95     else:
96     row_values.append(self._normalize_single(cell_val))
97    
98     data[doc_id] = RowTuple(*row_values)
99    
100     wb.Close(SaveChanges = False)
101     wb = None
102     finally:
103     if wb is not None:
104     try:
105     wb.Close(SaveChanges = False)
106     except Exception:
107     pass
108     if not visible:
109     self._close_app()
110    
111     return data