ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Tool_Box/ExcelSpreadsheetToDict.py
Revision: 941
Committed: Tue Sep 9 17:01:07 2025 UTC (6 months, 2 weeks ago) by nino.borges
Content type: text/x-python
File size: 3567 byte(s)
Log Message:
A library for manipulating Excel Spreadsheets.  Created this to compare two privilege log so that I could see what is different between them.

File Contents

# User Rev Content
1 nino.borges 941 """
2    
3     ExcelSpreadsheetToDict
4    
5     Created by:
6     Emanuel Borges
7     09.09.2025
8    
9     A library for manipulating Excel Spreadsheets. Created to compare privilege logs.
10    
11     """
12    
13    
14     from typing import Dict, Iterable, Optional, Tuple
15     from collections import namedtuple
16     from win32com import client as win32
17    
18     version = "1.0"
19    
20     class ExcelToDictIngestor:
21     def __init__(self):
22     self._excel_app = None ## Will be ceated on demand
23    
24    
25     @staticmethod
26     def _normalize_col_letter(col: str) -> str:
27     if not isinstance(col, str) or not col.isalpha():
28     raise ValueError(f"Invalid Excel column letter: {col!r}")
29     return col.upper()
30    
31    
32     @staticmethod
33     def _cel_to_string(val) -> str:
34     if val is None:
35     return ""
36     return str(val)
37    
38     @staticmethod
39     def _normalize_single(val) -> str:
40     return ExcelToDictIngestor._cel_to_string(val).strip().upper()
41    
42     @staticmethod
43     def _normalize_multi(val,delimiter: str) -> list:
44     raw = ExcelToDictIngestor._cel_to_string(val)
45     parts = [p.strip().upper() for p in raw.split(delimiter)]
46     uniq = sorted({p for p in parts if p})
47     return uniq
48    
49     def _ensure_app(self, visible: bool):
50     if self._excel_app is None:
51     self._excel_app = win32.Dispatch("Excel.Application")
52     self._excel_app.Visible = bool(visible)
53    
54    
55     def _close_app(self):
56     if self._excel_app is not None:
57     try:
58     self._excel_app.Quit()
59     finally:
60     self._excel_app = None
61    
62    
63    
64     def load(self, filepath: str, doc_id_col: str, columns: Iterable[Tuple[str, bool]], start_row: int = 2, end_row: Optional[int] = None, sheet_name: Optional[str] = None, delimiter: str = ";", visible: bool = False,) -> Dict[str, tuple]:
65     doc_id_col = self._normalize_col_letter(doc_id_col)
66     col_specs = [(self._normalize_col_letter(c), bool(is_multi)) for c, is_multi in columns]
67     field_names = [c for c, _ in col_specs]
68     RowTuple = namedtuple("Row", field_names)
69    
70     data: Dict[str, RowTuple] = {}
71    
72     self._ensure_app(visible=visible)
73     wb = None
74     try:
75     wb = self._excel_app.Workbooks.Open(filepath)
76     ws = wb.Worksheets(sheet_name) if sheet_name else wb.ActiveSheet
77    
78     if end_row is None:
79     used = ws.UsedRange
80     end_row = used.Row + used.Rows.Count -1
81    
82     for row in range(start_row, end_row +1):
83     doc_id_val = ws.Range(f"{doc_id_col}{row}").Value
84     doc_id = self._normalize_single(doc_id_val)
85     if not doc_id:
86     continue
87    
88     row_values = []
89     for col_letter, is_multi in col_specs:
90     cell_val = ws.Range(f"{col_letter}{row}").Value
91     if is_multi:
92     row_values.append(self._normalize_multi(cell_val, delimiter))
93     else:
94     row_values.append(self._normalize_single(cell_val))
95    
96     data[doc_id] = RowTuple(*row_values)
97    
98     wb.Close(SaveChanges = False)
99     wb = None
100     finally:
101     if wb is not None:
102     try:
103     wb.Close(SaveChanges = False)
104     except Exception:
105     pass
106     if not visible:
107     self._close_app()
108    
109     return data