ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Tool_Box/ExcelSpreadsheetToDict.py
Revision: 941
Committed: Tue Sep 9 17:01:07 2025 UTC (6 months, 2 weeks ago) by nino.borges
Content type: text/x-python
File size: 3567 byte(s)
Log Message:
A library for manipulating Excel Spreadsheets.  Created this to compare two privilege log so that I could see what is different between them.

File Contents

# Content
1 """
2
3 ExcelSpreadsheetToDict
4
5 Created by:
6 Emanuel Borges
7 09.09.2025
8
9 A library for manipulating Excel Spreadsheets. Created to compare privilege logs.
10
11 """
12
13
14 from typing import Dict, Iterable, Optional, Tuple
15 from collections import namedtuple
16 from win32com import client as win32
17
18 version = "1.0"
19
20 class ExcelToDictIngestor:
21 def __init__(self):
22 self._excel_app = None ## Will be ceated on demand
23
24
25 @staticmethod
26 def _normalize_col_letter(col: str) -> str:
27 if not isinstance(col, str) or not col.isalpha():
28 raise ValueError(f"Invalid Excel column letter: {col!r}")
29 return col.upper()
30
31
32 @staticmethod
33 def _cel_to_string(val) -> str:
34 if val is None:
35 return ""
36 return str(val)
37
38 @staticmethod
39 def _normalize_single(val) -> str:
40 return ExcelToDictIngestor._cel_to_string(val).strip().upper()
41
42 @staticmethod
43 def _normalize_multi(val,delimiter: str) -> list:
44 raw = ExcelToDictIngestor._cel_to_string(val)
45 parts = [p.strip().upper() for p in raw.split(delimiter)]
46 uniq = sorted({p for p in parts if p})
47 return uniq
48
49 def _ensure_app(self, visible: bool):
50 if self._excel_app is None:
51 self._excel_app = win32.Dispatch("Excel.Application")
52 self._excel_app.Visible = bool(visible)
53
54
55 def _close_app(self):
56 if self._excel_app is not None:
57 try:
58 self._excel_app.Quit()
59 finally:
60 self._excel_app = None
61
62
63
64 def load(self, filepath: str, doc_id_col: str, columns: Iterable[Tuple[str, bool]], start_row: int = 2, end_row: Optional[int] = None, sheet_name: Optional[str] = None, delimiter: str = ";", visible: bool = False,) -> Dict[str, tuple]:
65 doc_id_col = self._normalize_col_letter(doc_id_col)
66 col_specs = [(self._normalize_col_letter(c), bool(is_multi)) for c, is_multi in columns]
67 field_names = [c for c, _ in col_specs]
68 RowTuple = namedtuple("Row", field_names)
69
70 data: Dict[str, RowTuple] = {}
71
72 self._ensure_app(visible=visible)
73 wb = None
74 try:
75 wb = self._excel_app.Workbooks.Open(filepath)
76 ws = wb.Worksheets(sheet_name) if sheet_name else wb.ActiveSheet
77
78 if end_row is None:
79 used = ws.UsedRange
80 end_row = used.Row + used.Rows.Count -1
81
82 for row in range(start_row, end_row +1):
83 doc_id_val = ws.Range(f"{doc_id_col}{row}").Value
84 doc_id = self._normalize_single(doc_id_val)
85 if not doc_id:
86 continue
87
88 row_values = []
89 for col_letter, is_multi in col_specs:
90 cell_val = ws.Range(f"{col_letter}{row}").Value
91 if is_multi:
92 row_values.append(self._normalize_multi(cell_val, delimiter))
93 else:
94 row_values.append(self._normalize_single(cell_val))
95
96 data[doc_id] = RowTuple(*row_values)
97
98 wb.Close(SaveChanges = False)
99 wb = None
100 finally:
101 if wb is not None:
102 try:
103 wb.Close(SaveChanges = False)
104 except Exception:
105 pass
106 if not visible:
107 self._close_app()
108
109 return data