ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Tool_Box/ExcelSpreadsheetToDict.py
Revision: 942
Committed: Tue Sep 9 17:03:07 2025 UTC (6 months, 2 weeks ago) by nino.borges
Content type: text/x-python
File size: 3770 byte(s)
Log Message:
Originally _normalize_multi would deduplicate the values in the cell using a set.  I changed my mind and changed this to allow duplicates.  If the original cell had dups, I want the dictionary cell to have dups.

File Contents

# Content
1 """
2
3 ExcelSpreadsheetToDict
4
5 Created by:
6 Emanuel Borges
7 09.09.2025
8
9 A library for manipulating Excel Spreadsheets. Created to compare privilege logs.
10
11 """
12
13
14 from typing import Dict, Iterable, Optional, Tuple
15 from collections import namedtuple
16 from win32com import client as win32
17
18 version = "2.0"
19
20 class ExcelToDictIngestor:
21 def __init__(self):
22 self._excel_app = None ## Will be ceated on demand
23
24
25 @staticmethod
26 def _normalize_col_letter(col: str) -> str:
27 if not isinstance(col, str) or not col.isalpha():
28 raise ValueError(f"Invalid Excel column letter: {col!r}")
29 return col.upper()
30
31
32 @staticmethod
33 def _cel_to_string(val) -> str:
34 if val is None:
35 return ""
36 return str(val)
37
38 @staticmethod
39 def _normalize_single(val) -> str:
40 return ExcelToDictIngestor._cel_to_string(val).strip().upper()
41
42 @staticmethod
43 def _normalize_multi(val,delimiter: str) -> list:
44 raw = ExcelToDictIngestor._cel_to_string(val)
45 parts = [p.strip().upper() for p in raw.split(delimiter) if p]
46 ## These next two lines deduplicate the values in the multi value cells but i'm reconsidering this. If the original had dups, I want the dict to have dups.
47 #uniq = sorted({p for p in parts if p})
48 #return uniq
49 return sorted(parts)
50
51 def _ensure_app(self, visible: bool):
52 if self._excel_app is None:
53 self._excel_app = win32.Dispatch("Excel.Application")
54 self._excel_app.Visible = bool(visible)
55
56
57 def _close_app(self):
58 if self._excel_app is not None:
59 try:
60 self._excel_app.Quit()
61 finally:
62 self._excel_app = None
63
64
65
66 def load(self, filepath: str, doc_id_col: str, columns: Iterable[Tuple[str, bool]], start_row: int = 2, end_row: Optional[int] = None, sheet_name: Optional[str] = None, delimiter: str = ";", visible: bool = False,) -> Dict[str, tuple]:
67 doc_id_col = self._normalize_col_letter(doc_id_col)
68 col_specs = [(self._normalize_col_letter(c), bool(is_multi)) for c, is_multi in columns]
69 field_names = [c for c, _ in col_specs]
70 RowTuple = namedtuple("Row", field_names)
71
72 data: Dict[str, RowTuple] = {}
73
74 self._ensure_app(visible=visible)
75 wb = None
76 try:
77 wb = self._excel_app.Workbooks.Open(filepath)
78 ws = wb.Worksheets(sheet_name) if sheet_name else wb.ActiveSheet
79
80 if end_row is None:
81 used = ws.UsedRange
82 end_row = used.Row + used.Rows.Count -1
83
84 for row in range(start_row, end_row +1):
85 doc_id_val = ws.Range(f"{doc_id_col}{row}").Value
86 doc_id = self._normalize_single(doc_id_val)
87 if not doc_id:
88 continue
89
90 row_values = []
91 for col_letter, is_multi in col_specs:
92 cell_val = ws.Range(f"{col_letter}{row}").Value
93 if is_multi:
94 row_values.append(self._normalize_multi(cell_val, delimiter))
95 else:
96 row_values.append(self._normalize_single(cell_val))
97
98 data[doc_id] = RowTuple(*row_values)
99
100 wb.Close(SaveChanges = False)
101 wb = None
102 finally:
103 if wb is not None:
104 try:
105 wb.Close(SaveChanges = False)
106 except Exception:
107 pass
108 if not visible:
109 self._close_app()
110
111 return data