NinoCode/Tool_Box/ExcelSpreadsheetToDict.py

"""

ExcelSpreadsheetToDict

Created by:
Emanuel Borges
09.09.2025

A library for manipulating Excel Spreadsheets.  Created to compare privilege logs.

"""


from typing import Dict, Iterable, Optional, Tuple
from collections import namedtuple
from win32com import client as win32

version = "1.0"

class ExcelToDictIngestor:
    def __init__(self):
        self._excel_app = None  ##  Will be ceated on demand


    @staticmethod
    def _normalize_col_letter(col: str) -> str:
        if not isinstance(col, str) or not col.isalpha():
            raise ValueError(f"Invalid Excel column letter: {col!r}")
        return col.upper()


    @staticmethod
    def _cel_to_string(val) -> str:
        if val is None:
            return ""
        return str(val)

    @staticmethod
    def _normalize_single(val) -> str:
        return ExcelToDictIngestor._cel_to_string(val).strip().upper()

    @staticmethod
    def _normalize_multi(val,delimiter: str) -> list:
        raw = ExcelToDictIngestor._cel_to_string(val)
        parts = [p.strip().upper() for p in raw.split(delimiter)]
        uniq = sorted({p for p in parts if p})
        return uniq 

    def _ensure_app(self, visible: bool):
        if self._excel_app is None:
            self._excel_app = win32.Dispatch("Excel.Application")
        self._excel_app.Visible = bool(visible)


    def _close_app(self):
        if self._excel_app is not None:
            try:
                self._excel_app.Quit()
            finally:
                self._excel_app = None


    def load(self, filepath: str, doc_id_col: str, columns: Iterable[Tuple[str, bool]], start_row: int = 2, end_row: Optional[int] = None, sheet_name: Optional[str] = None, delimiter: str = ";", visible: bool = False,) -> Dict[str, tuple]:
        doc_id_col = self._normalize_col_letter(doc_id_col)
        col_specs = [(self._normalize_col_letter(c), bool(is_multi)) for c, is_multi in columns]
        field_names = [c for c, _ in col_specs]
        RowTuple = namedtuple("Row", field_names)

        data: Dict[str, RowTuple] = {}

        self._ensure_app(visible=visible)
        wb = None
        try:
            wb = self._excel_app.Workbooks.Open(filepath)
            ws = wb.Worksheets(sheet_name) if sheet_name else wb.ActiveSheet

            if end_row is None:
                used = ws.UsedRange
                end_row = used.Row + used.Rows.Count -1

            for row in range(start_row, end_row +1):
                doc_id_val = ws.Range(f"{doc_id_col}{row}").Value
                doc_id = self._normalize_single(doc_id_val)
                if not doc_id:
                    continue
                
                row_values = []
                for col_letter, is_multi in col_specs:
                    cell_val = ws.Range(f"{col_letter}{row}").Value
                    if is_multi:
                        row_values.append(self._normalize_multi(cell_val, delimiter))
                    else:
                        row_values.append(self._normalize_single(cell_val))
                
                data[doc_id] = RowTuple(*row_values)
                
            wb.Close(SaveChanges = False)
            wb = None
        finally:
            if wb is not None:
                try:
                    wb.Close(SaveChanges = False)
                except Exception:
                    pass
            if not visible:
                self._close_app()
                
        return data
Revision:	941
Committed:	Tue Sep 9 17:01:07 2025 UTC (6 months, 2 weeks ago) by nino.borges
Content type:	text/x-python
File size:	3567 byte(s)
Log Message:	A library for manipulating Excel Spreadsheets. Created this to compare two privilege log so that I could see what is different between them.
#	Content
1	"""
2
3	ExcelSpreadsheetToDict
4
5	Created by:
6	Emanuel Borges
7	09.09.2025
8
9	A library for manipulating Excel Spreadsheets. Created to compare privilege logs.
10
11	"""
12
13
14	from typing import Dict, Iterable, Optional, Tuple
15	from collections import namedtuple
16	from win32com import client as win32
17
18	version = "1.0"
19
20	class ExcelToDictIngestor:
21	def __init__(self):
22	self._excel_app = None ## Will be ceated on demand
23
24
25	@staticmethod
26	def _normalize_col_letter(col: str) -> str:
27	if not isinstance(col, str) or not col.isalpha():
28	raise ValueError(f"Invalid Excel column letter: {col!r}")
29	return col.upper()
30
31
32	@staticmethod
33	def _cel_to_string(val) -> str:
34	if val is None:
35	return ""
36	return str(val)
37
38	@staticmethod
39	def _normalize_single(val) -> str:
40	return ExcelToDictIngestor._cel_to_string(val).strip().upper()
41
42	@staticmethod
43	def _normalize_multi(val,delimiter: str) -> list:
44	raw = ExcelToDictIngestor._cel_to_string(val)
45	parts = [p.strip().upper() for p in raw.split(delimiter)]
46	uniq = sorted({p for p in parts if p})
47	return uniq
48
49	def _ensure_app(self, visible: bool):
50	if self._excel_app is None:
51	self._excel_app = win32.Dispatch("Excel.Application")
52	self._excel_app.Visible = bool(visible)
53
54
55	def _close_app(self):
56	if self._excel_app is not None:
57	try:
58	self._excel_app.Quit()
59	finally:
60	self._excel_app = None
61
62
63
64	def load(self, filepath: str, doc_id_col: str, columns: Iterable[Tuple[str, bool]], start_row: int = 2, end_row: Optional[int] = None, sheet_name: Optional[str] = None, delimiter: str = ";", visible: bool = False,) -> Dict[str, tuple]:
65	doc_id_col = self._normalize_col_letter(doc_id_col)
66	col_specs = [(self._normalize_col_letter(c), bool(is_multi)) for c, is_multi in columns]
67	field_names = [c for c, _ in col_specs]
68	RowTuple = namedtuple("Row", field_names)
69
70	data: Dict[str, RowTuple] = {}
71
72	self._ensure_app(visible=visible)
73	wb = None
74	try:
75	wb = self._excel_app.Workbooks.Open(filepath)
76	ws = wb.Worksheets(sheet_name) if sheet_name else wb.ActiveSheet
77
78	if end_row is None:
79	used = ws.UsedRange
80	end_row = used.Row + used.Rows.Count -1
81
82	for row in range(start_row, end_row +1):
83	doc_id_val = ws.Range(f"{doc_id_col}{row}").Value
84	doc_id = self._normalize_single(doc_id_val)
85	if not doc_id:
86	continue
87
88	row_values = []
89	for col_letter, is_multi in col_specs:
90	cell_val = ws.Range(f"{col_letter}{row}").Value
91	if is_multi:
92	row_values.append(self._normalize_multi(cell_val, delimiter))
93	else:
94	row_values.append(self._normalize_single(cell_val))
95
96	data[doc_id] = RowTuple(*row_values)
97
98	wb.Close(SaveChanges = False)
99	wb = None
100	finally:
101	if wb is not None:
102	try:
103	wb.Close(SaveChanges = False)
104	except Exception:
105	pass
106	if not visible:
107	self._close_app()
108
109	return data