Active_prgs/Redgrave/RelativityAuditFlattenerXml.py

"""

RelativityAuditFlattenerXml

Created by
Emanuel Borges
12.08.2025

Using my XML inspector, this program will attempt to take a raw Relativity audit history report and generate a flatened spreadsheet report.

"""

import csv
import xml.etree.ElementTree as ET
from typing import Any, Dict, List, Optional


def load_choice_map(
    choice_csv_path: str,
    artifact_id_col: str = "Artifact ID",
    name_col: str = "Name",
) -> Dict[str, str]:
    """
    Load a CSV containing workspace choices into a dict:
        { artifact_id (str) -> choice_name (str) }

    By default expects columns named:
        ArtifactID, Name

    If your file uses different headers, adjust artifact_id_col / name_col.
    """
    choice_map: Dict[str, str] = {}

    with open(choice_csv_path, newline="", encoding="utf-8-sig") as f:
        reader = csv.DictReader(f)
        for row in reader:
            artifact_id = (row.get(artifact_id_col) or "").strip()
            name = (row.get(name_col) or "").strip()
            if artifact_id and name:
                choice_map[artifact_id] = name

    return choice_map


class RelativityAuditFlattenerXml:
    """
    Reads a Relativity audit CSV and flattens XML 'Details' so that
    each <field> modification becomes its own row, with the original
    columns (Name through Request Origination) repeated.

    For choice fields, setChoice/unSetChoice/unsetChoice values are
    artifactIDs; we map them to choice names using a choice_map dict.
    """

    # The base columns we want to carry through from the CSV
    BASE_COLUMNS = [
        "Name",
        "Action",
        "User Name",
        "Timestamp",
        "ArtifactID",
        "Execution Time",
        "ID",
        "Object Type",
        "Request Origination",
    ]

    # New columns derived from the XML
    DETAIL_COLUMNS = [
        "Field Name",   # from <field name="...">
        "Field Type",   # from <field type="...">
        "unSetChoice",  # choice names removed
        "setChoice",    # choice names added
        "oldValue",     # free-text oldValue
        "newValue",     # free-text newValue
    ]

    def __init__(self, csv_path: str, choice_map: Optional[Dict[str, str]] = None) -> None:
        self.csv_path = csv_path
        self.rows: List[Dict[str, Any]] = []
        self.choice_map: Dict[str, str] = choice_map or {}


    def load_and_flatten(self) -> None:
        """
        Read the CSV, parse Details XML, and populate self.rows with
        one row per <field> modification.
        """
        with open(self.csv_path, newline="", encoding="utf-8-sig") as f:
            reader = csv.DictReader(f)

            for src_row in reader:
                details_str = (src_row.get("Details") or "").strip()
                field_change_entries = self._parse_details_xml(details_str)

                # If we couldn't parse any field-level changes, still output
                # a single row so the event isn't lost.
                if not field_change_entries:
                    out_row = self._copy_base_columns(src_row)
                    out_row.update({
                        "Field Name": "",
                        "Field Type": "",
                        "unSetChoice": "",
                        "setChoice": "",
                        "oldValue": "",
                        "newValue": "",
                    })
                    self.rows.append(out_row)
                    continue

                # One row per <field> node
                for fc in field_change_entries:
                    out_row = self._copy_base_columns(src_row)
                    out_row.update({
                        "Field Name": fc.get("field_name", ""),
                        "Field Type": fc.get("field_type", ""),
                        "unSetChoice": fc.get("unset_choice", ""),
                        "setChoice": fc.get("set_choice", ""),
                        "oldValue": fc.get("old_value", ""),
                        "newValue": fc.get("new_value", ""),
                    })
                    self.rows.append(out_row)

    def total_field_changes(self) -> int:
        """Total number of field-level rows."""
        return len(self.rows)

    def export_to_excel(self, excel_path: str) -> None:
        """
        Export flattened rows to Excel using win32com.
        """
        from win32com.client import Dispatch

        xlApp = Dispatch("Excel.Application")
        xlApp.Visible = False

        wb = xlApp.Workbooks.Add()
        ws = wb.Worksheets(1)
        ws.Name = "Audit Changes"

        headers = self.BASE_COLUMNS + self.DETAIL_COLUMNS

        # Header row
        for col_idx, header in enumerate(headers, start=1):
            ws.Cells(1, col_idx).Value = header

        # Data rows
        for row_idx, row in enumerate(self.rows, start=2):
            for col_idx, header in enumerate(headers, start=1):
                ws.Cells(row_idx, col_idx).Value = row.get(header, "")

        # Autofit and (optionally) make a table
        last_row = len(self.rows) + 1
        last_col = len(headers)
        data_range = ws.Range(ws.Cells(1, 1), ws.Cells(last_row, last_col))
        data_range.EntireColumn.AutoFit()

        try:
            table = ws.ListObjects.Add(
                SourceType=0,           # xlSrcRange
                Source=data_range,
                XlListObjectHasHeaders=1,  # xlYes
            )
            table.Name = "AuditChangesTable"
            table.TableStyle = "TableStyleLight9"
        except Exception:
            # If ListObjects/TableStyle fails, just ignore it
            pass

        wb.SaveAs(excel_path)
        wb.Close(SaveChanges=True)
        xlApp.Quit()


    def _copy_base_columns(self, src_row: Dict[str, Any]) -> Dict[str, Any]:
        """
        Copy only the base columns from the source CSV row.
        """
        out: Dict[str, Any] = {}
        for col in self.BASE_COLUMNS:
            out[col] = src_row.get(col, "")
        return out

    def _map_choice_id_to_name(self, choice_id: str) -> str:
        """
        Convert a choice artifactID to its choice name using self.choice_map.
        If not found, fall back to the original ID string.
        """
        choice_id = (choice_id or "").strip()
        if not choice_id:
            return ""
        return self.choice_map.get(choice_id, choice_id)

    def _parse_details_xml(self, xml_str: str) -> List[Dict[str, str]]:
        """
        Parse the XML in Details and return a list of dicts with keys:
          - field_name
          - field_type
          - unset_choice  (names, combined)
          - set_choice    (names, combined)
          - old_value
          - new_value
        """
        xml_str = (xml_str or "").strip()
        if not xml_str:
            return []

        try:
            root = ET.fromstring(xml_str)
        except ET.ParseError:
            return []

        results: List[Dict[str, str]] = []

        # Find all <field> elements anywhere under the root
        for field_elem in root.findall(".//field"):
            attrib = field_elem.attrib

            field_name = attrib.get("name", "")
            field_type = attrib.get("type", "")

            # Collect choice changes as IDs
            set_choice_ids = [
                (child.text or "").strip()
                for child in field_elem.findall("setChoice")
                if (child.text or "").strip()
            ]

            unset_choice_ids = [
                (child.text or "").strip()
                for child in field_elem.findall("unSetChoice")
                if (child.text or "").strip()
            ]
            unset_choice_ids += [
                (child.text or "").strip()
                for child in field_elem.findall("unsetChoice")
                if (child.text or "").strip()
            ]

            # Map IDs -> names
            set_choice_names = [self._map_choice_id_to_name(cid) for cid in set_choice_ids]
            unset_choice_names = [self._map_choice_id_to_name(cid) for cid in unset_choice_ids]

            set_choice_str = "; ".join(name for name in set_choice_names if name)
            unset_choice_str = "; ".join(name for name in unset_choice_names if name)

            # Gather old/new value tags (for non-choice cases)
            old_val_node = field_elem.find("oldValue")
            new_val_node = field_elem.find("newValue")
            old_val_text = (old_val_node.text or "").strip() if (old_val_node is not None and old_val_node.text) else ""
            new_val_text = (new_val_node.text or "").strip() if (new_val_node is not None and new_val_node.text) else ""

            results.append(
                {
                    "field_name": field_name,
                    "field_type": field_type,
                    "unset_choice": unset_choice_str,
                    "set_choice": set_choice_str,
                    "old_value": old_val_text,
                    "new_value": new_val_text,
                }
            )

        return results


if __name__ == "__main__":
    audit_csv_path = r"C:\Test_Dir\NS\RelExport-DocHistory-DateFiltered(MED).csv"
    choice_csv_path = r"C:\Test_Dir\NS\20251208 - NS-AllChoicesLookupList.csv"
    output_excel = r"C:\Test_Dir\NS\RelativityAuditReport_Flattened_XML.xlsx"

    ## 1) Load the choice map (artifactID -> choice name)
    choice_map = load_choice_map(choice_csv_path)
    print(f"Loaded {len(choice_map)} choices from {choice_csv_path}")

    ## 2) Flatten the audit CSV, using the choice map
    flattener = RelativityAuditFlattenerXml(audit_csv_path, choice_map=choice_map)
    flattener.load_and_flatten()
    print(f"Total field-level changes: {flattener.total_field_changes()}")

    ## 3) Export to Excel
    flattener.export_to_excel(output_excel)
    print(f"Excel report written to: {output_excel}")
Revision:	963
Committed:	Tue Dec 9 18:40:50 2025 UTC (3 months, 2 weeks ago) by nino.borges
Content type:	text/x-python
File size:	9875 byte(s)
Log Message:	Minor cleanup
#	User	Rev	Content
1	nino.borges	960	"""
2
3			RelativityAuditFlattenerXml
4
5			Created by
6			Emanuel Borges
7			12.08.2025
8
9			Using my XML inspector, this program will attempt to take a raw Relativity audit history report and generate a flatened spreadsheet report.
10
11			"""
12
13			import csv
14			import xml.etree.ElementTree as ET
15	nino.borges	962	from typing import Any, Dict, List, Optional
16	nino.borges	960
17
18	nino.borges	962
19	nino.borges	963
20	nino.borges	962	def load_choice_map(
21			choice_csv_path: str,
22	nino.borges	963	artifact_id_col: str = "Artifact ID",
23	nino.borges	962	name_col: str = "Name",
24			) -> Dict[str, str]:
25			"""
26			Load a CSV containing workspace choices into a dict:
27			{ artifact_id (str) -> choice_name (str) }
28
29			By default expects columns named:
30			ArtifactID, Name
31
32			If your file uses different headers, adjust artifact_id_col / name_col.
33			"""
34			choice_map: Dict[str, str] = {}
35
36			with open(choice_csv_path, newline="", encoding="utf-8-sig") as f:
37			reader = csv.DictReader(f)
38			for row in reader:
39			artifact_id = (row.get(artifact_id_col) or "").strip()
40			name = (row.get(name_col) or "").strip()
41			if artifact_id and name:
42			choice_map[artifact_id] = name
43
44			return choice_map
45
46
47
48	nino.borges	960	class RelativityAuditFlattenerXml:
49			"""
50			Reads a Relativity audit CSV and flattens XML 'Details' so that
51			each <field> modification becomes its own row, with the original
52			columns (Name through Request Origination) repeated.
53	nino.borges	961
54	nino.borges	962	For choice fields, setChoice/unSetChoice/unsetChoice values are
55			artifactIDs; we map them to choice names using a choice_map dict.
56	nino.borges	960	"""
57
58			# The base columns we want to carry through from the CSV
59			BASE_COLUMNS = [
60			"Name",
61			"Action",
62			"User Name",
63			"Timestamp",
64			"ArtifactID",
65			"Execution Time",
66			"ID",
67			"Object Type",
68			"Request Origination",
69			]
70
71	nino.borges	961	# New columns derived from the XML
72	nino.borges	960	DETAIL_COLUMNS = [
73	nino.borges	961	"Field Name", # from <field name="...">
74			"Field Type", # from <field type="...">
75	nino.borges	962	"unSetChoice", # choice names removed
76			"setChoice", # choice names added
77			"oldValue", # free-text oldValue
78			"newValue", # free-text newValue
79	nino.borges	960	]
80
81	nino.borges	962	def __init__(self, csv_path: str, choice_map: Optional[Dict[str, str]] = None) -> None:
82	nino.borges	960	self.csv_path = csv_path
83			self.rows: List[Dict[str, Any]] = []
84	nino.borges	962	self.choice_map: Dict[str, str] = choice_map or {}
85	nino.borges	960
86
87			def load_and_flatten(self) -> None:
88			"""
89			Read the CSV, parse Details XML, and populate self.rows with
90			one row per <field> modification.
91			"""
92			with open(self.csv_path, newline="", encoding="utf-8-sig") as f:
93			reader = csv.DictReader(f)
94
95			for src_row in reader:
96			details_str = (src_row.get("Details") or "").strip()
97			field_change_entries = self._parse_details_xml(details_str)
98
99			# If we couldn't parse any field-level changes, still output
100			# a single row so the event isn't lost.
101			if not field_change_entries:
102			out_row = self._copy_base_columns(src_row)
103			out_row.update({
104	nino.borges	961	"Field Name": "",
105			"Field Type": "",
106			"unSetChoice": "",
107			"setChoice": "",
108			"oldValue": "",
109			"newValue": "",
110	nino.borges	960	})
111			self.rows.append(out_row)
112			continue
113
114			# One row per <field> node
115			for fc in field_change_entries:
116			out_row = self._copy_base_columns(src_row)
117			out_row.update({
118			"Field Name": fc.get("field_name", ""),
119	nino.borges	961	"Field Type": fc.get("field_type", ""),
120			"unSetChoice": fc.get("unset_choice", ""),
121			"setChoice": fc.get("set_choice", ""),
122			"oldValue": fc.get("old_value", ""),
123			"newValue": fc.get("new_value", ""),
124	nino.borges	960	})
125			self.rows.append(out_row)
126
127			def total_field_changes(self) -> int:
128			"""Total number of field-level rows."""
129			return len(self.rows)
130
131			def export_to_excel(self, excel_path: str) -> None:
132			"""
133	nino.borges	963	Export flattened rows to Excel using win32com.
134	nino.borges	960	"""
135			from win32com.client import Dispatch
136
137			xlApp = Dispatch("Excel.Application")
138	nino.borges	963	xlApp.Visible = False
139	nino.borges	960
140			wb = xlApp.Workbooks.Add()
141			ws = wb.Worksheets(1)
142			ws.Name = "Audit Changes"
143
144			headers = self.BASE_COLUMNS + self.DETAIL_COLUMNS
145
146			# Header row
147			for col_idx, header in enumerate(headers, start=1):
148			ws.Cells(1, col_idx).Value = header
149
150			# Data rows
151			for row_idx, row in enumerate(self.rows, start=2):
152			for col_idx, header in enumerate(headers, start=1):
153			ws.Cells(row_idx, col_idx).Value = row.get(header, "")
154
155			# Autofit and (optionally) make a table
156			last_row = len(self.rows) + 1
157			last_col = len(headers)
158			data_range = ws.Range(ws.Cells(1, 1), ws.Cells(last_row, last_col))
159			data_range.EntireColumn.AutoFit()
160
161			try:
162			table = ws.ListObjects.Add(
163			SourceType=0, # xlSrcRange
164			Source=data_range,
165			XlListObjectHasHeaders=1, # xlYes
166			)
167			table.Name = "AuditChangesTable"
168			table.TableStyle = "TableStyleLight9"
169			except Exception:
170			# If ListObjects/TableStyle fails, just ignore it
171			pass
172
173			wb.SaveAs(excel_path)
174			wb.Close(SaveChanges=True)
175			xlApp.Quit()
176
177
178			def _copy_base_columns(self, src_row: Dict[str, Any]) -> Dict[str, Any]:
179			"""
180			Copy only the base columns from the source CSV row.
181			"""
182			out: Dict[str, Any] = {}
183			for col in self.BASE_COLUMNS:
184			out[col] = src_row.get(col, "")
185			return out
186
187	nino.borges	962	def _map_choice_id_to_name(self, choice_id: str) -> str:
188			"""
189			Convert a choice artifactID to its choice name using self.choice_map.
190			If not found, fall back to the original ID string.
191			"""
192			choice_id = (choice_id or "").strip()
193			if not choice_id:
194			return ""
195			return self.choice_map.get(choice_id, choice_id)
196
197	nino.borges	960	def _parse_details_xml(self, xml_str: str) -> List[Dict[str, str]]:
198			"""
199	nino.borges	961	Parse the XML in Details and return a list of dicts with keys:
200			- field_name
201			- field_type
202	nino.borges	962	- unset_choice (names, combined)
203			- set_choice (names, combined)
204	nino.borges	961	- old_value
205			- new_value
206	nino.borges	960	"""
207			xml_str = (xml_str or "").strip()
208			if not xml_str:
209			return []
210
211			try:
212			root = ET.fromstring(xml_str)
213			except ET.ParseError:
214			return []
215
216			results: List[Dict[str, str]] = []
217
218			# Find all <field> elements anywhere under the root
219			for field_elem in root.findall(".//field"):
220			attrib = field_elem.attrib
221
222			field_name = attrib.get("name", "")
223	nino.borges	961	field_type = attrib.get("type", "")
224	nino.borges	960
225	nino.borges	962	# Collect choice changes as IDs
226			set_choice_ids = [
227	nino.borges	960	(child.text or "").strip()
228			for child in field_elem.findall("setChoice")
229			if (child.text or "").strip()
230			]
231
232	nino.borges	962	unset_choice_ids = [
233	nino.borges	960	(child.text or "").strip()
234			for child in field_elem.findall("unSetChoice")
235			if (child.text or "").strip()
236			]
237	nino.borges	962	unset_choice_ids += [
238	nino.borges	960	(child.text or "").strip()
239			for child in field_elem.findall("unsetChoice")
240			if (child.text or "").strip()
241			]
242
243	nino.borges	962	# Map IDs -> names
244			set_choice_names = [self._map_choice_id_to_name(cid) for cid in set_choice_ids]
245			unset_choice_names = [self._map_choice_id_to_name(cid) for cid in unset_choice_ids]
246	nino.borges	961
247	nino.borges	962	set_choice_str = "; ".join(name for name in set_choice_names if name)
248			unset_choice_str = "; ".join(name for name in unset_choice_names if name)
249
250	nino.borges	961	# Gather old/new value tags (for non-choice cases)
251	nino.borges	960	old_val_node = field_elem.find("oldValue")
252			new_val_node = field_elem.find("newValue")
253	nino.borges	961	old_val_text = (old_val_node.text or "").strip() if (old_val_node is not None and old_val_node.text) else ""
254			new_val_text = (new_val_node.text or "").strip() if (new_val_node is not None and new_val_node.text) else ""
255	nino.borges	960
256			results.append(
257			{
258			"field_name": field_name,
259	nino.borges	961	"field_type": field_type,
260			"unset_choice": unset_choice_str,
261			"set_choice": set_choice_str,
262			"old_value": old_val_text,
263			"new_value": new_val_text,
264	nino.borges	960	}
265			)
266
267			return results
268
269
270
271			if __name__ == "__main__":
272	nino.borges	963	audit_csv_path = r"C:\Test_Dir\NS\RelExport-DocHistory-DateFiltered(MED).csv"
273			choice_csv_path = r"C:\Test_Dir\NS\20251208 - NS-AllChoicesLookupList.csv"
274			output_excel = r"C:\Test_Dir\NS\RelativityAuditReport_Flattened_XML.xlsx"
275	nino.borges	960
276	nino.borges	963	## 1) Load the choice map (artifactID -> choice name)
277	nino.borges	962	choice_map = load_choice_map(choice_csv_path)
278			print(f"Loaded {len(choice_map)} choices from {choice_csv_path}")
279
280	nino.borges	963	## 2) Flatten the audit CSV, using the choice map
281	nino.borges	962	flattener = RelativityAuditFlattenerXml(audit_csv_path, choice_map=choice_map)
282	nino.borges	960	flattener.load_and_flatten()
283			print(f"Total field-level changes: {flattener.total_field_changes()}")
284
285	nino.borges	963	## 3) Export to Excel
286	nino.borges	960	flattener.export_to_excel(output_excel)
287			print(f"Excel report written to: {output_excel}")