Active_prgs/Redgrave/RelativityAuditFlattenerXml.py

"""

RelativityAuditFlattenerXml

Created by
Emanuel Borges
12.08.2025

Using my XML inspector, this program will attempt to take a raw Relativity audit history report and generate a flatened spreadsheet report.

"""

import csv
import xml.etree.ElementTree as ET
from typing import Any, Dict, List, Optional


# ------------------------------------------------------------
# Helper: load choice map (artifactID -> choice name)
# ------------------------------------------------------------

def load_choice_map(
    choice_csv_path: str,
    artifact_id_col: str = "ArtifactID",
    name_col: str = "Name",
) -> Dict[str, str]:
    """
    Load a CSV containing workspace choices into a dict:
        { artifact_id (str) -> choice_name (str) }

    By default expects columns named:
        ArtifactID, Name

    If your file uses different headers, adjust artifact_id_col / name_col.
    """
    choice_map: Dict[str, str] = {}

    with open(choice_csv_path, newline="", encoding="utf-8-sig") as f:
        reader = csv.DictReader(f)
        for row in reader:
            artifact_id = (row.get(artifact_id_col) or "").strip()
            name = (row.get(name_col) or "").strip()
            if artifact_id and name:
                choice_map[artifact_id] = name

    return choice_map


# ------------------------------------------------------------
# Main flattener
# ------------------------------------------------------------

class RelativityAuditFlattenerXml:
    """
    Reads a Relativity audit CSV and flattens XML 'Details' so that
    each <field> modification becomes its own row, with the original
    columns (Name through Request Origination) repeated.

    For choice fields, setChoice/unSetChoice/unsetChoice values are
    artifactIDs; we map them to choice names using a choice_map dict.
    """

    # The base columns we want to carry through from the CSV
    BASE_COLUMNS = [
        "Name",
        "Action",
        "User Name",
        "Timestamp",
        "ArtifactID",
        "Execution Time",
        "ID",
        "Object Type",
        "Request Origination",
    ]

    # New columns derived from the XML
    DETAIL_COLUMNS = [
        "Field Name",   # from <field name="...">
        "Field Type",   # from <field type="...">
        "unSetChoice",  # choice names removed
        "setChoice",    # choice names added
        "oldValue",     # free-text oldValue
        "newValue",     # free-text newValue
    ]

    def __init__(self, csv_path: str, choice_map: Optional[Dict[str, str]] = None) -> None:
        self.csv_path = csv_path
        self.rows: List[Dict[str, Any]] = []
        self.choice_map: Dict[str, str] = choice_map or {}

    # --------------------------------------------------------
    # Public API
    # --------------------------------------------------------

    def load_and_flatten(self) -> None:
        """
        Read the CSV, parse Details XML, and populate self.rows with
        one row per <field> modification.
        """
        with open(self.csv_path, newline="", encoding="utf-8-sig") as f:
            reader = csv.DictReader(f)

            for src_row in reader:
                details_str = (src_row.get("Details") or "").strip()
                field_change_entries = self._parse_details_xml(details_str)

                # If we couldn't parse any field-level changes, still output
                # a single row so the event isn't lost.
                if not field_change_entries:
                    out_row = self._copy_base_columns(src_row)
                    out_row.update({
                        "Field Name": "",
                        "Field Type": "",
                        "unSetChoice": "",
                        "setChoice": "",
                        "oldValue": "",
                        "newValue": "",
                    })
                    self.rows.append(out_row)
                    continue

                # One row per <field> node
                for fc in field_change_entries:
                    out_row = self._copy_base_columns(src_row)
                    out_row.update({
                        "Field Name": fc.get("field_name", ""),
                        "Field Type": fc.get("field_type", ""),
                        "unSetChoice": fc.get("unset_choice", ""),
                        "setChoice": fc.get("set_choice", ""),
                        "oldValue": fc.get("old_value", ""),
                        "newValue": fc.get("new_value", ""),
                    })
                    self.rows.append(out_row)

    def total_field_changes(self) -> int:
        """Total number of field-level rows."""
        return len(self.rows)

    def export_to_excel(self, excel_path: str) -> None:
        """
        Export flattened rows to Excel using win32com (your preferred approach).
        """
        from win32com.client import Dispatch

        xlApp = Dispatch("Excel.Application")
        xlApp.Visible = False   # set True if you want to watch it build

        wb = xlApp.Workbooks.Add()
        ws = wb.Worksheets(1)
        ws.Name = "Audit Changes"

        headers = self.BASE_COLUMNS + self.DETAIL_COLUMNS

        # Header row
        for col_idx, header in enumerate(headers, start=1):
            ws.Cells(1, col_idx).Value = header

        # Data rows
        for row_idx, row in enumerate(self.rows, start=2):
            for col_idx, header in enumerate(headers, start=1):
                ws.Cells(row_idx, col_idx).Value = row.get(header, "")

        # Autofit and (optionally) make a table
        last_row = len(self.rows) + 1
        last_col = len(headers)
        data_range = ws.Range(ws.Cells(1, 1), ws.Cells(last_row, last_col))
        data_range.EntireColumn.AutoFit()

        try:
            table = ws.ListObjects.Add(
                SourceType=0,           # xlSrcRange
                Source=data_range,
                XlListObjectHasHeaders=1,  # xlYes
            )
            table.Name = "AuditChangesTable"
            table.TableStyle = "TableStyleLight9"
        except Exception:
            # If ListObjects/TableStyle fails, just ignore it
            pass

        wb.SaveAs(excel_path)
        wb.Close(SaveChanges=True)
        xlApp.Quit()

    # --------------------------------------------------------
    # Internal helpers
    # --------------------------------------------------------

    def _copy_base_columns(self, src_row: Dict[str, Any]) -> Dict[str, Any]:
        """
        Copy only the base columns from the source CSV row.
        """
        out: Dict[str, Any] = {}
        for col in self.BASE_COLUMNS:
            out[col] = src_row.get(col, "")
        return out

    def _map_choice_id_to_name(self, choice_id: str) -> str:
        """
        Convert a choice artifactID to its choice name using self.choice_map.
        If not found, fall back to the original ID string.
        """
        choice_id = (choice_id or "").strip()
        if not choice_id:
            return ""
        return self.choice_map.get(choice_id, choice_id)

    def _parse_details_xml(self, xml_str: str) -> List[Dict[str, str]]:
        """
        Parse the XML in Details and return a list of dicts with keys:
          - field_name
          - field_type
          - unset_choice  (names, combined)
          - set_choice    (names, combined)
          - old_value
          - new_value
        """
        xml_str = (xml_str or "").strip()
        if not xml_str:
            return []

        try:
            root = ET.fromstring(xml_str)
        except ET.ParseError:
            return []

        results: List[Dict[str, str]] = []

        # Find all <field> elements anywhere under the root
        for field_elem in root.findall(".//field"):
            attrib = field_elem.attrib

            field_name = attrib.get("name", "")
            field_type = attrib.get("type", "")

            # Collect choice changes as IDs
            set_choice_ids = [
                (child.text or "").strip()
                for child in field_elem.findall("setChoice")
                if (child.text or "").strip()
            ]

            unset_choice_ids = [
                (child.text or "").strip()
                for child in field_elem.findall("unSetChoice")
                if (child.text or "").strip()
            ]
            unset_choice_ids += [
                (child.text or "").strip()
                for child in field_elem.findall("unsetChoice")
                if (child.text or "").strip()
            ]

            # Map IDs -> names
            set_choice_names = [self._map_choice_id_to_name(cid) for cid in set_choice_ids]
            unset_choice_names = [self._map_choice_id_to_name(cid) for cid in unset_choice_ids]

            set_choice_str = "; ".join(name for name in set_choice_names if name)
            unset_choice_str = "; ".join(name for name in unset_choice_names if name)

            # Gather old/new value tags (for non-choice cases)
            old_val_node = field_elem.find("oldValue")
            new_val_node = field_elem.find("newValue")
            old_val_text = (old_val_node.text or "").strip() if (old_val_node is not None and old_val_node.text) else ""
            new_val_text = (new_val_node.text or "").strip() if (new_val_node is not None and new_val_node.text) else ""

            results.append(
                {
                    "field_name": field_name,
                    "field_type": field_type,
                    "unset_choice": unset_choice_str,
                    "set_choice": set_choice_str,
                    "old_value": old_val_text,
                    "new_value": new_val_text,
                }
            )

        return results


# ------------------------------------------------------------
# Script entrypoint
# ------------------------------------------------------------

if __name__ == "__main__":
    # TODO: update these three paths for your environment
    audit_csv_path = r"C:\path\to\RelativityAuditReport.csv"
    choice_csv_path = r"C:\path\to\AllWorkspaceChoices.csv"
    output_excel = r"C:\path\to\RelativityAuditReport_Flattened_XML.xlsx"

    # 1) Load the choice map (artifactID -> choice name)
    choice_map = load_choice_map(choice_csv_path)
    print(f"Loaded {len(choice_map)} choices from {choice_csv_path}")

    # 2) Flatten the audit CSV, using the choice map
    flattener = RelativityAuditFlattenerXml(audit_csv_path, choice_map=choice_map)
    flattener.load_and_flatten()
    print(f"Total field-level changes: {flattener.total_field_changes()}")

    # 3) Export to Excel
    flattener.export_to_excel(output_excel)
    print(f"Excel report written to: {output_excel}")
Revision:	962
Committed:	Mon Dec 8 23:06:33 2025 UTC (3 months, 2 weeks ago) by nino.borges
Content type:	text/x-python
File size:	10714 byte(s)
Log Message:	Updated to support a lookup list of choices since it appears that they only included artifact IDs for the choice values.
#	User	Rev	Content
1	nino.borges	960	"""
2
3			RelativityAuditFlattenerXml
4
5			Created by
6			Emanuel Borges
7			12.08.2025
8
9			Using my XML inspector, this program will attempt to take a raw Relativity audit history report and generate a flatened spreadsheet report.
10
11			"""
12
13			import csv
14			import xml.etree.ElementTree as ET
15	nino.borges	962	from typing import Any, Dict, List, Optional
16	nino.borges	960
17
18	nino.borges	962	# ------------------------------------------------------------
19			# Helper: load choice map (artifactID -> choice name)
20			# ------------------------------------------------------------
21
22			def load_choice_map(
23			choice_csv_path: str,
24			artifact_id_col: str = "ArtifactID",
25			name_col: str = "Name",
26			) -> Dict[str, str]:
27			"""
28			Load a CSV containing workspace choices into a dict:
29			{ artifact_id (str) -> choice_name (str) }
30
31			By default expects columns named:
32			ArtifactID, Name
33
34			If your file uses different headers, adjust artifact_id_col / name_col.
35			"""
36			choice_map: Dict[str, str] = {}
37
38			with open(choice_csv_path, newline="", encoding="utf-8-sig") as f:
39			reader = csv.DictReader(f)
40			for row in reader:
41			artifact_id = (row.get(artifact_id_col) or "").strip()
42			name = (row.get(name_col) or "").strip()
43			if artifact_id and name:
44			choice_map[artifact_id] = name
45
46			return choice_map
47
48
49			# ------------------------------------------------------------
50			# Main flattener
51			# ------------------------------------------------------------
52
53	nino.borges	960	class RelativityAuditFlattenerXml:
54			"""
55			Reads a Relativity audit CSV and flattens XML 'Details' so that
56			each <field> modification becomes its own row, with the original
57			columns (Name through Request Origination) repeated.
58	nino.borges	961
59	nino.borges	962	For choice fields, setChoice/unSetChoice/unsetChoice values are
60			artifactIDs; we map them to choice names using a choice_map dict.
61	nino.borges	960	"""
62
63			# The base columns we want to carry through from the CSV
64			BASE_COLUMNS = [
65			"Name",
66			"Action",
67			"User Name",
68			"Timestamp",
69			"ArtifactID",
70			"Execution Time",
71			"ID",
72			"Object Type",
73			"Request Origination",
74			]
75
76	nino.borges	961	# New columns derived from the XML
77	nino.borges	960	DETAIL_COLUMNS = [
78	nino.borges	961	"Field Name", # from <field name="...">
79			"Field Type", # from <field type="...">
80	nino.borges	962	"unSetChoice", # choice names removed
81			"setChoice", # choice names added
82			"oldValue", # free-text oldValue
83			"newValue", # free-text newValue
84	nino.borges	960	]
85
86	nino.borges	962	def __init__(self, csv_path: str, choice_map: Optional[Dict[str, str]] = None) -> None:
87	nino.borges	960	self.csv_path = csv_path
88			self.rows: List[Dict[str, Any]] = []
89	nino.borges	962	self.choice_map: Dict[str, str] = choice_map or {}
90	nino.borges	960
91	nino.borges	962	# --------------------------------------------------------
92	nino.borges	960	# Public API
93	nino.borges	962	# --------------------------------------------------------
94	nino.borges	960
95			def load_and_flatten(self) -> None:
96			"""
97			Read the CSV, parse Details XML, and populate self.rows with
98			one row per <field> modification.
99			"""
100			with open(self.csv_path, newline="", encoding="utf-8-sig") as f:
101			reader = csv.DictReader(f)
102
103			for src_row in reader:
104			details_str = (src_row.get("Details") or "").strip()
105			field_change_entries = self._parse_details_xml(details_str)
106
107			# If we couldn't parse any field-level changes, still output
108			# a single row so the event isn't lost.
109			if not field_change_entries:
110			out_row = self._copy_base_columns(src_row)
111			out_row.update({
112	nino.borges	961	"Field Name": "",
113			"Field Type": "",
114			"unSetChoice": "",
115			"setChoice": "",
116			"oldValue": "",
117			"newValue": "",
118	nino.borges	960	})
119			self.rows.append(out_row)
120			continue
121
122			# One row per <field> node
123			for fc in field_change_entries:
124			out_row = self._copy_base_columns(src_row)
125			out_row.update({
126			"Field Name": fc.get("field_name", ""),
127	nino.borges	961	"Field Type": fc.get("field_type", ""),
128			"unSetChoice": fc.get("unset_choice", ""),
129			"setChoice": fc.get("set_choice", ""),
130			"oldValue": fc.get("old_value", ""),
131			"newValue": fc.get("new_value", ""),
132	nino.borges	960	})
133			self.rows.append(out_row)
134
135			def total_field_changes(self) -> int:
136			"""Total number of field-level rows."""
137			return len(self.rows)
138
139			def export_to_excel(self, excel_path: str) -> None:
140			"""
141	nino.borges	961	Export flattened rows to Excel using win32com (your preferred approach).
142	nino.borges	960	"""
143			from win32com.client import Dispatch
144
145			xlApp = Dispatch("Excel.Application")
146			xlApp.Visible = False # set True if you want to watch it build
147
148			wb = xlApp.Workbooks.Add()
149			ws = wb.Worksheets(1)
150			ws.Name = "Audit Changes"
151
152			headers = self.BASE_COLUMNS + self.DETAIL_COLUMNS
153
154			# Header row
155			for col_idx, header in enumerate(headers, start=1):
156			ws.Cells(1, col_idx).Value = header
157
158			# Data rows
159			for row_idx, row in enumerate(self.rows, start=2):
160			for col_idx, header in enumerate(headers, start=1):
161			ws.Cells(row_idx, col_idx).Value = row.get(header, "")
162
163			# Autofit and (optionally) make a table
164			last_row = len(self.rows) + 1
165			last_col = len(headers)
166			data_range = ws.Range(ws.Cells(1, 1), ws.Cells(last_row, last_col))
167			data_range.EntireColumn.AutoFit()
168
169			try:
170			table = ws.ListObjects.Add(
171			SourceType=0, # xlSrcRange
172			Source=data_range,
173			XlListObjectHasHeaders=1, # xlYes
174			)
175			table.Name = "AuditChangesTable"
176			table.TableStyle = "TableStyleLight9"
177			except Exception:
178			# If ListObjects/TableStyle fails, just ignore it
179			pass
180
181			wb.SaveAs(excel_path)
182			wb.Close(SaveChanges=True)
183			xlApp.Quit()
184
185	nino.borges	962	# --------------------------------------------------------
186	nino.borges	960	# Internal helpers
187	nino.borges	962	# --------------------------------------------------------
188	nino.borges	960
189			def _copy_base_columns(self, src_row: Dict[str, Any]) -> Dict[str, Any]:
190			"""
191			Copy only the base columns from the source CSV row.
192			"""
193			out: Dict[str, Any] = {}
194			for col in self.BASE_COLUMNS:
195			out[col] = src_row.get(col, "")
196			return out
197
198	nino.borges	962	def _map_choice_id_to_name(self, choice_id: str) -> str:
199			"""
200			Convert a choice artifactID to its choice name using self.choice_map.
201			If not found, fall back to the original ID string.
202			"""
203			choice_id = (choice_id or "").strip()
204			if not choice_id:
205			return ""
206			return self.choice_map.get(choice_id, choice_id)
207
208	nino.borges	960	def _parse_details_xml(self, xml_str: str) -> List[Dict[str, str]]:
209			"""
210	nino.borges	961	Parse the XML in Details and return a list of dicts with keys:
211			- field_name
212			- field_type
213	nino.borges	962	- unset_choice (names, combined)
214			- set_choice (names, combined)
215	nino.borges	961	- old_value
216			- new_value
217	nino.borges	960	"""
218			xml_str = (xml_str or "").strip()
219			if not xml_str:
220			return []
221
222			try:
223			root = ET.fromstring(xml_str)
224			except ET.ParseError:
225			return []
226
227			results: List[Dict[str, str]] = []
228
229			# Find all <field> elements anywhere under the root
230			for field_elem in root.findall(".//field"):
231			attrib = field_elem.attrib
232
233			field_name = attrib.get("name", "")
234	nino.borges	961	field_type = attrib.get("type", "")
235	nino.borges	960
236	nino.borges	962	# Collect choice changes as IDs
237			set_choice_ids = [
238	nino.borges	960	(child.text or "").strip()
239			for child in field_elem.findall("setChoice")
240			if (child.text or "").strip()
241			]
242
243	nino.borges	962	unset_choice_ids = [
244	nino.borges	960	(child.text or "").strip()
245			for child in field_elem.findall("unSetChoice")
246			if (child.text or "").strip()
247			]
248	nino.borges	962	unset_choice_ids += [
249	nino.borges	960	(child.text or "").strip()
250			for child in field_elem.findall("unsetChoice")
251			if (child.text or "").strip()
252			]
253
254	nino.borges	962	# Map IDs -> names
255			set_choice_names = [self._map_choice_id_to_name(cid) for cid in set_choice_ids]
256			unset_choice_names = [self._map_choice_id_to_name(cid) for cid in unset_choice_ids]
257	nino.borges	961
258	nino.borges	962	set_choice_str = "; ".join(name for name in set_choice_names if name)
259			unset_choice_str = "; ".join(name for name in unset_choice_names if name)
260
261	nino.borges	961	# Gather old/new value tags (for non-choice cases)
262	nino.borges	960	old_val_node = field_elem.find("oldValue")
263			new_val_node = field_elem.find("newValue")
264	nino.borges	961	old_val_text = (old_val_node.text or "").strip() if (old_val_node is not None and old_val_node.text) else ""
265			new_val_text = (new_val_node.text or "").strip() if (new_val_node is not None and new_val_node.text) else ""
266	nino.borges	960
267			results.append(
268			{
269			"field_name": field_name,
270	nino.borges	961	"field_type": field_type,
271			"unset_choice": unset_choice_str,
272			"set_choice": set_choice_str,
273			"old_value": old_val_text,
274			"new_value": new_val_text,
275	nino.borges	960	}
276			)
277
278			return results
279
280
281			# ------------------------------------------------------------
282	nino.borges	962	# Script entrypoint
283	nino.borges	960	# ------------------------------------------------------------
284
285			if __name__ == "__main__":
286	nino.borges	962	# TODO: update these three paths for your environment
287			audit_csv_path = r"C:\path\to\RelativityAuditReport.csv"
288			choice_csv_path = r"C:\path\to\AllWorkspaceChoices.csv"
289	nino.borges	960	output_excel = r"C:\path\to\RelativityAuditReport_Flattened_XML.xlsx"
290
291	nino.borges	962	# 1) Load the choice map (artifactID -> choice name)
292			choice_map = load_choice_map(choice_csv_path)
293			print(f"Loaded {len(choice_map)} choices from {choice_csv_path}")
294
295			# 2) Flatten the audit CSV, using the choice map
296			flattener = RelativityAuditFlattenerXml(audit_csv_path, choice_map=choice_map)
297	nino.borges	960	flattener.load_and_flatten()
298			print(f"Total field-level changes: {flattener.total_field_changes()}")
299
300	nino.borges	962	# 3) Export to Excel
301	nino.borges	960	flattener.export_to_excel(output_excel)
302			print(f"Excel report written to: {output_excel}")