Active_prgs/Redgrave/RelativityAuditFlattenerXml.py

"""

RelativityAuditFlattenerXml

Created by
Emanuel Borges
12.08.2025

Using my XML inspector, this program will attempt to take a raw Relativity audit history report and generate a flatened spreadsheet report.

"""


import csv
import xml.etree.ElementTree as ET
from typing import Any, Dict, List


class RelativityAuditFlattenerXml:
    """
    Reads a Relativity audit CSV and flattens XML 'Details' so that
    each <field> modification becomes its own row, with the original
    columns (Name through Request Origination) repeated.

    XML schema (from your inspector):
      - Root: <auditElement>
      - Repeated per-field nodes: <field id="..." type="..." name="..." formatstring="...">
      - Inside <field>:
          <setChoice>...</setChoice>
          <unSetChoice>...</unSetChoice>
          <unsetChoice>...</unsetChoice>
          <oldValue>...</oldValue>
          <newValue>...</newValue>
    """

    # The base columns we want to carry through from the CSV
    BASE_COLUMNS = [
        "Name",
        "Action",
        "User Name",
        "Timestamp",
        "ArtifactID",
        "Execution Time",
        "ID",
        "Object Type",
        "Request Origination",
    ]

    # New columns derived from the XML
    DETAIL_COLUMNS = [
        "Field Name",   # from <field name="...">
        "Field Type",   # from <field type="...">
        "unSetChoice",  # combined <unSetChoice> and <unsetChoice>
        "setChoice",    # <setChoice>
        "oldValue",     # <oldValue>
        "newValue",     # <newValue>
    ]

    def __init__(self, csv_path: str) -> None:
        self.csv_path = csv_path
        self.rows: List[Dict[str, Any]] = []

    # ------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------

    def load_and_flatten(self) -> None:
        """
        Read the CSV, parse Details XML, and populate self.rows with
        one row per <field> modification.
        """
        with open(self.csv_path, newline="", encoding="utf-8-sig") as f:
            reader = csv.DictReader(f)

            for src_row in reader:
                details_str = (src_row.get("Details") or "").strip()
                field_change_entries = self._parse_details_xml(details_str)

                # If we couldn't parse any field-level changes, still output
                # a single row so the event isn't lost.
                if not field_change_entries:
                    out_row = self._copy_base_columns(src_row)
                    out_row.update({
                        "Field Name": "",
                        "Field Type": "",
                        "unSetChoice": "",
                        "setChoice": "",
                        "oldValue": "",
                        "newValue": "",
                    })
                    self.rows.append(out_row)
                    continue

                # One row per <field> node
                for fc in field_change_entries:
                    out_row = self._copy_base_columns(src_row)
                    out_row.update({
                        "Field Name": fc.get("field_name", ""),
                        "Field Type": fc.get("field_type", ""),
                        "unSetChoice": fc.get("unset_choice", ""),
                        "setChoice": fc.get("set_choice", ""),
                        "oldValue": fc.get("old_value", ""),
                        "newValue": fc.get("new_value", ""),
                    })
                    self.rows.append(out_row)

    def total_field_changes(self) -> int:
        """Total number of field-level rows."""
        return len(self.rows)

    def export_to_excel(self, excel_path: str) -> None:
        """
        Export flattened rows to Excel using win32com (your preferred approach).
        """
        from win32com.client import Dispatch

        xlApp = Dispatch("Excel.Application")
        xlApp.Visible = False   # set True if you want to watch it build

        wb = xlApp.Workbooks.Add()
        ws = wb.Worksheets(1)
        ws.Name = "Audit Changes"

        headers = self.BASE_COLUMNS + self.DETAIL_COLUMNS

        # Header row
        for col_idx, header in enumerate(headers, start=1):
            ws.Cells(1, col_idx).Value = header

        # Data rows
        for row_idx, row in enumerate(self.rows, start=2):
            for col_idx, header in enumerate(headers, start=1):
                ws.Cells(row_idx, col_idx).Value = row.get(header, "")

        # Autofit and (optionally) make a table
        last_row = len(self.rows) + 1
        last_col = len(headers)
        data_range = ws.Range(ws.Cells(1, 1), ws.Cells(last_row, last_col))
        data_range.EntireColumn.AutoFit()

        try:
            table = ws.ListObjects.Add(
                SourceType=0,           # xlSrcRange
                Source=data_range,
                XlListObjectHasHeaders=1,  # xlYes
            )
            table.Name = "AuditChangesTable"
            table.TableStyle = "TableStyleLight9"
        except Exception:
            # If ListObjects/TableStyle fails, just ignore it
            pass

        wb.SaveAs(excel_path)
        wb.Close(SaveChanges=True)
        xlApp.Quit()

    # ------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------

    def _copy_base_columns(self, src_row: Dict[str, Any]) -> Dict[str, Any]:
        """
        Copy only the base columns from the source CSV row.
        """
        out: Dict[str, Any] = {}
        for col in self.BASE_COLUMNS:
            out[col] = src_row.get(col, "")
        return out

    def _parse_details_xml(self, xml_str: str) -> List[Dict[str, str]]:
        """
        Parse the XML in Details and return a list of dicts with keys:
          - field_name
          - field_type
          - unset_choice  (combined unSetChoice/unsetChoice)
          - set_choice
          - old_value
          - new_value
        """
        xml_str = (xml_str or "").strip()
        if not xml_str:
            return []

        try:
            root = ET.fromstring(xml_str)
        except ET.ParseError:
            return []

        results: List[Dict[str, str]] = []

        # Find all <field> elements anywhere under the root
        for field_elem in root.findall(".//field"):
            attrib = field_elem.attrib

            field_name = attrib.get("name", "")
            field_type = attrib.get("type", "")

            # Collect choice changes
            set_choices = [
                (child.text or "").strip()
                for child in field_elem.findall("setChoice")
                if (child.text or "").strip()
            ]

            # Note: both <unSetChoice> and <unsetChoice> appear in your data
            unset_choices = [
                (child.text or "").strip()
                for child in field_elem.findall("unSetChoice")
                if (child.text or "").strip()
            ]
            unset_choices += [
                (child.text or "").strip()
                for child in field_elem.findall("unsetChoice")
                if (child.text or "").strip()
            ]

            unset_choice_str = "; ".join(unset_choices) if unset_choices else ""
            set_choice_str = "; ".join(set_choices) if set_choices else ""

            # Gather old/new value tags (for non-choice cases)
            old_val_node = field_elem.find("oldValue")
            new_val_node = field_elem.find("newValue")
            old_val_text = (old_val_node.text or "").strip() if (old_val_node is not None and old_val_node.text) else ""
            new_val_text = (new_val_node.text or "").strip() if (new_val_node is not None and new_val_node.text) else ""

            results.append(
                {
                    "field_name": field_name,
                    "field_type": field_type,
                    "unset_choice": unset_choice_str,
                    "set_choice": set_choice_str,
                    "old_value": old_val_text,
                    "new_value": new_val_text,
                }
            )

        return results


# ------------------------------------------------------------
# Example usage
# ------------------------------------------------------------

if __name__ == "__main__":
    input_csv = r"C:\path\to\RelativityAuditReport.csv"
    output_excel = r"C:\path\to\RelativityAuditReport_Flattened_XML.xlsx"

    flattener = RelativityAuditFlattenerXml(input_csv)
    flattener.load_and_flatten()

    print(f"Total field-level changes: {flattener.total_field_changes()}")

    flattener.export_to_excel(output_excel)
    print(f"Excel report written to: {output_excel}")

Revision:	961
Committed:	Mon Dec 8 22:26:25 2025 UTC (3 months, 2 weeks ago) by nino.borges
Content type:	text/x-python
File size:	8764 byte(s)
Log Message:	Updated to write the xml data, only the bits I want, to sep columns.
#	Content
1	"""
2
3	RelativityAuditFlattenerXml
4
5	Created by
6	Emanuel Borges
7	12.08.2025
8
9	Using my XML inspector, this program will attempt to take a raw Relativity audit history report and generate a flatened spreadsheet report.
10
11	"""
12
13
14	import csv
15	import xml.etree.ElementTree as ET
16	from typing import Any, Dict, List
17
18
19	class RelativityAuditFlattenerXml:
20	"""
21	Reads a Relativity audit CSV and flattens XML 'Details' so that
22	each <field> modification becomes its own row, with the original
23	columns (Name through Request Origination) repeated.
24
25	XML schema (from your inspector):
26	- Root: <auditElement>
27	- Repeated per-field nodes: <field id="..." type="..." name="..." formatstring="...">
28	- Inside <field>:
29	<setChoice>...</setChoice>
30	<unSetChoice>...</unSetChoice>
31	<unsetChoice>...</unsetChoice>
32	<oldValue>...</oldValue>
33	<newValue>...</newValue>
34	"""
35
36	# The base columns we want to carry through from the CSV
37	BASE_COLUMNS = [
38	"Name",
39	"Action",
40	"User Name",
41	"Timestamp",
42	"ArtifactID",
43	"Execution Time",
44	"ID",
45	"Object Type",
46	"Request Origination",
47	]
48
49	# New columns derived from the XML
50	DETAIL_COLUMNS = [
51	"Field Name", # from <field name="...">
52	"Field Type", # from <field type="...">
53	"unSetChoice", # combined <unSetChoice> and <unsetChoice>
54	"setChoice", # <setChoice>
55	"oldValue", # <oldValue>
56	"newValue", # <newValue>
57	]
58
59	def __init__(self, csv_path: str) -> None:
60	self.csv_path = csv_path
61	self.rows: List[Dict[str, Any]] = []
62
63	# ------------------------------------------------------------
64	# Public API
65	# ------------------------------------------------------------
66
67	def load_and_flatten(self) -> None:
68	"""
69	Read the CSV, parse Details XML, and populate self.rows with
70	one row per <field> modification.
71	"""
72	with open(self.csv_path, newline="", encoding="utf-8-sig") as f:
73	reader = csv.DictReader(f)
74
75	for src_row in reader:
76	details_str = (src_row.get("Details") or "").strip()
77	field_change_entries = self._parse_details_xml(details_str)
78
79	# If we couldn't parse any field-level changes, still output
80	# a single row so the event isn't lost.
81	if not field_change_entries:
82	out_row = self._copy_base_columns(src_row)
83	out_row.update({
84	"Field Name": "",
85	"Field Type": "",
86	"unSetChoice": "",
87	"setChoice": "",
88	"oldValue": "",
89	"newValue": "",
90	})
91	self.rows.append(out_row)
92	continue
93
94	# One row per <field> node
95	for fc in field_change_entries:
96	out_row = self._copy_base_columns(src_row)
97	out_row.update({
98	"Field Name": fc.get("field_name", ""),
99	"Field Type": fc.get("field_type", ""),
100	"unSetChoice": fc.get("unset_choice", ""),
101	"setChoice": fc.get("set_choice", ""),
102	"oldValue": fc.get("old_value", ""),
103	"newValue": fc.get("new_value", ""),
104	})
105	self.rows.append(out_row)
106
107	def total_field_changes(self) -> int:
108	"""Total number of field-level rows."""
109	return len(self.rows)
110
111	def export_to_excel(self, excel_path: str) -> None:
112	"""
113	Export flattened rows to Excel using win32com (your preferred approach).
114	"""
115	from win32com.client import Dispatch
116
117	xlApp = Dispatch("Excel.Application")
118	xlApp.Visible = False # set True if you want to watch it build
119
120	wb = xlApp.Workbooks.Add()
121	ws = wb.Worksheets(1)
122	ws.Name = "Audit Changes"
123
124	headers = self.BASE_COLUMNS + self.DETAIL_COLUMNS
125
126	# Header row
127	for col_idx, header in enumerate(headers, start=1):
128	ws.Cells(1, col_idx).Value = header
129
130	# Data rows
131	for row_idx, row in enumerate(self.rows, start=2):
132	for col_idx, header in enumerate(headers, start=1):
133	ws.Cells(row_idx, col_idx).Value = row.get(header, "")
134
135	# Autofit and (optionally) make a table
136	last_row = len(self.rows) + 1
137	last_col = len(headers)
138	data_range = ws.Range(ws.Cells(1, 1), ws.Cells(last_row, last_col))
139	data_range.EntireColumn.AutoFit()
140
141	try:
142	table = ws.ListObjects.Add(
143	SourceType=0, # xlSrcRange
144	Source=data_range,
145	XlListObjectHasHeaders=1, # xlYes
146	)
147	table.Name = "AuditChangesTable"
148	table.TableStyle = "TableStyleLight9"
149	except Exception:
150	# If ListObjects/TableStyle fails, just ignore it
151	pass
152
153	wb.SaveAs(excel_path)
154	wb.Close(SaveChanges=True)
155	xlApp.Quit()
156
157	# ------------------------------------------------------------
158	# Internal helpers
159	# ------------------------------------------------------------
160
161	def _copy_base_columns(self, src_row: Dict[str, Any]) -> Dict[str, Any]:
162	"""
163	Copy only the base columns from the source CSV row.
164	"""
165	out: Dict[str, Any] = {}
166	for col in self.BASE_COLUMNS:
167	out[col] = src_row.get(col, "")
168	return out
169
170	def _parse_details_xml(self, xml_str: str) -> List[Dict[str, str]]:
171	"""
172	Parse the XML in Details and return a list of dicts with keys:
173	- field_name
174	- field_type
175	- unset_choice (combined unSetChoice/unsetChoice)
176	- set_choice
177	- old_value
178	- new_value
179	"""
180	xml_str = (xml_str or "").strip()
181	if not xml_str:
182	return []
183
184	try:
185	root = ET.fromstring(xml_str)
186	except ET.ParseError:
187	return []
188
189	results: List[Dict[str, str]] = []
190
191	# Find all <field> elements anywhere under the root
192	for field_elem in root.findall(".//field"):
193	attrib = field_elem.attrib
194
195	field_name = attrib.get("name", "")
196	field_type = attrib.get("type", "")
197
198	# Collect choice changes
199	set_choices = [
200	(child.text or "").strip()
201	for child in field_elem.findall("setChoice")
202	if (child.text or "").strip()
203	]
204
205	# Note: both <unSetChoice> and <unsetChoice> appear in your data
206	unset_choices = [
207	(child.text or "").strip()
208	for child in field_elem.findall("unSetChoice")
209	if (child.text or "").strip()
210	]
211	unset_choices += [
212	(child.text or "").strip()
213	for child in field_elem.findall("unsetChoice")
214	if (child.text or "").strip()
215	]
216
217	unset_choice_str = "; ".join(unset_choices) if unset_choices else ""
218	set_choice_str = "; ".join(set_choices) if set_choices else ""
219
220	# Gather old/new value tags (for non-choice cases)
221	old_val_node = field_elem.find("oldValue")
222	new_val_node = field_elem.find("newValue")
223	old_val_text = (old_val_node.text or "").strip() if (old_val_node is not None and old_val_node.text) else ""
224	new_val_text = (new_val_node.text or "").strip() if (new_val_node is not None and new_val_node.text) else ""
225
226	results.append(
227	{
228	"field_name": field_name,
229	"field_type": field_type,
230	"unset_choice": unset_choice_str,
231	"set_choice": set_choice_str,
232	"old_value": old_val_text,
233	"new_value": new_val_text,
234	}
235	)
236
237	return results
238
239
240	# ------------------------------------------------------------
241	# Example usage
242	# ------------------------------------------------------------
243
244	if __name__ == "__main__":
245	input_csv = r"C:\path\to\RelativityAuditReport.csv"
246	output_excel = r"C:\path\to\RelativityAuditReport_Flattened_XML.xlsx"
247
248	flattener = RelativityAuditFlattenerXml(input_csv)
249	flattener.load_and_flatten()
250
251	print(f"Total field-level changes: {flattener.total_field_changes()}")
252
253	flattener.export_to_excel(output_excel)
254	print(f"Excel report written to: {output_excel}")
255