ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Tool_Box/XmlSchemaInspector.py
Revision: 959
Committed: Mon Dec 8 21:52:35 2025 UTC (3 months, 2 weeks ago) by nino.borges
Content type: text/x-python
File size: 2220 byte(s)
Log Message:
A library that I continue to refine to gather a schema from Relativity XML data.

File Contents

# Content
1 """
2
3 XmlSchemaInspector
4
5 Created by:
6 Emanuel Borges
7 11.01.2024
8
9 I library that I've been refining to inspect XML data from Relativity, specifically with audit history reports.
10 This will give me some insight so that I can write a true report generator from these exports.
11
12 """
13
14 import csv
15 import xml.etree.ElementTree as ET
16 from collections import Counter, defaultdict
17 from typing import Optional
18
19
20 class XmlSchemaInspector:
21 def __init__(self) -> None:
22 self.tag_counts = Counter()
23 self.attr_counts = defaultdict(Counter) # tag -> attr_name -> count
24
25 def add_xml_str(self, s: str) -> None:
26 s = (s or "").strip()
27 if not s:
28 return
29 try:
30 root = ET.fromstring(s)
31 except ET.ParseError:
32 # Not valid XML, ignore or log if you want
33 return
34 self._visit(root)
35
36 def _visit(self, elem: ET.Element) -> None:
37 self.tag_counts[elem.tag] += 1
38 for attr_name in elem.attrib.keys():
39 self.attr_counts[elem.tag][attr_name] += 1
40 for child in list(elem):
41 self._visit(child)
42
43 def print_summary(self) -> None:
44 print("Tag / attribute summary:")
45 for tag, count in self.tag_counts.most_common():
46 print(f" <{tag}>: {count} occurrences")
47 if self.attr_counts[tag]:
48 print(" attributes:")
49 for attr_name, acount in self.attr_counts[tag].most_common():
50 print(f" {attr_name}: present in {acount} elements")
51
52
53 def inspect_details_xml_from_csv(csv_path: str, limit: Optional[int] = None) -> None:
54 inspector = XmlSchemaInspector()
55 count = 0
56
57 with open(csv_path, newline="", encoding="utf-8-sig") as f:
58 reader = csv.DictReader(f)
59 for row in reader:
60 details_str = row.get("Details") or ""
61 inspector.add_xml_str(details_str)
62 count += 1
63 if limit is not None and count >= limit:
64 break
65
66 print(f"Processed {count} CSV rows (for XML).")
67 inspector.print_summary()
68
69
70 if __name__ == "__main__":
71 csv_path = r"C:\path\to\RelativityAuditReport.csv"
72 inspect_details_xml_from_csv(csv_path, limit=500)
73