ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Tool_Box/XmlSchemaInspector.py
Revision: 959
Committed: Mon Dec 8 21:52:35 2025 UTC (3 months, 2 weeks ago) by nino.borges
Content type: text/x-python
File size: 2220 byte(s)
Log Message:
A library that I continue to refine to gather a schema from Relativity XML data.

File Contents

# User Rev Content
1 nino.borges 959 """
2    
3     XmlSchemaInspector
4    
5     Created by:
6     Emanuel Borges
7     11.01.2024
8    
9     I library that I've been refining to inspect XML data from Relativity, specifically with audit history reports.
10     This will give me some insight so that I can write a true report generator from these exports.
11    
12     """
13    
14     import csv
15     import xml.etree.ElementTree as ET
16     from collections import Counter, defaultdict
17     from typing import Optional
18    
19    
20     class XmlSchemaInspector:
21     def __init__(self) -> None:
22     self.tag_counts = Counter()
23     self.attr_counts = defaultdict(Counter) # tag -> attr_name -> count
24    
25     def add_xml_str(self, s: str) -> None:
26     s = (s or "").strip()
27     if not s:
28     return
29     try:
30     root = ET.fromstring(s)
31     except ET.ParseError:
32     # Not valid XML, ignore or log if you want
33     return
34     self._visit(root)
35    
36     def _visit(self, elem: ET.Element) -> None:
37     self.tag_counts[elem.tag] += 1
38     for attr_name in elem.attrib.keys():
39     self.attr_counts[elem.tag][attr_name] += 1
40     for child in list(elem):
41     self._visit(child)
42    
43     def print_summary(self) -> None:
44     print("Tag / attribute summary:")
45     for tag, count in self.tag_counts.most_common():
46     print(f" <{tag}>: {count} occurrences")
47     if self.attr_counts[tag]:
48     print(" attributes:")
49     for attr_name, acount in self.attr_counts[tag].most_common():
50     print(f" {attr_name}: present in {acount} elements")
51    
52    
53     def inspect_details_xml_from_csv(csv_path: str, limit: Optional[int] = None) -> None:
54     inspector = XmlSchemaInspector()
55     count = 0
56    
57     with open(csv_path, newline="", encoding="utf-8-sig") as f:
58     reader = csv.DictReader(f)
59     for row in reader:
60     details_str = row.get("Details") or ""
61     inspector.add_xml_str(details_str)
62     count += 1
63     if limit is not None and count >= limit:
64     break
65    
66     print(f"Processed {count} CSV rows (for XML).")
67     inspector.print_summary()
68    
69    
70     if __name__ == "__main__":
71     csv_path = r"C:\path\to\RelativityAuditReport.csv"
72     inspect_details_xml_from_csv(csv_path, limit=500)
73