ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/NS_MasterAttorneyList.py
Revision: 954
Committed: Mon Nov 17 18:46:09 2025 UTC (4 months, 1 week ago) by nino.borges
Content type: text/x-python
File size: 25211 byte(s)
Log Message:
First working version of the NS master attorney list.

File Contents

# User Rev Content
1 nino.borges 954 """
2    
3     NS_MasterAttorneyList
4    
5     Created by:
6     Emanuel Borges
7     11.01.2025
8    
9     A library for the creation and management of the Norfolk Southern MAL.
10    
11     """
12    
13     import os, uuid, pickle, re
14     from dataclasses import dataclass, field, fields
15     from typing import List, Tuple, Optional
16     from collections import namedtuple
17     from win32com.client import Dispatch
18    
19    
20     version = "1.0"
21    
22     @dataclass
23     class Person:
24     first_name: Optional[str] = None
25     last_name: Optional[str] = None
26     alt_first_names: Optional[str] = None
27     alt_surnames: Optional[str] = None
28     ## TODO: Make these email addresses lists instead of strings
29     work_email_addresses: Optional[str] = None
30     raw_email_addresses: Optional[str] = None
31     _id: uuid.UUID = field(default_factory=uuid.uuid4)
32     is_attorney: Optional[str] = None
33     split_role_date_range: Optional[str] = None
34     middle_initial: Optional[str] = None
35     company: Optional[str] = None
36     category: Optional[str] = None
37     job_title: Optional[str] = None
38     full_name_preferred: Optional[str] = None
39     vendor_normalized_name: Optional[str] = None
40     user_id: Optional[str] = None
41     full_name_overide: Optional[str] = None
42     ## Only gather unique_attorney_row_number from the attorney and split role attorney tabs. NEVER from Non-Attorneys.
43     unique_attorney_row_number:Optional[str] = None
44     ## Will be saving this as a list of tuple pairs (startdate,enddate). Allowing None for now but may update this to forcing an empty list, to avoid mutable default issues.
45     dates_as_counsel:Optional[List[Tuple[str,str]]] = None
46    
47     def __post_init__(self):
48     """Convert all string fields to uppercase."""
49     if self.first_name:
50     self.first_name = self.first_name.strip().upper()
51     if self.alt_first_names:
52     self.alt_first_names = self.alt_first_names.strip().upper()
53     if self.alt_surnames:
54     self.alt_surnames = self.alt_surnames.strip().upper()
55     if self.last_name:
56     self.last_name = self.last_name.strip().upper()
57     if self.work_email_addresses:
58     self.work_email_addresses = self.work_email_addresses.strip().upper()
59    
60     if self.raw_email_addresses:
61     self.raw_email_addresses = self.raw_email_addresses.strip().upper()
62     if self.is_attorney:
63     self.is_attorney = self.is_attorney.strip().upper()
64     if self.split_role_date_range:
65     self.split_role_date_range = self.split_role_date_range.strip().upper()
66     if self.middle_initial:
67     self.middle_initial = self.middle_initial.strip().upper()
68     if self.company:
69     self.company = self.company.strip().upper()
70     if self.category:
71     self.category = self.category.strip().upper()
72     if self.job_title:
73     self.job_title = self.job_title.strip().upper()
74     if self.full_name_preferred:
75     self.full_name_preferred = self.full_name_preferred.strip().upper()
76     if self.vendor_normalized_name:
77     self.vendor_normalized_name = self.vendor_normalized_name.strip().upper()
78     if self.user_id:
79     self.user_id = self.user_id.strip().upper()
80    
81    
82     @dataclass
83     class PeopleList:
84     people: List[Person] = field(default_factory=list)
85    
86     def add_person(self, person: Person):
87     self.people.append(person)
88     #print(f"Added person: {person}")
89    
90    
91     def search_by_email(self, emailAddress:str) -> Optional[Person]:
92     """Returns the first matching emailAddress value. Assumes emailAddresses are unique"""
93     for person in self.people:
94     if person.work_email_addresses:
95     #print(person.work_email_addresses)
96     personEmailAddresses = person.work_email_addresses.split(";\n")
97     personEmailAddresses = [x.strip() for x in personEmailAddresses]
98     if emailAddress in personEmailAddresses:
99     #if person.work_email_addresses == emailAddress:
100     return person
101     ## elif person.alt_work_email_addresses == emailAddress:
102     ## return person
103     return None
104    
105     def search_by_login_id(self, loginID:str) -> Optional[Person]:
106     """Returns the first matching login user id value. Assumes login user ids are unique"""
107     for person in self.people:
108     if person.user_id:
109     personLoginIds = person.user_id.split(";\n")
110     personLoginIds = [x.strip() for x in personLoginIds]
111     if loginID in personLoginIds:
112     return person
113     return None
114    
115     def search_by_unique_attorney_row_number(self,uniqueAttorneyRowNumber:str) -> Optional[Person]:
116     """Returns the first matching uniqueAttorneyRowNumber value. Assumes uniqueAttorneyRowNumbers are unique"""
117     for person in self.people:
118     if person.unique_attorney_row_number == uniqueAttorneyRowNumber:
119     return person
120     return None
121    
122     def search_by_id(self, idNumber):
123     """Returns the first matching idNumber value. Must be in format UUID('7414f78c-8289-4c9f-bd49-a5aaac35545f')."""
124     for person in self.people:
125     if person._id == idNumber:
126     return person
127     return None
128    
129     def return_list_of_matching_values(self,fieldName, value:str):
130     """Returns a full list of items where value is found in fieldName"""
131     matchingPeopleList = []
132     for person in self.people:
133     personVals = getattr(person,fieldName)
134     if personVals:
135     personVals = personVals.split(";\n")
136     personVals = [x.strip() for x in personVals]
137     if value in personVals:
138     matchingPeopleList.append(person)
139     return matchingPeopleList
140    
141     def list_people(self):
142     for person in self.people:
143     print(person)
144    
145     def update_full_Name_overide(self, emailAddress:str, fullNameOverideValue) -> Optional[Person]:
146     valueUpdated = False
147     for person in self.people:
148     if person.work_email_addresses == emailAddress.upper():
149     person.full_name_overide = fullNameOverideValue.upper()
150     valueUpdated = True
151     ## Give a quik warning as you add the override value into the database if the last name differs.
152     if "," in fullNameOverideValue:
153     lastName = fullNameOverideValue.split(",")[0]
154     else:
155     lastName = fullNameOverideValue.split(" ")[-1]
156     if lastName.upper() == person.last_name:
157     pass
158     else:
159     print(f"WARNING: Overide last name value {lastName.upper()} does not match {person.last_name}.")
160     if valueUpdated == False:
161     print(f"WARNING: No email address match for {emailAddress} found.")
162    
163     def return_person_all_name_variations(self, person):
164     ## TODO: Fix this to support multiple alt first names and multiple surnames.
165     """This will take a matched person and return a large list of all of the possible full name variations"""
166     last = person.last_name.strip() if person.last_name else None
167     firsts = [person.first_name.strip() if person.first_name else None]
168     if person.alt_first_names:
169     #firsts.append(person.alt_first_names.strip())
170     firsts += person.alt_first_names.split(";\n")
171    
172     middle = person.middle_initial.replace(".","").strip() if person.middle_initial else None
173    
174     combos = set() ## Using a set here to avoid dupes.
175    
176     for first in firsts:
177     ## Some basic combinations
178     combos.add(f"{first} {last}")
179     combos.add(f"{last} {first}")
180     combos.add(f"{last}, {first}")
181    
182     ## Include middle initial variations if it exists
183     if middle:
184     combos.add(f"{first} {middle} {last}")
185     combos.add(f"{last} {first} {middle}")
186     combos.add(f"{last}, {first} {middle}")
187     combos.add(f"{first} {middle}. {last}")
188     combos.add(f"{last} {first} {middle}.")
189     combos.add(f"{last}, {first} {middle}.")
190    
191     fNamePrefered = person.full_name_preferred
192     if fNamePrefered:
193     fNamePrefered = fNamePrefered.split(";\n")
194     fNamePrefered = [x.strip() for x in fNamePrefered]
195     combos.update(fNamePrefered)
196     if person.vendor_normalized_name:
197     combos.add(person.vendor_normalized_name.strip())
198     ## Want to add the vendor version of the name without the ESQ here.
199     combos.add(person.vendor_normalized_name.upper().replace("(ESQ.)","").strip())
200     return list(combos)
201    
202     class NS_MasterAttorneyList(object):
203     """A class for building and performing functions against the NS Master Attorney List."""
204     version = '0.01.0'
205    
206    
207     def __init__(self, masterAttorneyListFileName,fullNameOveridesFileName = False, forceNewPklFile = False, Encoding = 'UTF8'):
208     """Assumes the MAL is a spreadsheet (for now).MAL gets saved to a pkl file for performance reasons. pkl will be used unless forceNewPklFile is set to true"""
209     pklFileName = os.path.splitext(masterAttorneyListFileName)[0] + ".pkl"
210    
211     print("Initializing data structures...")
212     if forceNewPklFile:
213     print("Creating MAL structure...")
214     self.malPeopleList = PeopleList()
215     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
216     print("MAL structure created.")
217     if fullNameOveridesFileName:
218     print("Loading full name overide values...")
219     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
220     print("Full name overide values loaded.")
221     print("Creating pickle backup...")
222     self.__SaveMalToPkl(pklFileName)
223     print("Pickle backup created.")
224     else:
225     if os.path.exists(pklFileName):
226     print("Loading MAL structure from pickle file...")
227     self.malPeopleList = self.__LoadMalFromPkl(pklFileName)
228     print("MAL structure loaded.")
229     else:
230     print("Pickle file doesnt exist.")
231     print("Creating MAL structure...")
232     self.malPeopleList = PeopleList()
233     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
234     print("MAL structure created.")
235     if fullNameOveridesFileName:
236     print("Loading full name overide values...")
237     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
238     print("Full name overide values loaded.")
239     print("Creating pickle backup...")
240     self.__SaveMalToPkl(pklFileName)
241     print("Pickle backup created.")
242    
243    
244     def __IngestMALSpreadsheet(self, masterAttorneyListFileName):
245     """Pseudo-private method which will open an Excel spreadsheet and ingest the values into the peoplelist dataclass."""
246     ## There doenst seem to be a consistent value in the "row" column in the MAL, so setting these parameters here to avoid gap issues.
247    
248     ## excelTabParametersList should always be an ordered list because now order matters.
249     excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":176, "beginColNumber":1, "endColNumber":20},
250     {"tabName":"Non-Attorneys", "beginRowNumber":2, "endRowNumber":12, "beginColNumber":1, "endColNumber":20},
251     {"tabName":"Split Role Attorneys", "beginRowNumber":2, "endRowNumber":21, "beginColNumber":1, "endColNumber":10}]
252    
253    
254     # spreadsheetFileMappingMatrix = {"First Name":"first_name", "Last Name":"last_name", "Work Email":"work_email_address", "Alt Work Email":"alt_work_email_address", "Is Attorney": "is_attorney",
255     # "Split Role - Attorney Capacity Date Range":"split_role_date_range", " Validated by OC??":"sidley_validated", "Category": "category", "Organization":"organization", "Job Title":"job_title",
256     # "Business Title":"business_title", "Full Name (Preferred)":"full_name_preferred", "Login":"login", "Department (Fine)":"department_fine", "Addressed during CAAG":"addressed_during_caag",
257     # "Last Updated":"last_updated"}
258    
259     xlApp = Dispatch('Excel.Application')
260     xlBook = xlApp.Workbooks.Open(masterAttorneyListFileName)
261    
262     for excelTab in excelTabParametersList:
263     sht = xlBook.Worksheets(excelTab['tabName'])
264     print(f"Ingesting sheet {excelTab['tabName']}.")
265     excelFieldPositionMatrix = {}
266     for col in range (excelTab['beginColNumber'], excelTab['endColNumber'] +1):
267     excelFieldPositionMatrix[sht.Cells(1,col).Value] = col
268     for row in range(excelTab['beginRowNumber'], excelTab['endRowNumber'] +1):
269     #print(row)
270     ## TODO: Refactor the excelTabParametersList later. Didnt realize columns were not consistent.
271     if excelTab['tabName'] == 'Attorneys':
272     self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
273     #split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Dates as Counsel']).Value,
274     company = sht.Cells(row,excelFieldPositionMatrix['Company']).Value,
275     category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
276     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
277     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
278     alt_first_names = sht.Cells(row,excelFieldPositionMatrix['Alt First Names']).Value,
279     alt_surnames = sht.Cells(row,excelFieldPositionMatrix['Alt Surnames']).Value,
280     middle_initial = sht.Cells(row,excelFieldPositionMatrix['Middle Initial']).Value,
281     work_email_addresses = sht.Cells(row,excelFieldPositionMatrix['Email Addresses']).Value,
282     #alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
283     raw_email_addresses = sht.Cells(row,excelFieldPositionMatrix['EmailAddress RAW']).Value,
284     job_title = sht.Cells(row,excelFieldPositionMatrix['Title']).Value,
285     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
286     vendor_normalized_name = sht.Cells(row,excelFieldPositionMatrix['Vendor Normalized Name']).Value,
287     user_id = sht.Cells(row,excelFieldPositionMatrix['UserID']).Value,
288     unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['MAL ID']).Value))
289    
290     elif excelTab['tabName'] == 'Non-Attorneys':
291     ## Make sure to NOT grab the unique attorney row number from here
292     self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
293     #split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Dates as Counsel']).Value,
294     company = sht.Cells(row,excelFieldPositionMatrix['Company']).Value,
295     category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
296     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
297     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
298     alt_first_names = sht.Cells(row,excelFieldPositionMatrix['Alt First Names']).Value,
299     alt_surnames = sht.Cells(row,excelFieldPositionMatrix['Alt Surnames']).Value,
300     middle_initial = sht.Cells(row,excelFieldPositionMatrix['Middle Initial']).Value,
301     work_email_addresses = sht.Cells(row,excelFieldPositionMatrix['Email Addresses']).Value,
302     #alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
303     raw_email_addresses = sht.Cells(row,excelFieldPositionMatrix['EmailAddress RAW']).Value,
304     job_title = sht.Cells(row,excelFieldPositionMatrix['Title']).Value,
305     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
306     vendor_normalized_name = sht.Cells(row,excelFieldPositionMatrix['Vendor Normalized Name']).Value,
307     user_id = sht.Cells(row,excelFieldPositionMatrix['UserID']).Value,
308     unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['MAL ID']).Value))
309     elif excelTab['tabName'] == 'Split Role Attorneys':
310     ## Skip this tab for now.
311     pass
312     ## unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['Attorney Row']).Value
313     ## matchedPerson = self.malPeopleList.search_by_unique_attorney_row_number(unique_attorney_row_number)
314     ## if matchedPerson:
315     ##
316     ## ## dates_as_counsel should always be a two string value tuple (startdate,enddate).
317     ## datesAsCounselValue = sht.Cells(row,excelFieldPositionMatrix['Dates as Counsel']).Value
318     ## datesAsCounselList = []
319     ## ## First get rid of any extra data that is on a new line. Note that they shouldnt be seperating the date ranges by newline.
320     ## datesAsCounselValue = datesAsCounselValue.split("\n")[0]
321     ## ## Next split the ranges correctly by semicolon
322     ## dateRanges = datesAsCounselValue.split(";")
323     ## for dateRange in dateRanges:
324     ## ## Split out the start and end, allowing non-date words. (current, present, etc) however force these to be uppercase.
325     ## counselStartDate, counselEndDate = dateRange.split("-")
326     ## counselStartDate = counselStartDate.upper().strip()
327     ## counselEndDate = counselEndDate.upper().strip()
328     ## datesAsCounselList.append((counselStartDate,counselEndDate))
329     ## matchedPerson.dates_as_counsel = datesAsCounselList
330    
331     else:
332     print(f"ERROR UNKNOWN TAB! {excelTab['tabName']} HAVE NEEDED TAB NAMES CHANGED?")
333    
334    
335     xlBook.Close()
336    
337    
338    
339     def __SaveMalToPkl(self, pklFileName):
340     """Pseudo-private method which will save the current MAL people list object to a pkl file, for performance reasons."""
341     outputFile = open(pklFileName,'wb')
342     pickle.dump(self.malPeopleList,outputFile)
343     outputFile.close()
344    
345     def __LoadMalFromPkl(self, pklFileName):
346     """Pseudo-private method which will load a MAL people list object from a pkl file, for performance reasons."""
347     contents = open(pklFileName, 'rb')
348     obj = pickle.load(contents)
349     contents.close()
350     return obj
351    
352     def __LoadFullNameOverideValues(self, fullNameOveridesFileName):
353     """Pseudo-private method which will update the MAL people list object with the full name overide values."""
354     contents = open(fullNameOveridesFileName).readlines()
355     for line in contents:
356     line = line.replace("\n","")
357     emailAddress,fullNameOverideValue = line.split("|")
358    
359     self.malPeopleList.update_full_Name_overide(emailAddress, fullNameOverideValue)
360    
361     def SmartDedupeSet(self, currentSet):
362     """A method that attempts to do some additional deduplication of the values in a set by lowering all values and deduplicating. Returns a lowered deduplicated set."""
363     newSet = set()
364     for val in currentSet:
365     newSet.add(val.lower())
366     return newSet
367    
368    
369    
370     def RunMalEmailAddressIntegrityCheck(self):
371     """This method performs an integrity check on the MAL by analyzing and looking for duplicate email addresses."""
372     emailTestMatrix = {}
373     altTestMatrix = {}
374     print("Performing MAL email address integrity check...")
375     for i in range(0,len(self.malPeopleList.people)):
376     #altAddr = self.malPeopleList.people[i].alt_work_email_address
377     altAddr = None
378     ## Right now workaddrs are stored as a string that you need to parse to use.
379     workAddrs = self.malPeopleList.people[i].work_email_addresses
380     if altAddr != None:
381     altAddr = altAddr.strip()
382     if altAddr in list(emailTestMatrix.keys()):
383     print(f"ISSUE:{altAddr} is a dupe of an workAddr.")
384     if altAddr in list(altTestMatrix.keys()):
385     print(f"ISSUE:{altAddr} is a dupe!")
386     else:
387     altTestMatrix[altAddr] = 1
388     if workAddrs != None:
389     workAddrs = [w.upper().strip() for w in workAddrs.split(";\n")]
390     for workAddr in workAddrs:
391     workAddr = workAddr.strip()
392     if workAddr in list(altTestMatrix.keys()):
393     print(f"ISSUE:{workAddr} is a dupe of an altAddr.")
394     if workAddr in list(emailTestMatrix.keys()):
395     print(f"ISSUE:{workAddr} is a dupe!")
396     else:
397     emailTestMatrix[workAddr] = 1
398     print("\nEmail address integrity check complete.\n\n")
399    
400    
401     def RunMalLoginIdIntegrityCheck(self):
402     """This method performs an integrity check on the MAL by analyzing and looking for duplicate User Login ID values."""
403     loginIdTestMatrix = {}
404     print("Performing MAL user login ID integrity check...")
405     for i in range(0,len(self.malPeopleList.people)):
406     loginIds = self.malPeopleList.people[i].user_id
407     if loginIds != None:
408     loginIds = [w.upper().strip() for w in loginIds.split(";\n")]
409     for loginId in loginIds:
410     loginId = loginId.strip()
411     if loginId in list(loginIdTestMatrix.keys()):
412     print(f"ISSUE:{loginId} is a dupe!")
413     else:
414     loginIdTestMatrix[loginId] = 1
415     print("\nUser Login ID integrity check complete.\n\n")
416    
417     if __name__ == '__main__':
418     masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Norfolk Southern\_NS_Current_MAL\RG - NS Cross-Matter Master Attorney List 20251113 (20251117 - 1203).xlsx"
419     nsMal = NS_MasterAttorneyList(masterAttorneyListFileName)
420     test = nsMal.malPeopleList.search_by_email('Catharine.Fletcher@nscorp.com'.upper())
421     nsMal.malPeopleList.return_person_all_name_variations(test)
422     test = nsMal.malPeopleList.search_by_email('mcarr@dmclaw.com'.upper())
423     #test = attMal.malPeopleList.search_by_email('LMENRIQUEZ@DIRECTV.COM')
424     #test = attMal.malPeopleList.return_list_of_matching_values('work_email_addresses','LMENRIQUEZ@DIRECTV.COM')
425     #test = attMal.malPeopleList.search_by_login_id('JS6637')
426     print(test)
427     print(nsMal.malPeopleList.return_person_all_name_variations(test))
428     nsMal.RunMalEmailAddressIntegrityCheck()
429     nsMal.RunMalLoginIdIntegrityCheck()