ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/ATT_MasterAttorneyList.py
Revision: 985
Committed: Wed Feb 25 22:07:03 2026 UTC (4 weeks, 3 days ago) by nino.borges
Content type: text/x-python
File size: 37769 byte(s)
Log Message:
This version adds the build_similar_names function and also adds the includeSimilarNamesExclusion option on return_str_string method.

File Contents

# User Rev Content
1 nino.borges 913 """
2    
3     ATT_MasterAttorneyList
4    
5     Created by:
6     Emanuel Borges
7     07.14.2025
8    
9     A library for the creation and management of the ATT MAL.
10    
11     """
12    
13     import os, uuid, pickle, re
14 nino.borges 951 #import dill as pickle
15 nino.borges 913 from dataclasses import dataclass, field, fields
16 nino.borges 985 from typing import List, Tuple, Optional, Set, Dict
17     from collections import namedtuple, defaultdict
18 nino.borges 913 from win32com.client import Dispatch
19    
20    
21 nino.borges 985 version = "1.5"
22 nino.borges 913
23     @dataclass
24     class Person:
25     first_name: Optional[str] = None
26     last_name: Optional[str] = None
27     alt_first_name: Optional[str] = None
28     ## TODO: Make these email addresses lists instead of strings
29     work_email_addresses: Optional[str] = None
30     #alt_work_email_addresses: Optional[str] = None
31     raw_email_addresses: Optional[str] = None
32     _id: uuid.UUID = field(default_factory=uuid.uuid4)
33     is_attorney: Optional[str] = None
34     split_role_date_range: Optional[str] = None
35     middle_initial: Optional[str] = None
36     company: Optional[str] = None
37     category: Optional[str] = None
38     job_title: Optional[str] = None
39     full_name_preferred: Optional[str] = None
40 nino.borges 916 lh_normalized_name: Optional[str] = None
41 nino.borges 913 user_id: Optional[str] = None
42     full_name_overide: Optional[str] = None
43     ## Only gather unique_attorney_row_number from the attorney and split role attorney tabs. NEVER from Non-Attorneys.
44     unique_attorney_row_number:Optional[str] = None
45     ## Will be saving this as a list of tuple pairs (startdate,enddate). Allowing None for now but may update this to forcing an empty list, to avoid mutable default issues.
46     dates_as_counsel:Optional[List[Tuple[str,str]]] = None
47 nino.borges 985 name_variants: Set[str] = field(default_factory=set, repr=False)
48     ## List of other person_ids with at least one shared variant
49     similar_names: List[str] = field(default_factory=list)
50 nino.borges 913
51     def __post_init__(self):
52     """Convert all string fields to uppercase."""
53     if self.first_name:
54     self.first_name = self.first_name.strip().upper()
55     if self.alt_first_name:
56     self.alt_first_name = self.alt_first_name.strip().upper()
57     if self.last_name:
58     self.last_name = self.last_name.strip().upper()
59     if self.work_email_addresses:
60     self.work_email_addresses = self.work_email_addresses.strip().upper()
61     ## if self.alt_work_email_addresses:
62     ## self.alt_work_email_addresses = self.alt_work_email_addresses.strip().upper()
63     if self.raw_email_addresses:
64     self.raw_email_addresses = self.raw_email_addresses.strip().upper()
65     if self.is_attorney:
66     self.is_attorney = self.is_attorney.strip().upper()
67     if self.split_role_date_range:
68     self.split_role_date_range = self.split_role_date_range.strip().upper()
69     if self.middle_initial:
70     self.middle_initial = self.middle_initial.strip().upper()
71     if self.company:
72     self.company = self.company.strip().upper()
73     if self.category:
74     self.category = self.category.strip().upper()
75     if self.job_title:
76     self.job_title = self.job_title.strip().upper()
77     if self.full_name_preferred:
78     self.full_name_preferred = self.full_name_preferred.strip().upper()
79 nino.borges 916 if self.lh_normalized_name:
80     self.lh_normalized_name = self.lh_normalized_name.strip().upper()
81 nino.borges 913 if self.user_id:
82     self.user_id = self.user_id.strip().upper()
83    
84    
85     @dataclass
86     class PeopleList:
87     people: List[Person] = field(default_factory=list)
88    
89     def add_person(self, person: Person):
90     self.people.append(person)
91     #print(f"Added person: {person}")
92    
93    
94     def search_by_email(self, emailAddress:str) -> Optional[Person]:
95     """Returns the first matching emailAddress value. Assumes emailAddresses are unique"""
96     for person in self.people:
97 nino.borges 951 if person.work_email_addresses:
98     #print(person.work_email_addresses)
99     personEmailAddresses = person.work_email_addresses.split(";\n")
100     personEmailAddresses = [x.strip() for x in personEmailAddresses]
101     if emailAddress in personEmailAddresses:
102     #if person.work_email_addresses == emailAddress:
103     return person
104     ## elif person.alt_work_email_addresses == emailAddress:
105     ## return person
106 nino.borges 913 return None
107    
108 nino.borges 951 def search_by_login_id(self, loginID:str) -> Optional[Person]:
109     """Returns the first matching login user id value. Assumes login user ids are unique"""
110     for person in self.people:
111     if person.user_id:
112     personLoginIds = person.user_id.split(";\n")
113     personLoginIds = [x.strip() for x in personLoginIds]
114     if loginID in personLoginIds:
115     return person
116     return None
117 nino.borges 913
118     def search_by_unique_attorney_row_number(self,uniqueAttorneyRowNumber:str) -> Optional[Person]:
119     """Returns the first matching uniqueAttorneyRowNumber value. Assumes uniqueAttorneyRowNumbers are unique"""
120     for person in self.people:
121     if person.unique_attorney_row_number == uniqueAttorneyRowNumber:
122     return person
123     return None
124    
125     def search_by_id(self, idNumber):
126     """Returns the first matching idNumber value. Must be in format UUID('7414f78c-8289-4c9f-bd49-a5aaac35545f')."""
127     for person in self.people:
128     if person._id == idNumber:
129     return person
130     return None
131    
132     def return_list_of_matching_values(self,fieldName, value:str):
133     """Returns a full list of items where value is found in fieldName"""
134     matchingPeopleList = []
135     for person in self.people:
136 nino.borges 916 personVals = getattr(person,fieldName)
137     if personVals:
138     personVals = personVals.split(";\n")
139     personVals = [x.strip() for x in personVals]
140     if value in personVals:
141     matchingPeopleList.append(person)
142 nino.borges 913 return matchingPeopleList
143    
144     def list_people(self):
145     for person in self.people:
146     print(person)
147    
148     def update_full_Name_overide(self, emailAddress:str, fullNameOverideValue) -> Optional[Person]:
149     valueUpdated = False
150     for person in self.people:
151     if person.work_email_addresses == emailAddress.upper():
152     person.full_name_overide = fullNameOverideValue.upper()
153     valueUpdated = True
154     ## Give a quik warning as you add the override value into the database if the last name differs.
155     if "," in fullNameOverideValue:
156     lastName = fullNameOverideValue.split(",")[0]
157     else:
158     lastName = fullNameOverideValue.split(" ")[-1]
159     if lastName.upper() == person.last_name:
160     pass
161     else:
162     print(f"WARNING: Overide last name value {lastName.upper()} does not match {person.last_name}.")
163     if valueUpdated == False:
164     print(f"WARNING: No email address match for {emailAddress} found.")
165    
166 nino.borges 916 def return_person_all_name_variations(self, person):
167     """This will take a matched person and return a large list of all of the possible full name variations"""
168 nino.borges 951 last = person.last_name.strip() if person.last_name else None
169     firsts = [person.first_name.strip() if person.first_name else None]
170 nino.borges 916 if person.alt_first_name:
171     firsts.append(person.alt_first_name.strip())
172    
173     middle = person.middle_initial.replace(".","").strip() if person.middle_initial else None
174    
175     combos = set() ## Using a set here to avoid dupes.
176    
177     for first in firsts:
178     ## Some basic combinations
179     combos.add(f"{first} {last}")
180     combos.add(f"{last} {first}")
181     combos.add(f"{last}, {first}")
182    
183     ## Include middle initial variations if it exists
184     if middle:
185     combos.add(f"{first} {middle} {last}")
186     combos.add(f"{last} {first} {middle}")
187     combos.add(f"{last}, {first} {middle}")
188     combos.add(f"{first} {middle}. {last}")
189     combos.add(f"{last} {first} {middle}.")
190     combos.add(f"{last}, {first} {middle}.")
191    
192 nino.borges 951 fNamePrefered = person.full_name_preferred
193     if fNamePrefered:
194     fNamePrefered = fNamePrefered.split(";\n")
195     fNamePrefered = [x.strip() for x in fNamePrefered]
196     combos.update(fNamePrefered)
197     if person.lh_normalized_name:
198     combos.add(person.lh_normalized_name.strip())
199     ## Want to add the LH version of the name without the ESQ here.
200     combos.add(person.lh_normalized_name.upper().replace("(ESQ.)","").strip())
201 nino.borges 916 return list(combos)
202    
203 nino.borges 973
204 nino.borges 985 def return_str_string(self, person, includeSimilarNamesExclusion = True):
205 nino.borges 973 """returns the STR search string for a given person."""
206     namesList = []
207     #emailAddrList = []
208     emailAddrSet = set()
209     firstNamesSet = set()
210     lastNamesSet = set()
211    
212     if person.last_name:
213     lastNamesSet.add(person.last_name)
214     ## if person.alt_surnames:
215     ## for lastName in person.alt_surnames.split(";\n"):
216     ## lastNamesSet.add(lastName)
217     if person.first_name:
218     firstNamesSet.add(person.first_name)
219     if person.alt_first_name:
220     firstNamesSet.add(person.alt_first_name)
221     if person.full_name_preferred:
222     person.full_name_preferred = person.full_name_preferred.replace('(SHE, HER)',"(SHE HER)")
223     person.full_name_preferred = person.full_name_preferred.replace(",(LEGAL),"," (LEGAL),")
224     if "," in person.full_name_preferred:
225     parsedLast, parsedFirst = person.full_name_preferred.split(",", 1)
226     else:
227     parsedLast = person.full_name_preferred.split(" ")[-1]
228     parsedFirst = person.full_name_preferred.split(" ")[:-1]
229     parsedFirst = " ".join(parsedFirst)
230     lastNamesSet.add(parsedLast.strip())
231     firstNamesSet.add(parsedFirst.strip())
232    
233     ## within all values in both the first name and last name sets, replace any open or close parenthesis with a space.
234     firstNamesSet = {fn.replace("("," ").replace(")"," ") for fn in firstNamesSet}
235     lastNamesSet = {ln.replace("("," ").replace(")"," ") for ln in lastNamesSet}
236    
237     if len(lastNamesSet) >1:
238     namesList.append(f'({" OR ".join((lastNamesSet))})')
239     elif len(lastNamesSet) == 1:
240     namesList.append(list(lastNamesSet)[0])
241     if len(firstNamesSet) >1:
242     namesList.append(f'({" OR ".join((firstNamesSet))})')
243     elif len(firstNamesSet) == 1:
244     namesList.append(list(firstNamesSet)[0])
245    
246    
247     withinWordCount = len(str(firstNamesSet).split(" ")) + len(str(lastNamesSet).split(" "))
248     withinPhrase = f' W/{str(withinWordCount)} '
249    
250    
251     ## Assembling the list of email addresses and possibly adding the international domain parts.
252     if person.work_email_addresses:
253     emailAddrSet.add(person.work_email_addresses)
254     ## if includeInternationalDomains:
255     ## ## They elected to add the additional international domain parts.
256     ## for addr in list(emailAddrSet):
257     ## addrDomain = addr.split("@")[-1]
258     ## if addrDomain in self.internationalEmailDomainsSet:
259     ## for intDomain in self.internationalEmailDomainsSet:
260     ## emailAddrSet.add(addr.replace(addrDomain, intDomain))
261    
262    
263     emailAddrList = list(emailAddrSet)
264    
265     if namesList:
266     if emailAddrList:
267     if len(emailAddrList) >1:
268     strText = f"({withinPhrase.join(namesList)}) OR ({' OR '.join(emailAddrList)})"
269     else:
270     strText = f"({withinPhrase.join(namesList)}) OR {' OR '.join(emailAddrList)}"
271     else:
272     strText = f"{withinPhrase.join(namesList)}"
273     else:
274     if emailAddrList:
275     strText = f"{' OR '.join(emailAddrList)}"
276     else:
277     strText = "NONE"
278    
279     if person.user_id:
280     ## Only consider the login if it's longer than 4 characters. We may adjust this cutoff to more than 5 in the future.
281     if len(person.user_id) > 4:
282     if person.user_id in str(namesList):
283     pass
284     else:
285     strText = strText + f" OR {person.user_id.lower()}"
286    
287    
288 nino.borges 985 if includeSimilarNamesExclusion:
289     ## This will check the similar_names value for the person and will add the NOT clause at the end.
290     if person.similar_names:
291     #print("Similar names for this person exists")
292     similarNamesEmailList = []
293     similarNamesIDList = person.similar_names
294     for sID in similarNamesIDList:
295     sPerson = self.search_by_id(sID)
296     if sPerson.work_email_addresses:
297     similarNamesEmailList = [x.strip().upper() for x in sPerson.work_email_addresses.split(";\n")]
298     strText = f"({strText}) NOT W/2 ({' OR '.join(similarNamesEmailList)})"
299 nino.borges 973
300     return strText
301    
302 nino.borges 985 def build_similar_names(self, people: List[Person], keep_details: bool = False) -> Optional[Dict[str, Dict[str, Set[str]]]]:
303     """Builds and pulls together the similar names, adding and intersection list to the person"""
304    
305     for p in people:
306     p.name_variants = self.return_person_all_name_variations(p)
307    
308     index: Dict[str, Set[str]] = defaultdict(set)
309     for p in people:
310     for v in p.name_variants:
311     index[v].add(p._id)
312    
313    
314     details: Optional[Dict[str, Dict[str, Set[str]]]] = {} if keep_details else None
315    
316     id_to_person: {p._id: p for p in self.people}
317    
318     for p in people:
319     collisions: Set[str] = set()
320     if keep_details:
321     details.setdefault(p._id, {})
322     for v in p.name_variants:
323     others = index[v]
324     if len(others) >1:
325     for other_id in others:
326     if other_id == p._id:
327     continue
328     collisions.add(other_id)
329     if keep_details:
330     details[p._id].setdefault(other_id, set()).add(v)
331     #if collisions:
332     # print(collisions)
333     p.similar_names = sorted(collisions)
334    
335     return details
336    
337 nino.borges 913 class ATT_MasterAttorneyList(object):
338     """A class for building and performing functions against the ATT Master Attorney List."""
339     version = '0.01.0'
340    
341    
342     def __init__(self, masterAttorneyListFileName,fullNameOveridesFileName = False, forceNewPklFile = False, Encoding = 'UTF8'):
343     """Assumes the MAL is a spreadsheet (for now).MAL gets saved to a pkl file for performance reasons. pkl will be used unless forceNewPklFile is set to true"""
344     pklFileName = os.path.splitext(masterAttorneyListFileName)[0] + ".pkl"
345    
346     print("Initializing data structures...")
347     if forceNewPklFile:
348     print("Creating MAL structure...")
349     self.malPeopleList = PeopleList()
350     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
351     print("MAL structure created.")
352     if fullNameOveridesFileName:
353     print("Loading full name overide values...")
354     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
355     print("Full name overide values loaded.")
356 nino.borges 985 print("Analyzing for similar name links...")
357     details = self.malPeopleList.build_similar_names(self.malPeopleList.people)
358     print("Similar names links added.")
359 nino.borges 913 print("Creating pickle backup...")
360     self.__SaveMalToPkl(pklFileName)
361     print("Pickle backup created.")
362     else:
363     if os.path.exists(pklFileName):
364     print("Loading MAL structure from pickle file...")
365     self.malPeopleList = self.__LoadMalFromPkl(pklFileName)
366     print("MAL structure loaded.")
367     else:
368     print("Pickle file doesnt exist.")
369     print("Creating MAL structure...")
370     self.malPeopleList = PeopleList()
371     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
372     print("MAL structure created.")
373     if fullNameOveridesFileName:
374     print("Loading full name overide values...")
375     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
376     print("Full name overide values loaded.")
377 nino.borges 985 print("Analyzing for similar name links...")
378     details = self.malPeopleList.build_similar_names(self.malPeopleList.people)
379     print("Similar names links added.")
380 nino.borges 913 print("Creating pickle backup...")
381     self.__SaveMalToPkl(pklFileName)
382     print("Pickle backup created.")
383    
384    
385     def __IngestMALSpreadsheet(self, masterAttorneyListFileName):
386     """Pseudo-private method which will open an Excel spreadsheet and ingest the values into the peoplelist dataclass."""
387     ## There doenst seem to be a consistent value in the "row" column in the MAL, so setting these parameters here to avoid gap issues.
388    
389     ## excelTabParametersList should always be an ordered list because now order matters.
390 nino.borges 981 excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":2535, "beginColNumber":1, "endColNumber":19},
391 nino.borges 973 {"tabName":"Non-Attorneys", "beginRowNumber":2, "endRowNumber":3435, "beginColNumber":1, "endColNumber":19},
392 nino.borges 913 {"tabName":"Split Role Attorneys", "beginRowNumber":2, "endRowNumber":21, "beginColNumber":1, "endColNumber":10}]
393    
394    
395     # spreadsheetFileMappingMatrix = {"First Name":"first_name", "Last Name":"last_name", "Work Email":"work_email_address", "Alt Work Email":"alt_work_email_address", "Is Attorney": "is_attorney",
396     # "Split Role - Attorney Capacity Date Range":"split_role_date_range", " Validated by OC??":"sidley_validated", "Category": "category", "Organization":"organization", "Job Title":"job_title",
397     # "Business Title":"business_title", "Full Name (Preferred)":"full_name_preferred", "Login":"login", "Department (Fine)":"department_fine", "Addressed during CAAG":"addressed_during_caag",
398     # "Last Updated":"last_updated"}
399    
400     xlApp = Dispatch('Excel.Application')
401     xlBook = xlApp.Workbooks.Open(masterAttorneyListFileName)
402    
403     for excelTab in excelTabParametersList:
404     sht = xlBook.Worksheets(excelTab['tabName'])
405     print(f"Ingesting sheet {excelTab['tabName']}.")
406     excelFieldPositionMatrix = {}
407     for col in range (excelTab['beginColNumber'], excelTab['endColNumber'] +1):
408     excelFieldPositionMatrix[sht.Cells(1,col).Value] = col
409     for row in range(excelTab['beginRowNumber'], excelTab['endRowNumber'] +1):
410     #print(row)
411     ## TODO: Refactor the excelTabParametersList later. Didnt realize columns were not consistent.
412     if excelTab['tabName'] == 'Attorneys':
413     self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
414     #split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Dates as Counsel']).Value,
415     company = sht.Cells(row,excelFieldPositionMatrix['Company']).Value,
416     category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
417     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
418     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
419     alt_first_name = sht.Cells(row,excelFieldPositionMatrix['Alt First Name']).Value,
420     middle_initial = sht.Cells(row,excelFieldPositionMatrix['Middle Initial']).Value,
421     work_email_addresses = sht.Cells(row,excelFieldPositionMatrix['Email Addresses']).Value,
422     #alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
423     raw_email_addresses = sht.Cells(row,excelFieldPositionMatrix['EmailAddress RAW']).Value,
424     job_title = sht.Cells(row,excelFieldPositionMatrix['Title']).Value,
425     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
426 nino.borges 916 lh_normalized_name = sht.Cells(row,excelFieldPositionMatrix['Lighthouse Normalized Name']).Value,
427 nino.borges 913 user_id = sht.Cells(row,excelFieldPositionMatrix['UserID']).Value,
428 nino.borges 916 unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['MAL ID']).Value))
429 nino.borges 913
430     elif excelTab['tabName'] == 'Non-Attorneys':
431     ## Make sure to NOT grab the unique attorney row number from here
432     self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
433     #split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Dates as Counsel']).Value,
434     company = sht.Cells(row,excelFieldPositionMatrix['Company']).Value,
435     category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
436     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
437     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
438     alt_first_name = sht.Cells(row,excelFieldPositionMatrix['Alt First Name']).Value,
439     middle_initial = sht.Cells(row,excelFieldPositionMatrix['Middle Initial']).Value,
440     work_email_addresses = sht.Cells(row,excelFieldPositionMatrix['Email Addresses']).Value,
441     #alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
442     raw_email_addresses = sht.Cells(row,excelFieldPositionMatrix['EmailAddress RAW']).Value,
443     job_title = sht.Cells(row,excelFieldPositionMatrix['Title']).Value,
444     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
445 nino.borges 916 lh_normalized_name = sht.Cells(row,excelFieldPositionMatrix['Lighthouse Normalized Name']).Value,
446 nino.borges 913 user_id = sht.Cells(row,excelFieldPositionMatrix['UserID']).Value,
447 nino.borges 916 unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['MAL ID']).Value))
448 nino.borges 913 elif excelTab['tabName'] == 'Split Role Attorneys':
449     ## Skip this tab for now.
450     pass
451     ## unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['Attorney Row']).Value
452     ## matchedPerson = self.malPeopleList.search_by_unique_attorney_row_number(unique_attorney_row_number)
453     ## if matchedPerson:
454     ##
455     ## ## dates_as_counsel should always be a two string value tuple (startdate,enddate).
456     ## datesAsCounselValue = sht.Cells(row,excelFieldPositionMatrix['Dates as Counsel']).Value
457     ## datesAsCounselList = []
458     ## ## First get rid of any extra data that is on a new line. Note that they shouldnt be seperating the date ranges by newline.
459     ## datesAsCounselValue = datesAsCounselValue.split("\n")[0]
460     ## ## Next split the ranges correctly by semicolon
461     ## dateRanges = datesAsCounselValue.split(";")
462     ## for dateRange in dateRanges:
463     ## ## Split out the start and end, allowing non-date words. (current, present, etc) however force these to be uppercase.
464     ## counselStartDate, counselEndDate = dateRange.split("-")
465     ## counselStartDate = counselStartDate.upper().strip()
466     ## counselEndDate = counselEndDate.upper().strip()
467     ## datesAsCounselList.append((counselStartDate,counselEndDate))
468     ## matchedPerson.dates_as_counsel = datesAsCounselList
469    
470     else:
471     print(f"ERROR UNKNOWN TAB! {excelTab['tabName']} HAVE NEEDED TAB NAMES CHANGED?")
472    
473    
474     xlBook.Close()
475    
476    
477    
478     def __SaveMalToPkl(self, pklFileName):
479     """Pseudo-private method which will save the current MAL people list object to a pkl file, for performance reasons."""
480     outputFile = open(pklFileName,'wb')
481     pickle.dump(self.malPeopleList,outputFile)
482     outputFile.close()
483    
484     def __LoadMalFromPkl(self, pklFileName):
485     """Pseudo-private method which will load a MAL people list object from a pkl file, for performance reasons."""
486     contents = open(pklFileName, 'rb')
487     obj = pickle.load(contents)
488     contents.close()
489     return obj
490    
491     def __LoadFullNameOverideValues(self, fullNameOveridesFileName):
492     """Pseudo-private method which will update the MAL people list object with the full name overide values."""
493     contents = open(fullNameOveridesFileName).readlines()
494     for line in contents:
495     line = line.replace("\n","")
496     emailAddress,fullNameOverideValue = line.split("|")
497    
498     self.malPeopleList.update_full_Name_overide(emailAddress, fullNameOverideValue)
499    
500     def SmartDedupeSet(self, currentSet):
501     """A method that attempts to do some additional deduplication of the values in a set by lowering all values and deduplicating. Returns a lowered deduplicated set."""
502     newSet = set()
503     for val in currentSet:
504     newSet.add(val.lower())
505     return newSet
506    
507    
508    
509     def RunMalEmailAddressIntegrityCheck(self):
510     """This method performs an integrity check on the MAL by analyzing and looking for duplicate email addresses."""
511     emailTestMatrix = {}
512     altTestMatrix = {}
513     print("Performing MAL email address integrity check...")
514     for i in range(0,len(self.malPeopleList.people)):
515     #altAddr = self.malPeopleList.people[i].alt_work_email_address
516     altAddr = None
517 nino.borges 916 ## Right now workaddrs are stored as a string that you need to parse to use.
518     workAddrs = self.malPeopleList.people[i].work_email_addresses
519 nino.borges 913 if altAddr != None:
520     altAddr = altAddr.strip()
521     if altAddr in list(emailTestMatrix.keys()):
522     print(f"ISSUE:{altAddr} is a dupe of an workAddr.")
523     if altAddr in list(altTestMatrix.keys()):
524     print(f"ISSUE:{altAddr} is a dupe!")
525     else:
526     altTestMatrix[altAddr] = 1
527 nino.borges 916 if workAddrs != None:
528     workAddrs = [w.upper().strip() for w in workAddrs.split(";\n")]
529     for workAddr in workAddrs:
530     workAddr = workAddr.strip()
531     if workAddr in list(altTestMatrix.keys()):
532     print(f"ISSUE:{workAddr} is a dupe of an altAddr.")
533     if workAddr in list(emailTestMatrix.keys()):
534     print(f"ISSUE:{workAddr} is a dupe!")
535     else:
536     emailTestMatrix[workAddr] = 1
537 nino.borges 913 print("\nEmail address integrity check complete.\n\n")
538    
539    
540 nino.borges 951 def RunMalLoginIdIntegrityCheck(self):
541     """This method performs an integrity check on the MAL by analyzing and looking for duplicate User Login ID values."""
542     loginIdTestMatrix = {}
543     print("Performing MAL user login ID integrity check...")
544     for i in range(0,len(self.malPeopleList.people)):
545     loginIds = self.malPeopleList.people[i].user_id
546     if loginIds != None:
547     loginIds = [w.upper().strip() for w in loginIds.split(";\n")]
548     for loginId in loginIds:
549     loginId = loginId.strip()
550     if loginId in list(loginIdTestMatrix.keys()):
551     print(f"ISSUE:{loginId} is a dupe!")
552     else:
553     loginIdTestMatrix[loginId] = 1
554     print("\nUser Login ID integrity check complete.\n\n")
555    
556 nino.borges 981 def RunUniqueAttorneyRowNumberCheck(self):
557     """Performs an integrity check on the unique_attorney_row_number calues, both for attorney and non-attorney"""
558     attorneyUniqueNumberList = []
559     non_attorneyUniqueNumberList = []
560     print("Performing MAL Unique Attorney Row Number integrity check...")
561     for i in range(0,len(self.malPeopleList.people)):
562     unique_id = self.malPeopleList.people[i].unique_attorney_row_number
563     if unique_id:
564     if self.malPeopleList.people[i].is_attorney == "NO":
565     non_attorneyUniqueNumberList.append(int(unique_id))
566     else:
567     attorneyUniqueNumberList.append(int(unique_id))
568     non_attorneyUniqueNumberList.sort()
569     attorneyUniqueNumberList.sort()
570     print(f"{non_attorneyUniqueNumberList[0]}-{non_attorneyUniqueNumberList[-1]}")
571     print(f"{attorneyUniqueNumberList[0]}-{attorneyUniqueNumberList[-1]}")
572    
573     def analyze_number_list(numbers):
574     if not numbers:
575     return [], []
576     sorted_nums = sorted(numbers)
577     duplicates = []
578     gaps = []
579     previous = sorted_nums[0]
580     for current in sorted_nums[1:]:
581     if current == previous:
582     duplicates.append(current)
583     elif current > previous +1:
584     gaps.extend(range(previous +1, current))
585     previous = current
586     return gaps, duplicates
587    
588    
589     ## Check the attorney values first
590     attorney_gaps, attorney_duplicates = analyze_number_list(attorneyUniqueNumberList)
591     print("\nAttorney Unique Number Check")
592     print ("-" *40)
593     if attorney_gaps:
594     print(f"Gaps found({len(attorney_gaps)}): {attorney_gaps}")
595     else:
596     print("No gaps found.")
597     if attorney_duplicates:
598     print(f"Duplicates Found({len(attorney_duplicates)}): {attorney_duplicates}")
599     else:
600     print("No duplicates found.")
601    
602    
603     ## Check non-attorney values
604     non_attorney_gaps, non_attorney_duplicates = analyze_number_list(non_attorneyUniqueNumberList)
605     print("\nNon-Attorney Unique Number Check")
606     print ("-" *40)
607     if non_attorney_gaps:
608     print(f"Gaps found({len(non_attorney_gaps)}): {non_attorney_gaps}")
609     else:
610     print("No gaps found.")
611     if non_attorney_duplicates:
612     print(f"Duplicates Found({len(non_attorney_duplicates)}): {non_attorney_duplicates}")
613     else:
614     print("No duplicates found.")
615    
616 nino.borges 913 if __name__ == '__main__':
617 nino.borges 981 masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\AT&T\Cybersecurity FCA Case\_ATT_Current_MAL\RG - ATT Cross-Matter Master Attorney List (20260224)(20260224-0358).xlsx"
618 nino.borges 913 attMal = ATT_MasterAttorneyList(masterAttorneyListFileName)
619 nino.borges 951 test = attMal.malPeopleList.search_by_email('ahickey@mayerbrown.com'.upper())
620 nino.borges 916 #test = attMal.malPeopleList.search_by_email('LMENRIQUEZ@DIRECTV.COM')
621 nino.borges 951 #test = attMal.malPeopleList.return_list_of_matching_values('work_email_addresses','LMENRIQUEZ@DIRECTV.COM')
622     #test = attMal.malPeopleList.search_by_login_id('JS6637')
623 nino.borges 916 print(test)
624 nino.borges 951 print(attMal.malPeopleList.return_person_all_name_variations(test))
625 nino.borges 973 attMal.RunMalEmailAddressIntegrityCheck()
626     attMal.RunMalLoginIdIntegrityCheck()
627 nino.borges 981 attMal.RunUniqueAttorneyRowNumberCheck()
628 nino.borges 913 ## emailCheckList = []
629     ## contents = open(r"C:\Test_Dir\ATT\JoshVerifyPrivResponse\Not_Verified_by_ATT.txt").readlines()
630     ## for line in contents:
631     ## line = line.replace("\n","")
632     ## emailCheckList.append(line)
633     ## print(f"There are {len(emailCheckList)} email addresses to check.")
634     ## notFoundList = []
635     ## for i in emailCheckList:
636     ## test = attMal.malPeopleList.search_by_email(i)
637     ## if test:
638     ## print(f"{test.first_name} {test.last_name} | {i}")
639     ## else:
640     ## notFoundList.append(i)
641     ## print("\n\n")
642     ## print(f"There are {len(notFoundList)} email addresses not found:")
643     ## notFoundList.sort()
644     ## for x in notFoundList:
645     ## print(x)
646    
647    
648    
649     ## lastNameCheckList = []
650     ## notFoundList = []
651     ## contents = open(r"C:\Test_Dir\ATT\JoshVerifyPrivResponse\Josh-ATT_No_title.txt").readlines()
652     ## for line in contents:
653     ## line = line.replace("\n","")
654     ## lName = line.split(",")[0]
655     ## lName = lName.strip()
656     ## lName = lName.upper()
657     ## test = attMal.malPeopleList.return_list_of_matching_values('last_name',lName)
658     ## if test:
659     ## #print(f"Possible match for {line}:")
660     ## for x in test:
661     ## print(f"{line}|{x.last_name},{x.first_name}|{x.company}|{x.job_title}|{x.is_attorney}")
662     ## else:
663     ## notFoundList.append(line)
664     ##
665     ## print("\n\n")
666     ## print(f"There are {len(notFoundList)} email addresses not found:")
667     ## notFoundList.sort()
668     ## for x in notFoundList:
669     ## print(x)
670    
671    
672    
673    
674     ## findList = ["JON.GREER@DIRECTV.COM","AJJOHNSON@GIBSONDUNN.COM","RLANG@GIBSONDUNN.COM","MATT.MILLER@CROWDSTRIKE.COM","ADAM.MONTGOMERY@FLEISHMAN.COM","ALPESHP@AMDOCS.COM",
675     ## "ERIC.PRATT@CROWDSTRIKE.COM","MROBERTS@GIBSONDUNN.COM","VVARGAS@KTSLAW.COM","TIM.WILLIAMS@FLEISHMAN.COM"]
676     ## for s in findList:
677     ## test = attMal.malPeopleList.search_by_email(s)
678     ## print(f"{s}|{test.last_name},{test.first_name}|{test.company}")
679    
680    
681 nino.borges 916 ## foundList = []
682     ## outputFile = open(r"C:\Test_Dir\ATT\20250716_ESQLogTest\NormalizedFromDOJLog(DB MATCH REPORT)2.txt",'w',encoding='UTF-8')
683     ## contents = open(r"C:\Test_Dir\ATT\20250716_ESQLogTest\NormalizedFromDOJLog(all-deduplicated-cleaned)2.txt").readlines()
684     ## for line in contents:
685     ## line = line.replace("\n","")
686     ## line = line.upper()
687     ## line = line.strip()
688     ## if "@" in line:
689     ## result = attMal.malPeopleList.search_by_email(line)
690     ## if result:
691     ## outputFile.write(f"{line}|{result.last_name},{result.first_name}|{result.is_attorney}\n")
692     ## results = attMal.malPeopleList.return_list_of_matching_values('full_name_preferred',line)
693     ## if results:
694     ## for result in results:
695     ## outputFile.write(f"{line}|{result.last_name},{result.first_name}|{result.is_attorney}\n")
696     ## elif "," in line:
697     ## lName = line.split(",")[0]
698     ## results = attMal.malPeopleList.return_list_of_matching_values('last_name',lName)
699     ## if results:
700     ## for result in results:
701     ## outputFile.write(f"{line}|{result.last_name},{result.first_name}|{result.is_attorney}\n")
702     ## else:
703     ## lName = line.split(" ")[-1]
704     ## results = attMal.malPeopleList.return_list_of_matching_values('last_name',lName)
705     ## if results:
706     ## for result in results:
707     ## outputFile.write(f"{line}|{result.last_name},{result.first_name}|{result.is_attorney}\n")
708     ## outputFile.close()