ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/ATT_MasterAttorneyList.py
Revision: 913
Committed: Fri Aug 1 21:27:27 2025 UTC (7 months, 3 weeks ago) by nino.borges
Content type: text/x-python
File size: 23591 byte(s)
Log Message:
A library for the creation and management of the ATT MAL.

File Contents

# User Rev Content
1 nino.borges 913 """
2    
3     ATT_MasterAttorneyList
4    
5     Created by:
6     Emanuel Borges
7     07.14.2025
8    
9     A library for the creation and management of the ATT MAL.
10    
11     """
12    
13     import os, uuid, pickle, re
14     from dataclasses import dataclass, field, fields
15     from typing import List, Tuple, Optional
16     from collections import namedtuple
17     from win32com.client import Dispatch
18    
19    
20     version = "1.0"
21    
22     @dataclass
23     class Person:
24     first_name: Optional[str] = None
25     last_name: Optional[str] = None
26     alt_first_name: Optional[str] = None
27     ## TODO: Make these email addresses lists instead of strings
28     work_email_addresses: Optional[str] = None
29     #alt_work_email_addresses: Optional[str] = None
30     raw_email_addresses: Optional[str] = None
31     _id: uuid.UUID = field(default_factory=uuid.uuid4)
32     is_attorney: Optional[str] = None
33     split_role_date_range: Optional[str] = None
34     middle_initial: Optional[str] = None
35     company: Optional[str] = None
36     category: Optional[str] = None
37     job_title: Optional[str] = None
38     full_name_preferred: Optional[str] = None
39     user_id: Optional[str] = None
40     full_name_overide: Optional[str] = None
41     ## Only gather unique_attorney_row_number from the attorney and split role attorney tabs. NEVER from Non-Attorneys.
42     unique_attorney_row_number:Optional[str] = None
43     ## Will be saving this as a list of tuple pairs (startdate,enddate). Allowing None for now but may update this to forcing an empty list, to avoid mutable default issues.
44     dates_as_counsel:Optional[List[Tuple[str,str]]] = None
45    
46     def __post_init__(self):
47     """Convert all string fields to uppercase."""
48     if self.first_name:
49     self.first_name = self.first_name.strip().upper()
50     if self.alt_first_name:
51     self.alt_first_name = self.alt_first_name.strip().upper()
52     if self.last_name:
53     self.last_name = self.last_name.strip().upper()
54     if self.work_email_addresses:
55     self.work_email_addresses = self.work_email_addresses.strip().upper()
56     ## if self.alt_work_email_addresses:
57     ## self.alt_work_email_addresses = self.alt_work_email_addresses.strip().upper()
58     if self.raw_email_addresses:
59     self.raw_email_addresses = self.raw_email_addresses.strip().upper()
60     if self.is_attorney:
61     self.is_attorney = self.is_attorney.strip().upper()
62     if self.split_role_date_range:
63     self.split_role_date_range = self.split_role_date_range.strip().upper()
64     if self.middle_initial:
65     self.middle_initial = self.middle_initial.strip().upper()
66     if self.company:
67     self.company = self.company.strip().upper()
68     if self.category:
69     self.category = self.category.strip().upper()
70     if self.job_title:
71     self.job_title = self.job_title.strip().upper()
72     if self.full_name_preferred:
73     self.full_name_preferred = self.full_name_preferred.strip().upper()
74     if self.user_id:
75     self.user_id = self.user_id.strip().upper()
76    
77    
78     @dataclass
79     class PeopleList:
80     people: List[Person] = field(default_factory=list)
81    
82     def add_person(self, person: Person):
83     self.people.append(person)
84     #print(f"Added person: {person}")
85    
86    
87     def search_by_email(self, emailAddress:str) -> Optional[Person]:
88     """Returns the first matching emailAddress value. Assumes emailAddresses are unique"""
89     for person in self.people:
90     if person.work_email_addresses == emailAddress:
91     return person
92     ## elif person.alt_work_email_addresses == emailAddress:
93     ## return person
94     return None
95    
96    
97     def search_by_unique_attorney_row_number(self,uniqueAttorneyRowNumber:str) -> Optional[Person]:
98     """Returns the first matching uniqueAttorneyRowNumber value. Assumes uniqueAttorneyRowNumbers are unique"""
99     for person in self.people:
100     if person.unique_attorney_row_number == uniqueAttorneyRowNumber:
101     return person
102     return None
103    
104     def search_by_id(self, idNumber):
105     """Returns the first matching idNumber value. Must be in format UUID('7414f78c-8289-4c9f-bd49-a5aaac35545f')."""
106     for person in self.people:
107     if person._id == idNumber:
108     return person
109     return None
110    
111     def return_list_of_matching_values(self,fieldName, value:str):
112     """Returns a full list of items where value is found in fieldName"""
113     matchingPeopleList = []
114     for person in self.people:
115     if getattr(person,fieldName) == value:
116     matchingPeopleList.append(person)
117     return matchingPeopleList
118    
119     def list_people(self):
120     for person in self.people:
121     print(person)
122    
123     def update_full_Name_overide(self, emailAddress:str, fullNameOverideValue) -> Optional[Person]:
124     valueUpdated = False
125     for person in self.people:
126     if person.work_email_addresses == emailAddress.upper():
127     person.full_name_overide = fullNameOverideValue.upper()
128     valueUpdated = True
129     ## Give a quik warning as you add the override value into the database if the last name differs.
130     if "," in fullNameOverideValue:
131     lastName = fullNameOverideValue.split(",")[0]
132     else:
133     lastName = fullNameOverideValue.split(" ")[-1]
134     if lastName.upper() == person.last_name:
135     pass
136     else:
137     print(f"WARNING: Overide last name value {lastName.upper()} does not match {person.last_name}.")
138     if valueUpdated == False:
139     print(f"WARNING: No email address match for {emailAddress} found.")
140    
141     class ATT_MasterAttorneyList(object):
142     """A class for building and performing functions against the ATT Master Attorney List."""
143     version = '0.01.0'
144    
145    
146     def __init__(self, masterAttorneyListFileName,fullNameOveridesFileName = False, forceNewPklFile = False, Encoding = 'UTF8'):
147     """Assumes the MAL is a spreadsheet (for now).MAL gets saved to a pkl file for performance reasons. pkl will be used unless forceNewPklFile is set to true"""
148     pklFileName = os.path.splitext(masterAttorneyListFileName)[0] + ".pkl"
149    
150     print("Initializing data structures...")
151     if forceNewPklFile:
152     print("Creating MAL structure...")
153     self.malPeopleList = PeopleList()
154     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
155     print("MAL structure created.")
156     if fullNameOveridesFileName:
157     print("Loading full name overide values...")
158     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
159     print("Full name overide values loaded.")
160     print("Creating pickle backup...")
161     self.__SaveMalToPkl(pklFileName)
162     print("Pickle backup created.")
163     else:
164     if os.path.exists(pklFileName):
165     print("Loading MAL structure from pickle file...")
166     self.malPeopleList = self.__LoadMalFromPkl(pklFileName)
167     print("MAL structure loaded.")
168     else:
169     print("Pickle file doesnt exist.")
170     print("Creating MAL structure...")
171     self.malPeopleList = PeopleList()
172     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
173     print("MAL structure created.")
174     if fullNameOveridesFileName:
175     print("Loading full name overide values...")
176     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
177     print("Full name overide values loaded.")
178     print("Creating pickle backup...")
179     self.__SaveMalToPkl(pklFileName)
180     print("Pickle backup created.")
181    
182    
183     def __IngestMALSpreadsheet(self, masterAttorneyListFileName):
184     """Pseudo-private method which will open an Excel spreadsheet and ingest the values into the peoplelist dataclass."""
185     ## There doenst seem to be a consistent value in the "row" column in the MAL, so setting these parameters here to avoid gap issues.
186    
187     ## excelTabParametersList should always be an ordered list because now order matters.
188     excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":221, "beginColNumber":1, "endColNumber":14},
189     {"tabName":"Non-Attorneys", "beginRowNumber":2, "endRowNumber":1211, "beginColNumber":1, "endColNumber":14},
190     {"tabName":"Split Role Attorneys", "beginRowNumber":2, "endRowNumber":21, "beginColNumber":1, "endColNumber":10}]
191    
192    
193     # spreadsheetFileMappingMatrix = {"First Name":"first_name", "Last Name":"last_name", "Work Email":"work_email_address", "Alt Work Email":"alt_work_email_address", "Is Attorney": "is_attorney",
194     # "Split Role - Attorney Capacity Date Range":"split_role_date_range", " Validated by OC??":"sidley_validated", "Category": "category", "Organization":"organization", "Job Title":"job_title",
195     # "Business Title":"business_title", "Full Name (Preferred)":"full_name_preferred", "Login":"login", "Department (Fine)":"department_fine", "Addressed during CAAG":"addressed_during_caag",
196     # "Last Updated":"last_updated"}
197    
198     xlApp = Dispatch('Excel.Application')
199     xlBook = xlApp.Workbooks.Open(masterAttorneyListFileName)
200    
201     for excelTab in excelTabParametersList:
202     sht = xlBook.Worksheets(excelTab['tabName'])
203     print(f"Ingesting sheet {excelTab['tabName']}.")
204     excelFieldPositionMatrix = {}
205     for col in range (excelTab['beginColNumber'], excelTab['endColNumber'] +1):
206     excelFieldPositionMatrix[sht.Cells(1,col).Value] = col
207     for row in range(excelTab['beginRowNumber'], excelTab['endRowNumber'] +1):
208     #print(row)
209     ## TODO: Refactor the excelTabParametersList later. Didnt realize columns were not consistent.
210     if excelTab['tabName'] == 'Attorneys':
211     self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
212     #split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Dates as Counsel']).Value,
213     company = sht.Cells(row,excelFieldPositionMatrix['Company']).Value,
214     category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
215     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
216     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
217     alt_first_name = sht.Cells(row,excelFieldPositionMatrix['Alt First Name']).Value,
218     middle_initial = sht.Cells(row,excelFieldPositionMatrix['Middle Initial']).Value,
219     work_email_addresses = sht.Cells(row,excelFieldPositionMatrix['Email Addresses']).Value,
220     #alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
221     raw_email_addresses = sht.Cells(row,excelFieldPositionMatrix['EmailAddress RAW']).Value,
222     job_title = sht.Cells(row,excelFieldPositionMatrix['Title']).Value,
223     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
224     user_id = sht.Cells(row,excelFieldPositionMatrix['UserID']).Value,
225     unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['UID']).Value))
226    
227     elif excelTab['tabName'] == 'Non-Attorneys':
228     ## Make sure to NOT grab the unique attorney row number from here
229     self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
230     #split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Dates as Counsel']).Value,
231     company = sht.Cells(row,excelFieldPositionMatrix['Company']).Value,
232     category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
233     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
234     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
235     alt_first_name = sht.Cells(row,excelFieldPositionMatrix['Alt First Name']).Value,
236     middle_initial = sht.Cells(row,excelFieldPositionMatrix['Middle Initial']).Value,
237     work_email_addresses = sht.Cells(row,excelFieldPositionMatrix['Email Addresses']).Value,
238     #alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
239     raw_email_addresses = sht.Cells(row,excelFieldPositionMatrix['EmailAddress RAW']).Value,
240     job_title = sht.Cells(row,excelFieldPositionMatrix['Title']).Value,
241     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
242     user_id = sht.Cells(row,excelFieldPositionMatrix['UserID']).Value,
243     unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['UID']).Value))
244     elif excelTab['tabName'] == 'Split Role Attorneys':
245     ## Skip this tab for now.
246     pass
247     ## unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['Attorney Row']).Value
248     ## matchedPerson = self.malPeopleList.search_by_unique_attorney_row_number(unique_attorney_row_number)
249     ## if matchedPerson:
250     ##
251     ## ## dates_as_counsel should always be a two string value tuple (startdate,enddate).
252     ## datesAsCounselValue = sht.Cells(row,excelFieldPositionMatrix['Dates as Counsel']).Value
253     ## datesAsCounselList = []
254     ## ## First get rid of any extra data that is on a new line. Note that they shouldnt be seperating the date ranges by newline.
255     ## datesAsCounselValue = datesAsCounselValue.split("\n")[0]
256     ## ## Next split the ranges correctly by semicolon
257     ## dateRanges = datesAsCounselValue.split(";")
258     ## for dateRange in dateRanges:
259     ## ## Split out the start and end, allowing non-date words. (current, present, etc) however force these to be uppercase.
260     ## counselStartDate, counselEndDate = dateRange.split("-")
261     ## counselStartDate = counselStartDate.upper().strip()
262     ## counselEndDate = counselEndDate.upper().strip()
263     ## datesAsCounselList.append((counselStartDate,counselEndDate))
264     ## matchedPerson.dates_as_counsel = datesAsCounselList
265    
266     else:
267     print(f"ERROR UNKNOWN TAB! {excelTab['tabName']} HAVE NEEDED TAB NAMES CHANGED?")
268    
269    
270     xlBook.Close()
271    
272    
273    
274     def __SaveMalToPkl(self, pklFileName):
275     """Pseudo-private method which will save the current MAL people list object to a pkl file, for performance reasons."""
276     outputFile = open(pklFileName,'wb')
277     pickle.dump(self.malPeopleList,outputFile)
278     outputFile.close()
279    
280     def __LoadMalFromPkl(self, pklFileName):
281     """Pseudo-private method which will load a MAL people list object from a pkl file, for performance reasons."""
282     contents = open(pklFileName, 'rb')
283     obj = pickle.load(contents)
284     contents.close()
285     return obj
286    
287     def __LoadFullNameOverideValues(self, fullNameOveridesFileName):
288     """Pseudo-private method which will update the MAL people list object with the full name overide values."""
289     contents = open(fullNameOveridesFileName).readlines()
290     for line in contents:
291     line = line.replace("\n","")
292     emailAddress,fullNameOverideValue = line.split("|")
293    
294     self.malPeopleList.update_full_Name_overide(emailAddress, fullNameOverideValue)
295    
296     def SmartDedupeSet(self, currentSet):
297     """A method that attempts to do some additional deduplication of the values in a set by lowering all values and deduplicating. Returns a lowered deduplicated set."""
298     newSet = set()
299     for val in currentSet:
300     newSet.add(val.lower())
301     return newSet
302    
303    
304    
305     def RunMalEmailAddressIntegrityCheck(self):
306     """This method performs an integrity check on the MAL by analyzing and looking for duplicate email addresses."""
307     emailTestMatrix = {}
308     altTestMatrix = {}
309     print("Performing MAL email address integrity check...")
310     for i in range(0,len(self.malPeopleList.people)):
311     #altAddr = self.malPeopleList.people[i].alt_work_email_address
312     altAddr = None
313     workAddr = self.malPeopleList.people[i].work_email_addresses
314     if altAddr != None:
315     altAddr = altAddr.strip()
316     if altAddr in list(emailTestMatrix.keys()):
317     print(f"ISSUE:{altAddr} is a dupe of an workAddr.")
318     if altAddr in list(altTestMatrix.keys()):
319     print(f"ISSUE:{altAddr} is a dupe!")
320     else:
321     altTestMatrix[altAddr] = 1
322     if workAddr != None:
323     workAddr = workAddr.strip()
324     if workAddr in list(altTestMatrix.keys()):
325     print(f"ISSUE:{workAddr} is a dupe of an altAddr.")
326     if workAddr in list(emailTestMatrix.keys()):
327     print(f"ISSUE:{workAddr} is a dupe!")
328     else:
329     emailTestMatrix[workAddr] = 1
330     print("\nEmail address integrity check complete.\n\n")
331    
332    
333     if __name__ == '__main__':
334     masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\AT&T\Cybersecurity FCA Case\_ATT_Current_MAL\RG - ATT Master Attorney List (20250718)(20250718-0100).xlsx"
335     attMal = ATT_MasterAttorneyList(masterAttorneyListFileName)
336     #test = attMal.malPeopleList.search_by_email('DD0281@ATT.COM')
337     #print(test)
338     #attMal.RunMalEmailAddressIntegrityCheck()
339     ## emailCheckList = []
340     ## contents = open(r"C:\Test_Dir\ATT\JoshVerifyPrivResponse\Not_Verified_by_ATT.txt").readlines()
341     ## for line in contents:
342     ## line = line.replace("\n","")
343     ## emailCheckList.append(line)
344     ## print(f"There are {len(emailCheckList)} email addresses to check.")
345     ## notFoundList = []
346     ## for i in emailCheckList:
347     ## test = attMal.malPeopleList.search_by_email(i)
348     ## if test:
349     ## print(f"{test.first_name} {test.last_name} | {i}")
350     ## else:
351     ## notFoundList.append(i)
352     ## print("\n\n")
353     ## print(f"There are {len(notFoundList)} email addresses not found:")
354     ## notFoundList.sort()
355     ## for x in notFoundList:
356     ## print(x)
357    
358    
359    
360     ## lastNameCheckList = []
361     ## notFoundList = []
362     ## contents = open(r"C:\Test_Dir\ATT\JoshVerifyPrivResponse\Josh-ATT_No_title.txt").readlines()
363     ## for line in contents:
364     ## line = line.replace("\n","")
365     ## lName = line.split(",")[0]
366     ## lName = lName.strip()
367     ## lName = lName.upper()
368     ## test = attMal.malPeopleList.return_list_of_matching_values('last_name',lName)
369     ## if test:
370     ## #print(f"Possible match for {line}:")
371     ## for x in test:
372     ## print(f"{line}|{x.last_name},{x.first_name}|{x.company}|{x.job_title}|{x.is_attorney}")
373     ## else:
374     ## notFoundList.append(line)
375     ##
376     ## print("\n\n")
377     ## print(f"There are {len(notFoundList)} email addresses not found:")
378     ## notFoundList.sort()
379     ## for x in notFoundList:
380     ## print(x)
381    
382    
383    
384    
385     ## findList = ["JON.GREER@DIRECTV.COM","AJJOHNSON@GIBSONDUNN.COM","RLANG@GIBSONDUNN.COM","MATT.MILLER@CROWDSTRIKE.COM","ADAM.MONTGOMERY@FLEISHMAN.COM","ALPESHP@AMDOCS.COM",
386     ## "ERIC.PRATT@CROWDSTRIKE.COM","MROBERTS@GIBSONDUNN.COM","VVARGAS@KTSLAW.COM","TIM.WILLIAMS@FLEISHMAN.COM"]
387     ## for s in findList:
388     ## test = attMal.malPeopleList.search_by_email(s)
389     ## print(f"{s}|{test.last_name},{test.first_name}|{test.company}")
390    
391    
392     foundList = []
393     outputFile = open(r"C:\Test_Dir\ATT\20250716_ESQLogTest\NormalizedFromDOJLog(DB MATCH REPORT)2.txt",'w',encoding='UTF-8')
394     contents = open(r"C:\Test_Dir\ATT\20250716_ESQLogTest\NormalizedFromDOJLog(all-deduplicated-cleaned)2.txt").readlines()
395     for line in contents:
396     line = line.replace("\n","")
397     line = line.upper()
398     line = line.strip()
399     if "@" in line:
400     result = attMal.malPeopleList.search_by_email(line)
401     if result:
402     outputFile.write(f"{line}|{result.last_name},{result.first_name}|{result.is_attorney}\n")
403     results = attMal.malPeopleList.return_list_of_matching_values('full_name_preferred',line)
404     if results:
405     for result in results:
406     outputFile.write(f"{line}|{result.last_name},{result.first_name}|{result.is_attorney}\n")
407     elif "," in line:
408     lName = line.split(",")[0]
409     results = attMal.malPeopleList.return_list_of_matching_values('last_name',lName)
410     if results:
411     for result in results:
412     outputFile.write(f"{line}|{result.last_name},{result.first_name}|{result.is_attorney}\n")
413     else:
414     lName = line.split(" ")[-1]
415     results = attMal.malPeopleList.return_list_of_matching_values('last_name',lName)
416     if results:
417     for result in results:
418     outputFile.write(f"{line}|{result.last_name},{result.first_name}|{result.is_attorney}\n")
419     outputFile.close()