ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/Amazon_NamesNormQC.py
Revision: 956
Committed: Fri Dec 5 16:50:27 2025 UTC (3 months, 3 weeks ago) by nino.borges
Content type: text/x-python
File size: 39285 byte(s)
Log Message:
Added return_str_string2 which is a more complicated version of the original method.  I kept the original in there but I'm using using this more updated method when ExportFullSTRList is called.

File Contents

# User Rev Content
1 nino.borges 834 """
2    
3 nino.borges 847 Amazon_NamesNormQC
4 nino.borges 834
5     Created by:
6     Emanuel Borges
7     11.21.2024
8    
9 nino.borges 847 This Library will assist with the process of performing Names Normalization QC on the Amazon privilege logs.
10 nino.borges 834
11     """
12    
13     import os, uuid, pickle, re
14 nino.borges 847 #import MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC
15 nino.borges 877 from dataclasses import dataclass, field, fields
16 nino.borges 854 from typing import List, Tuple, Optional
17 nino.borges 834 from collections import namedtuple
18     from win32com.client import Dispatch
19    
20    
21     @dataclass
22     class Person:
23     first_name: Optional[str] = None
24     last_name: Optional[str] = None
25     work_email_address: Optional[str] = None
26     alt_work_email_address: Optional[str] = None
27     _id: uuid.UUID = field(default_factory=uuid.uuid4)
28     is_attorney: Optional[str] = None
29     split_role_date_range: Optional[str] = None
30     sidley_validated: Optional[str] = None
31     category: Optional[str] = None
32     organization: Optional[str] = None
33     job_title: Optional[str] = None
34     business_title: Optional[str] = None
35     full_name_preferred: Optional[str] = None
36     login: Optional[str] = None
37     department_fine: Optional[str] = None
38     addressed_during_caag: Optional[str] = None
39 nino.borges 838 #last_updated: Optional[str] = None
40 nino.borges 837 full_name_overide: Optional[str] = None
41 nino.borges 850 ## Only gather unique_attorney_row_number from the attorney and split role attorney tabs. NEVER from downgrades.
42     unique_attorney_row_number:Optional[str] = None
43 nino.borges 854 ## Will be saving this as a list of tuple pairs (startdate,enddate). Allowing None for now but may update this to forcing an empty list, to avoid mutable default issues.
44     dates_as_counsel:Optional[List[Tuple[str,str]]] = None
45 nino.borges 834
46     def __post_init__(self):
47     """Convert all string fields to uppercase."""
48     if self.first_name:
49 nino.borges 836 self.first_name = self.first_name.strip().upper()
50 nino.borges 834 if self.last_name:
51 nino.borges 836 self.last_name = self.last_name.strip().upper()
52 nino.borges 834 if self.work_email_address:
53 nino.borges 836 self.work_email_address = self.work_email_address.strip().upper()
54 nino.borges 834 if self.alt_work_email_address:
55 nino.borges 836 self.alt_work_email_address = self.alt_work_email_address.strip().upper()
56 nino.borges 834 if self.is_attorney:
57 nino.borges 836 self.is_attorney = self.is_attorney.strip().upper()
58 nino.borges 834 if self.split_role_date_range:
59 nino.borges 836 self.split_role_date_range = self.split_role_date_range.strip().upper()
60 nino.borges 834 if self.sidley_validated:
61 nino.borges 836 self.sidley_validated = self.sidley_validated.strip().upper()
62 nino.borges 834 if self.category:
63 nino.borges 836 self.category = self.category.strip().upper()
64 nino.borges 834 if self.organization:
65 nino.borges 836 self.organization = self.organization.strip().upper()
66 nino.borges 834 if self.job_title:
67 nino.borges 836 self.job_title = self.job_title.strip().upper()
68 nino.borges 834 if self.business_title:
69 nino.borges 836 self.business_title = self.business_title.strip().upper()
70 nino.borges 834 if self.full_name_preferred:
71 nino.borges 836 self.full_name_preferred = self.full_name_preferred.strip().upper()
72 nino.borges 834 if self.login:
73 nino.borges 836 self.login = self.login.strip().upper()
74 nino.borges 834 if self.department_fine:
75 nino.borges 836 self.department_fine = self.department_fine.strip().upper()
76 nino.borges 834 if self.addressed_during_caag:
77 nino.borges 836 self.addressed_during_caag = self.addressed_during_caag.strip().upper()
78 nino.borges 838 #if self.last_updated:
79     # self.last_updated = self.last_updated.strip().upper()
80 nino.borges 834
81     @dataclass
82     class PeopleList:
83     people: List[Person] = field(default_factory=list)
84    
85     def add_person(self, person: Person):
86     self.people.append(person)
87     #print(f"Added person: {person}")
88    
89    
90     def search_by_email(self, emailAddress:str) -> Optional[Person]:
91 nino.borges 850 """Returns the first matching emailAddress value. Assumes emailAddresses are unique"""
92 nino.borges 834 for person in self.people:
93     if person.work_email_address == emailAddress:
94     return person
95 nino.borges 854 elif person.alt_work_email_address == emailAddress:
96     return person
97 nino.borges 834 return None
98 nino.borges 850
99    
100     def search_by_unique_attorney_row_number(self,uniqueAttorneyRowNumber:str) -> Optional[Person]:
101     """Returns the first matching uniqueAttorneyRowNumber value. Assumes uniqueAttorneyRowNumbers are unique"""
102     for person in self.people:
103     if person.unique_attorney_row_number == uniqueAttorneyRowNumber:
104     return person
105     return None
106    
107     def search_by_id(self, idNumber):
108     """Returns the first matching idNumber value. Must be in format UUID('7414f78c-8289-4c9f-bd49-a5aaac35545f')."""
109     for person in self.people:
110     if person._id == idNumber:
111     return person
112     return None
113    
114     def return_list_of_matching_values(self,fieldName, value:str):
115     """Returns a full list of items where value is found in fieldName"""
116     matchingPeopleList = []
117     for person in self.people:
118     if getattr(person,fieldName) == value:
119     matchingPeopleList.append(person)
120     return matchingPeopleList
121 nino.borges 920
122     def return_list_of_partial_email_matches(self, emailAddress:str) -> Optional[Person]:
123     """Returns a full list of partial email address matches by attempting to match the user name part of the email address"""
124     ## Grab the username part of the email address
125     emailAddressPart = emailAddress.split("@")[0]
126     matchingPeopleList = []
127     for person in self.people:
128     if person.work_email_address:
129     ## If a work email address for this person exists, see if the username part is a match.
130     if emailAddressPart == person.work_email_address.split("@")[0]:
131     ## If so, add the person to the matching people list
132     matchingPeopleList.append(person)
133     ## if not, do the same on the Alt email address, if one exists.
134     elif person.alt_work_email_address:
135     if emailAddressPart == person.alt_work_email_address.split("@")[0]:
136     matchingPeopleList.append(person)
137     return matchingPeopleList
138    
139     def return_soppy_search_list(self, fieldName, value:str):
140     """Peforms a sloppy search where the value is IN the field, returns full list of possible matches"""
141     ## Be very careful in using this because you can get a lot of false positives.
142     matchingPeopleList = []
143     for person in self.people:
144     if getattr(person,fieldName) == None:
145     pass
146     else:
147     if value in getattr(person,fieldName):
148     matchingPeopleList.append(person)
149     return matchingPeopleList
150    
151 nino.borges 834 def list_people(self):
152     for person in self.people:
153     print(person)
154    
155 nino.borges 837 def update_full_Name_overide(self, emailAddress:str, fullNameOverideValue) -> Optional[Person]:
156 nino.borges 846 valueUpdated = False
157 nino.borges 837 for person in self.people:
158     if person.work_email_address == emailAddress.upper():
159     person.full_name_overide = fullNameOverideValue.upper()
160 nino.borges 846 valueUpdated = True
161 nino.borges 844 ## Give a quik warning as you add the override value into the database if the last name differs.
162     if "," in fullNameOverideValue:
163     lastName = fullNameOverideValue.split(",")[0]
164     else:
165     lastName = fullNameOverideValue.split(" ")[-1]
166     if lastName.upper() == person.last_name:
167     pass
168     else:
169     print(f"WARNING: Overide last name value {lastName.upper()} does not match {person.last_name}.")
170 nino.borges 846 if valueUpdated == False:
171     print(f"WARNING: No email address match for {emailAddress} found.")
172 nino.borges 834
173 nino.borges 837
174 nino.borges 944 def return_str_string(self, person):
175     """returns the STR search string for a given person"""
176     namesList = []
177     emailAddrList = []
178     if person.last_name:
179     namesList.append(person.last_name)
180     if person.first_name:
181     namesList.append(person.first_name)
182     if person.work_email_address:
183     emailAddrList.append(person.work_email_address)
184     if person.alt_work_email_address:
185     emailAddrList.append(person.alt_work_email_address)
186     if namesList:
187     if emailAddrList:
188     if len(emailAddrList) >1:
189     strText = f"({' W/3 '.join(namesList)}) OR ({' OR '.join(emailAddrList)})"
190     else:
191     strText = f"({' W/3 '.join(namesList)}) OR {' OR '.join(emailAddrList)}"
192     else:
193     strText = f"{' W/3 '.join(namesList)}"
194     else:
195     if emailAddrList:
196     strText = f"{' OR '.join(emailAddrList)}"
197     else:
198     strText = "NONE"
199     return strText
200    
201 nino.borges 956 def return_str_string2(self, person):
202     """returns the STR search string for a given person"""
203     namesList = []
204     emailAddrList = []
205     firstNamesSet = set()
206     lastNamesSet = set()
207    
208     if person.last_name:
209     lastNamesSet.add(person.last_name)
210     if person.first_name:
211     firstNamesSet.add(person.first_name)
212     if person.full_name_preferred:
213     person.full_name_preferred = person.full_name_preferred.replace('(SHE, HER)',"(SHE HER)")
214     person.full_name_preferred = person.full_name_preferred.replace(",(LEGAL),"," (LEGAL),")
215     if "," in person.full_name_preferred:
216     parsedLast, parsedFirst = person.full_name_preferred.split(",", 1)
217     else:
218     parsedLast = person.full_name_preferred.split(" ")[-1]
219     parsedFirst = person.full_name_preferred.split(" ")[:-1]
220     parsedFirst = " ".join(parsedFirst)
221     lastNamesSet.add(parsedLast.strip())
222     firstNamesSet.add(parsedFirst.strip())
223    
224     if len(lastNamesSet) >1:
225     namesList.append(f'({" OR ".join((lastNamesSet))})')
226     elif len(lastNamesSet) == 1:
227     namesList.append(list(lastNamesSet)[0])
228     if len(firstNamesSet) >1:
229     namesList.append(f'({" OR ".join((firstNamesSet))})')
230     elif len(firstNamesSet) == 1:
231     namesList.append(list(firstNamesSet)[0])
232    
233    
234     withinWordCount = len(str(firstNamesSet).split(" ")) + len(str(lastNamesSet).split(" "))
235     withinPhrase = f' W/{str(withinWordCount)} '
236    
237    
238    
239     if person.work_email_address:
240     emailAddrList.append(person.work_email_address)
241     if person.alt_work_email_address:
242     emailAddrList.append(person.alt_work_email_address)
243     if namesList:
244     if emailAddrList:
245     if len(emailAddrList) >1:
246     strText = f"({withinPhrase.join(namesList)}) OR ({' OR '.join(emailAddrList)})"
247     else:
248     strText = f"({withinPhrase.join(namesList)}) OR {' OR '.join(emailAddrList)}"
249     else:
250     strText = f"{withinPhrase.join(namesList)}"
251     else:
252     if emailAddrList:
253     strText = f"{' OR '.join(emailAddrList)}"
254     else:
255     strText = "NONE"
256    
257     if person.login:
258     ## Only consider the login if it's longer than 4 characters. We may adjust this cutoff to more than 5 in the future.
259     if len(person.login) > 4:
260     if person.login in str(namesList):
261     pass
262     else:
263     strText = strText + f" OR {person.login.lower()}"
264     return strText
265    
266 nino.borges 834 class NamesVerification(object):
267     """A class for automating the process of performing QC on the names within the Amazon privilege logs."""
268 nino.borges 956 version = '0.12.0'
269 nino.borges 834
270    
271 nino.borges 837 def __init__(self, cleanedDatExportFileName, masterAttorneyListFileName,fullNameOveridesFileName, forceNewPklFile = False, Encoding = 'UTF8'):
272 nino.borges 834 """Initializes the data structures. cleanedDatExportFileName should be the full path to the file.
273     Assumes the first row of the data file is the header and first column is DocID.
274     Assumes the MAL is a spreadsheet (for now).
275     MAL gets saved to a pkl file for performance reasons. pkl will be used unless forceNewPklFile is set to true"""
276     pklFileName = os.path.splitext(masterAttorneyListFileName)[0] + ".pkl"
277    
278     print("Initializing data structures...")
279     if forceNewPklFile:
280     print("Creating MAL structure...")
281     self.malPeopleList = PeopleList()
282     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
283     print("MAL structure created.")
284 nino.borges 837 print("Loading full name overide values...")
285     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
286     print("Full name overide values loaded.")
287 nino.borges 834 print("Creating pickle backup...")
288     self.__SaveMalToPkl(pklFileName)
289     print("Pickle backup created.")
290     else:
291     if os.path.exists(pklFileName):
292     print("Loading MAL structure from pickle file...")
293     self.malPeopleList = self.__LoadMalFromPkl(pklFileName)
294     print("MAL structure loaded.")
295     else:
296     print("Pickle file doesnt exist.")
297     print("Creating MAL structure...")
298     self.malPeopleList = PeopleList()
299     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
300     print("MAL structure created.")
301 nino.borges 837 print("Loading full name overide values...")
302     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
303     print("Full name overide values loaded.")
304 nino.borges 834 print("Creating pickle backup...")
305     self.__SaveMalToPkl(pklFileName)
306     print("Pickle backup created.")
307    
308     ## self.malPeopleList = PeopleList()
309     ##
310     ## print("Creating MAL structure...")
311     ## self.__IngestMALSpreadsheet(masterAttorneyListFileName)
312     ## print("MAL structure created.")
313     ## print("Creating pickle backup...")
314    
315    
316    
317    
318    
319     def __IngestMALSpreadsheet(self, masterAttorneyListFileName):
320     """Pseudo-private method which will open an Excel spreadsheet and ingest the values into the peoplelist dataclass."""
321     ## There doenst seem to be a consistent value in the "row" column in the MAL, so setting these parameters here to avoid gap issues.
322    
323 nino.borges 850 ## excelTabParametersList should always be an ordered list because now order matters.
324 nino.borges 956 excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":11082, "beginColNumber":1, "endColNumber":17},
325     {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":814, "beginColNumber":1, "endColNumber":16},
326 nino.borges 944 {"tabName":"Split Role Attorneys", "beginRowNumber":2, "endRowNumber":46, "beginColNumber":1, "endColNumber":10}]
327 nino.borges 834
328 nino.borges 850
329 nino.borges 834 # excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":16},
330     # {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":15}]
331    
332 nino.borges 850 # spreadsheetFileMappingMatrix = {"First Name":"first_name", "Last Name":"last_name", "Work Email":"work_email_address", "Alt Work Email":"alt_work_email_address", "Is Attorney": "is_attorney",
333     # "Split Role - Attorney Capacity Date Range":"split_role_date_range", " Validated by OC??":"sidley_validated", "Category": "category", "Organization":"organization", "Job Title":"job_title",
334     # "Business Title":"business_title", "Full Name (Preferred)":"full_name_preferred", "Login":"login", "Department (Fine)":"department_fine", "Addressed during CAAG":"addressed_during_caag",
335     # "Last Updated":"last_updated"}
336 nino.borges 834
337     xlApp = Dispatch('Excel.Application')
338     xlBook = xlApp.Workbooks.Open(masterAttorneyListFileName)
339    
340     for excelTab in excelTabParametersList:
341     sht = xlBook.Worksheets(excelTab['tabName'])
342     print(f"Ingesting sheet {excelTab['tabName']}.")
343     excelFieldPositionMatrix = {}
344     for col in range (excelTab['beginColNumber'], excelTab['endColNumber'] +1):
345     excelFieldPositionMatrix[sht.Cells(1,col).Value] = col
346     for row in range(excelTab['beginRowNumber'], excelTab['endRowNumber'] +1):
347 nino.borges 920 if row == 5000:
348     print("5,000 row mark reached.")
349     elif row == 10000:
350     print("10,000 row mark reached.")
351 nino.borges 834 #print(row)
352     ## TODO: Refactor the excelTabParametersList later. Didnt realize columns were not consistent.
353     if excelTab['tabName'] == 'Attorneys':
354     self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
355 nino.borges 877 split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Dates as Counsel']).Value,
356 nino.borges 837 sidley_validated = sht.Cells(row,excelFieldPositionMatrix[' Validated by OC?']).Value,
357 nino.borges 834 category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
358     organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
359     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
360     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
361     work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
362     alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
363     job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
364     business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
365     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
366     login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
367     department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
368 nino.borges 850 unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['Row']).Value,
369     addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Comments']).Value))
370     #addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
371 nino.borges 838 #last_updated = sht.Cells(row,excelFieldPositionMatrix['Last Updated']).Value ))
372 nino.borges 844
373 nino.borges 850 elif excelTab['tabName'] == 'Downgrades':
374     ## Make sure to NOT grab the unique attorney row number from here
375 nino.borges 844 self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
376 nino.borges 850 #split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Attorney Capacity Date Range']).Value,
377 nino.borges 844 sidley_validated = sht.Cells(row,excelFieldPositionMatrix['Validated by OC?']).Value,
378     organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
379     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
380     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
381     work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
382     alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
383     job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
384     business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
385     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
386     login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
387     department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
388 nino.borges 850 addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
389     elif excelTab['tabName'] == 'Split Role Attorneys':
390     unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['Attorney Row']).Value
391     matchedPerson = self.malPeopleList.search_by_unique_attorney_row_number(unique_attorney_row_number)
392     if matchedPerson:
393 nino.borges 854
394     ## dates_as_counsel should always be a two string value tuple (startdate,enddate).
395     datesAsCounselValue = sht.Cells(row,excelFieldPositionMatrix['Dates as Counsel']).Value
396     datesAsCounselList = []
397     ## First get rid of any extra data that is on a new line. Note that they shouldnt be seperating the date ranges by newline.
398     datesAsCounselValue = datesAsCounselValue.split("\n")[0]
399 nino.borges 920 #print(datesAsCounselValue)
400 nino.borges 854 ## Next split the ranges correctly by semicolon
401     dateRanges = datesAsCounselValue.split(";")
402     for dateRange in dateRanges:
403     ## Split out the start and end, allowing non-date words. (current, present, etc) however force these to be uppercase.
404     counselStartDate, counselEndDate = dateRange.split("-")
405     counselStartDate = counselStartDate.upper().strip()
406     counselEndDate = counselEndDate.upper().strip()
407     datesAsCounselList.append((counselStartDate,counselEndDate))
408     matchedPerson.dates_as_counsel = datesAsCounselList
409 nino.borges 850
410     else:
411     print(f"ERROR UNKNOWN TAB! {excelTab['tabName']} HAVE NEEDED TAB NAMES CHANGED?")
412    
413 nino.borges 834
414     xlBook.Close()
415    
416     def __SaveMalToPkl(self, pklFileName):
417     """Pseudo-private method which will save the current MAL people list object to a pkl file, for performance reasons."""
418     outputFile = open(pklFileName,'wb')
419     pickle.dump(self.malPeopleList,outputFile)
420     outputFile.close()
421    
422     def __LoadMalFromPkl(self, pklFileName):
423     """Pseudo-private method which will load a MAL people list object from a pkl file, for performance reasons."""
424     contents = open(pklFileName, 'rb')
425     obj = pickle.load(contents)
426     contents.close()
427     return obj
428    
429 nino.borges 837 def __LoadFullNameOverideValues(self, fullNameOveridesFileName):
430     """Pseudo-private method which will update the MAL people list object with the full name overide values."""
431     contents = open(fullNameOveridesFileName).readlines()
432     for line in contents:
433     line = line.replace("\n","")
434     emailAddress,fullNameOverideValue = line.split("|")
435 nino.borges 844
436 nino.borges 837 self.malPeopleList.update_full_Name_overide(emailAddress, fullNameOverideValue)
437    
438 nino.borges 844 def SmartDedupeSet(self, currentSet):
439 nino.borges 850 """A method that attempts to do some additional deduplication of the values in a set by lowering all values and deduplicating. Returns a lowered deduplicated set."""
440 nino.borges 844 newSet = set()
441     for val in currentSet:
442     newSet.add(val.lower())
443     return newSet
444    
445 nino.borges 853 def RunMalEmailAddressIntegrityCheck(self):
446     """This method performs an integrity check on the MAL by analyzing and looking for duplicate email addresses."""
447     emailTestMatrix = {}
448     altTestMatrix = {}
449     print("Performing MAL email address integrity check...")
450     for i in range(0,len(self.malPeopleList.people)):
451     altAddr = self.malPeopleList.people[i].alt_work_email_address
452     workAddr = self.malPeopleList.people[i].work_email_address
453     if altAddr != None:
454 nino.borges 877 altAddr = altAddr.strip()
455     if altAddr in list(emailTestMatrix.keys()):
456     print(f"ISSUE:{altAddr} is a dupe of an workAddr.")
457 nino.borges 853 if altAddr in list(altTestMatrix.keys()):
458 nino.borges 877 print(f"ISSUE:{altAddr} is a dupe!")
459 nino.borges 853 else:
460     altTestMatrix[altAddr] = 1
461     if workAddr != None:
462 nino.borges 877 workAddr = workAddr.strip()
463 nino.borges 853 if workAddr in list(altTestMatrix.keys()):
464 nino.borges 877 print(f"ISSUE:{workAddr} is a dupe of an altAddr.")
465 nino.borges 853 if workAddr in list(emailTestMatrix.keys()):
466 nino.borges 877 print(f"ISSUE:{workAddr} is a dupe!")
467 nino.borges 853 else:
468     emailTestMatrix[workAddr] = 1
469 nino.borges 877 print("\nEmail address integrity check complete.\n\n")
470 nino.borges 853
471 nino.borges 877 def RunMalEmailOutsideEmailFieldsIntegrityCheck(self):
472     """This method performs an integrity check on the MAL by looking for email addresses that exist in fields other than the email address fields."""
473     ## Right now this looks for the @ symbol.
474     ## Editable list of fields that should be excluded from this test, especially those that should already have email addresses
475     fieldsToExcludeList = ['work_email_address', 'alt_work_email_address','_id','dates_as_counsel','unique_attorney_row_number']
476     print("Performing MAL email addresses outside of email address fields integrity check...")
477     fieldObjects = fields(Person)
478     fieldNames = [f.name for f in fieldObjects]
479     #print(fieldNames)
480     fieldsToSearchList = [x for x in fieldNames if x not in fieldsToExcludeList]
481     #print(fieldsToSearchList)
482     for i in range(0,len(self.malPeopleList.people)):
483     for fieldName in fieldsToSearchList:
484     testValue = getattr(self.malPeopleList.people[i], fieldName)
485     #print(fieldName)
486     if testValue:
487     if "@" in testValue:
488     print(f"ISSUE: The email address {testValue} exists in the non-email field {fieldName} for unique row# {self.malPeopleList.people[i].unique_attorney_row_number}.")
489     print("\nEmail addresss outside of email fields integrity check complete.\n\n")
490    
491 nino.borges 853
492 nino.borges 869 def RunRowNumberIntegrityCheck(self):
493     """This method performs an integrity check on the MAL by analyzing the hard-coded row numbers across the 3 imporant tabs. Looks for gaps, blanks, and inconsistencies between split role. """
494     ## First let's return all non-attorneys and confirm the hard-coded row number is in the 50000 range and look for gaps.
495     print("Performing MAL hard-coded row number integrity check...")
496     ## nonAttorneyPeopleList = self.malPeopleList.return_list_of_matching_values('is_attorney','NO')
497     ## print(f"Analyzing all {len(nonAttorneyPeopleList)} non-attorneys items...")
498     ## ## Gather all non-attorneys and add hc row number to a list, looking for any that are missing a value
499     ## for nonAttorneyPerson in nonAttorneyPeopleList:
500     ## hcRowNumberList = []
501     ## hcRowNumber = nonAttorneyPerson.unique_attorney_row_number
502     ## if hcRowNumber == None:
503     ## print(f"WARNING: Empty hard coded row number for {nonAttorneyPerson.first_name} {nonAttorneyPerson.last_name} in the Downgrades Tab.")
504     ## else:
505     ## hcRowNumberList.append(int(hcRowNumber))
506     ## ## Next export a list of the missing numbers
507     ## hcRowNumberList.sort()
508     ## compareSet = set(range(hcRowNumberList[0], hcRowNumberList[-1]))
509     ## downgradeDiffs = compareSet - set(hcRowNumberList)
510     ## print(downgradeDiffs)
511     ## Now let's do similar for attorneys, including split role.
512     attorneyPeopleList = self.malPeopleList.return_list_of_matching_values('is_attorney','YES')
513     splitRolePeopleList = self.malPeopleList.return_list_of_matching_values('is_attorney','SPLIT ROLE')
514     ## Creating a third list using the newer list joining from pep 448
515     fullAttorneyPeopleList = [*attorneyPeopleList,*splitRolePeopleList]
516     print(f"Analyzing all {len(fullAttorneyPeopleList)} attorneys items...")
517     ## Gather all attorneys and add hc row number to a list, looking for any that are missing a value
518     for attorneyPerson in fullAttorneyPeopleList:
519     hcRowNumberList = []
520     hcRowNumber = attorneyPerson.unique_attorney_row_number
521     if hcRowNumber == None:
522     print(f"WARNING: Empty hard coded row number for {attorneyPerson.first_name} {attorneyPerson.last_name} in the Attorneys Tab.")
523     else:
524     hcRowNumberList.append(int(hcRowNumber))
525     ## Next export a list of the missing numbers
526     hcRowNumberList.sort()
527     compareSet = set(range(hcRowNumberList[0], hcRowNumberList[-1]))
528     attorneyDiffs = compareSet - set(hcRowNumberList)
529     if attorneyDiffs:
530     print(attorneyDiffs)
531     else:
532     print("There are no gaps in the hard coded row numbers in the Attorneys tab.")
533 nino.borges 853
534 nino.borges 944 def ExportFullSTRList(self, attorneyOnly = True):
535 nino.borges 956 """Exports a full STR file for all entries in the data class. Defaults to only attorneys. Changed to also include split role in attorneys"""
536     outputFile = open(r"C:\Test_Dir\Amazon\Attorneys_SR_STR_v2.txt",'w', encoding='UTF-8')
537 nino.borges 944 if attorneyOnly == True:
538     attorneyPeopleList = self.malPeopleList.return_list_of_matching_values('is_attorney','YES')
539 nino.borges 956 splitRolePeoplelist = self.malPeopleList.return_list_of_matching_values('is_attorney','SPLIT ROLE')
540     attorneyPeopleList += splitRolePeoplelist
541 nino.borges 944 for attorneyPerson in attorneyPeopleList:
542 nino.borges 956 outputText = self.malPeopleList.return_str_string2(attorneyPerson)
543 nino.borges 944 outputFile.write(outputText + "\n")
544     outputFile.close()
545 nino.borges 853
546 nino.borges 834 if __name__ == '__main__':
547 nino.borges 847 pass
548     ## cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Data Exports\VEAS\VEAS_Log_Data_Export_Converted.txt"
549     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241202)\PLOG All IDs (20241202)_Converted_SubSetOnly.txt"
550     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241202)\TEST-PLOG.txt"
551     ## #cleanedDatExportFileName = r"C:\Users\eborges\AppData\Local\Programs\Python\Python312\MyCode\JN\_Temp2\20241115_PrivLogWorking_CAAG\PrivLogExport_20241113_CAAG_Converted.txt"
552     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241122 - VEAS CAAG 20241206\export_20241122_160117_Converted.txt"
553     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241122 - VEAS CAAG 20241206\TEST.txt"
554     ## #masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.11.06(7045550.3).xlsx"
555     ## masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.12.1(7045413.15).xlsx"
556     ## #masterAttorneyListFileName = r"C:\Test_Dir\Amazon\TEST-MAL.xlsx"
557     ## #fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides.txt"
558     ## #fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides - Copy.txt"
559     ## fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\VEAS-MasterAttorneyList\FullNameOverides.txt"
560     ##
561     ##
562     ##
563     ## nv = NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)
564     ## #nv.malPeopleList.list_people()
565     ##
566     ## qcP = MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC.QcPrivLog(cleanedDatExportFileName)
567     ## print(nv.malPeopleList.search_by_email('crespojp@amazon.com'.upper()))
568     ## #print(nv.malPeopleList.search_by_email('crespojp@amazon.com'.upper()))
569     ## workList = qcP.metadataValuesDict.keys()
570     ## outputFile = open(r"C:\Test_Dir\Amazon\NameNormOutputText.txt",'w')
571     ## for docID in workList:
572     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['toValues']
573     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['toValues']
574     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['ccValues']
575     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['ccValues']
576     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['bccValues']
577     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['bccValues']
578     ## metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['fromValues']
579     ## formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['fromValues']
580     ## formattedAttorneyValues = set()
581     ## for formattedValue in formattedFieldValues:
582     ## if "*" in formattedValue:
583     ## formattedAttorneyValues.add(formattedValue.upper())
584     ##
585     ## if metadataFieldValues:
586     ## matchedMetadataValues = set()
587     ## for nameItem in metadataFieldValues:
588     ## ## First test to see if there is a valid email address.
589     ## resultSet = set()
590     ## results = re.findall(qcP.allPossibleEmailAddressesRegExPattern, nameItem)
591     ## if results:
592     ## for result in results:
593     ## resultSet.add(result)
594     ## if len(resultSet) >1:
595     ## resultSet = nv.SmartDedupeSet(resultSet)
596     ## if len(resultSet) >1:
597     ## print("ERROR multiple email **unique** email addresses in one item.")
598     ## print(resultSet)
599     ## print("\n")
600     ## else:
601     ## personMatch = nv.malPeopleList.search_by_email(resultSet.pop().upper())
602     ## if personMatch:
603     ## if personMatch.full_name_overide:
604     ## fullName = personMatch.full_name_overide
605     ## elif personMatch.full_name_preferred:
606     ## #print(personMatch.full_name_preferred)
607     ## ## Going to need to do a bit of replacing to remove some information that is just never in the formatted.
608     ## fullPreferredName = personMatch.full_name_preferred
609     ## fullPreferredName = fullPreferredName.replace('(LEGAL)','')
610     ## fullPreferredName = fullPreferredName.replace('(SHE, HER)','')
611     ## fullPreferredName = fullPreferredName.replace('(SHE HER)','')
612     ## preferedLastName, preferedFirstName = fullPreferredName.split(',')
613     ## preferedLastName = preferedLastName.strip()
614     ## preferedFirstName = preferedFirstName.strip()
615     ## preferedFirstName = preferedFirstName.split(" ")[0]
616     ## fullName = f"{preferedFirstName} {preferedLastName}"
617     ## #fullName = f"{preferedLastName}, {preferedFirstName}"
618     ## else:
619     ## fullName = f"{personMatch.first_name} {personMatch.last_name}"
620     ## #fullName = f"{personMatch.last_name}, {personMatch.first_name}"
621     ## if personMatch.is_attorney == 'YES':
622     ## #outputFile.write(f"{docID} has match {personMatch.first_name} {personMatch.last_name}* ({personMatch.work_email_address.split('@')[-1]})\n")
623     ## matchedMetadataValues.add(f"{fullName}* ({personMatch.work_email_address.split('@')[-1]})")
624     ## else:
625     ## #outputFile.write(f"{docID} has match {personMatch.first_name} {personMatch.last_name} ({personMatch.work_email_address.split('@')[-1]})\n")
626     ## matchedMetadataValues.add(f"{fullName} ({personMatch.work_email_address.split('@')[-1]})")
627     ## else:
628     ## outputFile.write(f"{docID} contains a non-email item {nameItem}\n\n")
629     ## missingFromFormatted = matchedMetadataValues - formattedAttorneyValues
630     ## missingFromMeta = formattedAttorneyValues - matchedMetadataValues
631     ## if missingFromFormatted:
632     ## for missingItem in missingFromFormatted:
633     ## outputFile.write(f"{docID} has {missingItem} missing from the formatted field\n")
634     ## if missingFromMeta:
635     ## for missingItem in missingFromMeta:
636     ## outputFile.write(f"{docID} has {missingItem} missing from the metadata field\n")
637     ## if missingFromFormatted:
638     ## outputFile.write("\n")
639     ## elif missingFromMeta:
640     ## outputFile.write("\n")
641     ## outputFile.close()
642 nino.borges 834