ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/Amazon_NamesNormQC.py
Revision: 877
Committed: Fri Jan 10 22:03:53 2025 UTC (14 months, 2 weeks ago) by nino.borges
Content type: text/x-python
File size: 32676 byte(s)
Log Message:
Adds a new integrity check that will look for email addresses outside of email address fields in the MAL.  Also updated the existing email address integrity check to do a better job of finding duplicates across work email and alt email.

File Contents

# User Rev Content
1 nino.borges 834 """
2    
3 nino.borges 847 Amazon_NamesNormQC
4 nino.borges 834
5     Created by:
6     Emanuel Borges
7     11.21.2024
8    
9 nino.borges 847 This Library will assist with the process of performing Names Normalization QC on the Amazon privilege logs.
10 nino.borges 834
11     """
12    
13     import os, uuid, pickle, re
14 nino.borges 847 #import MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC
15 nino.borges 877 from dataclasses import dataclass, field, fields
16 nino.borges 854 from typing import List, Tuple, Optional
17 nino.borges 834 from collections import namedtuple
18     from win32com.client import Dispatch
19    
20    
21     @dataclass
22     class Person:
23     first_name: Optional[str] = None
24     last_name: Optional[str] = None
25     work_email_address: Optional[str] = None
26     alt_work_email_address: Optional[str] = None
27     _id: uuid.UUID = field(default_factory=uuid.uuid4)
28     is_attorney: Optional[str] = None
29     split_role_date_range: Optional[str] = None
30     sidley_validated: Optional[str] = None
31     category: Optional[str] = None
32     organization: Optional[str] = None
33     job_title: Optional[str] = None
34     business_title: Optional[str] = None
35     full_name_preferred: Optional[str] = None
36     login: Optional[str] = None
37     department_fine: Optional[str] = None
38     addressed_during_caag: Optional[str] = None
39 nino.borges 838 #last_updated: Optional[str] = None
40 nino.borges 837 full_name_overide: Optional[str] = None
41 nino.borges 850 ## Only gather unique_attorney_row_number from the attorney and split role attorney tabs. NEVER from downgrades.
42     unique_attorney_row_number:Optional[str] = None
43 nino.borges 854 ## Will be saving this as a list of tuple pairs (startdate,enddate). Allowing None for now but may update this to forcing an empty list, to avoid mutable default issues.
44     dates_as_counsel:Optional[List[Tuple[str,str]]] = None
45 nino.borges 834
46     def __post_init__(self):
47     """Convert all string fields to uppercase."""
48     if self.first_name:
49 nino.borges 836 self.first_name = self.first_name.strip().upper()
50 nino.borges 834 if self.last_name:
51 nino.borges 836 self.last_name = self.last_name.strip().upper()
52 nino.borges 834 if self.work_email_address:
53 nino.borges 836 self.work_email_address = self.work_email_address.strip().upper()
54 nino.borges 834 if self.alt_work_email_address:
55 nino.borges 836 self.alt_work_email_address = self.alt_work_email_address.strip().upper()
56 nino.borges 834 if self.is_attorney:
57 nino.borges 836 self.is_attorney = self.is_attorney.strip().upper()
58 nino.borges 834 if self.split_role_date_range:
59 nino.borges 836 self.split_role_date_range = self.split_role_date_range.strip().upper()
60 nino.borges 834 if self.sidley_validated:
61 nino.borges 836 self.sidley_validated = self.sidley_validated.strip().upper()
62 nino.borges 834 if self.category:
63 nino.borges 836 self.category = self.category.strip().upper()
64 nino.borges 834 if self.organization:
65 nino.borges 836 self.organization = self.organization.strip().upper()
66 nino.borges 834 if self.job_title:
67 nino.borges 836 self.job_title = self.job_title.strip().upper()
68 nino.borges 834 if self.business_title:
69 nino.borges 836 self.business_title = self.business_title.strip().upper()
70 nino.borges 834 if self.full_name_preferred:
71 nino.borges 836 self.full_name_preferred = self.full_name_preferred.strip().upper()
72 nino.borges 834 if self.login:
73 nino.borges 836 self.login = self.login.strip().upper()
74 nino.borges 834 if self.department_fine:
75 nino.borges 836 self.department_fine = self.department_fine.strip().upper()
76 nino.borges 834 if self.addressed_during_caag:
77 nino.borges 836 self.addressed_during_caag = self.addressed_during_caag.strip().upper()
78 nino.borges 838 #if self.last_updated:
79     # self.last_updated = self.last_updated.strip().upper()
80 nino.borges 834
81     @dataclass
82     class PeopleList:
83     people: List[Person] = field(default_factory=list)
84    
85     def add_person(self, person: Person):
86     self.people.append(person)
87     #print(f"Added person: {person}")
88    
89    
90     def search_by_email(self, emailAddress:str) -> Optional[Person]:
91 nino.borges 850 """Returns the first matching emailAddress value. Assumes emailAddresses are unique"""
92 nino.borges 834 for person in self.people:
93     if person.work_email_address == emailAddress:
94     return person
95 nino.borges 854 elif person.alt_work_email_address == emailAddress:
96     return person
97 nino.borges 834 return None
98 nino.borges 850
99    
100     def search_by_unique_attorney_row_number(self,uniqueAttorneyRowNumber:str) -> Optional[Person]:
101     """Returns the first matching uniqueAttorneyRowNumber value. Assumes uniqueAttorneyRowNumbers are unique"""
102     for person in self.people:
103     if person.unique_attorney_row_number == uniqueAttorneyRowNumber:
104     return person
105     return None
106    
107     def search_by_id(self, idNumber):
108     """Returns the first matching idNumber value. Must be in format UUID('7414f78c-8289-4c9f-bd49-a5aaac35545f')."""
109     for person in self.people:
110     if person._id == idNumber:
111     return person
112     return None
113    
114     def return_list_of_matching_values(self,fieldName, value:str):
115     """Returns a full list of items where value is found in fieldName"""
116     matchingPeopleList = []
117     for person in self.people:
118     if getattr(person,fieldName) == value:
119     matchingPeopleList.append(person)
120     return matchingPeopleList
121 nino.borges 834
122     def list_people(self):
123     for person in self.people:
124     print(person)
125    
126 nino.borges 837 def update_full_Name_overide(self, emailAddress:str, fullNameOverideValue) -> Optional[Person]:
127 nino.borges 846 valueUpdated = False
128 nino.borges 837 for person in self.people:
129     if person.work_email_address == emailAddress.upper():
130     person.full_name_overide = fullNameOverideValue.upper()
131 nino.borges 846 valueUpdated = True
132 nino.borges 844 ## Give a quik warning as you add the override value into the database if the last name differs.
133     if "," in fullNameOverideValue:
134     lastName = fullNameOverideValue.split(",")[0]
135     else:
136     lastName = fullNameOverideValue.split(" ")[-1]
137     if lastName.upper() == person.last_name:
138     pass
139     else:
140     print(f"WARNING: Overide last name value {lastName.upper()} does not match {person.last_name}.")
141 nino.borges 846 if valueUpdated == False:
142     print(f"WARNING: No email address match for {emailAddress} found.")
143 nino.borges 834
144 nino.borges 837
145 nino.borges 834 class NamesVerification(object):
146     """A class for automating the process of performing QC on the names within the Amazon privilege logs."""
147 nino.borges 877 version = '0.10.0'
148 nino.borges 834
149    
150 nino.borges 837 def __init__(self, cleanedDatExportFileName, masterAttorneyListFileName,fullNameOveridesFileName, forceNewPklFile = False, Encoding = 'UTF8'):
151 nino.borges 834 """Initializes the data structures. cleanedDatExportFileName should be the full path to the file.
152     Assumes the first row of the data file is the header and first column is DocID.
153     Assumes the MAL is a spreadsheet (for now).
154     MAL gets saved to a pkl file for performance reasons. pkl will be used unless forceNewPklFile is set to true"""
155     pklFileName = os.path.splitext(masterAttorneyListFileName)[0] + ".pkl"
156    
157     print("Initializing data structures...")
158     if forceNewPklFile:
159     print("Creating MAL structure...")
160     self.malPeopleList = PeopleList()
161     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
162     print("MAL structure created.")
163 nino.borges 837 print("Loading full name overide values...")
164     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
165     print("Full name overide values loaded.")
166 nino.borges 834 print("Creating pickle backup...")
167     self.__SaveMalToPkl(pklFileName)
168     print("Pickle backup created.")
169     else:
170     if os.path.exists(pklFileName):
171     print("Loading MAL structure from pickle file...")
172     self.malPeopleList = self.__LoadMalFromPkl(pklFileName)
173     print("MAL structure loaded.")
174     else:
175     print("Pickle file doesnt exist.")
176     print("Creating MAL structure...")
177     self.malPeopleList = PeopleList()
178     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
179     print("MAL structure created.")
180 nino.borges 837 print("Loading full name overide values...")
181     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
182     print("Full name overide values loaded.")
183 nino.borges 834 print("Creating pickle backup...")
184     self.__SaveMalToPkl(pklFileName)
185     print("Pickle backup created.")
186    
187     ## self.malPeopleList = PeopleList()
188     ##
189     ## print("Creating MAL structure...")
190     ## self.__IngestMALSpreadsheet(masterAttorneyListFileName)
191     ## print("MAL structure created.")
192     ## print("Creating pickle backup...")
193    
194    
195    
196    
197    
198     def __IngestMALSpreadsheet(self, masterAttorneyListFileName):
199     """Pseudo-private method which will open an Excel spreadsheet and ingest the values into the peoplelist dataclass."""
200     ## There doenst seem to be a consistent value in the "row" column in the MAL, so setting these parameters here to avoid gap issues.
201    
202 nino.borges 850 ## excelTabParametersList should always be an ordered list because now order matters.
203 nino.borges 877 excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":10923, "beginColNumber":1, "endColNumber":17},
204     {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":726, "beginColNumber":1, "endColNumber":16},
205 nino.borges 854 {"tabName":"Split Role Attorneys", "beginRowNumber":2, "endRowNumber":21, "beginColNumber":1, "endColNumber":10}]
206 nino.borges 834
207 nino.borges 850
208 nino.borges 834 # excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":16},
209     # {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":15}]
210    
211 nino.borges 850 # spreadsheetFileMappingMatrix = {"First Name":"first_name", "Last Name":"last_name", "Work Email":"work_email_address", "Alt Work Email":"alt_work_email_address", "Is Attorney": "is_attorney",
212     # "Split Role - Attorney Capacity Date Range":"split_role_date_range", " Validated by OC??":"sidley_validated", "Category": "category", "Organization":"organization", "Job Title":"job_title",
213     # "Business Title":"business_title", "Full Name (Preferred)":"full_name_preferred", "Login":"login", "Department (Fine)":"department_fine", "Addressed during CAAG":"addressed_during_caag",
214     # "Last Updated":"last_updated"}
215 nino.borges 834
216     xlApp = Dispatch('Excel.Application')
217     xlBook = xlApp.Workbooks.Open(masterAttorneyListFileName)
218    
219     for excelTab in excelTabParametersList:
220     sht = xlBook.Worksheets(excelTab['tabName'])
221     print(f"Ingesting sheet {excelTab['tabName']}.")
222     excelFieldPositionMatrix = {}
223     for col in range (excelTab['beginColNumber'], excelTab['endColNumber'] +1):
224     excelFieldPositionMatrix[sht.Cells(1,col).Value] = col
225     for row in range(excelTab['beginRowNumber'], excelTab['endRowNumber'] +1):
226     #print(row)
227     ## TODO: Refactor the excelTabParametersList later. Didnt realize columns were not consistent.
228     if excelTab['tabName'] == 'Attorneys':
229     self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
230 nino.borges 877 split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Dates as Counsel']).Value,
231 nino.borges 837 sidley_validated = sht.Cells(row,excelFieldPositionMatrix[' Validated by OC?']).Value,
232 nino.borges 834 category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
233     organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
234     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
235     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
236     work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
237     alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
238     job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
239     business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
240     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
241     login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
242     department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
243 nino.borges 850 unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['Row']).Value,
244     addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Comments']).Value))
245     #addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
246 nino.borges 838 #last_updated = sht.Cells(row,excelFieldPositionMatrix['Last Updated']).Value ))
247 nino.borges 844
248 nino.borges 850 elif excelTab['tabName'] == 'Downgrades':
249     ## Make sure to NOT grab the unique attorney row number from here
250 nino.borges 844 self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
251 nino.borges 850 #split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Attorney Capacity Date Range']).Value,
252 nino.borges 844 sidley_validated = sht.Cells(row,excelFieldPositionMatrix['Validated by OC?']).Value,
253     organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
254     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
255     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
256     work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
257     alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
258     job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
259     business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
260     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
261     login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
262     department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
263 nino.borges 850 addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
264     elif excelTab['tabName'] == 'Split Role Attorneys':
265     unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['Attorney Row']).Value
266     matchedPerson = self.malPeopleList.search_by_unique_attorney_row_number(unique_attorney_row_number)
267     if matchedPerson:
268 nino.borges 854
269     ## dates_as_counsel should always be a two string value tuple (startdate,enddate).
270     datesAsCounselValue = sht.Cells(row,excelFieldPositionMatrix['Dates as Counsel']).Value
271     datesAsCounselList = []
272     ## First get rid of any extra data that is on a new line. Note that they shouldnt be seperating the date ranges by newline.
273     datesAsCounselValue = datesAsCounselValue.split("\n")[0]
274     ## Next split the ranges correctly by semicolon
275     dateRanges = datesAsCounselValue.split(";")
276     for dateRange in dateRanges:
277     ## Split out the start and end, allowing non-date words. (current, present, etc) however force these to be uppercase.
278     counselStartDate, counselEndDate = dateRange.split("-")
279     counselStartDate = counselStartDate.upper().strip()
280     counselEndDate = counselEndDate.upper().strip()
281     datesAsCounselList.append((counselStartDate,counselEndDate))
282     matchedPerson.dates_as_counsel = datesAsCounselList
283 nino.borges 850
284     else:
285     print(f"ERROR UNKNOWN TAB! {excelTab['tabName']} HAVE NEEDED TAB NAMES CHANGED?")
286    
287 nino.borges 834
288     xlBook.Close()
289    
290     def __SaveMalToPkl(self, pklFileName):
291     """Pseudo-private method which will save the current MAL people list object to a pkl file, for performance reasons."""
292     outputFile = open(pklFileName,'wb')
293     pickle.dump(self.malPeopleList,outputFile)
294     outputFile.close()
295    
296     def __LoadMalFromPkl(self, pklFileName):
297     """Pseudo-private method which will load a MAL people list object from a pkl file, for performance reasons."""
298     contents = open(pklFileName, 'rb')
299     obj = pickle.load(contents)
300     contents.close()
301     return obj
302    
303 nino.borges 837 def __LoadFullNameOverideValues(self, fullNameOveridesFileName):
304     """Pseudo-private method which will update the MAL people list object with the full name overide values."""
305     contents = open(fullNameOveridesFileName).readlines()
306     for line in contents:
307     line = line.replace("\n","")
308     emailAddress,fullNameOverideValue = line.split("|")
309 nino.borges 844
310 nino.borges 837 self.malPeopleList.update_full_Name_overide(emailAddress, fullNameOverideValue)
311    
312 nino.borges 844 def SmartDedupeSet(self, currentSet):
313 nino.borges 850 """A method that attempts to do some additional deduplication of the values in a set by lowering all values and deduplicating. Returns a lowered deduplicated set."""
314 nino.borges 844 newSet = set()
315     for val in currentSet:
316     newSet.add(val.lower())
317     return newSet
318    
319 nino.borges 853 def RunMalEmailAddressIntegrityCheck(self):
320     """This method performs an integrity check on the MAL by analyzing and looking for duplicate email addresses."""
321     emailTestMatrix = {}
322     altTestMatrix = {}
323     print("Performing MAL email address integrity check...")
324     for i in range(0,len(self.malPeopleList.people)):
325     altAddr = self.malPeopleList.people[i].alt_work_email_address
326     workAddr = self.malPeopleList.people[i].work_email_address
327     if altAddr != None:
328 nino.borges 877 altAddr = altAddr.strip()
329     if altAddr in list(emailTestMatrix.keys()):
330     print(f"ISSUE:{altAddr} is a dupe of an workAddr.")
331 nino.borges 853 if altAddr in list(altTestMatrix.keys()):
332 nino.borges 877 print(f"ISSUE:{altAddr} is a dupe!")
333 nino.borges 853 else:
334     altTestMatrix[altAddr] = 1
335     if workAddr != None:
336 nino.borges 877 workAddr = workAddr.strip()
337 nino.borges 853 if workAddr in list(altTestMatrix.keys()):
338 nino.borges 877 print(f"ISSUE:{workAddr} is a dupe of an altAddr.")
339 nino.borges 853 if workAddr in list(emailTestMatrix.keys()):
340 nino.borges 877 print(f"ISSUE:{workAddr} is a dupe!")
341 nino.borges 853 else:
342     emailTestMatrix[workAddr] = 1
343 nino.borges 877 print("\nEmail address integrity check complete.\n\n")
344 nino.borges 853
345 nino.borges 877 def RunMalEmailOutsideEmailFieldsIntegrityCheck(self):
346     """This method performs an integrity check on the MAL by looking for email addresses that exist in fields other than the email address fields."""
347     ## Right now this looks for the @ symbol.
348     ## Editable list of fields that should be excluded from this test, especially those that should already have email addresses
349     fieldsToExcludeList = ['work_email_address', 'alt_work_email_address','_id','dates_as_counsel','unique_attorney_row_number']
350     print("Performing MAL email addresses outside of email address fields integrity check...")
351     fieldObjects = fields(Person)
352     fieldNames = [f.name for f in fieldObjects]
353     #print(fieldNames)
354     fieldsToSearchList = [x for x in fieldNames if x not in fieldsToExcludeList]
355     #print(fieldsToSearchList)
356     for i in range(0,len(self.malPeopleList.people)):
357     for fieldName in fieldsToSearchList:
358     testValue = getattr(self.malPeopleList.people[i], fieldName)
359     #print(fieldName)
360     if testValue:
361     if "@" in testValue:
362     print(f"ISSUE: The email address {testValue} exists in the non-email field {fieldName} for unique row# {self.malPeopleList.people[i].unique_attorney_row_number}.")
363     print("\nEmail addresss outside of email fields integrity check complete.\n\n")
364    
365 nino.borges 853
366 nino.borges 869 def RunRowNumberIntegrityCheck(self):
367     """This method performs an integrity check on the MAL by analyzing the hard-coded row numbers across the 3 imporant tabs. Looks for gaps, blanks, and inconsistencies between split role. """
368     ## First let's return all non-attorneys and confirm the hard-coded row number is in the 50000 range and look for gaps.
369     print("Performing MAL hard-coded row number integrity check...")
370     ## nonAttorneyPeopleList = self.malPeopleList.return_list_of_matching_values('is_attorney','NO')
371     ## print(f"Analyzing all {len(nonAttorneyPeopleList)} non-attorneys items...")
372     ## ## Gather all non-attorneys and add hc row number to a list, looking for any that are missing a value
373     ## for nonAttorneyPerson in nonAttorneyPeopleList:
374     ## hcRowNumberList = []
375     ## hcRowNumber = nonAttorneyPerson.unique_attorney_row_number
376     ## if hcRowNumber == None:
377     ## print(f"WARNING: Empty hard coded row number for {nonAttorneyPerson.first_name} {nonAttorneyPerson.last_name} in the Downgrades Tab.")
378     ## else:
379     ## hcRowNumberList.append(int(hcRowNumber))
380     ## ## Next export a list of the missing numbers
381     ## hcRowNumberList.sort()
382     ## compareSet = set(range(hcRowNumberList[0], hcRowNumberList[-1]))
383     ## downgradeDiffs = compareSet - set(hcRowNumberList)
384     ## print(downgradeDiffs)
385     ## Now let's do similar for attorneys, including split role.
386     attorneyPeopleList = self.malPeopleList.return_list_of_matching_values('is_attorney','YES')
387     splitRolePeopleList = self.malPeopleList.return_list_of_matching_values('is_attorney','SPLIT ROLE')
388     ## Creating a third list using the newer list joining from pep 448
389     fullAttorneyPeopleList = [*attorneyPeopleList,*splitRolePeopleList]
390     print(f"Analyzing all {len(fullAttorneyPeopleList)} attorneys items...")
391     ## Gather all attorneys and add hc row number to a list, looking for any that are missing a value
392     for attorneyPerson in fullAttorneyPeopleList:
393     hcRowNumberList = []
394     hcRowNumber = attorneyPerson.unique_attorney_row_number
395     if hcRowNumber == None:
396     print(f"WARNING: Empty hard coded row number for {attorneyPerson.first_name} {attorneyPerson.last_name} in the Attorneys Tab.")
397     else:
398     hcRowNumberList.append(int(hcRowNumber))
399     ## Next export a list of the missing numbers
400     hcRowNumberList.sort()
401     compareSet = set(range(hcRowNumberList[0], hcRowNumberList[-1]))
402     attorneyDiffs = compareSet - set(hcRowNumberList)
403     if attorneyDiffs:
404     print(attorneyDiffs)
405     else:
406     print("There are no gaps in the hard coded row numbers in the Attorneys tab.")
407 nino.borges 853
408    
409 nino.borges 869
410 nino.borges 834 if __name__ == '__main__':
411 nino.borges 847 pass
412     ## cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Data Exports\VEAS\VEAS_Log_Data_Export_Converted.txt"
413     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241202)\PLOG All IDs (20241202)_Converted_SubSetOnly.txt"
414     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241202)\TEST-PLOG.txt"
415     ## #cleanedDatExportFileName = r"C:\Users\eborges\AppData\Local\Programs\Python\Python312\MyCode\JN\_Temp2\20241115_PrivLogWorking_CAAG\PrivLogExport_20241113_CAAG_Converted.txt"
416     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241122 - VEAS CAAG 20241206\export_20241122_160117_Converted.txt"
417     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241122 - VEAS CAAG 20241206\TEST.txt"
418     ## #masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.11.06(7045550.3).xlsx"
419     ## masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.12.1(7045413.15).xlsx"
420     ## #masterAttorneyListFileName = r"C:\Test_Dir\Amazon\TEST-MAL.xlsx"
421     ## #fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides.txt"
422     ## #fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides - Copy.txt"
423     ## fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\VEAS-MasterAttorneyList\FullNameOverides.txt"
424     ##
425     ##
426     ##
427     ## nv = NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)
428     ## #nv.malPeopleList.list_people()
429     ##
430     ## qcP = MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC.QcPrivLog(cleanedDatExportFileName)
431     ## print(nv.malPeopleList.search_by_email('crespojp@amazon.com'.upper()))
432     ## #print(nv.malPeopleList.search_by_email('crespojp@amazon.com'.upper()))
433     ## workList = qcP.metadataValuesDict.keys()
434     ## outputFile = open(r"C:\Test_Dir\Amazon\NameNormOutputText.txt",'w')
435     ## for docID in workList:
436     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['toValues']
437     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['toValues']
438     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['ccValues']
439     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['ccValues']
440     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['bccValues']
441     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['bccValues']
442     ## metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['fromValues']
443     ## formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['fromValues']
444     ## formattedAttorneyValues = set()
445     ## for formattedValue in formattedFieldValues:
446     ## if "*" in formattedValue:
447     ## formattedAttorneyValues.add(formattedValue.upper())
448     ##
449     ## if metadataFieldValues:
450     ## matchedMetadataValues = set()
451     ## for nameItem in metadataFieldValues:
452     ## ## First test to see if there is a valid email address.
453     ## resultSet = set()
454     ## results = re.findall(qcP.allPossibleEmailAddressesRegExPattern, nameItem)
455     ## if results:
456     ## for result in results:
457     ## resultSet.add(result)
458     ## if len(resultSet) >1:
459     ## resultSet = nv.SmartDedupeSet(resultSet)
460     ## if len(resultSet) >1:
461     ## print("ERROR multiple email **unique** email addresses in one item.")
462     ## print(resultSet)
463     ## print("\n")
464     ## else:
465     ## personMatch = nv.malPeopleList.search_by_email(resultSet.pop().upper())
466     ## if personMatch:
467     ## if personMatch.full_name_overide:
468     ## fullName = personMatch.full_name_overide
469     ## elif personMatch.full_name_preferred:
470     ## #print(personMatch.full_name_preferred)
471     ## ## Going to need to do a bit of replacing to remove some information that is just never in the formatted.
472     ## fullPreferredName = personMatch.full_name_preferred
473     ## fullPreferredName = fullPreferredName.replace('(LEGAL)','')
474     ## fullPreferredName = fullPreferredName.replace('(SHE, HER)','')
475     ## fullPreferredName = fullPreferredName.replace('(SHE HER)','')
476     ## preferedLastName, preferedFirstName = fullPreferredName.split(',')
477     ## preferedLastName = preferedLastName.strip()
478     ## preferedFirstName = preferedFirstName.strip()
479     ## preferedFirstName = preferedFirstName.split(" ")[0]
480     ## fullName = f"{preferedFirstName} {preferedLastName}"
481     ## #fullName = f"{preferedLastName}, {preferedFirstName}"
482     ## else:
483     ## fullName = f"{personMatch.first_name} {personMatch.last_name}"
484     ## #fullName = f"{personMatch.last_name}, {personMatch.first_name}"
485     ## if personMatch.is_attorney == 'YES':
486     ## #outputFile.write(f"{docID} has match {personMatch.first_name} {personMatch.last_name}* ({personMatch.work_email_address.split('@')[-1]})\n")
487     ## matchedMetadataValues.add(f"{fullName}* ({personMatch.work_email_address.split('@')[-1]})")
488     ## else:
489     ## #outputFile.write(f"{docID} has match {personMatch.first_name} {personMatch.last_name} ({personMatch.work_email_address.split('@')[-1]})\n")
490     ## matchedMetadataValues.add(f"{fullName} ({personMatch.work_email_address.split('@')[-1]})")
491     ## else:
492     ## outputFile.write(f"{docID} contains a non-email item {nameItem}\n\n")
493     ## missingFromFormatted = matchedMetadataValues - formattedAttorneyValues
494     ## missingFromMeta = formattedAttorneyValues - matchedMetadataValues
495     ## if missingFromFormatted:
496     ## for missingItem in missingFromFormatted:
497     ## outputFile.write(f"{docID} has {missingItem} missing from the formatted field\n")
498     ## if missingFromMeta:
499     ## for missingItem in missingFromMeta:
500     ## outputFile.write(f"{docID} has {missingItem} missing from the metadata field\n")
501     ## if missingFromFormatted:
502     ## outputFile.write("\n")
503     ## elif missingFromMeta:
504     ## outputFile.write("\n")
505     ## outputFile.close()
506 nino.borges 834