ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/Amazon_NamesNormQC.py
Revision: 854
Committed: Thu Dec 12 18:26:19 2024 UTC (15 months, 2 weeks ago) by nino.borges
Content type: text/x-python
File size: 28072 byte(s)
Log Message:
Updated the dates_as_counsel attribute so that it's now a list of two value tuples, instead of just a raw string.  this parsed version will work much better when I attempt to determine if split role attorneys will fall within one of those ranges.  Also updated search_by_email method to search both the work_email_address and the alt_work_email_address, exiting as soon as it finds the first match.  There shouldnt be any duplicates, including the alt email addresses, across rows.

File Contents

# User Rev Content
1 nino.borges 834 """
2    
3 nino.borges 847 Amazon_NamesNormQC
4 nino.borges 834
5     Created by:
6     Emanuel Borges
7     11.21.2024
8    
9 nino.borges 847 This Library will assist with the process of performing Names Normalization QC on the Amazon privilege logs.
10 nino.borges 834
11     """
12    
13     import os, uuid, pickle, re
14 nino.borges 847 #import MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC
15 nino.borges 834 from dataclasses import dataclass, field
16 nino.borges 854 from typing import List, Tuple, Optional
17 nino.borges 834 from collections import namedtuple
18     from win32com.client import Dispatch
19    
20    
21     @dataclass
22     class Person:
23     first_name: Optional[str] = None
24     last_name: Optional[str] = None
25     work_email_address: Optional[str] = None
26     alt_work_email_address: Optional[str] = None
27     _id: uuid.UUID = field(default_factory=uuid.uuid4)
28     is_attorney: Optional[str] = None
29     split_role_date_range: Optional[str] = None
30     sidley_validated: Optional[str] = None
31     category: Optional[str] = None
32     organization: Optional[str] = None
33     job_title: Optional[str] = None
34     business_title: Optional[str] = None
35     full_name_preferred: Optional[str] = None
36     login: Optional[str] = None
37     department_fine: Optional[str] = None
38     addressed_during_caag: Optional[str] = None
39 nino.borges 838 #last_updated: Optional[str] = None
40 nino.borges 837 full_name_overide: Optional[str] = None
41 nino.borges 850 ## Only gather unique_attorney_row_number from the attorney and split role attorney tabs. NEVER from downgrades.
42     unique_attorney_row_number:Optional[str] = None
43 nino.borges 854 ## Will be saving this as a list of tuple pairs (startdate,enddate). Allowing None for now but may update this to forcing an empty list, to avoid mutable default issues.
44     dates_as_counsel:Optional[List[Tuple[str,str]]] = None
45 nino.borges 834
46     def __post_init__(self):
47     """Convert all string fields to uppercase."""
48     if self.first_name:
49 nino.borges 836 self.first_name = self.first_name.strip().upper()
50 nino.borges 834 if self.last_name:
51 nino.borges 836 self.last_name = self.last_name.strip().upper()
52 nino.borges 834 if self.work_email_address:
53 nino.borges 836 self.work_email_address = self.work_email_address.strip().upper()
54 nino.borges 834 if self.alt_work_email_address:
55 nino.borges 836 self.alt_work_email_address = self.alt_work_email_address.strip().upper()
56 nino.borges 834 if self.is_attorney:
57 nino.borges 836 self.is_attorney = self.is_attorney.strip().upper()
58 nino.borges 834 if self.split_role_date_range:
59 nino.borges 836 self.split_role_date_range = self.split_role_date_range.strip().upper()
60 nino.borges 834 if self.sidley_validated:
61 nino.borges 836 self.sidley_validated = self.sidley_validated.strip().upper()
62 nino.borges 834 if self.category:
63 nino.borges 836 self.category = self.category.strip().upper()
64 nino.borges 834 if self.organization:
65 nino.borges 836 self.organization = self.organization.strip().upper()
66 nino.borges 834 if self.job_title:
67 nino.borges 836 self.job_title = self.job_title.strip().upper()
68 nino.borges 834 if self.business_title:
69 nino.borges 836 self.business_title = self.business_title.strip().upper()
70 nino.borges 834 if self.full_name_preferred:
71 nino.borges 836 self.full_name_preferred = self.full_name_preferred.strip().upper()
72 nino.borges 834 if self.login:
73 nino.borges 836 self.login = self.login.strip().upper()
74 nino.borges 834 if self.department_fine:
75 nino.borges 836 self.department_fine = self.department_fine.strip().upper()
76 nino.borges 834 if self.addressed_during_caag:
77 nino.borges 836 self.addressed_during_caag = self.addressed_during_caag.strip().upper()
78 nino.borges 838 #if self.last_updated:
79     # self.last_updated = self.last_updated.strip().upper()
80 nino.borges 834
81     @dataclass
82     class PeopleList:
83     people: List[Person] = field(default_factory=list)
84    
85     def add_person(self, person: Person):
86     self.people.append(person)
87     #print(f"Added person: {person}")
88    
89    
90     def search_by_email(self, emailAddress:str) -> Optional[Person]:
91 nino.borges 850 """Returns the first matching emailAddress value. Assumes emailAddresses are unique"""
92 nino.borges 834 for person in self.people:
93     if person.work_email_address == emailAddress:
94     return person
95 nino.borges 854 elif person.alt_work_email_address == emailAddress:
96     return person
97 nino.borges 834 return None
98 nino.borges 850
99    
100     def search_by_unique_attorney_row_number(self,uniqueAttorneyRowNumber:str) -> Optional[Person]:
101     """Returns the first matching uniqueAttorneyRowNumber value. Assumes uniqueAttorneyRowNumbers are unique"""
102     for person in self.people:
103     if person.unique_attorney_row_number == uniqueAttorneyRowNumber:
104     return person
105     return None
106    
107     def search_by_id(self, idNumber):
108     """Returns the first matching idNumber value. Must be in format UUID('7414f78c-8289-4c9f-bd49-a5aaac35545f')."""
109     for person in self.people:
110     if person._id == idNumber:
111     return person
112     return None
113    
114     def return_list_of_matching_values(self,fieldName, value:str):
115     """Returns a full list of items where value is found in fieldName"""
116     matchingPeopleList = []
117     for person in self.people:
118     if getattr(person,fieldName) == value:
119     matchingPeopleList.append(person)
120     return matchingPeopleList
121 nino.borges 834
122     def list_people(self):
123     for person in self.people:
124     print(person)
125    
126 nino.borges 837 def update_full_Name_overide(self, emailAddress:str, fullNameOverideValue) -> Optional[Person]:
127 nino.borges 846 valueUpdated = False
128 nino.borges 837 for person in self.people:
129     if person.work_email_address == emailAddress.upper():
130     person.full_name_overide = fullNameOverideValue.upper()
131 nino.borges 846 valueUpdated = True
132 nino.borges 844 ## Give a quik warning as you add the override value into the database if the last name differs.
133     if "," in fullNameOverideValue:
134     lastName = fullNameOverideValue.split(",")[0]
135     else:
136     lastName = fullNameOverideValue.split(" ")[-1]
137     if lastName.upper() == person.last_name:
138     pass
139     else:
140     print(f"WARNING: Overide last name value {lastName.upper()} does not match {person.last_name}.")
141 nino.borges 846 if valueUpdated == False:
142     print(f"WARNING: No email address match for {emailAddress} found.")
143 nino.borges 834
144 nino.borges 837
145 nino.borges 834 class NamesVerification(object):
146     """A class for automating the process of performing QC on the names within the Amazon privilege logs."""
147 nino.borges 854 version = '0.9.0'
148 nino.borges 834
149    
150 nino.borges 837 def __init__(self, cleanedDatExportFileName, masterAttorneyListFileName,fullNameOveridesFileName, forceNewPklFile = False, Encoding = 'UTF8'):
151 nino.borges 834 """Initializes the data structures. cleanedDatExportFileName should be the full path to the file.
152     Assumes the first row of the data file is the header and first column is DocID.
153     Assumes the MAL is a spreadsheet (for now).
154     MAL gets saved to a pkl file for performance reasons. pkl will be used unless forceNewPklFile is set to true"""
155     pklFileName = os.path.splitext(masterAttorneyListFileName)[0] + ".pkl"
156    
157     print("Initializing data structures...")
158     if forceNewPklFile:
159     print("Creating MAL structure...")
160     self.malPeopleList = PeopleList()
161     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
162     print("MAL structure created.")
163 nino.borges 837 print("Loading full name overide values...")
164     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
165     print("Full name overide values loaded.")
166 nino.borges 834 print("Creating pickle backup...")
167     self.__SaveMalToPkl(pklFileName)
168     print("Pickle backup created.")
169     else:
170     if os.path.exists(pklFileName):
171     print("Loading MAL structure from pickle file...")
172     self.malPeopleList = self.__LoadMalFromPkl(pklFileName)
173     print("MAL structure loaded.")
174     else:
175     print("Pickle file doesnt exist.")
176     print("Creating MAL structure...")
177     self.malPeopleList = PeopleList()
178     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
179     print("MAL structure created.")
180 nino.borges 837 print("Loading full name overide values...")
181     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
182     print("Full name overide values loaded.")
183 nino.borges 834 print("Creating pickle backup...")
184     self.__SaveMalToPkl(pklFileName)
185     print("Pickle backup created.")
186    
187     ## self.malPeopleList = PeopleList()
188     ##
189     ## print("Creating MAL structure...")
190     ## self.__IngestMALSpreadsheet(masterAttorneyListFileName)
191     ## print("MAL structure created.")
192     ## print("Creating pickle backup...")
193    
194    
195    
196    
197    
198     def __IngestMALSpreadsheet(self, masterAttorneyListFileName):
199     """Pseudo-private method which will open an Excel spreadsheet and ingest the values into the peoplelist dataclass."""
200     ## There doenst seem to be a consistent value in the "row" column in the MAL, so setting these parameters here to avoid gap issues.
201    
202 nino.borges 850 ## excelTabParametersList should always be an ordered list because now order matters.
203     excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":10926, "beginColNumber":1, "endColNumber":17},
204 nino.borges 854 {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":719, "beginColNumber":1, "endColNumber":16},
205     {"tabName":"Split Role Attorneys", "beginRowNumber":2, "endRowNumber":21, "beginColNumber":1, "endColNumber":10}]
206 nino.borges 834
207 nino.borges 850
208 nino.borges 834 # excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":16},
209     # {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":15}]
210    
211 nino.borges 850 # spreadsheetFileMappingMatrix = {"First Name":"first_name", "Last Name":"last_name", "Work Email":"work_email_address", "Alt Work Email":"alt_work_email_address", "Is Attorney": "is_attorney",
212     # "Split Role - Attorney Capacity Date Range":"split_role_date_range", " Validated by OC??":"sidley_validated", "Category": "category", "Organization":"organization", "Job Title":"job_title",
213     # "Business Title":"business_title", "Full Name (Preferred)":"full_name_preferred", "Login":"login", "Department (Fine)":"department_fine", "Addressed during CAAG":"addressed_during_caag",
214     # "Last Updated":"last_updated"}
215 nino.borges 834
216     xlApp = Dispatch('Excel.Application')
217     xlBook = xlApp.Workbooks.Open(masterAttorneyListFileName)
218    
219     for excelTab in excelTabParametersList:
220     sht = xlBook.Worksheets(excelTab['tabName'])
221     print(f"Ingesting sheet {excelTab['tabName']}.")
222     excelFieldPositionMatrix = {}
223     for col in range (excelTab['beginColNumber'], excelTab['endColNumber'] +1):
224     excelFieldPositionMatrix[sht.Cells(1,col).Value] = col
225     for row in range(excelTab['beginRowNumber'], excelTab['endRowNumber'] +1):
226     #print(row)
227     ## TODO: Refactor the excelTabParametersList later. Didnt realize columns were not consistent.
228     if excelTab['tabName'] == 'Attorneys':
229     self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
230     split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Attorney Capacity Date Range']).Value,
231 nino.borges 837 sidley_validated = sht.Cells(row,excelFieldPositionMatrix[' Validated by OC?']).Value,
232 nino.borges 834 category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
233     organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
234     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
235     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
236     work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
237     alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
238     job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
239     business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
240     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
241     login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
242     department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
243 nino.borges 850 unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['Row']).Value,
244     addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Comments']).Value))
245     #addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
246 nino.borges 838 #last_updated = sht.Cells(row,excelFieldPositionMatrix['Last Updated']).Value ))
247 nino.borges 844
248 nino.borges 850 elif excelTab['tabName'] == 'Downgrades':
249     ## Make sure to NOT grab the unique attorney row number from here
250 nino.borges 844 self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
251 nino.borges 850 #split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Attorney Capacity Date Range']).Value,
252 nino.borges 844 sidley_validated = sht.Cells(row,excelFieldPositionMatrix['Validated by OC?']).Value,
253     organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
254     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
255     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
256     work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
257     alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
258     job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
259     business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
260     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
261     login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
262     department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
263 nino.borges 850 addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
264     elif excelTab['tabName'] == 'Split Role Attorneys':
265     unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['Attorney Row']).Value
266     matchedPerson = self.malPeopleList.search_by_unique_attorney_row_number(unique_attorney_row_number)
267     if matchedPerson:
268 nino.borges 854
269     ## dates_as_counsel should always be a two string value tuple (startdate,enddate).
270     datesAsCounselValue = sht.Cells(row,excelFieldPositionMatrix['Dates as Counsel']).Value
271     datesAsCounselList = []
272     ## First get rid of any extra data that is on a new line. Note that they shouldnt be seperating the date ranges by newline.
273     datesAsCounselValue = datesAsCounselValue.split("\n")[0]
274     ## Next split the ranges correctly by semicolon
275     dateRanges = datesAsCounselValue.split(";")
276     for dateRange in dateRanges:
277     ## Split out the start and end, allowing non-date words. (current, present, etc) however force these to be uppercase.
278     counselStartDate, counselEndDate = dateRange.split("-")
279     counselStartDate = counselStartDate.upper().strip()
280     counselEndDate = counselEndDate.upper().strip()
281     datesAsCounselList.append((counselStartDate,counselEndDate))
282     matchedPerson.dates_as_counsel = datesAsCounselList
283 nino.borges 850
284     else:
285     print(f"ERROR UNKNOWN TAB! {excelTab['tabName']} HAVE NEEDED TAB NAMES CHANGED?")
286    
287 nino.borges 834
288     xlBook.Close()
289    
290     def __SaveMalToPkl(self, pklFileName):
291     """Pseudo-private method which will save the current MAL people list object to a pkl file, for performance reasons."""
292     outputFile = open(pklFileName,'wb')
293     pickle.dump(self.malPeopleList,outputFile)
294     outputFile.close()
295    
296     def __LoadMalFromPkl(self, pklFileName):
297     """Pseudo-private method which will load a MAL people list object from a pkl file, for performance reasons."""
298     contents = open(pklFileName, 'rb')
299     obj = pickle.load(contents)
300     contents.close()
301     return obj
302    
303 nino.borges 837 def __LoadFullNameOverideValues(self, fullNameOveridesFileName):
304     """Pseudo-private method which will update the MAL people list object with the full name overide values."""
305     contents = open(fullNameOveridesFileName).readlines()
306     for line in contents:
307     line = line.replace("\n","")
308     emailAddress,fullNameOverideValue = line.split("|")
309 nino.borges 844
310 nino.borges 837 self.malPeopleList.update_full_Name_overide(emailAddress, fullNameOverideValue)
311    
312 nino.borges 844 def SmartDedupeSet(self, currentSet):
313 nino.borges 850 """A method that attempts to do some additional deduplication of the values in a set by lowering all values and deduplicating. Returns a lowered deduplicated set."""
314 nino.borges 844 newSet = set()
315     for val in currentSet:
316     newSet.add(val.lower())
317     return newSet
318    
319 nino.borges 853 def RunMalEmailAddressIntegrityCheck(self):
320     """This method performs an integrity check on the MAL by analyzing and looking for duplicate email addresses."""
321     emailTestMatrix = {}
322     altTestMatrix = {}
323     print("Performing MAL email address integrity check...")
324     for i in range(0,len(self.malPeopleList.people)):
325     altAddr = self.malPeopleList.people[i].alt_work_email_address
326     workAddr = self.malPeopleList.people[i].work_email_address
327     if altAddr != None:
328     if altAddr in list(altTestMatrix.keys()):
329     print(f"{altAddr} is a dupe!")
330     else:
331     altTestMatrix[altAddr] = 1
332     if workAddr != None:
333     if workAddr in list(altTestMatrix.keys()):
334     print(f"{workAddr} is a dupe of an altAddr.")
335     if workAddr in list(emailTestMatrix.keys()):
336     print(f"{workAddr} is a dupe!")
337     else:
338     emailTestMatrix[workAddr] = 1
339     print("\n\nIntegrity check complete.")
340    
341    
342    
343    
344    
345 nino.borges 834 if __name__ == '__main__':
346 nino.borges 847 pass
347     ## cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Data Exports\VEAS\VEAS_Log_Data_Export_Converted.txt"
348     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241202)\PLOG All IDs (20241202)_Converted_SubSetOnly.txt"
349     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241202)\TEST-PLOG.txt"
350     ## #cleanedDatExportFileName = r"C:\Users\eborges\AppData\Local\Programs\Python\Python312\MyCode\JN\_Temp2\20241115_PrivLogWorking_CAAG\PrivLogExport_20241113_CAAG_Converted.txt"
351     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241122 - VEAS CAAG 20241206\export_20241122_160117_Converted.txt"
352     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241122 - VEAS CAAG 20241206\TEST.txt"
353     ## #masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.11.06(7045550.3).xlsx"
354     ## masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.12.1(7045413.15).xlsx"
355     ## #masterAttorneyListFileName = r"C:\Test_Dir\Amazon\TEST-MAL.xlsx"
356     ## #fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides.txt"
357     ## #fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides - Copy.txt"
358     ## fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\VEAS-MasterAttorneyList\FullNameOverides.txt"
359     ##
360     ##
361     ##
362     ## nv = NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)
363     ## #nv.malPeopleList.list_people()
364     ##
365     ## qcP = MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC.QcPrivLog(cleanedDatExportFileName)
366     ## print(nv.malPeopleList.search_by_email('crespojp@amazon.com'.upper()))
367     ## #print(nv.malPeopleList.search_by_email('crespojp@amazon.com'.upper()))
368     ## workList = qcP.metadataValuesDict.keys()
369     ## outputFile = open(r"C:\Test_Dir\Amazon\NameNormOutputText.txt",'w')
370     ## for docID in workList:
371     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['toValues']
372     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['toValues']
373     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['ccValues']
374     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['ccValues']
375     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['bccValues']
376     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['bccValues']
377     ## metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['fromValues']
378     ## formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['fromValues']
379     ## formattedAttorneyValues = set()
380     ## for formattedValue in formattedFieldValues:
381     ## if "*" in formattedValue:
382     ## formattedAttorneyValues.add(formattedValue.upper())
383     ##
384     ## if metadataFieldValues:
385     ## matchedMetadataValues = set()
386     ## for nameItem in metadataFieldValues:
387     ## ## First test to see if there is a valid email address.
388     ## resultSet = set()
389     ## results = re.findall(qcP.allPossibleEmailAddressesRegExPattern, nameItem)
390     ## if results:
391     ## for result in results:
392     ## resultSet.add(result)
393     ## if len(resultSet) >1:
394     ## resultSet = nv.SmartDedupeSet(resultSet)
395     ## if len(resultSet) >1:
396     ## print("ERROR multiple email **unique** email addresses in one item.")
397     ## print(resultSet)
398     ## print("\n")
399     ## else:
400     ## personMatch = nv.malPeopleList.search_by_email(resultSet.pop().upper())
401     ## if personMatch:
402     ## if personMatch.full_name_overide:
403     ## fullName = personMatch.full_name_overide
404     ## elif personMatch.full_name_preferred:
405     ## #print(personMatch.full_name_preferred)
406     ## ## Going to need to do a bit of replacing to remove some information that is just never in the formatted.
407     ## fullPreferredName = personMatch.full_name_preferred
408     ## fullPreferredName = fullPreferredName.replace('(LEGAL)','')
409     ## fullPreferredName = fullPreferredName.replace('(SHE, HER)','')
410     ## fullPreferredName = fullPreferredName.replace('(SHE HER)','')
411     ## preferedLastName, preferedFirstName = fullPreferredName.split(',')
412     ## preferedLastName = preferedLastName.strip()
413     ## preferedFirstName = preferedFirstName.strip()
414     ## preferedFirstName = preferedFirstName.split(" ")[0]
415     ## fullName = f"{preferedFirstName} {preferedLastName}"
416     ## #fullName = f"{preferedLastName}, {preferedFirstName}"
417     ## else:
418     ## fullName = f"{personMatch.first_name} {personMatch.last_name}"
419     ## #fullName = f"{personMatch.last_name}, {personMatch.first_name}"
420     ## if personMatch.is_attorney == 'YES':
421     ## #outputFile.write(f"{docID} has match {personMatch.first_name} {personMatch.last_name}* ({personMatch.work_email_address.split('@')[-1]})\n")
422     ## matchedMetadataValues.add(f"{fullName}* ({personMatch.work_email_address.split('@')[-1]})")
423     ## else:
424     ## #outputFile.write(f"{docID} has match {personMatch.first_name} {personMatch.last_name} ({personMatch.work_email_address.split('@')[-1]})\n")
425     ## matchedMetadataValues.add(f"{fullName} ({personMatch.work_email_address.split('@')[-1]})")
426     ## else:
427     ## outputFile.write(f"{docID} contains a non-email item {nameItem}\n\n")
428     ## missingFromFormatted = matchedMetadataValues - formattedAttorneyValues
429     ## missingFromMeta = formattedAttorneyValues - matchedMetadataValues
430     ## if missingFromFormatted:
431     ## for missingItem in missingFromFormatted:
432     ## outputFile.write(f"{docID} has {missingItem} missing from the formatted field\n")
433     ## if missingFromMeta:
434     ## for missingItem in missingFromMeta:
435     ## outputFile.write(f"{docID} has {missingItem} missing from the metadata field\n")
436     ## if missingFromFormatted:
437     ## outputFile.write("\n")
438     ## elif missingFromMeta:
439     ## outputFile.write("\n")
440     ## outputFile.close()
441 nino.borges 834