ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/Amazon_NamesNormQC.py
Revision: 853
Committed: Thu Dec 12 16:18:41 2024 UTC (15 months, 2 weeks ago) by nino.borges
Content type: text/x-python
File size: 26788 byte(s)
Log Message:
This version adds the method RunMalEmailAddressIntegrityCheck which will test and report on email address duplicates across rows and tabs.

File Contents

# User Rev Content
1 nino.borges 834 """
2    
3 nino.borges 847 Amazon_NamesNormQC
4 nino.borges 834
5     Created by:
6     Emanuel Borges
7     11.21.2024
8    
9 nino.borges 847 This Library will assist with the process of performing Names Normalization QC on the Amazon privilege logs.
10 nino.borges 834
11     """
12    
13     import os, uuid, pickle, re
14 nino.borges 847 #import MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC
15 nino.borges 834 from dataclasses import dataclass, field
16     from typing import List, Optional
17     from collections import namedtuple
18     from win32com.client import Dispatch
19    
20    
21     @dataclass
22     class Person:
23     first_name: Optional[str] = None
24     last_name: Optional[str] = None
25     work_email_address: Optional[str] = None
26     alt_work_email_address: Optional[str] = None
27     _id: uuid.UUID = field(default_factory=uuid.uuid4)
28     is_attorney: Optional[str] = None
29     split_role_date_range: Optional[str] = None
30     sidley_validated: Optional[str] = None
31     category: Optional[str] = None
32     organization: Optional[str] = None
33     job_title: Optional[str] = None
34     business_title: Optional[str] = None
35     full_name_preferred: Optional[str] = None
36     login: Optional[str] = None
37     department_fine: Optional[str] = None
38     addressed_during_caag: Optional[str] = None
39 nino.borges 838 #last_updated: Optional[str] = None
40 nino.borges 837 full_name_overide: Optional[str] = None
41 nino.borges 850 ## Only gather unique_attorney_row_number from the attorney and split role attorney tabs. NEVER from downgrades.
42     unique_attorney_row_number:Optional[str] = None
43     ## Going to leave dates_as_counsel as an unparrsed raw string for now.
44     dates_as_counsel:Optional[str] = None
45 nino.borges 834
46     def __post_init__(self):
47     """Convert all string fields to uppercase."""
48     if self.first_name:
49 nino.borges 836 self.first_name = self.first_name.strip().upper()
50 nino.borges 834 if self.last_name:
51 nino.borges 836 self.last_name = self.last_name.strip().upper()
52 nino.borges 834 if self.work_email_address:
53 nino.borges 836 self.work_email_address = self.work_email_address.strip().upper()
54 nino.borges 834 if self.alt_work_email_address:
55 nino.borges 836 self.alt_work_email_address = self.alt_work_email_address.strip().upper()
56 nino.borges 834 if self.is_attorney:
57 nino.borges 836 self.is_attorney = self.is_attorney.strip().upper()
58 nino.borges 834 if self.split_role_date_range:
59 nino.borges 836 self.split_role_date_range = self.split_role_date_range.strip().upper()
60 nino.borges 834 if self.sidley_validated:
61 nino.borges 836 self.sidley_validated = self.sidley_validated.strip().upper()
62 nino.borges 834 if self.category:
63 nino.borges 836 self.category = self.category.strip().upper()
64 nino.borges 834 if self.organization:
65 nino.borges 836 self.organization = self.organization.strip().upper()
66 nino.borges 834 if self.job_title:
67 nino.borges 836 self.job_title = self.job_title.strip().upper()
68 nino.borges 834 if self.business_title:
69 nino.borges 836 self.business_title = self.business_title.strip().upper()
70 nino.borges 834 if self.full_name_preferred:
71 nino.borges 836 self.full_name_preferred = self.full_name_preferred.strip().upper()
72 nino.borges 834 if self.login:
73 nino.borges 836 self.login = self.login.strip().upper()
74 nino.borges 834 if self.department_fine:
75 nino.borges 836 self.department_fine = self.department_fine.strip().upper()
76 nino.borges 834 if self.addressed_during_caag:
77 nino.borges 836 self.addressed_during_caag = self.addressed_during_caag.strip().upper()
78 nino.borges 838 #if self.last_updated:
79     # self.last_updated = self.last_updated.strip().upper()
80 nino.borges 834
81     @dataclass
82     class PeopleList:
83     people: List[Person] = field(default_factory=list)
84    
85     def add_person(self, person: Person):
86     self.people.append(person)
87     #print(f"Added person: {person}")
88    
89    
90     def search_by_email(self, emailAddress:str) -> Optional[Person]:
91 nino.borges 850 """Returns the first matching emailAddress value. Assumes emailAddresses are unique"""
92 nino.borges 834 for person in self.people:
93     if person.work_email_address == emailAddress:
94     return person
95     return None
96 nino.borges 850
97    
98     def search_by_unique_attorney_row_number(self,uniqueAttorneyRowNumber:str) -> Optional[Person]:
99     """Returns the first matching uniqueAttorneyRowNumber value. Assumes uniqueAttorneyRowNumbers are unique"""
100     for person in self.people:
101     if person.unique_attorney_row_number == uniqueAttorneyRowNumber:
102     return person
103     return None
104    
105     def search_by_id(self, idNumber):
106     """Returns the first matching idNumber value. Must be in format UUID('7414f78c-8289-4c9f-bd49-a5aaac35545f')."""
107     for person in self.people:
108     if person._id == idNumber:
109     return person
110     return None
111    
112     def return_list_of_matching_values(self,fieldName, value:str):
113     """Returns a full list of items where value is found in fieldName"""
114     matchingPeopleList = []
115     for person in self.people:
116     if getattr(person,fieldName) == value:
117     matchingPeopleList.append(person)
118     return matchingPeopleList
119 nino.borges 834
120     def list_people(self):
121     for person in self.people:
122     print(person)
123    
124 nino.borges 837 def update_full_Name_overide(self, emailAddress:str, fullNameOverideValue) -> Optional[Person]:
125 nino.borges 846 valueUpdated = False
126 nino.borges 837 for person in self.people:
127     if person.work_email_address == emailAddress.upper():
128     person.full_name_overide = fullNameOverideValue.upper()
129 nino.borges 846 valueUpdated = True
130 nino.borges 844 ## Give a quik warning as you add the override value into the database if the last name differs.
131     if "," in fullNameOverideValue:
132     lastName = fullNameOverideValue.split(",")[0]
133     else:
134     lastName = fullNameOverideValue.split(" ")[-1]
135     if lastName.upper() == person.last_name:
136     pass
137     else:
138     print(f"WARNING: Overide last name value {lastName.upper()} does not match {person.last_name}.")
139 nino.borges 846 if valueUpdated == False:
140     print(f"WARNING: No email address match for {emailAddress} found.")
141 nino.borges 834
142 nino.borges 837
143 nino.borges 834 class NamesVerification(object):
144     """A class for automating the process of performing QC on the names within the Amazon privilege logs."""
145 nino.borges 853 version = '0.8.0'
146 nino.borges 834
147    
148 nino.borges 837 def __init__(self, cleanedDatExportFileName, masterAttorneyListFileName,fullNameOveridesFileName, forceNewPklFile = False, Encoding = 'UTF8'):
149 nino.borges 834 """Initializes the data structures. cleanedDatExportFileName should be the full path to the file.
150     Assumes the first row of the data file is the header and first column is DocID.
151     Assumes the MAL is a spreadsheet (for now).
152     MAL gets saved to a pkl file for performance reasons. pkl will be used unless forceNewPklFile is set to true"""
153     pklFileName = os.path.splitext(masterAttorneyListFileName)[0] + ".pkl"
154    
155     print("Initializing data structures...")
156     if forceNewPklFile:
157     print("Creating MAL structure...")
158     self.malPeopleList = PeopleList()
159     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
160     print("MAL structure created.")
161 nino.borges 837 print("Loading full name overide values...")
162     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
163     print("Full name overide values loaded.")
164 nino.borges 834 print("Creating pickle backup...")
165     self.__SaveMalToPkl(pklFileName)
166     print("Pickle backup created.")
167     else:
168     if os.path.exists(pklFileName):
169     print("Loading MAL structure from pickle file...")
170     self.malPeopleList = self.__LoadMalFromPkl(pklFileName)
171     print("MAL structure loaded.")
172     else:
173     print("Pickle file doesnt exist.")
174     print("Creating MAL structure...")
175     self.malPeopleList = PeopleList()
176     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
177     print("MAL structure created.")
178 nino.borges 837 print("Loading full name overide values...")
179     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
180     print("Full name overide values loaded.")
181 nino.borges 834 print("Creating pickle backup...")
182     self.__SaveMalToPkl(pklFileName)
183     print("Pickle backup created.")
184    
185     ## self.malPeopleList = PeopleList()
186     ##
187     ## print("Creating MAL structure...")
188     ## self.__IngestMALSpreadsheet(masterAttorneyListFileName)
189     ## print("MAL structure created.")
190     ## print("Creating pickle backup...")
191    
192    
193    
194    
195    
196     def __IngestMALSpreadsheet(self, masterAttorneyListFileName):
197     """Pseudo-private method which will open an Excel spreadsheet and ingest the values into the peoplelist dataclass."""
198     ## There doenst seem to be a consistent value in the "row" column in the MAL, so setting these parameters here to avoid gap issues.
199    
200 nino.borges 850 ## excelTabParametersList should always be an ordered list because now order matters.
201     excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":10926, "beginColNumber":1, "endColNumber":17},
202     {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":675, "beginColNumber":1, "endColNumber":15},
203     {"tabName":"Split Role Attorneys", "beginRowNumber":2, "endRowNumber":19, "beginColNumber":1, "endColNumber":26}]
204 nino.borges 834
205 nino.borges 850
206 nino.borges 834 # excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":16},
207     # {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":15}]
208    
209 nino.borges 850 # spreadsheetFileMappingMatrix = {"First Name":"first_name", "Last Name":"last_name", "Work Email":"work_email_address", "Alt Work Email":"alt_work_email_address", "Is Attorney": "is_attorney",
210     # "Split Role - Attorney Capacity Date Range":"split_role_date_range", " Validated by OC??":"sidley_validated", "Category": "category", "Organization":"organization", "Job Title":"job_title",
211     # "Business Title":"business_title", "Full Name (Preferred)":"full_name_preferred", "Login":"login", "Department (Fine)":"department_fine", "Addressed during CAAG":"addressed_during_caag",
212     # "Last Updated":"last_updated"}
213 nino.borges 834
214     xlApp = Dispatch('Excel.Application')
215     xlBook = xlApp.Workbooks.Open(masterAttorneyListFileName)
216    
217     for excelTab in excelTabParametersList:
218     sht = xlBook.Worksheets(excelTab['tabName'])
219     print(f"Ingesting sheet {excelTab['tabName']}.")
220     excelFieldPositionMatrix = {}
221     for col in range (excelTab['beginColNumber'], excelTab['endColNumber'] +1):
222     excelFieldPositionMatrix[sht.Cells(1,col).Value] = col
223     for row in range(excelTab['beginRowNumber'], excelTab['endRowNumber'] +1):
224     #print(row)
225     ## TODO: Refactor the excelTabParametersList later. Didnt realize columns were not consistent.
226     if excelTab['tabName'] == 'Attorneys':
227     self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
228     split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Attorney Capacity Date Range']).Value,
229 nino.borges 837 sidley_validated = sht.Cells(row,excelFieldPositionMatrix[' Validated by OC?']).Value,
230 nino.borges 834 category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
231     organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
232     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
233     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
234     work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
235     alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
236     job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
237     business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
238     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
239     login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
240     department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
241 nino.borges 850 unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['Row']).Value,
242     addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Comments']).Value))
243     #addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
244 nino.borges 838 #last_updated = sht.Cells(row,excelFieldPositionMatrix['Last Updated']).Value ))
245 nino.borges 844
246 nino.borges 850 elif excelTab['tabName'] == 'Downgrades':
247     ## Make sure to NOT grab the unique attorney row number from here
248 nino.borges 844 self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
249 nino.borges 850 #split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Attorney Capacity Date Range']).Value,
250 nino.borges 844 sidley_validated = sht.Cells(row,excelFieldPositionMatrix['Validated by OC?']).Value,
251     organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
252     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
253     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
254     work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
255     alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
256     job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
257     business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
258     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
259     login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
260     department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
261 nino.borges 850 addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
262     elif excelTab['tabName'] == 'Split Role Attorneys':
263     unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['Attorney Row']).Value
264     matchedPerson = self.malPeopleList.search_by_unique_attorney_row_number(unique_attorney_row_number)
265     if matchedPerson:
266     ## Going to leave dates_as_counsel as an unparrsed raw string for now.
267     matchedPerson.dates_as_counsel = sht.Cells(row,excelFieldPositionMatrix['Dates as Counsel']).Value
268    
269     else:
270     print(f"ERROR UNKNOWN TAB! {excelTab['tabName']} HAVE NEEDED TAB NAMES CHANGED?")
271    
272 nino.borges 834
273     xlBook.Close()
274    
275     def __SaveMalToPkl(self, pklFileName):
276     """Pseudo-private method which will save the current MAL people list object to a pkl file, for performance reasons."""
277     outputFile = open(pklFileName,'wb')
278     pickle.dump(self.malPeopleList,outputFile)
279     outputFile.close()
280    
281     def __LoadMalFromPkl(self, pklFileName):
282     """Pseudo-private method which will load a MAL people list object from a pkl file, for performance reasons."""
283     contents = open(pklFileName, 'rb')
284     obj = pickle.load(contents)
285     contents.close()
286     return obj
287    
288 nino.borges 837 def __LoadFullNameOverideValues(self, fullNameOveridesFileName):
289     """Pseudo-private method which will update the MAL people list object with the full name overide values."""
290     contents = open(fullNameOveridesFileName).readlines()
291     for line in contents:
292     line = line.replace("\n","")
293     emailAddress,fullNameOverideValue = line.split("|")
294 nino.borges 844
295 nino.borges 837 self.malPeopleList.update_full_Name_overide(emailAddress, fullNameOverideValue)
296    
297 nino.borges 844 def SmartDedupeSet(self, currentSet):
298 nino.borges 850 """A method that attempts to do some additional deduplication of the values in a set by lowering all values and deduplicating. Returns a lowered deduplicated set."""
299 nino.borges 844 newSet = set()
300     for val in currentSet:
301     newSet.add(val.lower())
302     return newSet
303    
304 nino.borges 853 def RunMalEmailAddressIntegrityCheck(self):
305     """This method performs an integrity check on the MAL by analyzing and looking for duplicate email addresses."""
306     emailTestMatrix = {}
307     altTestMatrix = {}
308     print("Performing MAL email address integrity check...")
309     for i in range(0,len(self.malPeopleList.people)):
310     altAddr = self.malPeopleList.people[i].alt_work_email_address
311     workAddr = self.malPeopleList.people[i].work_email_address
312     if altAddr != None:
313     if altAddr in list(altTestMatrix.keys()):
314     print(f"{altAddr} is a dupe!")
315     else:
316     altTestMatrix[altAddr] = 1
317     if workAddr != None:
318     if workAddr in list(altTestMatrix.keys()):
319     print(f"{workAddr} is a dupe of an altAddr.")
320     if workAddr in list(emailTestMatrix.keys()):
321     print(f"{workAddr} is a dupe!")
322     else:
323     emailTestMatrix[workAddr] = 1
324     print("\n\nIntegrity check complete.")
325    
326    
327    
328    
329    
330 nino.borges 834 if __name__ == '__main__':
331 nino.borges 847 pass
332     ## cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Data Exports\VEAS\VEAS_Log_Data_Export_Converted.txt"
333     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241202)\PLOG All IDs (20241202)_Converted_SubSetOnly.txt"
334     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241202)\TEST-PLOG.txt"
335     ## #cleanedDatExportFileName = r"C:\Users\eborges\AppData\Local\Programs\Python\Python312\MyCode\JN\_Temp2\20241115_PrivLogWorking_CAAG\PrivLogExport_20241113_CAAG_Converted.txt"
336     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241122 - VEAS CAAG 20241206\export_20241122_160117_Converted.txt"
337     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241122 - VEAS CAAG 20241206\TEST.txt"
338     ## #masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.11.06(7045550.3).xlsx"
339     ## masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.12.1(7045413.15).xlsx"
340     ## #masterAttorneyListFileName = r"C:\Test_Dir\Amazon\TEST-MAL.xlsx"
341     ## #fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides.txt"
342     ## #fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides - Copy.txt"
343     ## fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\VEAS-MasterAttorneyList\FullNameOverides.txt"
344     ##
345     ##
346     ##
347     ## nv = NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)
348     ## #nv.malPeopleList.list_people()
349     ##
350     ## qcP = MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC.QcPrivLog(cleanedDatExportFileName)
351     ## print(nv.malPeopleList.search_by_email('crespojp@amazon.com'.upper()))
352     ## #print(nv.malPeopleList.search_by_email('crespojp@amazon.com'.upper()))
353     ## workList = qcP.metadataValuesDict.keys()
354     ## outputFile = open(r"C:\Test_Dir\Amazon\NameNormOutputText.txt",'w')
355     ## for docID in workList:
356     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['toValues']
357     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['toValues']
358     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['ccValues']
359     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['ccValues']
360     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['bccValues']
361     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['bccValues']
362     ## metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['fromValues']
363     ## formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['fromValues']
364     ## formattedAttorneyValues = set()
365     ## for formattedValue in formattedFieldValues:
366     ## if "*" in formattedValue:
367     ## formattedAttorneyValues.add(formattedValue.upper())
368     ##
369     ## if metadataFieldValues:
370     ## matchedMetadataValues = set()
371     ## for nameItem in metadataFieldValues:
372     ## ## First test to see if there is a valid email address.
373     ## resultSet = set()
374     ## results = re.findall(qcP.allPossibleEmailAddressesRegExPattern, nameItem)
375     ## if results:
376     ## for result in results:
377     ## resultSet.add(result)
378     ## if len(resultSet) >1:
379     ## resultSet = nv.SmartDedupeSet(resultSet)
380     ## if len(resultSet) >1:
381     ## print("ERROR multiple email **unique** email addresses in one item.")
382     ## print(resultSet)
383     ## print("\n")
384     ## else:
385     ## personMatch = nv.malPeopleList.search_by_email(resultSet.pop().upper())
386     ## if personMatch:
387     ## if personMatch.full_name_overide:
388     ## fullName = personMatch.full_name_overide
389     ## elif personMatch.full_name_preferred:
390     ## #print(personMatch.full_name_preferred)
391     ## ## Going to need to do a bit of replacing to remove some information that is just never in the formatted.
392     ## fullPreferredName = personMatch.full_name_preferred
393     ## fullPreferredName = fullPreferredName.replace('(LEGAL)','')
394     ## fullPreferredName = fullPreferredName.replace('(SHE, HER)','')
395     ## fullPreferredName = fullPreferredName.replace('(SHE HER)','')
396     ## preferedLastName, preferedFirstName = fullPreferredName.split(',')
397     ## preferedLastName = preferedLastName.strip()
398     ## preferedFirstName = preferedFirstName.strip()
399     ## preferedFirstName = preferedFirstName.split(" ")[0]
400     ## fullName = f"{preferedFirstName} {preferedLastName}"
401     ## #fullName = f"{preferedLastName}, {preferedFirstName}"
402     ## else:
403     ## fullName = f"{personMatch.first_name} {personMatch.last_name}"
404     ## #fullName = f"{personMatch.last_name}, {personMatch.first_name}"
405     ## if personMatch.is_attorney == 'YES':
406     ## #outputFile.write(f"{docID} has match {personMatch.first_name} {personMatch.last_name}* ({personMatch.work_email_address.split('@')[-1]})\n")
407     ## matchedMetadataValues.add(f"{fullName}* ({personMatch.work_email_address.split('@')[-1]})")
408     ## else:
409     ## #outputFile.write(f"{docID} has match {personMatch.first_name} {personMatch.last_name} ({personMatch.work_email_address.split('@')[-1]})\n")
410     ## matchedMetadataValues.add(f"{fullName} ({personMatch.work_email_address.split('@')[-1]})")
411     ## else:
412     ## outputFile.write(f"{docID} contains a non-email item {nameItem}\n\n")
413     ## missingFromFormatted = matchedMetadataValues - formattedAttorneyValues
414     ## missingFromMeta = formattedAttorneyValues - matchedMetadataValues
415     ## if missingFromFormatted:
416     ## for missingItem in missingFromFormatted:
417     ## outputFile.write(f"{docID} has {missingItem} missing from the formatted field\n")
418     ## if missingFromMeta:
419     ## for missingItem in missingFromMeta:
420     ## outputFile.write(f"{docID} has {missingItem} missing from the metadata field\n")
421     ## if missingFromFormatted:
422     ## outputFile.write("\n")
423     ## elif missingFromMeta:
424     ## outputFile.write("\n")
425     ## outputFile.close()
426 nino.borges 834