ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/Amazon_NamesNormQC.py
Revision: 944
Committed: Wed Nov 5 18:00:35 2025 UTC (4 months, 3 weeks ago) by nino.borges
Content type: text/x-python
File size: 36247 byte(s)
Log Message:
adjusting the caps on the return_str_string method.

File Contents

# User Rev Content
1 nino.borges 834 """
2    
3 nino.borges 847 Amazon_NamesNormQC
4 nino.borges 834
5     Created by:
6     Emanuel Borges
7     11.21.2024
8    
9 nino.borges 847 This Library will assist with the process of performing Names Normalization QC on the Amazon privilege logs.
10 nino.borges 834
11     """
12    
13     import os, uuid, pickle, re
14 nino.borges 847 #import MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC
15 nino.borges 877 from dataclasses import dataclass, field, fields
16 nino.borges 854 from typing import List, Tuple, Optional
17 nino.borges 834 from collections import namedtuple
18     from win32com.client import Dispatch
19    
20    
21     @dataclass
22     class Person:
23     first_name: Optional[str] = None
24     last_name: Optional[str] = None
25     work_email_address: Optional[str] = None
26     alt_work_email_address: Optional[str] = None
27     _id: uuid.UUID = field(default_factory=uuid.uuid4)
28     is_attorney: Optional[str] = None
29     split_role_date_range: Optional[str] = None
30     sidley_validated: Optional[str] = None
31     category: Optional[str] = None
32     organization: Optional[str] = None
33     job_title: Optional[str] = None
34     business_title: Optional[str] = None
35     full_name_preferred: Optional[str] = None
36     login: Optional[str] = None
37     department_fine: Optional[str] = None
38     addressed_during_caag: Optional[str] = None
39 nino.borges 838 #last_updated: Optional[str] = None
40 nino.borges 837 full_name_overide: Optional[str] = None
41 nino.borges 850 ## Only gather unique_attorney_row_number from the attorney and split role attorney tabs. NEVER from downgrades.
42     unique_attorney_row_number:Optional[str] = None
43 nino.borges 854 ## Will be saving this as a list of tuple pairs (startdate,enddate). Allowing None for now but may update this to forcing an empty list, to avoid mutable default issues.
44     dates_as_counsel:Optional[List[Tuple[str,str]]] = None
45 nino.borges 834
46     def __post_init__(self):
47     """Convert all string fields to uppercase."""
48     if self.first_name:
49 nino.borges 836 self.first_name = self.first_name.strip().upper()
50 nino.borges 834 if self.last_name:
51 nino.borges 836 self.last_name = self.last_name.strip().upper()
52 nino.borges 834 if self.work_email_address:
53 nino.borges 836 self.work_email_address = self.work_email_address.strip().upper()
54 nino.borges 834 if self.alt_work_email_address:
55 nino.borges 836 self.alt_work_email_address = self.alt_work_email_address.strip().upper()
56 nino.borges 834 if self.is_attorney:
57 nino.borges 836 self.is_attorney = self.is_attorney.strip().upper()
58 nino.borges 834 if self.split_role_date_range:
59 nino.borges 836 self.split_role_date_range = self.split_role_date_range.strip().upper()
60 nino.borges 834 if self.sidley_validated:
61 nino.borges 836 self.sidley_validated = self.sidley_validated.strip().upper()
62 nino.borges 834 if self.category:
63 nino.borges 836 self.category = self.category.strip().upper()
64 nino.borges 834 if self.organization:
65 nino.borges 836 self.organization = self.organization.strip().upper()
66 nino.borges 834 if self.job_title:
67 nino.borges 836 self.job_title = self.job_title.strip().upper()
68 nino.borges 834 if self.business_title:
69 nino.borges 836 self.business_title = self.business_title.strip().upper()
70 nino.borges 834 if self.full_name_preferred:
71 nino.borges 836 self.full_name_preferred = self.full_name_preferred.strip().upper()
72 nino.borges 834 if self.login:
73 nino.borges 836 self.login = self.login.strip().upper()
74 nino.borges 834 if self.department_fine:
75 nino.borges 836 self.department_fine = self.department_fine.strip().upper()
76 nino.borges 834 if self.addressed_during_caag:
77 nino.borges 836 self.addressed_during_caag = self.addressed_during_caag.strip().upper()
78 nino.borges 838 #if self.last_updated:
79     # self.last_updated = self.last_updated.strip().upper()
80 nino.borges 834
81     @dataclass
82     class PeopleList:
83     people: List[Person] = field(default_factory=list)
84    
85     def add_person(self, person: Person):
86     self.people.append(person)
87     #print(f"Added person: {person}")
88    
89    
90     def search_by_email(self, emailAddress:str) -> Optional[Person]:
91 nino.borges 850 """Returns the first matching emailAddress value. Assumes emailAddresses are unique"""
92 nino.borges 834 for person in self.people:
93     if person.work_email_address == emailAddress:
94     return person
95 nino.borges 854 elif person.alt_work_email_address == emailAddress:
96     return person
97 nino.borges 834 return None
98 nino.borges 850
99    
100     def search_by_unique_attorney_row_number(self,uniqueAttorneyRowNumber:str) -> Optional[Person]:
101     """Returns the first matching uniqueAttorneyRowNumber value. Assumes uniqueAttorneyRowNumbers are unique"""
102     for person in self.people:
103     if person.unique_attorney_row_number == uniqueAttorneyRowNumber:
104     return person
105     return None
106    
107     def search_by_id(self, idNumber):
108     """Returns the first matching idNumber value. Must be in format UUID('7414f78c-8289-4c9f-bd49-a5aaac35545f')."""
109     for person in self.people:
110     if person._id == idNumber:
111     return person
112     return None
113    
114     def return_list_of_matching_values(self,fieldName, value:str):
115     """Returns a full list of items where value is found in fieldName"""
116     matchingPeopleList = []
117     for person in self.people:
118     if getattr(person,fieldName) == value:
119     matchingPeopleList.append(person)
120     return matchingPeopleList
121 nino.borges 920
122     def return_list_of_partial_email_matches(self, emailAddress:str) -> Optional[Person]:
123     """Returns a full list of partial email address matches by attempting to match the user name part of the email address"""
124     ## Grab the username part of the email address
125     emailAddressPart = emailAddress.split("@")[0]
126     matchingPeopleList = []
127     for person in self.people:
128     if person.work_email_address:
129     ## If a work email address for this person exists, see if the username part is a match.
130     if emailAddressPart == person.work_email_address.split("@")[0]:
131     ## If so, add the person to the matching people list
132     matchingPeopleList.append(person)
133     ## if not, do the same on the Alt email address, if one exists.
134     elif person.alt_work_email_address:
135     if emailAddressPart == person.alt_work_email_address.split("@")[0]:
136     matchingPeopleList.append(person)
137     return matchingPeopleList
138    
139     def return_soppy_search_list(self, fieldName, value:str):
140     """Peforms a sloppy search where the value is IN the field, returns full list of possible matches"""
141     ## Be very careful in using this because you can get a lot of false positives.
142     matchingPeopleList = []
143     for person in self.people:
144     if getattr(person,fieldName) == None:
145     pass
146     else:
147     if value in getattr(person,fieldName):
148     matchingPeopleList.append(person)
149     return matchingPeopleList
150    
151 nino.borges 834 def list_people(self):
152     for person in self.people:
153     print(person)
154    
155 nino.borges 837 def update_full_Name_overide(self, emailAddress:str, fullNameOverideValue) -> Optional[Person]:
156 nino.borges 846 valueUpdated = False
157 nino.borges 837 for person in self.people:
158     if person.work_email_address == emailAddress.upper():
159     person.full_name_overide = fullNameOverideValue.upper()
160 nino.borges 846 valueUpdated = True
161 nino.borges 844 ## Give a quik warning as you add the override value into the database if the last name differs.
162     if "," in fullNameOverideValue:
163     lastName = fullNameOverideValue.split(",")[0]
164     else:
165     lastName = fullNameOverideValue.split(" ")[-1]
166     if lastName.upper() == person.last_name:
167     pass
168     else:
169     print(f"WARNING: Overide last name value {lastName.upper()} does not match {person.last_name}.")
170 nino.borges 846 if valueUpdated == False:
171     print(f"WARNING: No email address match for {emailAddress} found.")
172 nino.borges 834
173 nino.borges 837
174 nino.borges 944 def return_str_string(self, person):
175     """returns the STR search string for a given person"""
176     namesList = []
177     emailAddrList = []
178     if person.last_name:
179     namesList.append(person.last_name)
180     if person.first_name:
181     namesList.append(person.first_name)
182     if person.work_email_address:
183     emailAddrList.append(person.work_email_address)
184     if person.alt_work_email_address:
185     emailAddrList.append(person.alt_work_email_address)
186     if namesList:
187     if emailAddrList:
188     if len(emailAddrList) >1:
189     strText = f"({' W/3 '.join(namesList)}) OR ({' OR '.join(emailAddrList)})"
190     else:
191     strText = f"({' W/3 '.join(namesList)}) OR {' OR '.join(emailAddrList)}"
192     else:
193     strText = f"{' W/3 '.join(namesList)}"
194     else:
195     if emailAddrList:
196     strText = f"{' OR '.join(emailAddrList)}"
197     else:
198     strText = "NONE"
199     return strText
200    
201    
202 nino.borges 834 class NamesVerification(object):
203     """A class for automating the process of performing QC on the names within the Amazon privilege logs."""
204 nino.borges 920 version = '0.11.0'
205 nino.borges 834
206    
207 nino.borges 837 def __init__(self, cleanedDatExportFileName, masterAttorneyListFileName,fullNameOveridesFileName, forceNewPklFile = False, Encoding = 'UTF8'):
208 nino.borges 834 """Initializes the data structures. cleanedDatExportFileName should be the full path to the file.
209     Assumes the first row of the data file is the header and first column is DocID.
210     Assumes the MAL is a spreadsheet (for now).
211     MAL gets saved to a pkl file for performance reasons. pkl will be used unless forceNewPklFile is set to true"""
212     pklFileName = os.path.splitext(masterAttorneyListFileName)[0] + ".pkl"
213    
214     print("Initializing data structures...")
215     if forceNewPklFile:
216     print("Creating MAL structure...")
217     self.malPeopleList = PeopleList()
218     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
219     print("MAL structure created.")
220 nino.borges 837 print("Loading full name overide values...")
221     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
222     print("Full name overide values loaded.")
223 nino.borges 834 print("Creating pickle backup...")
224     self.__SaveMalToPkl(pklFileName)
225     print("Pickle backup created.")
226     else:
227     if os.path.exists(pklFileName):
228     print("Loading MAL structure from pickle file...")
229     self.malPeopleList = self.__LoadMalFromPkl(pklFileName)
230     print("MAL structure loaded.")
231     else:
232     print("Pickle file doesnt exist.")
233     print("Creating MAL structure...")
234     self.malPeopleList = PeopleList()
235     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
236     print("MAL structure created.")
237 nino.borges 837 print("Loading full name overide values...")
238     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
239     print("Full name overide values loaded.")
240 nino.borges 834 print("Creating pickle backup...")
241     self.__SaveMalToPkl(pklFileName)
242     print("Pickle backup created.")
243    
244     ## self.malPeopleList = PeopleList()
245     ##
246     ## print("Creating MAL structure...")
247     ## self.__IngestMALSpreadsheet(masterAttorneyListFileName)
248     ## print("MAL structure created.")
249     ## print("Creating pickle backup...")
250    
251    
252    
253    
254    
255     def __IngestMALSpreadsheet(self, masterAttorneyListFileName):
256     """Pseudo-private method which will open an Excel spreadsheet and ingest the values into the peoplelist dataclass."""
257     ## There doenst seem to be a consistent value in the "row" column in the MAL, so setting these parameters here to avoid gap issues.
258    
259 nino.borges 850 ## excelTabParametersList should always be an ordered list because now order matters.
260 nino.borges 944 excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":11078, "beginColNumber":1, "endColNumber":17},
261     {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":812, "beginColNumber":1, "endColNumber":16},
262     {"tabName":"Split Role Attorneys", "beginRowNumber":2, "endRowNumber":46, "beginColNumber":1, "endColNumber":10}]
263 nino.borges 834
264 nino.borges 850
265 nino.borges 834 # excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":16},
266     # {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":15}]
267    
268 nino.borges 850 # spreadsheetFileMappingMatrix = {"First Name":"first_name", "Last Name":"last_name", "Work Email":"work_email_address", "Alt Work Email":"alt_work_email_address", "Is Attorney": "is_attorney",
269     # "Split Role - Attorney Capacity Date Range":"split_role_date_range", " Validated by OC??":"sidley_validated", "Category": "category", "Organization":"organization", "Job Title":"job_title",
270     # "Business Title":"business_title", "Full Name (Preferred)":"full_name_preferred", "Login":"login", "Department (Fine)":"department_fine", "Addressed during CAAG":"addressed_during_caag",
271     # "Last Updated":"last_updated"}
272 nino.borges 834
273     xlApp = Dispatch('Excel.Application')
274     xlBook = xlApp.Workbooks.Open(masterAttorneyListFileName)
275    
276     for excelTab in excelTabParametersList:
277     sht = xlBook.Worksheets(excelTab['tabName'])
278     print(f"Ingesting sheet {excelTab['tabName']}.")
279     excelFieldPositionMatrix = {}
280     for col in range (excelTab['beginColNumber'], excelTab['endColNumber'] +1):
281     excelFieldPositionMatrix[sht.Cells(1,col).Value] = col
282     for row in range(excelTab['beginRowNumber'], excelTab['endRowNumber'] +1):
283 nino.borges 920 if row == 5000:
284     print("5,000 row mark reached.")
285     elif row == 10000:
286     print("10,000 row mark reached.")
287 nino.borges 834 #print(row)
288     ## TODO: Refactor the excelTabParametersList later. Didnt realize columns were not consistent.
289     if excelTab['tabName'] == 'Attorneys':
290     self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
291 nino.borges 877 split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Dates as Counsel']).Value,
292 nino.borges 837 sidley_validated = sht.Cells(row,excelFieldPositionMatrix[' Validated by OC?']).Value,
293 nino.borges 834 category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
294     organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
295     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
296     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
297     work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
298     alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
299     job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
300     business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
301     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
302     login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
303     department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
304 nino.borges 850 unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['Row']).Value,
305     addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Comments']).Value))
306     #addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
307 nino.borges 838 #last_updated = sht.Cells(row,excelFieldPositionMatrix['Last Updated']).Value ))
308 nino.borges 844
309 nino.borges 850 elif excelTab['tabName'] == 'Downgrades':
310     ## Make sure to NOT grab the unique attorney row number from here
311 nino.borges 844 self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
312 nino.borges 850 #split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Attorney Capacity Date Range']).Value,
313 nino.borges 844 sidley_validated = sht.Cells(row,excelFieldPositionMatrix['Validated by OC?']).Value,
314     organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
315     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
316     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
317     work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
318     alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
319     job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
320     business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
321     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
322     login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
323     department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
324 nino.borges 850 addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
325     elif excelTab['tabName'] == 'Split Role Attorneys':
326     unique_attorney_row_number = sht.Cells(row,excelFieldPositionMatrix['Attorney Row']).Value
327     matchedPerson = self.malPeopleList.search_by_unique_attorney_row_number(unique_attorney_row_number)
328     if matchedPerson:
329 nino.borges 854
330     ## dates_as_counsel should always be a two string value tuple (startdate,enddate).
331     datesAsCounselValue = sht.Cells(row,excelFieldPositionMatrix['Dates as Counsel']).Value
332     datesAsCounselList = []
333     ## First get rid of any extra data that is on a new line. Note that they shouldnt be seperating the date ranges by newline.
334     datesAsCounselValue = datesAsCounselValue.split("\n")[0]
335 nino.borges 920 #print(datesAsCounselValue)
336 nino.borges 854 ## Next split the ranges correctly by semicolon
337     dateRanges = datesAsCounselValue.split(";")
338     for dateRange in dateRanges:
339     ## Split out the start and end, allowing non-date words. (current, present, etc) however force these to be uppercase.
340     counselStartDate, counselEndDate = dateRange.split("-")
341     counselStartDate = counselStartDate.upper().strip()
342     counselEndDate = counselEndDate.upper().strip()
343     datesAsCounselList.append((counselStartDate,counselEndDate))
344     matchedPerson.dates_as_counsel = datesAsCounselList
345 nino.borges 850
346     else:
347     print(f"ERROR UNKNOWN TAB! {excelTab['tabName']} HAVE NEEDED TAB NAMES CHANGED?")
348    
349 nino.borges 834
350     xlBook.Close()
351    
352     def __SaveMalToPkl(self, pklFileName):
353     """Pseudo-private method which will save the current MAL people list object to a pkl file, for performance reasons."""
354     outputFile = open(pklFileName,'wb')
355     pickle.dump(self.malPeopleList,outputFile)
356     outputFile.close()
357    
358     def __LoadMalFromPkl(self, pklFileName):
359     """Pseudo-private method which will load a MAL people list object from a pkl file, for performance reasons."""
360     contents = open(pklFileName, 'rb')
361     obj = pickle.load(contents)
362     contents.close()
363     return obj
364    
365 nino.borges 837 def __LoadFullNameOverideValues(self, fullNameOveridesFileName):
366     """Pseudo-private method which will update the MAL people list object with the full name overide values."""
367     contents = open(fullNameOveridesFileName).readlines()
368     for line in contents:
369     line = line.replace("\n","")
370     emailAddress,fullNameOverideValue = line.split("|")
371 nino.borges 844
372 nino.borges 837 self.malPeopleList.update_full_Name_overide(emailAddress, fullNameOverideValue)
373    
374 nino.borges 844 def SmartDedupeSet(self, currentSet):
375 nino.borges 850 """A method that attempts to do some additional deduplication of the values in a set by lowering all values and deduplicating. Returns a lowered deduplicated set."""
376 nino.borges 844 newSet = set()
377     for val in currentSet:
378     newSet.add(val.lower())
379     return newSet
380    
381 nino.borges 853 def RunMalEmailAddressIntegrityCheck(self):
382     """This method performs an integrity check on the MAL by analyzing and looking for duplicate email addresses."""
383     emailTestMatrix = {}
384     altTestMatrix = {}
385     print("Performing MAL email address integrity check...")
386     for i in range(0,len(self.malPeopleList.people)):
387     altAddr = self.malPeopleList.people[i].alt_work_email_address
388     workAddr = self.malPeopleList.people[i].work_email_address
389     if altAddr != None:
390 nino.borges 877 altAddr = altAddr.strip()
391     if altAddr in list(emailTestMatrix.keys()):
392     print(f"ISSUE:{altAddr} is a dupe of an workAddr.")
393 nino.borges 853 if altAddr in list(altTestMatrix.keys()):
394 nino.borges 877 print(f"ISSUE:{altAddr} is a dupe!")
395 nino.borges 853 else:
396     altTestMatrix[altAddr] = 1
397     if workAddr != None:
398 nino.borges 877 workAddr = workAddr.strip()
399 nino.borges 853 if workAddr in list(altTestMatrix.keys()):
400 nino.borges 877 print(f"ISSUE:{workAddr} is a dupe of an altAddr.")
401 nino.borges 853 if workAddr in list(emailTestMatrix.keys()):
402 nino.borges 877 print(f"ISSUE:{workAddr} is a dupe!")
403 nino.borges 853 else:
404     emailTestMatrix[workAddr] = 1
405 nino.borges 877 print("\nEmail address integrity check complete.\n\n")
406 nino.borges 853
407 nino.borges 877 def RunMalEmailOutsideEmailFieldsIntegrityCheck(self):
408     """This method performs an integrity check on the MAL by looking for email addresses that exist in fields other than the email address fields."""
409     ## Right now this looks for the @ symbol.
410     ## Editable list of fields that should be excluded from this test, especially those that should already have email addresses
411     fieldsToExcludeList = ['work_email_address', 'alt_work_email_address','_id','dates_as_counsel','unique_attorney_row_number']
412     print("Performing MAL email addresses outside of email address fields integrity check...")
413     fieldObjects = fields(Person)
414     fieldNames = [f.name for f in fieldObjects]
415     #print(fieldNames)
416     fieldsToSearchList = [x for x in fieldNames if x not in fieldsToExcludeList]
417     #print(fieldsToSearchList)
418     for i in range(0,len(self.malPeopleList.people)):
419     for fieldName in fieldsToSearchList:
420     testValue = getattr(self.malPeopleList.people[i], fieldName)
421     #print(fieldName)
422     if testValue:
423     if "@" in testValue:
424     print(f"ISSUE: The email address {testValue} exists in the non-email field {fieldName} for unique row# {self.malPeopleList.people[i].unique_attorney_row_number}.")
425     print("\nEmail addresss outside of email fields integrity check complete.\n\n")
426    
427 nino.borges 853
428 nino.borges 869 def RunRowNumberIntegrityCheck(self):
429     """This method performs an integrity check on the MAL by analyzing the hard-coded row numbers across the 3 imporant tabs. Looks for gaps, blanks, and inconsistencies between split role. """
430     ## First let's return all non-attorneys and confirm the hard-coded row number is in the 50000 range and look for gaps.
431     print("Performing MAL hard-coded row number integrity check...")
432     ## nonAttorneyPeopleList = self.malPeopleList.return_list_of_matching_values('is_attorney','NO')
433     ## print(f"Analyzing all {len(nonAttorneyPeopleList)} non-attorneys items...")
434     ## ## Gather all non-attorneys and add hc row number to a list, looking for any that are missing a value
435     ## for nonAttorneyPerson in nonAttorneyPeopleList:
436     ## hcRowNumberList = []
437     ## hcRowNumber = nonAttorneyPerson.unique_attorney_row_number
438     ## if hcRowNumber == None:
439     ## print(f"WARNING: Empty hard coded row number for {nonAttorneyPerson.first_name} {nonAttorneyPerson.last_name} in the Downgrades Tab.")
440     ## else:
441     ## hcRowNumberList.append(int(hcRowNumber))
442     ## ## Next export a list of the missing numbers
443     ## hcRowNumberList.sort()
444     ## compareSet = set(range(hcRowNumberList[0], hcRowNumberList[-1]))
445     ## downgradeDiffs = compareSet - set(hcRowNumberList)
446     ## print(downgradeDiffs)
447     ## Now let's do similar for attorneys, including split role.
448     attorneyPeopleList = self.malPeopleList.return_list_of_matching_values('is_attorney','YES')
449     splitRolePeopleList = self.malPeopleList.return_list_of_matching_values('is_attorney','SPLIT ROLE')
450     ## Creating a third list using the newer list joining from pep 448
451     fullAttorneyPeopleList = [*attorneyPeopleList,*splitRolePeopleList]
452     print(f"Analyzing all {len(fullAttorneyPeopleList)} attorneys items...")
453     ## Gather all attorneys and add hc row number to a list, looking for any that are missing a value
454     for attorneyPerson in fullAttorneyPeopleList:
455     hcRowNumberList = []
456     hcRowNumber = attorneyPerson.unique_attorney_row_number
457     if hcRowNumber == None:
458     print(f"WARNING: Empty hard coded row number for {attorneyPerson.first_name} {attorneyPerson.last_name} in the Attorneys Tab.")
459     else:
460     hcRowNumberList.append(int(hcRowNumber))
461     ## Next export a list of the missing numbers
462     hcRowNumberList.sort()
463     compareSet = set(range(hcRowNumberList[0], hcRowNumberList[-1]))
464     attorneyDiffs = compareSet - set(hcRowNumberList)
465     if attorneyDiffs:
466     print(attorneyDiffs)
467     else:
468     print("There are no gaps in the hard coded row numbers in the Attorneys tab.")
469 nino.borges 853
470 nino.borges 944 def ExportFullSTRList(self, attorneyOnly = True):
471     """Exports a full STR file for all entries in the data class. Defaults to only attorneys."""
472     outputFile = open(r"C:\Test_Dir\Amazon\Attorneys_STR.txt",'w')
473     if attorneyOnly == True:
474     attorneyPeopleList = self.malPeopleList.return_list_of_matching_values('is_attorney','YES')
475     for attorneyPerson in attorneyPeopleList:
476     outputText = self.malPeopleList.return_str_string(attorneyPerson)
477     outputFile.write(outputText + "\n")
478     outputFile.close()
479 nino.borges 853
480 nino.borges 834 if __name__ == '__main__':
481 nino.borges 847 pass
482     ## cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Data Exports\VEAS\VEAS_Log_Data_Export_Converted.txt"
483     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241202)\PLOG All IDs (20241202)_Converted_SubSetOnly.txt"
484     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241202)\TEST-PLOG.txt"
485     ## #cleanedDatExportFileName = r"C:\Users\eborges\AppData\Local\Programs\Python\Python312\MyCode\JN\_Temp2\20241115_PrivLogWorking_CAAG\PrivLogExport_20241113_CAAG_Converted.txt"
486     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241122 - VEAS CAAG 20241206\export_20241122_160117_Converted.txt"
487     ## #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241122 - VEAS CAAG 20241206\TEST.txt"
488     ## #masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.11.06(7045550.3).xlsx"
489     ## masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.12.1(7045413.15).xlsx"
490     ## #masterAttorneyListFileName = r"C:\Test_Dir\Amazon\TEST-MAL.xlsx"
491     ## #fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides.txt"
492     ## #fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides - Copy.txt"
493     ## fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\VEAS-MasterAttorneyList\FullNameOverides.txt"
494     ##
495     ##
496     ##
497     ## nv = NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)
498     ## #nv.malPeopleList.list_people()
499     ##
500     ## qcP = MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC.QcPrivLog(cleanedDatExportFileName)
501     ## print(nv.malPeopleList.search_by_email('crespojp@amazon.com'.upper()))
502     ## #print(nv.malPeopleList.search_by_email('crespojp@amazon.com'.upper()))
503     ## workList = qcP.metadataValuesDict.keys()
504     ## outputFile = open(r"C:\Test_Dir\Amazon\NameNormOutputText.txt",'w')
505     ## for docID in workList:
506     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['toValues']
507     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['toValues']
508     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['ccValues']
509     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['ccValues']
510     ## #metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['bccValues']
511     ## #formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['bccValues']
512     ## metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['fromValues']
513     ## formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['fromValues']
514     ## formattedAttorneyValues = set()
515     ## for formattedValue in formattedFieldValues:
516     ## if "*" in formattedValue:
517     ## formattedAttorneyValues.add(formattedValue.upper())
518     ##
519     ## if metadataFieldValues:
520     ## matchedMetadataValues = set()
521     ## for nameItem in metadataFieldValues:
522     ## ## First test to see if there is a valid email address.
523     ## resultSet = set()
524     ## results = re.findall(qcP.allPossibleEmailAddressesRegExPattern, nameItem)
525     ## if results:
526     ## for result in results:
527     ## resultSet.add(result)
528     ## if len(resultSet) >1:
529     ## resultSet = nv.SmartDedupeSet(resultSet)
530     ## if len(resultSet) >1:
531     ## print("ERROR multiple email **unique** email addresses in one item.")
532     ## print(resultSet)
533     ## print("\n")
534     ## else:
535     ## personMatch = nv.malPeopleList.search_by_email(resultSet.pop().upper())
536     ## if personMatch:
537     ## if personMatch.full_name_overide:
538     ## fullName = personMatch.full_name_overide
539     ## elif personMatch.full_name_preferred:
540     ## #print(personMatch.full_name_preferred)
541     ## ## Going to need to do a bit of replacing to remove some information that is just never in the formatted.
542     ## fullPreferredName = personMatch.full_name_preferred
543     ## fullPreferredName = fullPreferredName.replace('(LEGAL)','')
544     ## fullPreferredName = fullPreferredName.replace('(SHE, HER)','')
545     ## fullPreferredName = fullPreferredName.replace('(SHE HER)','')
546     ## preferedLastName, preferedFirstName = fullPreferredName.split(',')
547     ## preferedLastName = preferedLastName.strip()
548     ## preferedFirstName = preferedFirstName.strip()
549     ## preferedFirstName = preferedFirstName.split(" ")[0]
550     ## fullName = f"{preferedFirstName} {preferedLastName}"
551     ## #fullName = f"{preferedLastName}, {preferedFirstName}"
552     ## else:
553     ## fullName = f"{personMatch.first_name} {personMatch.last_name}"
554     ## #fullName = f"{personMatch.last_name}, {personMatch.first_name}"
555     ## if personMatch.is_attorney == 'YES':
556     ## #outputFile.write(f"{docID} has match {personMatch.first_name} {personMatch.last_name}* ({personMatch.work_email_address.split('@')[-1]})\n")
557     ## matchedMetadataValues.add(f"{fullName}* ({personMatch.work_email_address.split('@')[-1]})")
558     ## else:
559     ## #outputFile.write(f"{docID} has match {personMatch.first_name} {personMatch.last_name} ({personMatch.work_email_address.split('@')[-1]})\n")
560     ## matchedMetadataValues.add(f"{fullName} ({personMatch.work_email_address.split('@')[-1]})")
561     ## else:
562     ## outputFile.write(f"{docID} contains a non-email item {nameItem}\n\n")
563     ## missingFromFormatted = matchedMetadataValues - formattedAttorneyValues
564     ## missingFromMeta = formattedAttorneyValues - matchedMetadataValues
565     ## if missingFromFormatted:
566     ## for missingItem in missingFromFormatted:
567     ## outputFile.write(f"{docID} has {missingItem} missing from the formatted field\n")
568     ## if missingFromMeta:
569     ## for missingItem in missingFromMeta:
570     ## outputFile.write(f"{docID} has {missingItem} missing from the metadata field\n")
571     ## if missingFromFormatted:
572     ## outputFile.write("\n")
573     ## elif missingFromMeta:
574     ## outputFile.write("\n")
575     ## outputFile.close()
576 nino.borges 834