ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/Amazon_NameSearches.py
Revision: 842
Committed: Fri Dec 6 21:47:35 2024 UTC (15 months, 2 weeks ago) by nino.borges
Content type: text/x-python
File size: 22252 byte(s)
Log Message:
This program to test searching the MAL.

File Contents

# User Rev Content
1 nino.borges 842 """
2    
3     Amazon_NameSearches
4    
5     Created by:
6     Emanuel Borges
7     12.3.2024
8    
9     This program to test searching the MAL.
10    
11     """
12    
13    
14    
15     import os, uuid, pickle, re
16     import MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC
17     from dataclasses import dataclass, field
18     from typing import List, Optional
19     from collections import namedtuple
20     from win32com.client import Dispatch
21    
22    
23     @dataclass
24     class Person:
25     first_name: Optional[str] = None
26     last_name: Optional[str] = None
27     work_email_address: Optional[str] = None
28     alt_work_email_address: Optional[str] = None
29     _id: uuid.UUID = field(default_factory=uuid.uuid4)
30     is_attorney: Optional[str] = None
31     split_role_date_range: Optional[str] = None
32     sidley_validated: Optional[str] = None
33     category: Optional[str] = None
34     organization: Optional[str] = None
35     job_title: Optional[str] = None
36     business_title: Optional[str] = None
37     full_name_preferred: Optional[str] = None
38     login: Optional[str] = None
39     department_fine: Optional[str] = None
40     addressed_during_caag: Optional[str] = None
41     #last_updated: Optional[str] = None
42     full_name_overide: Optional[str] = None
43    
44     def __post_init__(self):
45     """Convert all string fields to uppercase."""
46     if self.first_name:
47     self.first_name = self.first_name.strip().upper()
48     if self.last_name:
49     self.last_name = self.last_name.strip().upper()
50     if self.work_email_address:
51     self.work_email_address = self.work_email_address.strip().upper()
52     if self.alt_work_email_address:
53     self.alt_work_email_address = self.alt_work_email_address.strip().upper()
54     if self.is_attorney:
55     self.is_attorney = self.is_attorney.strip().upper()
56     if self.split_role_date_range:
57     self.split_role_date_range = self.split_role_date_range.strip().upper()
58     if self.sidley_validated:
59     self.sidley_validated = self.sidley_validated.strip().upper()
60     if self.category:
61     self.category = self.category.strip().upper()
62     if self.organization:
63     self.organization = self.organization.strip().upper()
64     if self.job_title:
65     self.job_title = self.job_title.strip().upper()
66     if self.business_title:
67     self.business_title = self.business_title.strip().upper()
68     if self.full_name_preferred:
69     self.full_name_preferred = self.full_name_preferred.strip().upper()
70     if self.login:
71     self.login = self.login.strip().upper()
72     if self.department_fine:
73     self.department_fine = self.department_fine.strip().upper()
74     if self.addressed_during_caag:
75     self.addressed_during_caag = self.addressed_during_caag.strip().upper()
76     #if self.last_updated:
77     # self.last_updated = self.last_updated.strip().upper()
78    
79     @dataclass
80     class PeopleList:
81     people: List[Person] = field(default_factory=list)
82    
83     def add_person(self, person: Person):
84     self.people.append(person)
85     #print(f"Added person: {person}")
86    
87    
88     def search_by_email(self, emailAddress:str) -> Optional[Person]:
89     for person in self.people:
90     if person.work_email_address == emailAddress:
91     return person
92     return None
93    
94     def list_people(self):
95     for person in self.people:
96     print(person)
97    
98     def update_full_Name_overide(self, emailAddress:str, fullNameOverideValue) -> Optional[Person]:
99     for person in self.people:
100     if person.work_email_address == emailAddress.upper():
101     person.full_name_overide = fullNameOverideValue.upper()
102    
103    
104     class NamesVerification(object):
105     """A class for automating the process of performing QC on the names within the Amazon privilege logs."""
106     version = '0.5.0'
107    
108    
109     def __init__(self, cleanedDatExportFileName, masterAttorneyListFileName,fullNameOveridesFileName, forceNewPklFile = False, Encoding = 'UTF8'):
110     """Initializes the data structures. cleanedDatExportFileName should be the full path to the file.
111     Assumes the first row of the data file is the header and first column is DocID.
112     Assumes the MAL is a spreadsheet (for now).
113     MAL gets saved to a pkl file for performance reasons. pkl will be used unless forceNewPklFile is set to true"""
114     pklFileName = os.path.splitext(masterAttorneyListFileName)[0] + ".pkl"
115    
116     print("Initializing data structures...")
117     if forceNewPklFile:
118     print("Creating MAL structure...")
119     self.malPeopleList = PeopleList()
120     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
121     print("MAL structure created.")
122     print("Loading full name overide values...")
123     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
124     print("Full name overide values loaded.")
125     print("Creating pickle backup...")
126     self.__SaveMalToPkl(pklFileName)
127     print("Pickle backup created.")
128     else:
129     if os.path.exists(pklFileName):
130     print("Loading MAL structure from pickle file...")
131     self.malPeopleList = self.__LoadMalFromPkl(pklFileName)
132     print("MAL structure loaded.")
133     else:
134     print("Pickle file doesnt exist.")
135     print("Creating MAL structure...")
136     self.malPeopleList = PeopleList()
137     self.__IngestMALSpreadsheet(masterAttorneyListFileName)
138     print("MAL structure created.")
139     print("Loading full name overide values...")
140     self.__LoadFullNameOverideValues(fullNameOveridesFileName)
141     print("Full name overide values loaded.")
142     print("Creating pickle backup...")
143     self.__SaveMalToPkl(pklFileName)
144     print("Pickle backup created.")
145    
146     ## self.malPeopleList = PeopleList()
147     ##
148     ## print("Creating MAL structure...")
149     ## self.__IngestMALSpreadsheet(masterAttorneyListFileName)
150     ## print("MAL structure created.")
151     ## print("Creating pickle backup...")
152    
153    
154    
155    
156    
157     def __IngestMALSpreadsheet(self, masterAttorneyListFileName):
158     """Pseudo-private method which will open an Excel spreadsheet and ingest the values into the peoplelist dataclass."""
159     ## There doenst seem to be a consistent value in the "row" column in the MAL, so setting these parameters here to avoid gap issues.
160    
161     excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":10919, "beginColNumber":2, "endColNumber":17},
162     {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":572, "beginColNumber":2, "endColNumber":16}]
163    
164     # excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":16},
165     # {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":15}]
166    
167     spreadsheetFileMappingMatrix = {"First Name":"first_name", "Last Name":"last_name", "Work Email":"work_email_address", "Alt Work Email":"alt_work_email_address", "Is Attorney": "is_attorney",
168     "Split Role - Attorney Capacity Date Range":"split_role_date_range", " Validated by OC??":"sidley_validated", "Category": "category", "Organization":"organization", "Job Title":"job_title",
169     "Business Title":"business_title", "Full Name (Preferred)":"full_name_preferred", "Login":"login", "Department (Fine)":"department_fine", "Addressed during CAAG":"addressed_during_caag",
170     "Last Updated":"last_updated"}
171    
172     xlApp = Dispatch('Excel.Application')
173     xlBook = xlApp.Workbooks.Open(masterAttorneyListFileName)
174    
175     for excelTab in excelTabParametersList:
176     sht = xlBook.Worksheets(excelTab['tabName'])
177     print(f"Ingesting sheet {excelTab['tabName']}.")
178     excelFieldPositionMatrix = {}
179     for col in range (excelTab['beginColNumber'], excelTab['endColNumber'] +1):
180     excelFieldPositionMatrix[sht.Cells(1,col).Value] = col
181     for row in range(excelTab['beginRowNumber'], excelTab['endRowNumber'] +1):
182     #print(row)
183     ## TODO: Refactor the excelTabParametersList later. Didnt realize columns were not consistent.
184     if excelTab['tabName'] == 'Attorneys':
185     self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
186     split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Attorney Capacity Date Range']).Value,
187     sidley_validated = sht.Cells(row,excelFieldPositionMatrix[' Validated by OC?']).Value,
188     category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
189     organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
190     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
191     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
192     work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
193     alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
194     job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
195     business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
196     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
197     login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
198     department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
199     addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
200     #last_updated = sht.Cells(row,excelFieldPositionMatrix['Last Updated']).Value ))
201    
202     else:
203     self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
204     split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Attorney Capacity Date Range']).Value,
205     sidley_validated = sht.Cells(row,excelFieldPositionMatrix['Validated by OC?']).Value,
206     organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
207     last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
208     first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
209     work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
210     alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
211     job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
212     business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
213     full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
214     login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
215     department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
216     addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
217    
218     xlBook.Close()
219    
220     def __SaveMalToPkl(self, pklFileName):
221     """Pseudo-private method which will save the current MAL people list object to a pkl file, for performance reasons."""
222     outputFile = open(pklFileName,'wb')
223     pickle.dump(self.malPeopleList,outputFile)
224     outputFile.close()
225    
226     def __LoadMalFromPkl(self, pklFileName):
227     """Pseudo-private method which will load a MAL people list object from a pkl file, for performance reasons."""
228     contents = open(pklFileName, 'rb')
229     obj = pickle.load(contents)
230     contents.close()
231     return obj
232    
233     def __LoadFullNameOverideValues(self, fullNameOveridesFileName):
234     """Pseudo-private method which will update the MAL people list object with the full name overide values."""
235     contents = open(fullNameOveridesFileName).readlines()
236     for line in contents:
237     line = line.replace("\n","")
238     emailAddress,fullNameOverideValue = line.split("|")
239     self.malPeopleList.update_full_Name_overide(emailAddress, fullNameOverideValue)
240    
241     def SmartDedupeSet(self, currentSet):
242     """Pseudo-private method that attempts to do some additional deduplication of the values in a set by lowering all values and deduplicating. Returns a lowered deduplicated set."""
243     newSet = set()
244     for val in currentSet:
245     newSet.add(val.lower())
246     return newSet
247    
248    
249     if __name__ == '__main__':
250     #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241203)\TEST-PLOG.txt"
251     #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241203)\PLOG All IDs (20241203)_Converted_SubSetOnly.txt"
252     cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Data Exports\CAAG\CAAG_Log_Data_Export_Converted.txt"
253     masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.12.1(7045413.15).xlsx"
254     fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides - Copy.txt"
255     #fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides.txt"
256     allPossibleEmailAddressesRegExPattern = r"[\w.+-]+@[\w-]+\.[\w.-]+"
257     #outputFile = open(r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Data Exports\CAAG\namesSearchResults.txt",'w')
258    
259    
260    
261     nv = NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)
262    
263    
264     ## This is just some simple searching of email addresses
265     contents = open(r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\VEAS-MasterAttorneyList\2024-12-05_EmailAddressesOnly.txt",encoding='UTF-8').readlines()
266     contents = contents[1:]
267     for line in contents:
268     line = line.replace("\n","")
269     line = line.split(";")
270     for i in line:
271     personMatch = nv.malPeopleList.search_by_email(i.upper())
272     if personMatch:
273     if personMatch.is_attorney == 'YES':
274     pass
275     #print(f"{i} is an attorney match!")
276     elif personMatch.is_attorney == 'NO':
277     print(f"{i} is an downgrade match!")
278     else:
279     print(f"{i} is a possible Split Role match!")
280    
281    
282     ## This section was used for comparing the names in legal sources to any email addresses I could locate on that same line.
283     ## contents = open(cleanedDatExportFileName,encoding='UTF-8').readlines()
284     ## contents = contents[1:]
285     ## for line in contents:
286     ## attorneysInDocumentSet = set()
287     ## downgradesInSet = set()
288     ## line = line.replace("\n","")
289     ## #singleLine = contents[3]
290     ## resultSet = set()
291     ## results = re.findall(allPossibleEmailAddressesRegExPattern, line)
292     ## for result in results:
293     ## resultSet.add(result)
294     ## line = line.split("|")
295     ## legalSources = line[11].upper()
296     ## if legalSources:
297     ## legalSources =legalSources.split(";")
298     ## #print(f"\n\n{legalSources}")
299     ## for r in list(resultSet):
300     ## #print(r)
301     ## personMatch = nv.malPeopleList.search_by_email(r.upper())
302     ## if personMatch:
303     ## #print("entry exists")
304     ## if personMatch.full_name_overide:
305     ## if f"{personMatch.full_name_overide}*" in legalSources:
306     ## if personMatch.is_attorney == 'YES':
307     ## attorneysInDocumentSet.add(f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is an Attorney.")
308     ## #print (f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is an Attorney.")
309     ## else:
310     ## downgradesInSet.add(f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is a DOWNGRADE.")
311     ## #print (f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is a DOWNGRADE.")
312     ## if personMatch.full_name_preferred:
313     ## fullPreferredName = personMatch.full_name_preferred
314     ## fullPreferredName = fullPreferredName.replace('(LEGAL)','')
315     ## fullPreferredName = fullPreferredName.replace('(SHE, HER)','')
316     ## fullPreferredName = fullPreferredName.replace('(SHE HER)','')
317     ## fullPreferredName = fullPreferredName.replace(',,',',')
318     ## #print(fullPreferredName)
319     ## preferedLastName, preferedFirstName = fullPreferredName.split(',')
320     ## preferedLastName = preferedLastName.strip()
321     ## preferedFirstName = preferedFirstName.strip()
322     ## preferedFirstName = preferedFirstName.split(" ")[0]
323     ## fullName = f"{preferedFirstName} {preferedLastName}"
324     ## #if f"{preferedLastName}, {preferedFirstName}*" in legalSources:
325     ## if f"{preferedFirstName} {preferedLastName}*" in legalSources:
326     ## if personMatch.is_attorney == 'YES':
327     ## #attorneysInDocumentSet.add(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is an Attorney.")
328     ## attorneysInDocumentSet.add(f"{preferedFirstName} {preferedLastName}* is a match on {r.upper()}. This is an Attorney.")
329     ## #print(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is an Attorney.")
330     ## else:
331     ## #downgradesInSet.add(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is a DOWNGRADE.")
332     ## downgradesInSet.add(f"{preferedFirstName} {preferedLastName}* is a match on {r.upper()}. This is a DOWNGRADE.")
333     ## #print(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is a DOWNGRADE.")
334     ## #if f"{personMatch.last_name}, {personMatch.first_name}*" in legalSources:
335     ## if f"{personMatch.first_name} {personMatch.last_name}*" in legalSources:
336     ## if personMatch.is_attorney == 'YES':
337     ## #attorneysInDocumentSet.add(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is an Attorney.")
338     ## attorneysInDocumentSet.add(f"{personMatch.first_name} {personMatch.last_name}* is a match on {r.upper()}. This is an Attorney.")
339     ## #print(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is an Attorney.")
340     ## else:
341     ## #downgradesInSet.add(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is a DOWNGRADE.")
342     ## downgradesInSet.add(f"{personMatch.first_name} {personMatch.last_name}* is a match on {r.upper()}. This is a DOWNGRADE.")
343     ## #print(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is a DOWNGRADE.")
344     ## outputFile.write(f"{line[0]} has {len(attorneysInDocumentSet)} matching attorneys and {len(downgradesInSet)} matching downgrades. ")
345     ## if len(attorneysInDocumentSet) > 0:
346     ## outputFile.write("There is at least 1 matching attorney.\n")
347     ## else:
348     ## outputFile.write("There are NO matching attorneys!\n")
349     ## else:
350     ## pass
351     ## #print("\n\nEmpty legal sources field")