ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/Amazon_NameSearches.py
Revision: 842
Committed: Fri Dec 6 21:47:35 2024 UTC (15 months, 2 weeks ago) by nino.borges
Content type: text/x-python
File size: 22252 byte(s)
Log Message:
This program to test searching the MAL.

File Contents

# Content
1 """
2
3 Amazon_NameSearches
4
5 Created by:
6 Emanuel Borges
7 12.3.2024
8
9 This program to test searching the MAL.
10
11 """
12
13
14
15 import os, uuid, pickle, re
16 import MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC
17 from dataclasses import dataclass, field
18 from typing import List, Optional
19 from collections import namedtuple
20 from win32com.client import Dispatch
21
22
23 @dataclass
24 class Person:
25 first_name: Optional[str] = None
26 last_name: Optional[str] = None
27 work_email_address: Optional[str] = None
28 alt_work_email_address: Optional[str] = None
29 _id: uuid.UUID = field(default_factory=uuid.uuid4)
30 is_attorney: Optional[str] = None
31 split_role_date_range: Optional[str] = None
32 sidley_validated: Optional[str] = None
33 category: Optional[str] = None
34 organization: Optional[str] = None
35 job_title: Optional[str] = None
36 business_title: Optional[str] = None
37 full_name_preferred: Optional[str] = None
38 login: Optional[str] = None
39 department_fine: Optional[str] = None
40 addressed_during_caag: Optional[str] = None
41 #last_updated: Optional[str] = None
42 full_name_overide: Optional[str] = None
43
44 def __post_init__(self):
45 """Convert all string fields to uppercase."""
46 if self.first_name:
47 self.first_name = self.first_name.strip().upper()
48 if self.last_name:
49 self.last_name = self.last_name.strip().upper()
50 if self.work_email_address:
51 self.work_email_address = self.work_email_address.strip().upper()
52 if self.alt_work_email_address:
53 self.alt_work_email_address = self.alt_work_email_address.strip().upper()
54 if self.is_attorney:
55 self.is_attorney = self.is_attorney.strip().upper()
56 if self.split_role_date_range:
57 self.split_role_date_range = self.split_role_date_range.strip().upper()
58 if self.sidley_validated:
59 self.sidley_validated = self.sidley_validated.strip().upper()
60 if self.category:
61 self.category = self.category.strip().upper()
62 if self.organization:
63 self.organization = self.organization.strip().upper()
64 if self.job_title:
65 self.job_title = self.job_title.strip().upper()
66 if self.business_title:
67 self.business_title = self.business_title.strip().upper()
68 if self.full_name_preferred:
69 self.full_name_preferred = self.full_name_preferred.strip().upper()
70 if self.login:
71 self.login = self.login.strip().upper()
72 if self.department_fine:
73 self.department_fine = self.department_fine.strip().upper()
74 if self.addressed_during_caag:
75 self.addressed_during_caag = self.addressed_during_caag.strip().upper()
76 #if self.last_updated:
77 # self.last_updated = self.last_updated.strip().upper()
78
79 @dataclass
80 class PeopleList:
81 people: List[Person] = field(default_factory=list)
82
83 def add_person(self, person: Person):
84 self.people.append(person)
85 #print(f"Added person: {person}")
86
87
88 def search_by_email(self, emailAddress:str) -> Optional[Person]:
89 for person in self.people:
90 if person.work_email_address == emailAddress:
91 return person
92 return None
93
94 def list_people(self):
95 for person in self.people:
96 print(person)
97
98 def update_full_Name_overide(self, emailAddress:str, fullNameOverideValue) -> Optional[Person]:
99 for person in self.people:
100 if person.work_email_address == emailAddress.upper():
101 person.full_name_overide = fullNameOverideValue.upper()
102
103
104 class NamesVerification(object):
105 """A class for automating the process of performing QC on the names within the Amazon privilege logs."""
106 version = '0.5.0'
107
108
109 def __init__(self, cleanedDatExportFileName, masterAttorneyListFileName,fullNameOveridesFileName, forceNewPklFile = False, Encoding = 'UTF8'):
110 """Initializes the data structures. cleanedDatExportFileName should be the full path to the file.
111 Assumes the first row of the data file is the header and first column is DocID.
112 Assumes the MAL is a spreadsheet (for now).
113 MAL gets saved to a pkl file for performance reasons. pkl will be used unless forceNewPklFile is set to true"""
114 pklFileName = os.path.splitext(masterAttorneyListFileName)[0] + ".pkl"
115
116 print("Initializing data structures...")
117 if forceNewPklFile:
118 print("Creating MAL structure...")
119 self.malPeopleList = PeopleList()
120 self.__IngestMALSpreadsheet(masterAttorneyListFileName)
121 print("MAL structure created.")
122 print("Loading full name overide values...")
123 self.__LoadFullNameOverideValues(fullNameOveridesFileName)
124 print("Full name overide values loaded.")
125 print("Creating pickle backup...")
126 self.__SaveMalToPkl(pklFileName)
127 print("Pickle backup created.")
128 else:
129 if os.path.exists(pklFileName):
130 print("Loading MAL structure from pickle file...")
131 self.malPeopleList = self.__LoadMalFromPkl(pklFileName)
132 print("MAL structure loaded.")
133 else:
134 print("Pickle file doesnt exist.")
135 print("Creating MAL structure...")
136 self.malPeopleList = PeopleList()
137 self.__IngestMALSpreadsheet(masterAttorneyListFileName)
138 print("MAL structure created.")
139 print("Loading full name overide values...")
140 self.__LoadFullNameOverideValues(fullNameOveridesFileName)
141 print("Full name overide values loaded.")
142 print("Creating pickle backup...")
143 self.__SaveMalToPkl(pklFileName)
144 print("Pickle backup created.")
145
146 ## self.malPeopleList = PeopleList()
147 ##
148 ## print("Creating MAL structure...")
149 ## self.__IngestMALSpreadsheet(masterAttorneyListFileName)
150 ## print("MAL structure created.")
151 ## print("Creating pickle backup...")
152
153
154
155
156
157 def __IngestMALSpreadsheet(self, masterAttorneyListFileName):
158 """Pseudo-private method which will open an Excel spreadsheet and ingest the values into the peoplelist dataclass."""
159 ## There doenst seem to be a consistent value in the "row" column in the MAL, so setting these parameters here to avoid gap issues.
160
161 excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":10919, "beginColNumber":2, "endColNumber":17},
162 {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":572, "beginColNumber":2, "endColNumber":16}]
163
164 # excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":16},
165 # {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":15}]
166
167 spreadsheetFileMappingMatrix = {"First Name":"first_name", "Last Name":"last_name", "Work Email":"work_email_address", "Alt Work Email":"alt_work_email_address", "Is Attorney": "is_attorney",
168 "Split Role - Attorney Capacity Date Range":"split_role_date_range", " Validated by OC??":"sidley_validated", "Category": "category", "Organization":"organization", "Job Title":"job_title",
169 "Business Title":"business_title", "Full Name (Preferred)":"full_name_preferred", "Login":"login", "Department (Fine)":"department_fine", "Addressed during CAAG":"addressed_during_caag",
170 "Last Updated":"last_updated"}
171
172 xlApp = Dispatch('Excel.Application')
173 xlBook = xlApp.Workbooks.Open(masterAttorneyListFileName)
174
175 for excelTab in excelTabParametersList:
176 sht = xlBook.Worksheets(excelTab['tabName'])
177 print(f"Ingesting sheet {excelTab['tabName']}.")
178 excelFieldPositionMatrix = {}
179 for col in range (excelTab['beginColNumber'], excelTab['endColNumber'] +1):
180 excelFieldPositionMatrix[sht.Cells(1,col).Value] = col
181 for row in range(excelTab['beginRowNumber'], excelTab['endRowNumber'] +1):
182 #print(row)
183 ## TODO: Refactor the excelTabParametersList later. Didnt realize columns were not consistent.
184 if excelTab['tabName'] == 'Attorneys':
185 self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
186 split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Attorney Capacity Date Range']).Value,
187 sidley_validated = sht.Cells(row,excelFieldPositionMatrix[' Validated by OC?']).Value,
188 category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
189 organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
190 last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
191 first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
192 work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
193 alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
194 job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
195 business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
196 full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
197 login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
198 department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
199 addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
200 #last_updated = sht.Cells(row,excelFieldPositionMatrix['Last Updated']).Value ))
201
202 else:
203 self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
204 split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Attorney Capacity Date Range']).Value,
205 sidley_validated = sht.Cells(row,excelFieldPositionMatrix['Validated by OC?']).Value,
206 organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
207 last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
208 first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
209 work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
210 alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
211 job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
212 business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
213 full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
214 login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
215 department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
216 addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
217
218 xlBook.Close()
219
220 def __SaveMalToPkl(self, pklFileName):
221 """Pseudo-private method which will save the current MAL people list object to a pkl file, for performance reasons."""
222 outputFile = open(pklFileName,'wb')
223 pickle.dump(self.malPeopleList,outputFile)
224 outputFile.close()
225
226 def __LoadMalFromPkl(self, pklFileName):
227 """Pseudo-private method which will load a MAL people list object from a pkl file, for performance reasons."""
228 contents = open(pklFileName, 'rb')
229 obj = pickle.load(contents)
230 contents.close()
231 return obj
232
233 def __LoadFullNameOverideValues(self, fullNameOveridesFileName):
234 """Pseudo-private method which will update the MAL people list object with the full name overide values."""
235 contents = open(fullNameOveridesFileName).readlines()
236 for line in contents:
237 line = line.replace("\n","")
238 emailAddress,fullNameOverideValue = line.split("|")
239 self.malPeopleList.update_full_Name_overide(emailAddress, fullNameOverideValue)
240
241 def SmartDedupeSet(self, currentSet):
242 """Pseudo-private method that attempts to do some additional deduplication of the values in a set by lowering all values and deduplicating. Returns a lowered deduplicated set."""
243 newSet = set()
244 for val in currentSet:
245 newSet.add(val.lower())
246 return newSet
247
248
249 if __name__ == '__main__':
250 #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241203)\TEST-PLOG.txt"
251 #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241203)\PLOG All IDs (20241203)_Converted_SubSetOnly.txt"
252 cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Data Exports\CAAG\CAAG_Log_Data_Export_Converted.txt"
253 masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.12.1(7045413.15).xlsx"
254 fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides - Copy.txt"
255 #fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides.txt"
256 allPossibleEmailAddressesRegExPattern = r"[\w.+-]+@[\w-]+\.[\w.-]+"
257 #outputFile = open(r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Data Exports\CAAG\namesSearchResults.txt",'w')
258
259
260
261 nv = NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)
262
263
264 ## This is just some simple searching of email addresses
265 contents = open(r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\VEAS-MasterAttorneyList\2024-12-05_EmailAddressesOnly.txt",encoding='UTF-8').readlines()
266 contents = contents[1:]
267 for line in contents:
268 line = line.replace("\n","")
269 line = line.split(";")
270 for i in line:
271 personMatch = nv.malPeopleList.search_by_email(i.upper())
272 if personMatch:
273 if personMatch.is_attorney == 'YES':
274 pass
275 #print(f"{i} is an attorney match!")
276 elif personMatch.is_attorney == 'NO':
277 print(f"{i} is an downgrade match!")
278 else:
279 print(f"{i} is a possible Split Role match!")
280
281
282 ## This section was used for comparing the names in legal sources to any email addresses I could locate on that same line.
283 ## contents = open(cleanedDatExportFileName,encoding='UTF-8').readlines()
284 ## contents = contents[1:]
285 ## for line in contents:
286 ## attorneysInDocumentSet = set()
287 ## downgradesInSet = set()
288 ## line = line.replace("\n","")
289 ## #singleLine = contents[3]
290 ## resultSet = set()
291 ## results = re.findall(allPossibleEmailAddressesRegExPattern, line)
292 ## for result in results:
293 ## resultSet.add(result)
294 ## line = line.split("|")
295 ## legalSources = line[11].upper()
296 ## if legalSources:
297 ## legalSources =legalSources.split(";")
298 ## #print(f"\n\n{legalSources}")
299 ## for r in list(resultSet):
300 ## #print(r)
301 ## personMatch = nv.malPeopleList.search_by_email(r.upper())
302 ## if personMatch:
303 ## #print("entry exists")
304 ## if personMatch.full_name_overide:
305 ## if f"{personMatch.full_name_overide}*" in legalSources:
306 ## if personMatch.is_attorney == 'YES':
307 ## attorneysInDocumentSet.add(f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is an Attorney.")
308 ## #print (f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is an Attorney.")
309 ## else:
310 ## downgradesInSet.add(f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is a DOWNGRADE.")
311 ## #print (f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is a DOWNGRADE.")
312 ## if personMatch.full_name_preferred:
313 ## fullPreferredName = personMatch.full_name_preferred
314 ## fullPreferredName = fullPreferredName.replace('(LEGAL)','')
315 ## fullPreferredName = fullPreferredName.replace('(SHE, HER)','')
316 ## fullPreferredName = fullPreferredName.replace('(SHE HER)','')
317 ## fullPreferredName = fullPreferredName.replace(',,',',')
318 ## #print(fullPreferredName)
319 ## preferedLastName, preferedFirstName = fullPreferredName.split(',')
320 ## preferedLastName = preferedLastName.strip()
321 ## preferedFirstName = preferedFirstName.strip()
322 ## preferedFirstName = preferedFirstName.split(" ")[0]
323 ## fullName = f"{preferedFirstName} {preferedLastName}"
324 ## #if f"{preferedLastName}, {preferedFirstName}*" in legalSources:
325 ## if f"{preferedFirstName} {preferedLastName}*" in legalSources:
326 ## if personMatch.is_attorney == 'YES':
327 ## #attorneysInDocumentSet.add(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is an Attorney.")
328 ## attorneysInDocumentSet.add(f"{preferedFirstName} {preferedLastName}* is a match on {r.upper()}. This is an Attorney.")
329 ## #print(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is an Attorney.")
330 ## else:
331 ## #downgradesInSet.add(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is a DOWNGRADE.")
332 ## downgradesInSet.add(f"{preferedFirstName} {preferedLastName}* is a match on {r.upper()}. This is a DOWNGRADE.")
333 ## #print(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is a DOWNGRADE.")
334 ## #if f"{personMatch.last_name}, {personMatch.first_name}*" in legalSources:
335 ## if f"{personMatch.first_name} {personMatch.last_name}*" in legalSources:
336 ## if personMatch.is_attorney == 'YES':
337 ## #attorneysInDocumentSet.add(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is an Attorney.")
338 ## attorneysInDocumentSet.add(f"{personMatch.first_name} {personMatch.last_name}* is a match on {r.upper()}. This is an Attorney.")
339 ## #print(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is an Attorney.")
340 ## else:
341 ## #downgradesInSet.add(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is a DOWNGRADE.")
342 ## downgradesInSet.add(f"{personMatch.first_name} {personMatch.last_name}* is a match on {r.upper()}. This is a DOWNGRADE.")
343 ## #print(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is a DOWNGRADE.")
344 ## outputFile.write(f"{line[0]} has {len(attorneysInDocumentSet)} matching attorneys and {len(downgradesInSet)} matching downgrades. ")
345 ## if len(attorneysInDocumentSet) > 0:
346 ## outputFile.write("There is at least 1 matching attorney.\n")
347 ## else:
348 ## outputFile.write("There are NO matching attorneys!\n")
349 ## else:
350 ## pass
351 ## #print("\n\nEmpty legal sources field")