Active_prgs/Redgrave/Amazon_NameSearches.py

"""

Amazon_NameSearches

Created by:
Emanuel Borges
12.3.2024

This program to test searching the MAL.

"""


import os, uuid, pickle, re
import MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC
from dataclasses import dataclass, field
from typing import List, Optional
from collections import namedtuple
from win32com.client import Dispatch


@dataclass
class Person:
    first_name: Optional[str] = None
    last_name: Optional[str] = None
    work_email_address: Optional[str] = None
    alt_work_email_address: Optional[str] = None
    _id: uuid.UUID = field(default_factory=uuid.uuid4)
    is_attorney: Optional[str] = None
    split_role_date_range: Optional[str] = None
    sidley_validated: Optional[str] = None
    category: Optional[str] = None
    organization: Optional[str] = None
    job_title: Optional[str] = None
    business_title: Optional[str] = None
    full_name_preferred: Optional[str] = None
    login: Optional[str] = None
    department_fine: Optional[str] = None
    addressed_during_caag: Optional[str] = None
    #last_updated: Optional[str] = None
    full_name_overide: Optional[str] = None

    def __post_init__(self):
        """Convert all string fields to uppercase."""
        if self.first_name:
            self.first_name = self.first_name.strip().upper()
        if self.last_name:
            self.last_name = self.last_name.strip().upper()
        if self.work_email_address:
            self.work_email_address = self.work_email_address.strip().upper()
        if self.alt_work_email_address:
            self.alt_work_email_address = self.alt_work_email_address.strip().upper()
        if self.is_attorney:
            self.is_attorney = self.is_attorney.strip().upper()
        if self.split_role_date_range:
            self.split_role_date_range = self.split_role_date_range.strip().upper()
        if self.sidley_validated:
            self.sidley_validated = self.sidley_validated.strip().upper()
        if self.category:
            self.category = self.category.strip().upper()
        if self.organization:
            self.organization = self.organization.strip().upper()
        if self.job_title:
            self.job_title = self.job_title.strip().upper()
        if self.business_title:
            self.business_title = self.business_title.strip().upper()
        if self.full_name_preferred:
            self.full_name_preferred = self.full_name_preferred.strip().upper()
        if self.login:
            self.login = self.login.strip().upper()
        if self.department_fine:
            self.department_fine = self.department_fine.strip().upper()
        if self.addressed_during_caag:
            self.addressed_during_caag = self.addressed_during_caag.strip().upper()
        #if self.last_updated:
        #    self.last_updated = self.last_updated.strip().upper()

@dataclass
class PeopleList:
    people: List[Person] = field(default_factory=list)

    def add_person(self, person: Person):
        self.people.append(person)
        #print(f"Added person: {person}")


    def search_by_email(self, emailAddress:str) -> Optional[Person]:
        for person in self.people:
            if person.work_email_address == emailAddress:
                return person
        return None
        
    def list_people(self):
        for person in self.people:
            print(person)

    def update_full_Name_overide(self, emailAddress:str, fullNameOverideValue) -> Optional[Person]:
        for person in self.people:
            if person.work_email_address == emailAddress.upper():
                person.full_name_overide = fullNameOverideValue.upper()


class NamesVerification(object):
    """A class for automating the process of performing QC on the names within the Amazon privilege logs."""
    version = '0.5.0'


    def __init__(self, cleanedDatExportFileName, masterAttorneyListFileName,fullNameOveridesFileName, forceNewPklFile = False, Encoding = 'UTF8'):
        """Initializes the data structures. cleanedDatExportFileName should be the full path to the file.
        Assumes the first row of the data file is the header and first column is DocID.
        Assumes the MAL is a spreadsheet (for now).
        MAL gets saved to a pkl file for performance reasons.  pkl will be used unless forceNewPklFile is set to true"""
        pklFileName = os.path.splitext(masterAttorneyListFileName)[0] + ".pkl"

        print("Initializing data structures...")
        if forceNewPklFile:
            print("Creating MAL structure...")
            self.malPeopleList = PeopleList()
            self.__IngestMALSpreadsheet(masterAttorneyListFileName)
            print("MAL structure created.")
            print("Loading full name overide values...")
            self.__LoadFullNameOverideValues(fullNameOveridesFileName)
            print("Full name overide values loaded.")
            print("Creating pickle backup...")
            self.__SaveMalToPkl(pklFileName)
            print("Pickle backup created.")
        else:
            if os.path.exists(pklFileName):
                print("Loading MAL structure from pickle file...")
                self.malPeopleList = self.__LoadMalFromPkl(pklFileName)
                print("MAL structure loaded.")
            else:
                print("Pickle file doesnt exist.")
                print("Creating MAL structure...")
                self.malPeopleList = PeopleList()
                self.__IngestMALSpreadsheet(masterAttorneyListFileName)
                print("MAL structure created.")
                print("Loading full name overide values...")
                self.__LoadFullNameOverideValues(fullNameOveridesFileName)
                print("Full name overide values loaded.")
                print("Creating pickle backup...")
                self.__SaveMalToPkl(pklFileName)
                print("Pickle backup created.")
                
##        self.malPeopleList = PeopleList()
##        
##        print("Creating MAL structure...")
##        self.__IngestMALSpreadsheet(masterAttorneyListFileName)
##        print("MAL structure created.")
##        print("Creating pickle backup...")


    def __IngestMALSpreadsheet(self, masterAttorneyListFileName):
        """Pseudo-private method which will open an Excel spreadsheet and ingest the values into the peoplelist dataclass."""
        ## There doenst seem to be a consistent value in the "row" column in the MAL, so setting these parameters here to avoid gap issues.

        excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":10919, "beginColNumber":2, "endColNumber":17},
                               {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":572, "beginColNumber":2, "endColNumber":16}]

 #       excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":16},
 #                              {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":15}]

        spreadsheetFileMappingMatrix = {"First Name":"first_name", "Last Name":"last_name", "Work Email":"work_email_address", "Alt Work Email":"alt_work_email_address", "Is Attorney": "is_attorney",
                                  "Split Role -  Attorney Capacity Date Range":"split_role_date_range", " Validated by OC??":"sidley_validated", "Category": "category", "Organization":"organization", "Job Title":"job_title",
                                  "Business Title":"business_title", "Full Name (Preferred)":"full_name_preferred", "Login":"login", "Department (Fine)":"department_fine", "Addressed during CAAG":"addressed_during_caag",
                                        "Last Updated":"last_updated"}

        xlApp = Dispatch('Excel.Application')
        xlBook = xlApp.Workbooks.Open(masterAttorneyListFileName)
        
        for excelTab in excelTabParametersList:
            sht = xlBook.Worksheets(excelTab['tabName'])
            print(f"Ingesting sheet {excelTab['tabName']}.")
            excelFieldPositionMatrix = {}
            for col in range (excelTab['beginColNumber'], excelTab['endColNumber'] +1):
                excelFieldPositionMatrix[sht.Cells(1,col).Value] = col
            for row in range(excelTab['beginRowNumber'], excelTab['endRowNumber'] +1):
                #print(row)
                ##  TODO: Refactor the excelTabParametersList later. Didnt realize columns were not consistent.
                if excelTab['tabName'] == 'Attorneys':
                    self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
                                                         split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role -  Attorney Capacity Date Range']).Value,
                                                         sidley_validated =  sht.Cells(row,excelFieldPositionMatrix[' Validated by OC?']).Value,
                                                         category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
                                                         organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
                                                         last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
                                                         first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
                                                         work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
                                                         alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
                                                         job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
                                                         business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
                                                         full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
                                                         login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
                                                         department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
                                                         addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
                                                         #last_updated = sht.Cells(row,excelFieldPositionMatrix['Last Updated']).Value ))
                
                else:
                    self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
                                                         split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role -  Attorney Capacity Date Range']).Value,
                                                         sidley_validated =  sht.Cells(row,excelFieldPositionMatrix['Validated by OC?']).Value,
                                                         organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
                                                         last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
                                                         first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
                                                         work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
                                                         alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
                                                         job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
                                                         business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
                                                         full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
                                                         login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
                                                         department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
                                                         addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))

        xlBook.Close()

    def __SaveMalToPkl(self, pklFileName):
        """Pseudo-private method which will save the current MAL people list object to a pkl file, for performance reasons."""
        outputFile = open(pklFileName,'wb')
        pickle.dump(self.malPeopleList,outputFile)
        outputFile.close()

    def __LoadMalFromPkl(self, pklFileName):
        """Pseudo-private method which will load a MAL people list object from a pkl file, for performance reasons."""
        contents = open(pklFileName, 'rb')
        obj = pickle.load(contents)
        contents.close()
        return obj

    def __LoadFullNameOverideValues(self, fullNameOveridesFileName):
        """Pseudo-private method which will update the MAL people list object with the full name overide values."""
        contents = open(fullNameOveridesFileName).readlines()
        for line in contents:
            line = line.replace("\n","")
            emailAddress,fullNameOverideValue = line.split("|")
            self.malPeopleList.update_full_Name_overide(emailAddress, fullNameOverideValue)

    def SmartDedupeSet(self, currentSet):
        """Pseudo-private method that attempts to do some additional deduplication of the values in a set by lowering all values and deduplicating.  Returns a lowered deduplicated set."""
        newSet = set()
        for val in currentSet:
            newSet.add(val.lower())
        return newSet


if __name__ == '__main__':
    #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241203)\TEST-PLOG.txt"
    #cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241203)\PLOG All IDs (20241203)_Converted_SubSetOnly.txt"
    cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Data Exports\CAAG\CAAG_Log_Data_Export_Converted.txt"
    masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List  2024.12.1(7045413.15).xlsx"
    fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides - Copy.txt"
    #fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides.txt"
    allPossibleEmailAddressesRegExPattern = r"[\w.+-]+@[\w-]+\.[\w.-]+"
    #outputFile = open(r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Data Exports\CAAG\namesSearchResults.txt",'w')


    nv = NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)


## This is just some simple searching of email addresses
    contents = open(r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\VEAS-MasterAttorneyList\2024-12-05_EmailAddressesOnly.txt",encoding='UTF-8').readlines()
    contents = contents[1:]
    for line in contents:
        line = line.replace("\n","")
        line = line.split(";")
        for i in line:
            personMatch = nv.malPeopleList.search_by_email(i.upper())
            if personMatch:
                if personMatch.is_attorney == 'YES':
                    pass
                    #print(f"{i} is an attorney match!")
                elif personMatch.is_attorney == 'NO':
                    print(f"{i} is an downgrade match!")
                else:
                    print(f"{i} is a possible Split Role match!")


## This section was used for comparing the names in legal sources to any email addresses I could locate on that same line.
##    contents = open(cleanedDatExportFileName,encoding='UTF-8').readlines()
##    contents = contents[1:]
##    for line in contents:
##        attorneysInDocumentSet = set()
##        downgradesInSet = set()
##        line = line.replace("\n","")
##        #singleLine = contents[3]
##        resultSet = set()
##        results = re.findall(allPossibleEmailAddressesRegExPattern, line)
##        for result in results:
##            resultSet.add(result)
##        line = line.split("|")
##        legalSources = line[11].upper()
##        if legalSources:
##            legalSources =legalSources.split(";")
##            #print(f"\n\n{legalSources}")
##            for r in list(resultSet):
##                #print(r)
##                personMatch = nv.malPeopleList.search_by_email(r.upper())
##                if personMatch:
##                    #print("entry exists")
##                    if personMatch.full_name_overide:
##                        if f"{personMatch.full_name_overide}*" in legalSources:
##                            if personMatch.is_attorney == 'YES':
##                                attorneysInDocumentSet.add(f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is an Attorney.")
##                                #print (f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is an Attorney.")
##                            else:
##                                downgradesInSet.add(f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is a DOWNGRADE.")
##                                #print (f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is a DOWNGRADE.")
##                    if personMatch.full_name_preferred:
##                        fullPreferredName = personMatch.full_name_preferred
##                        fullPreferredName = fullPreferredName.replace('(LEGAL)','')
##                        fullPreferredName = fullPreferredName.replace('(SHE, HER)','')
##                        fullPreferredName = fullPreferredName.replace('(SHE HER)','')
##                        fullPreferredName = fullPreferredName.replace(',,',',')
##                        #print(fullPreferredName) 
##                        preferedLastName, preferedFirstName = fullPreferredName.split(',')
##                        preferedLastName = preferedLastName.strip()
##                        preferedFirstName = preferedFirstName.strip()
##                        preferedFirstName = preferedFirstName.split(" ")[0]
##                        fullName = f"{preferedFirstName} {preferedLastName}"
##                        #if f"{preferedLastName}, {preferedFirstName}*" in legalSources:
##                        if f"{preferedFirstName} {preferedLastName}*" in legalSources:
##                            if personMatch.is_attorney == 'YES':
##                                #attorneysInDocumentSet.add(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is an Attorney.")
##                                attorneysInDocumentSet.add(f"{preferedFirstName} {preferedLastName}* is a match on {r.upper()}. This is an Attorney.")
##                                #print(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is an Attorney.")
##                            else:
##                                #downgradesInSet.add(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is a DOWNGRADE.")
##                                downgradesInSet.add(f"{preferedFirstName} {preferedLastName}* is a match on {r.upper()}. This is a DOWNGRADE.")
##                                #print(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is a DOWNGRADE.")
##                    #if f"{personMatch.last_name}, {personMatch.first_name}*" in legalSources:
##                    if f"{personMatch.first_name} {personMatch.last_name}*" in legalSources:
##                        if personMatch.is_attorney == 'YES':
##                            #attorneysInDocumentSet.add(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is an Attorney.")
##                            attorneysInDocumentSet.add(f"{personMatch.first_name} {personMatch.last_name}* is a match on {r.upper()}. This is an Attorney.")
##                            #print(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is an Attorney.")
##                        else:
##                            #downgradesInSet.add(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is a DOWNGRADE.")
##                            downgradesInSet.add(f"{personMatch.first_name} {personMatch.last_name}* is a match on {r.upper()}. This is a DOWNGRADE.")
##                            #print(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is a DOWNGRADE.")
##            outputFile.write(f"{line[0]} has {len(attorneysInDocumentSet)} matching attorneys and {len(downgradesInSet)} matching downgrades.  ")
##            if len(attorneysInDocumentSet) > 0:
##                outputFile.write("There is at least 1 matching attorney.\n")
##            else:
##                outputFile.write("There are NO matching attorneys!\n")
##        else:
##            pass
##            #print("\n\nEmpty legal sources field")
Revision:	842
Committed:	Fri Dec 6 21:47:35 2024 UTC (15 months, 2 weeks ago) by nino.borges
Content type:	text/x-python
File size:	22252 byte(s)
Log Message:	This program to test searching the MAL.
#	Content
1	"""
2
3	Amazon_NameSearches
4
5	Created by:
6	Emanuel Borges
7	12.3.2024
8
9	This program to test searching the MAL.
10
11	"""
12
13
14
15	import os, uuid, pickle, re
16	import MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC
17	from dataclasses import dataclass, field
18	from typing import List, Optional
19	from collections import namedtuple
20	from win32com.client import Dispatch
21
22
23	@dataclass
24	class Person:
25	first_name: Optional[str] = None
26	last_name: Optional[str] = None
27	work_email_address: Optional[str] = None
28	alt_work_email_address: Optional[str] = None
29	_id: uuid.UUID = field(default_factory=uuid.uuid4)
30	is_attorney: Optional[str] = None
31	split_role_date_range: Optional[str] = None
32	sidley_validated: Optional[str] = None
33	category: Optional[str] = None
34	organization: Optional[str] = None
35	job_title: Optional[str] = None
36	business_title: Optional[str] = None
37	full_name_preferred: Optional[str] = None
38	login: Optional[str] = None
39	department_fine: Optional[str] = None
40	addressed_during_caag: Optional[str] = None
41	#last_updated: Optional[str] = None
42	full_name_overide: Optional[str] = None
43
44	def __post_init__(self):
45	"""Convert all string fields to uppercase."""
46	if self.first_name:
47	self.first_name = self.first_name.strip().upper()
48	if self.last_name:
49	self.last_name = self.last_name.strip().upper()
50	if self.work_email_address:
51	self.work_email_address = self.work_email_address.strip().upper()
52	if self.alt_work_email_address:
53	self.alt_work_email_address = self.alt_work_email_address.strip().upper()
54	if self.is_attorney:
55	self.is_attorney = self.is_attorney.strip().upper()
56	if self.split_role_date_range:
57	self.split_role_date_range = self.split_role_date_range.strip().upper()
58	if self.sidley_validated:
59	self.sidley_validated = self.sidley_validated.strip().upper()
60	if self.category:
61	self.category = self.category.strip().upper()
62	if self.organization:
63	self.organization = self.organization.strip().upper()
64	if self.job_title:
65	self.job_title = self.job_title.strip().upper()
66	if self.business_title:
67	self.business_title = self.business_title.strip().upper()
68	if self.full_name_preferred:
69	self.full_name_preferred = self.full_name_preferred.strip().upper()
70	if self.login:
71	self.login = self.login.strip().upper()
72	if self.department_fine:
73	self.department_fine = self.department_fine.strip().upper()
74	if self.addressed_during_caag:
75	self.addressed_during_caag = self.addressed_during_caag.strip().upper()
76	#if self.last_updated:
77	# self.last_updated = self.last_updated.strip().upper()
78
79	@dataclass
80	class PeopleList:
81	people: List[Person] = field(default_factory=list)
82
83	def add_person(self, person: Person):
84	self.people.append(person)
85	#print(f"Added person: {person}")
86
87
88	def search_by_email(self, emailAddress:str) -> Optional[Person]:
89	for person in self.people:
90	if person.work_email_address == emailAddress:
91	return person
92	return None
93
94	def list_people(self):
95	for person in self.people:
96	print(person)
97
98	def update_full_Name_overide(self, emailAddress:str, fullNameOverideValue) -> Optional[Person]:
99	for person in self.people:
100	if person.work_email_address == emailAddress.upper():
101	person.full_name_overide = fullNameOverideValue.upper()
102
103
104	class NamesVerification(object):
105	"""A class for automating the process of performing QC on the names within the Amazon privilege logs."""
106	version = '0.5.0'
107
108
109	def __init__(self, cleanedDatExportFileName, masterAttorneyListFileName,fullNameOveridesFileName, forceNewPklFile = False, Encoding = 'UTF8'):
110	"""Initializes the data structures. cleanedDatExportFileName should be the full path to the file.
111	Assumes the first row of the data file is the header and first column is DocID.
112	Assumes the MAL is a spreadsheet (for now).
113	MAL gets saved to a pkl file for performance reasons. pkl will be used unless forceNewPklFile is set to true"""
114	pklFileName = os.path.splitext(masterAttorneyListFileName)[0] + ".pkl"
115
116	print("Initializing data structures...")
117	if forceNewPklFile:
118	print("Creating MAL structure...")
119	self.malPeopleList = PeopleList()
120	self.__IngestMALSpreadsheet(masterAttorneyListFileName)
121	print("MAL structure created.")
122	print("Loading full name overide values...")
123	self.__LoadFullNameOverideValues(fullNameOveridesFileName)
124	print("Full name overide values loaded.")
125	print("Creating pickle backup...")
126	self.__SaveMalToPkl(pklFileName)
127	print("Pickle backup created.")
128	else:
129	if os.path.exists(pklFileName):
130	print("Loading MAL structure from pickle file...")
131	self.malPeopleList = self.__LoadMalFromPkl(pklFileName)
132	print("MAL structure loaded.")
133	else:
134	print("Pickle file doesnt exist.")
135	print("Creating MAL structure...")
136	self.malPeopleList = PeopleList()
137	self.__IngestMALSpreadsheet(masterAttorneyListFileName)
138	print("MAL structure created.")
139	print("Loading full name overide values...")
140	self.__LoadFullNameOverideValues(fullNameOveridesFileName)
141	print("Full name overide values loaded.")
142	print("Creating pickle backup...")
143	self.__SaveMalToPkl(pklFileName)
144	print("Pickle backup created.")
145
146	## self.malPeopleList = PeopleList()
147	##
148	## print("Creating MAL structure...")
149	## self.__IngestMALSpreadsheet(masterAttorneyListFileName)
150	## print("MAL structure created.")
151	## print("Creating pickle backup...")
152
153
154
155
156
157	def __IngestMALSpreadsheet(self, masterAttorneyListFileName):
158	"""Pseudo-private method which will open an Excel spreadsheet and ingest the values into the peoplelist dataclass."""
159	## There doenst seem to be a consistent value in the "row" column in the MAL, so setting these parameters here to avoid gap issues.
160
161	excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":10919, "beginColNumber":2, "endColNumber":17},
162	{"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":572, "beginColNumber":2, "endColNumber":16}]
163
164	# excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":16},
165	# {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":30, "beginColNumber":2, "endColNumber":15}]
166
167	spreadsheetFileMappingMatrix = {"First Name":"first_name", "Last Name":"last_name", "Work Email":"work_email_address", "Alt Work Email":"alt_work_email_address", "Is Attorney": "is_attorney",
168	"Split Role - Attorney Capacity Date Range":"split_role_date_range", " Validated by OC??":"sidley_validated", "Category": "category", "Organization":"organization", "Job Title":"job_title",
169	"Business Title":"business_title", "Full Name (Preferred)":"full_name_preferred", "Login":"login", "Department (Fine)":"department_fine", "Addressed during CAAG":"addressed_during_caag",
170	"Last Updated":"last_updated"}
171
172	xlApp = Dispatch('Excel.Application')
173	xlBook = xlApp.Workbooks.Open(masterAttorneyListFileName)
174
175	for excelTab in excelTabParametersList:
176	sht = xlBook.Worksheets(excelTab['tabName'])
177	print(f"Ingesting sheet {excelTab['tabName']}.")
178	excelFieldPositionMatrix = {}
179	for col in range (excelTab['beginColNumber'], excelTab['endColNumber'] +1):
180	excelFieldPositionMatrix[sht.Cells(1,col).Value] = col
181	for row in range(excelTab['beginRowNumber'], excelTab['endRowNumber'] +1):
182	#print(row)
183	## TODO: Refactor the excelTabParametersList later. Didnt realize columns were not consistent.
184	if excelTab['tabName'] == 'Attorneys':
185	self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
186	split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Attorney Capacity Date Range']).Value,
187	sidley_validated = sht.Cells(row,excelFieldPositionMatrix[' Validated by OC?']).Value,
188	category = sht.Cells(row,excelFieldPositionMatrix['Category']).Value,
189	organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
190	last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
191	first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
192	work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
193	alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
194	job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
195	business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
196	full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
197	login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
198	department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
199	addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
200	#last_updated = sht.Cells(row,excelFieldPositionMatrix['Last Updated']).Value ))
201
202	else:
203	self.malPeopleList.add_person(Person(is_attorney = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value,
204	split_role_date_range = sht.Cells(row,excelFieldPositionMatrix['Split Role - Attorney Capacity Date Range']).Value,
205	sidley_validated = sht.Cells(row,excelFieldPositionMatrix['Validated by OC?']).Value,
206	organization = sht.Cells(row,excelFieldPositionMatrix['Organization']).Value,
207	last_name = sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value,
208	first_name = sht.Cells(row,excelFieldPositionMatrix['First Name']).Value,
209	work_email_address = sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value,
210	alt_work_email_address = sht.Cells(row,excelFieldPositionMatrix['Alt Work Email']).Value,
211	job_title = sht.Cells(row,excelFieldPositionMatrix['Job Title']).Value,
212	business_title = sht.Cells(row,excelFieldPositionMatrix['Business Title']).Value,
213	full_name_preferred = sht.Cells(row,excelFieldPositionMatrix['Full Name (Preferred)']).Value,
214	login = sht.Cells(row,excelFieldPositionMatrix['Login']).Value,
215	department_fine = sht.Cells(row,excelFieldPositionMatrix['Department (Fine)']).Value,
216	addressed_during_caag = sht.Cells(row,excelFieldPositionMatrix['Addressed during CAAG']).Value))
217
218	xlBook.Close()
219
220	def __SaveMalToPkl(self, pklFileName):
221	"""Pseudo-private method which will save the current MAL people list object to a pkl file, for performance reasons."""
222	outputFile = open(pklFileName,'wb')
223	pickle.dump(self.malPeopleList,outputFile)
224	outputFile.close()
225
226	def __LoadMalFromPkl(self, pklFileName):
227	"""Pseudo-private method which will load a MAL people list object from a pkl file, for performance reasons."""
228	contents = open(pklFileName, 'rb')
229	obj = pickle.load(contents)
230	contents.close()
231	return obj
232
233	def __LoadFullNameOverideValues(self, fullNameOveridesFileName):
234	"""Pseudo-private method which will update the MAL people list object with the full name overide values."""
235	contents = open(fullNameOveridesFileName).readlines()
236	for line in contents:
237	line = line.replace("\n","")
238	emailAddress,fullNameOverideValue = line.split("\|")
239	self.malPeopleList.update_full_Name_overide(emailAddress, fullNameOverideValue)
240
241	def SmartDedupeSet(self, currentSet):
242	"""Pseudo-private method that attempts to do some additional deduplication of the values in a set by lowering all values and deduplicating. Returns a lowered deduplicated set."""
243	newSet = set()
244	for val in currentSet:
245	newSet.add(val.lower())
246	return newSet
247
248
249	if __name__ == '__main__':
250	#cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241203)\TEST-PLOG.txt"
251	#cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241202 - FTC-CID\PLOG All IDs (20241203)\PLOG All IDs (20241203)_Converted_SubSetOnly.txt"
252	cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Data Exports\CAAG\CAAG_Log_Data_Export_Converted.txt"
253	masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.12.1(7045413.15).xlsx"
254	fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides - Copy.txt"
255	#fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\FullNameOverides.txt"
256	allPossibleEmailAddressesRegExPattern = r"[\w.+-]+@[\w-]+\.[\w.-]+"
257	#outputFile = open(r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Data Exports\CAAG\namesSearchResults.txt",'w')
258
259
260
261	nv = NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)
262
263
264	## This is just some simple searching of email addresses
265	contents = open(r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\VEAS-MasterAttorneyList\2024-12-05_EmailAddressesOnly.txt",encoding='UTF-8').readlines()
266	contents = contents[1:]
267	for line in contents:
268	line = line.replace("\n","")
269	line = line.split(";")
270	for i in line:
271	personMatch = nv.malPeopleList.search_by_email(i.upper())
272	if personMatch:
273	if personMatch.is_attorney == 'YES':
274	pass
275	#print(f"{i} is an attorney match!")
276	elif personMatch.is_attorney == 'NO':
277	print(f"{i} is an downgrade match!")
278	else:
279	print(f"{i} is a possible Split Role match!")
280
281
282	## This section was used for comparing the names in legal sources to any email addresses I could locate on that same line.
283	## contents = open(cleanedDatExportFileName,encoding='UTF-8').readlines()
284	## contents = contents[1:]
285	## for line in contents:
286	## attorneysInDocumentSet = set()
287	## downgradesInSet = set()
288	## line = line.replace("\n","")
289	## #singleLine = contents[3]
290	## resultSet = set()
291	## results = re.findall(allPossibleEmailAddressesRegExPattern, line)
292	## for result in results:
293	## resultSet.add(result)
294	## line = line.split("\|")
295	## legalSources = line[11].upper()
296	## if legalSources:
297	## legalSources =legalSources.split(";")
298	## #print(f"\n\n{legalSources}")
299	## for r in list(resultSet):
300	## #print(r)
301	## personMatch = nv.malPeopleList.search_by_email(r.upper())
302	## if personMatch:
303	## #print("entry exists")
304	## if personMatch.full_name_overide:
305	## if f"{personMatch.full_name_overide}*" in legalSources:
306	## if personMatch.is_attorney == 'YES':
307	## attorneysInDocumentSet.add(f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is an Attorney.")
308	## #print (f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is an Attorney.")
309	## else:
310	## downgradesInSet.add(f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is a DOWNGRADE.")
311	## #print (f"{personMatch.full_name_overide}* is a match on {r.upper()}. This is a DOWNGRADE.")
312	## if personMatch.full_name_preferred:
313	## fullPreferredName = personMatch.full_name_preferred
314	## fullPreferredName = fullPreferredName.replace('(LEGAL)','')
315	## fullPreferredName = fullPreferredName.replace('(SHE, HER)','')
316	## fullPreferredName = fullPreferredName.replace('(SHE HER)','')
317	## fullPreferredName = fullPreferredName.replace(',,',',')
318	## #print(fullPreferredName)
319	## preferedLastName, preferedFirstName = fullPreferredName.split(',')
320	## preferedLastName = preferedLastName.strip()
321	## preferedFirstName = preferedFirstName.strip()
322	## preferedFirstName = preferedFirstName.split(" ")[0]
323	## fullName = f"{preferedFirstName} {preferedLastName}"
324	## #if f"{preferedLastName}, {preferedFirstName}*" in legalSources:
325	## if f"{preferedFirstName} {preferedLastName}*" in legalSources:
326	## if personMatch.is_attorney == 'YES':
327	## #attorneysInDocumentSet.add(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is an Attorney.")
328	## attorneysInDocumentSet.add(f"{preferedFirstName} {preferedLastName}* is a match on {r.upper()}. This is an Attorney.")
329	## #print(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is an Attorney.")
330	## else:
331	## #downgradesInSet.add(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is a DOWNGRADE.")
332	## downgradesInSet.add(f"{preferedFirstName} {preferedLastName}* is a match on {r.upper()}. This is a DOWNGRADE.")
333	## #print(f"{preferedLastName}, {preferedFirstName}* is a match on {r.upper()}. This is a DOWNGRADE.")
334	## #if f"{personMatch.last_name}, {personMatch.first_name}*" in legalSources:
335	## if f"{personMatch.first_name} {personMatch.last_name}*" in legalSources:
336	## if personMatch.is_attorney == 'YES':
337	## #attorneysInDocumentSet.add(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is an Attorney.")
338	## attorneysInDocumentSet.add(f"{personMatch.first_name} {personMatch.last_name}* is a match on {r.upper()}. This is an Attorney.")
339	## #print(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is an Attorney.")
340	## else:
341	## #downgradesInSet.add(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is a DOWNGRADE.")
342	## downgradesInSet.add(f"{personMatch.first_name} {personMatch.last_name}* is a match on {r.upper()}. This is a DOWNGRADE.")
343	## #print(f"{personMatch.last_name}, {personMatch.first_name}* is a match on {r.upper()}. This is a DOWNGRADE.")
344	## outputFile.write(f"{line[0]} has {len(attorneysInDocumentSet)} matching attorneys and {len(downgradesInSet)} matching downgrades. ")
345	## if len(attorneysInDocumentSet) > 0:
346	## outputFile.write("There is at least 1 matching attorney.\n")
347	## else:
348	## outputFile.write("There are NO matching attorneys!\n")
349	## else:
350	## pass
351	## #print("\n\nEmpty legal sources field")