Active_prgs/Redgrave/Amazon_PerformDeepNamesNormQC.py

"""

Amazon_PerformDeepNamesNormQC

Created by:
Emanuel Borges
12.11.2024

This program is similar to Amazon_PerformNamesNormQC but it will perform a deeper level of names norm QC. I may just replace Amazon_PerformNamesNormQC with this file but for now i'd
like to keep both.

"""

import os, re, datetime, calendar
from uuid import UUID
import MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC
import MyCode.Active_prgs.Redgrave.Amazon_NamesNormQC

version = '0.5.0'

issuesMatrix = {}

def GatherAllPossibleVariations(personMatch):
    """Takes a personMatch, which is the results of a person match, and attempts to make all possible name match variations that may exist in the formatted field.
    returns deduplicated list of tuple pairs (fullname, parenthetical)"""
    ##  Start as a plain list of all possible tuple pairs.
    allPossibleVariationsList = []

    allDomainsList = []
    if personMatch.work_email_address:
        allDomainsList.append(f"{personMatch.work_email_address.split('@')[-1]}")
    if personMatch.alt_work_email_address:
        allDomainsList.append(f"{personMatch.alt_work_email_address.split('@')[-1]}")
    allDomainsList = list(dict.fromkeys(allDomainsList))

    if personMatch.full_name_overide:
        fullName = personMatch.full_name_overide
        for domain in allDomainsList:
            allPossibleVariationsList.append((fullName,domain))
    if personMatch.full_name_preferred:
        ##  Going to need to do a bit of replacing to remove some information that is just never in the formatted.
        fullPreferredName = personMatch.full_name_preferred
        fullPreferredName = fullPreferredName.replace('(LEGAL)','')
        fullPreferredName = fullPreferredName.replace('(SHE, HER)','')
        fullPreferredName = fullPreferredName.replace('(SHE HER)','')
        if "," in fullPreferredName:
            preferedLastName, preferedFirstName = fullPreferredName.split(',')
            preferedLastName = preferedLastName.strip()
            preferedFirstName = preferedFirstName.strip()
            preferedFirstName = preferedFirstName.split(" ")[0]
            fullName = f"{preferedFirstName} {preferedLastName}"
            #fullName = f"{preferedLastName}, {preferedFirstName}"
            for domain in allDomainsList:
                allPossibleVariationsList.append((fullName,domain))
        else:
            print(f"ERROR in this name {fullPreferredName}")
    if personMatch.last_name:
        if personMatch.first_name:
            fullName = f"{personMatch.first_name} {personMatch.last_name}"
            #fullName = f"{personMatch.last_name}, {personMatch.first_name}"
        else:
            fullName = f"{personMatch.last_name}"
        for domain in allDomainsList:
            allPossibleVariationsList.append((fullName,domain))


    ##  Now return a deduplicated list by using dict to deduplicate.
    return list(dict.fromkeys(allPossibleVariationsList))


def AddToIssuesList(docID,issueMessage):
    """This function will add a single issue to the issues matrix."""
    if docID in list(issuesMatrix.keys()):
        issuesMatrix[docID].append(issueMessage)
    else:
        issuesMatrix[docID] = [issueMessage,]
    

if __name__ == '__main__':
    cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Testing_3\PrivLogExport_20241204_VEAS_Converted.txt"
    masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List  2024.12.12(20241212-1151).xlsx"
    fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\VEAS-MasterAttorneyList\FullNameOverides.txt"
    outputFileName = r"C:\Test_Dir\Amazon\NameNormDeepOutputText.txt"


    nv = MyCode.Active_prgs.Redgrave.Amazon_NamesNormQC.NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)

    qcP = MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC.QcPrivLog(cleanedDatExportFileName)

    #issuesMatrix = {}

    print(f"\nThere are {len(qcP.formattedValuesDict)} documents in the formatted values dictionary.")
    print(f"There are {len(qcP.metadataValuesDict)} documents in the metadata values dictionary.")

    workList = qcP.metadataValuesDict.keys()
    for docID in workList:
        metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['toValues']
        formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['toValues']
        ##  remember to convert all values in formattedFieldValues to uppercase (perhaps eventually do some of the formatted cleaning that eli mentioned.
        formattedFieldValues = [xVal.upper() for xVal in formattedFieldValues]
        ##  This will change once you start itterating acroll all of the field values names
        currentMetadataValues = metadataFieldValues
        for val in currentMetadataValues:
            ##  First try to locate an email address in this val and if found, try to find that in the MAL.
            results = re.findall(qcP.allPossibleEmailAddressesRegExPattern, val)
            if results:
                ##  Use some smart deduplication to remove duplicates.
                results = nv.SmartDedupeSet(results)
                for result in results:
                    ##  Try to find a match in the MAL by email. There shouldnt rows with duplicative email addresses.
                    ##  TODO:DONE: Update search_by_email to search both workemail and alt email.
                    
                    personMatch = nv.malPeopleList.search_by_email(result.upper())
                    if personMatch:
                        ##  Person match found in MAL.  Now try to match a value in the formatted field by pulling various values from the MAL.
                        ##  For each of these match attempts, try using the correct designation and incorrect designation (* vs no *) and note that.
                        allPossibleVariationsList = GatherAllPossibleVariations(personMatch)
                        matchFlag = False
                        for variationPair in allPossibleVariationsList:
                            if personMatch.is_attorney == 'YES':
                                if f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
                                    ##  This variation was found in the list of formatted values, which is fine, so just remove it.
                                    if matchFlag:
                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
                                    formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
                                    matchFlag = True
                                elif f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
                                    ##  This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
                                    if matchFlag:
                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
                                    formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
                                    matchFlag = True
                                    ##  TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
                                    AddToIssuesList(docID,f"{variationPair[0]} ({variationPair[1]}) in To Field is High Confidence Potential Upgrade")

                            elif personMatch.is_attorney == 'NO':
                                if f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
                                    if matchFlag:
                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
                                    ##  This variation was found in the list of formatted values, which is fine, so just remove it.
                                    formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
                                    matchFlag = True
                                elif f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
                                    ##  This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
                                    if matchFlag:
                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
                                    formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
                                    matchFlag = True
                                    ##  TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
                                    AddToIssuesList(docID,f"{variationPair[0]}* ({variationPair[1]}) in To Field is High Confidence Potential downgrade")
                            else:
                                ##  This means they are a split role, so additional work will need to be done with the dates.
                                ##  First, determin if this document date is between the dates where this person was an attorney
                                wasAttorneyAtThatTime = False
                                documentDateValue = qcP.additionalValuesDict[docID]._asdict()['dateValue']
                                documentDateValue = datetime.datetime.strptime(documentDateValue,'%m/%d/%Y').date()
                                print(f"\ndocumentDateValue is {documentDateValue}")
                                personWasAttorneyDates = personMatch.dates_as_counsel
                                for wasAttorneyStartDate,wasAttorneyEndDate in personWasAttorneyDates:
                                    print(f"Attorney {wasAttorneyStartDate}-{wasAttorneyEndDate}")
                                    if wasAttorneyStartDate.count("/") < 2:
                                        wasAttorneyStartDate = wasAttorneyStartDate.replace("/","/1/")
                                    wasAttorneyStartDate = datetime.datetime.strptime(wasAttorneyStartDate,'%m/%d/%Y').date()
                                        
                                    if wasAttorneyEndDate == "CURRENT":
                                        wasAttorneyEndDate = datetime.datetime.today().strftime('%m/%d/%Y')
                                    elif wasAttorneyEndDate == "PRESENT":
                                        wasAttorneyEndDate = datetime.datetime.today().strftime('%m/%d/%Y')
                                    if wasAttorneyEndDate.count("/") < 2:
                                        missingDayValue = calendar.monthrange(int(wasAttorneyEndDate.split("/")[1]),int(wasAttorneyEndDate.split("/")[0]))[1]
                                        wasAttorneyEndDate = wasAttorneyEndDate.replace("/",f"/{missingDayValue}/")
                                    wasAttorneyEndDate = datetime.datetime.strptime(wasAttorneyEndDate,'%m/%d/%Y').date()
                                    print(f"{wasAttorneyStartDate} - {documentDateValue} - {wasAttorneyEndDate}")
                                    if wasAttorneyStartDate <= documentDateValue <= wasAttorneyEndDate:
                                        print("Person WAS attorney at this doc date.")
                                    else:
                                        print("Person WAS NOT attorney at this doc date.")
                                
                                
                    else:
                        ##  Person match, using email, not found in MAL.  Try extracting a name from this metadata value and try matching the MAL using that.
                        val = val.upper()
                        ##  First lets try dealing with the extra parentheticals that keep coming up but make sure to handle (LEGAL) differently.
                        if "(LEGAL)" in val:
                            ##  Attempt to only remove the email parenthetical, including the now empty paren.
                            val = val.replace(result.upper(),"")
                            val = val.replace("()",'')
                            #val = val.replace(")","")
                        else:
                            ##  Remove all parenthicals, including any character in that paren, from value.
                            val = re.sub(r"\([^)]*\)","",val)
                        val = val.strip()
                        ##  with the email address and the paren stripped out of the val, only move forward if anything still exists.
                        if val:
                            ##  if there is a comma, parse to last name, first name
                            if "," in val:
                                lastName, firstName = val.split(",")
                                lastName = lastName.strip()
                                firstName = firstName.strip()
                            elif " " in val:
                                ##  For now, try just splitting by the first space and take everything after as the first name.
                                firstName, lastName = val.split(" ",1)
                            ##  With the name now parse, try searching for all values that match on the last name.
                            
                            personMatchList = nv.malPeopleList.return_list_of_matching_values('last_name',lastName)
                            if personMatchList:
                                possiblePeopleMatchesMatrix = {}
                                ##  For each personMatch in the list, now attempt to also see if the first name matches and, if so, put that into the possible people list
                                for personMatch in personMatchList:
                                    if personMatch.first_name == firstName:
                                        ##  This is a personMatch that matches the first and last name
                                        possiblePeopleMatchesMatrix[personMatch._id] = 1
                                if possiblePeopleMatchesMatrix.keys():
                                    ##  If the list of possible matches is just 1, we are okay doing a simple match attempt.  if more than 1, we need to test for conflicting designations in the list of possible matches.
                                    if len(list(possiblePeopleMatchesMatrix.keys())) < 2:
                                        allPossibleVariationsList = GatherAllPossibleVariations(personMatch)
                                        matchFlag = False
                                        for variationPair in allPossibleVariationsList:
                                            if personMatch.is_attorney == 'YES':
                                                if f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
                                                    ##  This variation was found in the list of formatted values, which is fine, so just remove it.
                                                    if matchFlag:
                                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE name VALUE??")
                                                    formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
                                                    matchFlag = True
                                                elif f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
                                                    ##  This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
                                                    if matchFlag:
                                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE name VALUE??")
                                                    formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
                                                    matchFlag = True
                                                    ##  TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
                                                    AddToIssuesList(docID,f"{variationPair[0]} ({variationPair[1]}) in To Field is Low Confidence Potential Upgrade")
                                                else:
                                                    ##  This means it failed to match a value at all in the formatted field HOWEVER, this is an attorney so we should flag this as a low confidence flag.
                                                    AddToIssuesList(docID,f"{variationPair[0]} ({variationPair[1]}) in To Field did not directly match value in formatted however is a Low Confidence Potential Attorney")

                                            else:
                                                ##  TODO: will need to split this out to include split role soon.
                                                if f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
                                                    if matchFlag:
                                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
                                                    ##  This variation was found in the list of formatted values, which is fine, so just remove it.
                                                    formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
                                                    matchFlag = True
                                                elif f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
                                                    ##  This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
                                                    if matchFlag:
                                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
                                                    formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
                                                    matchFlag = True
                                                    ##  TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
                                                    AddToIssuesList(docID,f"{variationPair[0]}* ({variationPair[1]}) in To Field is Low Confidence Potential downgrade")
                                    
                            
                            else:
                                ##  TODO: Need to ask Eli if I dont a match by checking first and last name for a match if it's needed to flag these.
                                #AddToIssuesList(docID,f"first name: {firstName} - last name: {lastName} is an email in metadata that I couldnt match in MAL")
                                pass

            else:
                ##  No email address could be extracted from this val. Try extracting a name from this metadata value and try matching the MAL using that.
                AddToIssuesList(docID,f"{val} is a value in metadata that I couldnt extract an email address from")

            
        ##  Since you itterated over the metadata values but didnt itterate over the formatted values, check for any remaining formatted values that exist in the list
        if formattedFieldValues:
            for val in formattedFieldValues:
                ##  TODO: Confirm with Eli but we should only report these remaining values if they have a *
                ##  From Eliu: the Highest risk is the * values because these are the potential overdesignations so yes but in a perfect world we would check both.
                if "*" in val:
                    ##  TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
                    AddToIssuesList(docID,f"{val} in To Field is an attorney but couldnt be matched to any metadata value.")


    ##  Now just unpack and write the issues, per DocID, to the output file separated by semicolon.
    outputFile = open(outputFileName,'w')
    for docID in list(issuesMatrix.keys()):
        outputFile.write(f"{docID}|{';'.join(issuesMatrix[docID])}\n")
    outputFile.close()
Revision:	858
Committed:	Fri Dec 13 17:52:35 2024 UTC (15 months, 1 week ago) by nino.borges
Content type:	text/x-python
File size:	21276 byte(s)
Log Message:	I'm adding some code to work out if a split role attorney was an attorney at the time of the doc date. I'm just getting it to print out here, so this is not yet complete.
#	User	Rev	Content
1	nino.borges	855	"""
2
3			Amazon_PerformDeepNamesNormQC
4
5			Created by:
6			Emanuel Borges
7			12.11.2024
8
9			This program is similar to Amazon_PerformNamesNormQC but it will perform a deeper level of names norm QC. I may just replace Amazon_PerformNamesNormQC with this file but for now i'd
10			like to keep both.
11
12			"""
13
14	nino.borges	858	import os, re, datetime, calendar
15	nino.borges	855	from uuid import UUID
16			import MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC
17			import MyCode.Active_prgs.Redgrave.Amazon_NamesNormQC
18
19	nino.borges	858	version = '0.5.0'
20	nino.borges	855
21			issuesMatrix = {}
22
23			def GatherAllPossibleVariations(personMatch):
24			"""Takes a personMatch, which is the results of a person match, and attempts to make all possible name match variations that may exist in the formatted field.
25			returns deduplicated list of tuple pairs (fullname, parenthetical)"""
26			## Start as a plain list of all possible tuple pairs.
27			allPossibleVariationsList = []
28
29			allDomainsList = []
30			if personMatch.work_email_address:
31			allDomainsList.append(f"{personMatch.work_email_address.split('@')[-1]}")
32			if personMatch.alt_work_email_address:
33			allDomainsList.append(f"{personMatch.alt_work_email_address.split('@')[-1]}")
34			allDomainsList = list(dict.fromkeys(allDomainsList))
35
36			if personMatch.full_name_overide:
37			fullName = personMatch.full_name_overide
38			for domain in allDomainsList:
39			allPossibleVariationsList.append((fullName,domain))
40			if personMatch.full_name_preferred:
41			## Going to need to do a bit of replacing to remove some information that is just never in the formatted.
42			fullPreferredName = personMatch.full_name_preferred
43			fullPreferredName = fullPreferredName.replace('(LEGAL)','')
44			fullPreferredName = fullPreferredName.replace('(SHE, HER)','')
45			fullPreferredName = fullPreferredName.replace('(SHE HER)','')
46			if "," in fullPreferredName:
47			preferedLastName, preferedFirstName = fullPreferredName.split(',')
48			preferedLastName = preferedLastName.strip()
49			preferedFirstName = preferedFirstName.strip()
50			preferedFirstName = preferedFirstName.split(" ")[0]
51			fullName = f"{preferedFirstName} {preferedLastName}"
52			#fullName = f"{preferedLastName}, {preferedFirstName}"
53			for domain in allDomainsList:
54			allPossibleVariationsList.append((fullName,domain))
55			else:
56			print(f"ERROR in this name {fullPreferredName}")
57			if personMatch.last_name:
58			if personMatch.first_name:
59			fullName = f"{personMatch.first_name} {personMatch.last_name}"
60			#fullName = f"{personMatch.last_name}, {personMatch.first_name}"
61			else:
62			fullName = f"{personMatch.last_name}"
63			for domain in allDomainsList:
64			allPossibleVariationsList.append((fullName,domain))
65
66
67			## Now return a deduplicated list by using dict to deduplicate.
68			return list(dict.fromkeys(allPossibleVariationsList))
69
70
71	nino.borges	856	def AddToIssuesList(docID,issueMessage):
72			"""This function will add a single issue to the issues matrix."""
73			if docID in list(issuesMatrix.keys()):
74			issuesMatrix[docID].append(issueMessage)
75			else:
76			issuesMatrix[docID] = [issueMessage,]
77	nino.borges	855
78
79
80			if __name__ == '__main__':
81			cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Testing_3\PrivLogExport_20241204_VEAS_Converted.txt"
82			masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.12.12(20241212-1151).xlsx"
83			fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\VEAS-MasterAttorneyList\FullNameOverides.txt"
84			outputFileName = r"C:\Test_Dir\Amazon\NameNormDeepOutputText.txt"
85
86
87			nv = MyCode.Active_prgs.Redgrave.Amazon_NamesNormQC.NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)
88
89			qcP = MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC.QcPrivLog(cleanedDatExportFileName)
90
91			#issuesMatrix = {}
92
93			print(f"\nThere are {len(qcP.formattedValuesDict)} documents in the formatted values dictionary.")
94			print(f"There are {len(qcP.metadataValuesDict)} documents in the metadata values dictionary.")
95
96			workList = qcP.metadataValuesDict.keys()
97			for docID in workList:
98			metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['toValues']
99			formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['toValues']
100			## remember to convert all values in formattedFieldValues to uppercase (perhaps eventually do some of the formatted cleaning that eli mentioned.
101			formattedFieldValues = [xVal.upper() for xVal in formattedFieldValues]
102			## This will change once you start itterating acroll all of the field values names
103			currentMetadataValues = metadataFieldValues
104			for val in currentMetadataValues:
105			## First try to locate an email address in this val and if found, try to find that in the MAL.
106			results = re.findall(qcP.allPossibleEmailAddressesRegExPattern, val)
107			if results:
108			## Use some smart deduplication to remove duplicates.
109			results = nv.SmartDedupeSet(results)
110			for result in results:
111			## Try to find a match in the MAL by email. There shouldnt rows with duplicative email addresses.
112			## TODO:DONE: Update search_by_email to search both workemail and alt email.
113
114			personMatch = nv.malPeopleList.search_by_email(result.upper())
115			if personMatch:
116			## Person match found in MAL. Now try to match a value in the formatted field by pulling various values from the MAL.
117			## For each of these match attempts, try using the correct designation and incorrect designation (* vs no *) and note that.
118			allPossibleVariationsList = GatherAllPossibleVariations(personMatch)
119			matchFlag = False
120			for variationPair in allPossibleVariationsList:
121			if personMatch.is_attorney == 'YES':
122			if f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
123			## This variation was found in the list of formatted values, which is fine, so just remove it.
124			if matchFlag:
125			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
126			formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
127			matchFlag = True
128			elif f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
129			## This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
130			if matchFlag:
131			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
132			formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
133			matchFlag = True
134	nino.borges	856	## TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
135			AddToIssuesList(docID,f"{variationPair[0]} ({variationPair[1]}) in To Field is High Confidence Potential Upgrade")
136	nino.borges	857
137	nino.borges	858	elif personMatch.is_attorney == 'NO':
138	nino.borges	855	if f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
139			if matchFlag:
140			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
141			## This variation was found in the list of formatted values, which is fine, so just remove it.
142			formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
143			matchFlag = True
144			elif f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
145			## This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
146			if matchFlag:
147			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
148			formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
149			matchFlag = True
150	nino.borges	856	## TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
151			AddToIssuesList(docID,f"{variationPair[0]}* ({variationPair[1]}) in To Field is High Confidence Potential downgrade")
152	nino.borges	858	else:
153			## This means they are a split role, so additional work will need to be done with the dates.
154			## First, determin if this document date is between the dates where this person was an attorney
155			wasAttorneyAtThatTime = False
156			documentDateValue = qcP.additionalValuesDict[docID]._asdict()['dateValue']
157			documentDateValue = datetime.datetime.strptime(documentDateValue,'%m/%d/%Y').date()
158			print(f"\ndocumentDateValue is {documentDateValue}")
159			personWasAttorneyDates = personMatch.dates_as_counsel
160			for wasAttorneyStartDate,wasAttorneyEndDate in personWasAttorneyDates:
161			print(f"Attorney {wasAttorneyStartDate}-{wasAttorneyEndDate}")
162			if wasAttorneyStartDate.count("/") < 2:
163			wasAttorneyStartDate = wasAttorneyStartDate.replace("/","/1/")
164			wasAttorneyStartDate = datetime.datetime.strptime(wasAttorneyStartDate,'%m/%d/%Y').date()
165
166			if wasAttorneyEndDate == "CURRENT":
167			wasAttorneyEndDate = datetime.datetime.today().strftime('%m/%d/%Y')
168			elif wasAttorneyEndDate == "PRESENT":
169			wasAttorneyEndDate = datetime.datetime.today().strftime('%m/%d/%Y')
170			if wasAttorneyEndDate.count("/") < 2:
171			missingDayValue = calendar.monthrange(int(wasAttorneyEndDate.split("/")[1]),int(wasAttorneyEndDate.split("/")[0]))[1]
172			wasAttorneyEndDate = wasAttorneyEndDate.replace("/",f"/{missingDayValue}/")
173			wasAttorneyEndDate = datetime.datetime.strptime(wasAttorneyEndDate,'%m/%d/%Y').date()
174			print(f"{wasAttorneyStartDate} - {documentDateValue} - {wasAttorneyEndDate}")
175			if wasAttorneyStartDate <= documentDateValue <= wasAttorneyEndDate:
176			print("Person WAS attorney at this doc date.")
177			else:
178			print("Person WAS NOT attorney at this doc date.")
179	nino.borges	855
180	nino.borges	858
181	nino.borges	855	else:
182			## Person match, using email, not found in MAL. Try extracting a name from this metadata value and try matching the MAL using that.
183			val = val.upper()
184			## First lets try dealing with the extra parentheticals that keep coming up but make sure to handle (LEGAL) differently.
185			if "(LEGAL)" in val:
186			## Attempt to only remove the email parenthetical, including the now empty paren.
187			val = val.replace(result.upper(),"")
188			val = val.replace("()",'')
189			#val = val.replace(")","")
190			else:
191			## Remove all parenthicals, including any character in that paren, from value.
192			val = re.sub(r"\([^)]*\)","",val)
193			val = val.strip()
194			## with the email address and the paren stripped out of the val, only move forward if anything still exists.
195			if val:
196			## if there is a comma, parse to last name, first name
197			if "," in val:
198			lastName, firstName = val.split(",")
199			lastName = lastName.strip()
200			firstName = firstName.strip()
201			elif " " in val:
202			## For now, try just splitting by the first space and take everything after as the first name.
203			firstName, lastName = val.split(" ",1)
204			## With the name now parse, try searching for all values that match on the last name.
205
206			personMatchList = nv.malPeopleList.return_list_of_matching_values('last_name',lastName)
207			if personMatchList:
208			possiblePeopleMatchesMatrix = {}
209			## For each personMatch in the list, now attempt to also see if the first name matches and, if so, put that into the possible people list
210			for personMatch in personMatchList:
211			if personMatch.first_name == firstName:
212			## This is a personMatch that matches the first and last name
213			possiblePeopleMatchesMatrix[personMatch._id] = 1
214			if possiblePeopleMatchesMatrix.keys():
215			## If the list of possible matches is just 1, we are okay doing a simple match attempt. if more than 1, we need to test for conflicting designations in the list of possible matches.
216			if len(list(possiblePeopleMatchesMatrix.keys())) < 2:
217			allPossibleVariationsList = GatherAllPossibleVariations(personMatch)
218			matchFlag = False
219			for variationPair in allPossibleVariationsList:
220			if personMatch.is_attorney == 'YES':
221			if f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
222			## This variation was found in the list of formatted values, which is fine, so just remove it.
223			if matchFlag:
224			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE name VALUE??")
225			formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
226			matchFlag = True
227			elif f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
228			## This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
229			if matchFlag:
230			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE name VALUE??")
231			formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
232			matchFlag = True
233	nino.borges	856	## TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
234			AddToIssuesList(docID,f"{variationPair[0]} ({variationPair[1]}) in To Field is Low Confidence Potential Upgrade")
235	nino.borges	858	else:
236			## This means it failed to match a value at all in the formatted field HOWEVER, this is an attorney so we should flag this as a low confidence flag.
237			AddToIssuesList(docID,f"{variationPair[0]} ({variationPair[1]}) in To Field did not directly match value in formatted however is a Low Confidence Potential Attorney")
238	nino.borges	857
239	nino.borges	855	else:
240			## TODO: will need to split this out to include split role soon.
241			if f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
242			if matchFlag:
243			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
244			## This variation was found in the list of formatted values, which is fine, so just remove it.
245			formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
246			matchFlag = True
247			elif f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
248			## This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
249			if matchFlag:
250			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
251			formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
252			matchFlag = True
253	nino.borges	856	## TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
254			AddToIssuesList(docID,f"{variationPair[0]}* ({variationPair[1]}) in To Field is Low Confidence Potential downgrade")
255	nino.borges	855
256
257			else:
258			## TODO: Need to ask Eli if I dont a match by checking first and last name for a match if it's needed to flag these.
259	nino.borges	857	#AddToIssuesList(docID,f"first name: {firstName} - last name: {lastName} is an email in metadata that I couldnt match in MAL")
260	nino.borges	855	pass
261	nino.borges	857
262	nino.borges	855	else:
263			## No email address could be extracted from this val. Try extracting a name from this metadata value and try matching the MAL using that.
264	nino.borges	856	AddToIssuesList(docID,f"{val} is a value in metadata that I couldnt extract an email address from")
265	nino.borges	857
266	nino.borges	855
267			## Since you itterated over the metadata values but didnt itterate over the formatted values, check for any remaining formatted values that exist in the list
268			if formattedFieldValues:
269			for val in formattedFieldValues:
270			## TODO: Confirm with Eli but we should only report these remaining values if they have a *
271			## From Eliu: the Highest risk is the * values because these are the potential overdesignations so yes but in a perfect world we would check both.
272			if "*" in val:
273	nino.borges	856	## TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
274			AddToIssuesList(docID,f"{val} in To Field is an attorney but couldnt be matched to any metadata value.")
275	nino.borges	855
276
277	nino.borges	857
278	nino.borges	855	## Now just unpack and write the issues, per DocID, to the output file separated by semicolon.
279			outputFile = open(outputFileName,'w')
280			for docID in list(issuesMatrix.keys()):
281			outputFile.write(f"{docID}\|{';'.join(issuesMatrix[docID])}\n")
282			outputFile.close()