Active_prgs/Redgrave/Amazon_PerformDeepNamesNormQC.py

"""

Amazon_PerformDeepNamesNormQC

Created by:
Emanuel Borges
12.11.2024

This program is similar to Amazon_PerformNamesNormQC but it will perform a deeper level of names norm QC. I may just replace Amazon_PerformNamesNormQC with this file but for now i'd
like to keep both.

"""

import os, re, datetime, calendar
from uuid import UUID
import MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC
import MyCode.Active_prgs.Redgrave.Amazon_NamesNormQC

version = '0.6.0'

issuesMatrix = {}

def GatherAllPossibleVariations(personMatch):
    """Takes a personMatch, which is the results of a person match, and attempts to make all possible name match variations that may exist in the formatted field.
    returns deduplicated list of tuple pairs (fullname, parenthetical)"""
    ##  Start as a plain list of all possible tuple pairs.
    allPossibleVariationsList = []

    allDomainsList = []
    if personMatch.work_email_address:
        allDomainsList.append(f"{personMatch.work_email_address.split('@')[-1]}")
    if personMatch.alt_work_email_address:
        allDomainsList.append(f"{personMatch.alt_work_email_address.split('@')[-1]}")
    allDomainsList = list(dict.fromkeys(allDomainsList))

    if personMatch.full_name_overide:
        fullName = personMatch.full_name_overide
        for domain in allDomainsList:
            allPossibleVariationsList.append((fullName,domain))
    if personMatch.full_name_preferred:
        ##  Going to need to do a bit of replacing to remove some information that is just never in the formatted.
        fullPreferredName = personMatch.full_name_preferred
        fullPreferredName = fullPreferredName.replace('(LEGAL)','')
        fullPreferredName = fullPreferredName.replace('(SHE, HER)','')
        fullPreferredName = fullPreferredName.replace('(SHE HER)','')
        if "," in fullPreferredName:
            preferedLastName, preferedFirstName = fullPreferredName.split(',')
            preferedLastName = preferedLastName.strip()
            preferedFirstName = preferedFirstName.strip()
            preferedFirstName = preferedFirstName.split(" ")[0]
            fullName = f"{preferedFirstName} {preferedLastName}"
            #fullName = f"{preferedLastName}, {preferedFirstName}"
            for domain in allDomainsList:
                allPossibleVariationsList.append((fullName,domain))
        else:
            print(f"ERROR in this name {fullPreferredName}")
    if personMatch.last_name:
        if personMatch.first_name:
            fullName = f"{personMatch.first_name} {personMatch.last_name}"
            #fullName = f"{personMatch.last_name}, {personMatch.first_name}"
        else:
            fullName = f"{personMatch.last_name}"
        for domain in allDomainsList:
            allPossibleVariationsList.append((fullName,domain))


    ##  Now return a deduplicated list by using dict to deduplicate.
    return list(dict.fromkeys(allPossibleVariationsList))


def AddToIssuesList(docID,issueMessage):
    """This function will add a single issue to the issues matrix."""
    if docID in list(issuesMatrix.keys()):
        issuesMatrix[docID].append(issueMessage)
    else:
        issuesMatrix[docID] = [issueMessage,]
    

if __name__ == '__main__':
    cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Testing_3\PrivLogExport_20241204_VEAS_Converted.txt"
    masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List  2024.12.12(20241212-1151).xlsx"
    fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\VEAS-MasterAttorneyList\FullNameOverides.txt"
    outputFileName = r"C:\Test_Dir\Amazon\NameNormDeepOutputText.txt"


    nv = MyCode.Active_prgs.Redgrave.Amazon_NamesNormQC.NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)

    qcP = MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC.QcPrivLog(cleanedDatExportFileName)

    #issuesMatrix = {}

    print(f"\nThere are {len(qcP.formattedValuesDict)} documents in the formatted values dictionary.")
    print(f"There are {len(qcP.metadataValuesDict)} documents in the metadata values dictionary.")

    workList = qcP.metadataValuesDict.keys()
    for docID in workList:
        metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['toValues']
        formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['toValues']
        ##  remember to convert all values in formattedFieldValues to uppercase (perhaps eventually do some of the formatted cleaning that eli mentioned.
        formattedFieldValues = [xVal.upper() for xVal in formattedFieldValues]
        ##  This will change once you start itterating acroll all of the field values names
        currentMetadataValues = metadataFieldValues
        for val in currentMetadataValues:
            ##  First try to locate an email address in this val and if found, try to find that in the MAL.
            results = re.findall(qcP.allPossibleEmailAddressesRegExPattern, val)
            if results:
                ##  Use some smart deduplication to remove duplicates.
                results = nv.SmartDedupeSet(results)
                for result in results:
                    ##  Try to find a match in the MAL by email. There shouldnt rows with duplicative email addresses.
                    ##  TODO:DONE: Update search_by_email to search both workemail and alt email.
                    
                    personMatch = nv.malPeopleList.search_by_email(result.upper())
                    if personMatch:
                        ##  Person match found in MAL.  Now try to match a value in the formatted field by pulling various values from the MAL.
                        ##  For each of these match attempts, try using the correct designation and incorrect designation (* vs no *) and note that.
                        allPossibleVariationsList = GatherAllPossibleVariations(personMatch)
                        matchFlag = False
                        for variationPair in allPossibleVariationsList:
                            if personMatch.is_attorney == 'YES':
                                if f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
                                    ##  This variation was found in the list of formatted values, which is fine, so just remove it.
                                    if matchFlag:
                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
                                    formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
                                    matchFlag = True
                                elif f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
                                    ##  This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
                                    if matchFlag:
                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
                                    formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
                                    matchFlag = True
                                    ##  TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
                                    AddToIssuesList(docID,f"{variationPair[0]} ({variationPair[1]}) in To Field is High Confidence Potential Upgrade")

                            elif personMatch.is_attorney == 'NO':
                                if f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
                                    if matchFlag:
                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
                                    ##  This variation was found in the list of formatted values, which is fine, so just remove it.
                                    formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
                                    matchFlag = True
                                elif f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
                                    ##  This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
                                    if matchFlag:
                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
                                    formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
                                    matchFlag = True
                                    ##  TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
                                    AddToIssuesList(docID,f"{variationPair[0]}* ({variationPair[1]}) in To Field is High Confidence Potential downgrade")
                            else:
                                ##  This means they are a split role, so additional work will need to be done with the dates.
                                ##  First, determin if this document date is between the dates where this person was an attorney
                                wasAttorneyAtThatTime = False
                                documentDateValue = qcP.additionalValuesDict[docID]._asdict()['dateValue']
                                documentDateValue = datetime.datetime.strptime(documentDateValue,'%m/%d/%Y').date()
                                print(f"\ndocumentDateValue is {documentDateValue}")
                                personWasAttorneyDates = personMatch.dates_as_counsel
                                for wasAttorneyStartDate,wasAttorneyEndDate in personWasAttorneyDates:
                                    print(f"Attorney {wasAttorneyStartDate}-{wasAttorneyEndDate}")
                                    if wasAttorneyStartDate.count("/") < 2:
                                        wasAttorneyStartDate = wasAttorneyStartDate.replace("/","/1/")
                                    wasAttorneyStartDate = datetime.datetime.strptime(wasAttorneyStartDate,'%m/%d/%Y').date()
                                        
                                    if wasAttorneyEndDate == "CURRENT":
                                        wasAttorneyEndDate = datetime.datetime.today().strftime('%m/%d/%Y')
                                    elif wasAttorneyEndDate == "PRESENT":
                                        wasAttorneyEndDate = datetime.datetime.today().strftime('%m/%d/%Y')
                                    if wasAttorneyEndDate.count("/") < 2:
                                        missingDayValue = calendar.monthrange(int(wasAttorneyEndDate.split("/")[1]),int(wasAttorneyEndDate.split("/")[0]))[1]
                                        wasAttorneyEndDate = wasAttorneyEndDate.replace("/",f"/{missingDayValue}/")
                                    wasAttorneyEndDate = datetime.datetime.strptime(wasAttorneyEndDate,'%m/%d/%Y').date()
                                    
                                    #print(f"{wasAttorneyStartDate} - {documentDateValue} - {wasAttorneyEndDate}")
                                    if wasAttorneyStartDate <= documentDateValue <= wasAttorneyEndDate:
                                        wasAttorneyAtThatTime = True
                                        
                                if wasAttorneyAtThatTime:
                                    print("Person WAS attorney at this doc date.")
                                else:
                                    print("Person WAS NOT attorney at this doc date.")
                                
                                
                    else:
                        ##  Person match, using email, not found in MAL.  Try extracting a name from this metadata value and try matching the MAL using that.
                        val = val.upper()
                        ##  First lets try dealing with the extra parentheticals that keep coming up but make sure to handle (LEGAL) differently.
                        if "(LEGAL)" in val:
                            ##  Attempt to only remove the email parenthetical, including the now empty paren.
                            val = val.replace(result.upper(),"")
                            val = val.replace("()",'')
                            #val = val.replace(")","")
                        else:
                            ##  Remove all parenthicals, including any character in that paren, from value.
                            val = re.sub(r"\([^)]*\)","",val)
                        val = val.strip()
                        ##  with the email address and the paren stripped out of the val, only move forward if anything still exists.
                        if val:
                            ##  if there is a comma, parse to last name, first name
                            if "," in val:
                                lastName, firstName = val.split(",")
                                lastName = lastName.strip()
                                firstName = firstName.strip()
                            elif " " in val:
                                ##  For now, try just splitting by the first space and take everything after as the first name.
                                firstName, lastName = val.split(" ",1)
                            ##  With the name now parse, try searching for all values that match on the last name.
                            
                            personMatchList = nv.malPeopleList.return_list_of_matching_values('last_name',lastName)
                            if personMatchList:
                                possiblePeopleMatchesMatrix = {}
                                ##  For each personMatch in the list, now attempt to also see if the first name matches and, if so, put that into the possible people list
                                for personMatch in personMatchList:
                                    if personMatch.first_name == firstName:
                                        ##  This is a personMatch that matches the first and last name
                                        possiblePeopleMatchesMatrix[personMatch._id] = 1
                                if possiblePeopleMatchesMatrix.keys():
                                    ##  If the list of possible matches is just 1, we are okay doing a simple match attempt.  if more than 1, we need to test for conflicting designations in the list of possible matches.
                                    if len(list(possiblePeopleMatchesMatrix.keys())) < 2:
                                        allPossibleVariationsList = GatherAllPossibleVariations(personMatch)
                                        matchFlag = False
                                        for variationPair in allPossibleVariationsList:
                                            if personMatch.is_attorney == 'YES':
                                                if f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
                                                    ##  This variation was found in the list of formatted values, which is fine, so just remove it.
                                                    if matchFlag:
                                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE name VALUE??")
                                                    formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
                                                    matchFlag = True
                                                elif f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
                                                    ##  This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
                                                    if matchFlag:
                                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE name VALUE??")
                                                    formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
                                                    matchFlag = True
                                                    ##  TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
                                                    AddToIssuesList(docID,f"{variationPair[0]} ({variationPair[1]}) in To Field is Low Confidence Potential Upgrade")
                                                else:
                                                    ##  This means it failed to match a value at all in the formatted field HOWEVER, this is an attorney so we should flag this as a low confidence flag.
                                                    AddToIssuesList(docID,f"{variationPair[0]} ({variationPair[1]}) in To Field did not directly match value in formatted however is a Low Confidence Potential Attorney")

                                            else:
                                                ##  TODO: will need to split this out to include split role soon.
                                                if f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
                                                    if matchFlag:
                                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
                                                    ##  This variation was found in the list of formatted values, which is fine, so just remove it.
                                                    formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
                                                    matchFlag = True
                                                elif f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
                                                    ##  This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
                                                    if matchFlag:
                                                        print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
                                                    formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
                                                    matchFlag = True
                                                    ##  TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
                                                    AddToIssuesList(docID,f"{variationPair[0]}* ({variationPair[1]}) in To Field is Low Confidence Potential downgrade")
                                    
                            
                            else:
                                ##  TODO: Need to ask Eli if I dont a match by checking first and last name for a match if it's needed to flag these.
                                #AddToIssuesList(docID,f"first name: {firstName} - last name: {lastName} is an email in metadata that I couldnt match in MAL")
                                pass

            else:
                ##  No email address could be extracted from this val. Try extracting a name from this metadata value and try matching the MAL using that.
                AddToIssuesList(docID,f"{val} is a value in metadata that I couldnt extract an email address from")

            
        ##  Since you itterated over the metadata values but didnt itterate over the formatted values, check for any remaining formatted values that exist in the list
        if formattedFieldValues:
            for val in formattedFieldValues:
                ##  TODO: Confirm with Eli but we should only report these remaining values if they have a *
                ##  From Eliu: the Highest risk is the * values because these are the potential overdesignations so yes but in a perfect world we would check both.
                if "*" in val:
                    ##  TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
                    AddToIssuesList(docID,f"{val} in To Field is an attorney but couldnt be matched to any metadata value.")


    ##  Now just unpack and write the issues, per DocID, to the output file separated by semicolon.
    outputFile = open(outputFileName,'w')
    for docID in list(issuesMatrix.keys()):
        outputFile.write(f"{docID}|{';'.join(issuesMatrix[docID])}\n")
    outputFile.close()
Revision:	859
Committed:	Fri Dec 13 17:58:37 2024 UTC (15 months, 1 week ago) by nino.borges
Content type:	text/x-python
File size:	21474 byte(s)
Log Message:	I'm still just printing the results of the split role date check but it's working correctly now. Next step will be to action on these decisions.
#	User	Rev	Content
1	nino.borges	855	"""
2
3			Amazon_PerformDeepNamesNormQC
4
5			Created by:
6			Emanuel Borges
7			12.11.2024
8
9			This program is similar to Amazon_PerformNamesNormQC but it will perform a deeper level of names norm QC. I may just replace Amazon_PerformNamesNormQC with this file but for now i'd
10			like to keep both.
11
12			"""
13
14	nino.borges	858	import os, re, datetime, calendar
15	nino.borges	855	from uuid import UUID
16			import MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC
17			import MyCode.Active_prgs.Redgrave.Amazon_NamesNormQC
18
19	nino.borges	859	version = '0.6.0'
20	nino.borges	855
21			issuesMatrix = {}
22
23			def GatherAllPossibleVariations(personMatch):
24			"""Takes a personMatch, which is the results of a person match, and attempts to make all possible name match variations that may exist in the formatted field.
25			returns deduplicated list of tuple pairs (fullname, parenthetical)"""
26			## Start as a plain list of all possible tuple pairs.
27			allPossibleVariationsList = []
28
29			allDomainsList = []
30			if personMatch.work_email_address:
31			allDomainsList.append(f"{personMatch.work_email_address.split('@')[-1]}")
32			if personMatch.alt_work_email_address:
33			allDomainsList.append(f"{personMatch.alt_work_email_address.split('@')[-1]}")
34			allDomainsList = list(dict.fromkeys(allDomainsList))
35
36			if personMatch.full_name_overide:
37			fullName = personMatch.full_name_overide
38			for domain in allDomainsList:
39			allPossibleVariationsList.append((fullName,domain))
40			if personMatch.full_name_preferred:
41			## Going to need to do a bit of replacing to remove some information that is just never in the formatted.
42			fullPreferredName = personMatch.full_name_preferred
43			fullPreferredName = fullPreferredName.replace('(LEGAL)','')
44			fullPreferredName = fullPreferredName.replace('(SHE, HER)','')
45			fullPreferredName = fullPreferredName.replace('(SHE HER)','')
46			if "," in fullPreferredName:
47			preferedLastName, preferedFirstName = fullPreferredName.split(',')
48			preferedLastName = preferedLastName.strip()
49			preferedFirstName = preferedFirstName.strip()
50			preferedFirstName = preferedFirstName.split(" ")[0]
51			fullName = f"{preferedFirstName} {preferedLastName}"
52			#fullName = f"{preferedLastName}, {preferedFirstName}"
53			for domain in allDomainsList:
54			allPossibleVariationsList.append((fullName,domain))
55			else:
56			print(f"ERROR in this name {fullPreferredName}")
57			if personMatch.last_name:
58			if personMatch.first_name:
59			fullName = f"{personMatch.first_name} {personMatch.last_name}"
60			#fullName = f"{personMatch.last_name}, {personMatch.first_name}"
61			else:
62			fullName = f"{personMatch.last_name}"
63			for domain in allDomainsList:
64			allPossibleVariationsList.append((fullName,domain))
65
66
67			## Now return a deduplicated list by using dict to deduplicate.
68			return list(dict.fromkeys(allPossibleVariationsList))
69
70
71	nino.borges	856	def AddToIssuesList(docID,issueMessage):
72			"""This function will add a single issue to the issues matrix."""
73			if docID in list(issuesMatrix.keys()):
74			issuesMatrix[docID].append(issueMessage)
75			else:
76			issuesMatrix[docID] = [issueMessage,]
77	nino.borges	855
78
79
80			if __name__ == '__main__':
81			cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Testing_3\PrivLogExport_20241204_VEAS_Converted.txt"
82			masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2024.12.12(20241212-1151).xlsx"
83			fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\VEAS-MasterAttorneyList\FullNameOverides.txt"
84			outputFileName = r"C:\Test_Dir\Amazon\NameNormDeepOutputText.txt"
85
86
87			nv = MyCode.Active_prgs.Redgrave.Amazon_NamesNormQC.NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)
88
89			qcP = MyCode.Active_prgs.Redgrave.Amazon_PrivLogQC.QcPrivLog(cleanedDatExportFileName)
90
91			#issuesMatrix = {}
92
93			print(f"\nThere are {len(qcP.formattedValuesDict)} documents in the formatted values dictionary.")
94			print(f"There are {len(qcP.metadataValuesDict)} documents in the metadata values dictionary.")
95
96			workList = qcP.metadataValuesDict.keys()
97			for docID in workList:
98			metadataFieldValues = qcP.metadataValuesDict[docID]._asdict()['toValues']
99			formattedFieldValues = qcP.formattedValuesDict[docID]._asdict()['toValues']
100			## remember to convert all values in formattedFieldValues to uppercase (perhaps eventually do some of the formatted cleaning that eli mentioned.
101			formattedFieldValues = [xVal.upper() for xVal in formattedFieldValues]
102			## This will change once you start itterating acroll all of the field values names
103			currentMetadataValues = metadataFieldValues
104			for val in currentMetadataValues:
105			## First try to locate an email address in this val and if found, try to find that in the MAL.
106			results = re.findall(qcP.allPossibleEmailAddressesRegExPattern, val)
107			if results:
108			## Use some smart deduplication to remove duplicates.
109			results = nv.SmartDedupeSet(results)
110			for result in results:
111			## Try to find a match in the MAL by email. There shouldnt rows with duplicative email addresses.
112			## TODO:DONE: Update search_by_email to search both workemail and alt email.
113
114			personMatch = nv.malPeopleList.search_by_email(result.upper())
115			if personMatch:
116			## Person match found in MAL. Now try to match a value in the formatted field by pulling various values from the MAL.
117			## For each of these match attempts, try using the correct designation and incorrect designation (* vs no *) and note that.
118			allPossibleVariationsList = GatherAllPossibleVariations(personMatch)
119			matchFlag = False
120			for variationPair in allPossibleVariationsList:
121			if personMatch.is_attorney == 'YES':
122			if f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
123			## This variation was found in the list of formatted values, which is fine, so just remove it.
124			if matchFlag:
125			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
126			formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
127			matchFlag = True
128			elif f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
129			## This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
130			if matchFlag:
131			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
132			formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
133			matchFlag = True
134	nino.borges	856	## TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
135			AddToIssuesList(docID,f"{variationPair[0]} ({variationPair[1]}) in To Field is High Confidence Potential Upgrade")
136	nino.borges	857
137	nino.borges	858	elif personMatch.is_attorney == 'NO':
138	nino.borges	855	if f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
139			if matchFlag:
140			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
141			## This variation was found in the list of formatted values, which is fine, so just remove it.
142			formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
143			matchFlag = True
144			elif f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
145			## This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
146			if matchFlag:
147			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
148			formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
149			matchFlag = True
150	nino.borges	856	## TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
151			AddToIssuesList(docID,f"{variationPair[0]}* ({variationPair[1]}) in To Field is High Confidence Potential downgrade")
152	nino.borges	858	else:
153			## This means they are a split role, so additional work will need to be done with the dates.
154			## First, determin if this document date is between the dates where this person was an attorney
155			wasAttorneyAtThatTime = False
156			documentDateValue = qcP.additionalValuesDict[docID]._asdict()['dateValue']
157			documentDateValue = datetime.datetime.strptime(documentDateValue,'%m/%d/%Y').date()
158			print(f"\ndocumentDateValue is {documentDateValue}")
159			personWasAttorneyDates = personMatch.dates_as_counsel
160			for wasAttorneyStartDate,wasAttorneyEndDate in personWasAttorneyDates:
161			print(f"Attorney {wasAttorneyStartDate}-{wasAttorneyEndDate}")
162			if wasAttorneyStartDate.count("/") < 2:
163			wasAttorneyStartDate = wasAttorneyStartDate.replace("/","/1/")
164			wasAttorneyStartDate = datetime.datetime.strptime(wasAttorneyStartDate,'%m/%d/%Y').date()
165
166			if wasAttorneyEndDate == "CURRENT":
167			wasAttorneyEndDate = datetime.datetime.today().strftime('%m/%d/%Y')
168			elif wasAttorneyEndDate == "PRESENT":
169			wasAttorneyEndDate = datetime.datetime.today().strftime('%m/%d/%Y')
170			if wasAttorneyEndDate.count("/") < 2:
171			missingDayValue = calendar.monthrange(int(wasAttorneyEndDate.split("/")[1]),int(wasAttorneyEndDate.split("/")[0]))[1]
172			wasAttorneyEndDate = wasAttorneyEndDate.replace("/",f"/{missingDayValue}/")
173			wasAttorneyEndDate = datetime.datetime.strptime(wasAttorneyEndDate,'%m/%d/%Y').date()
174	nino.borges	859
175			#print(f"{wasAttorneyStartDate} - {documentDateValue} - {wasAttorneyEndDate}")
176	nino.borges	858	if wasAttorneyStartDate <= documentDateValue <= wasAttorneyEndDate:
177	nino.borges	859	wasAttorneyAtThatTime = True
178
179			if wasAttorneyAtThatTime:
180			print("Person WAS attorney at this doc date.")
181			else:
182			print("Person WAS NOT attorney at this doc date.")
183	nino.borges	855
184	nino.borges	858
185	nino.borges	855	else:
186			## Person match, using email, not found in MAL. Try extracting a name from this metadata value and try matching the MAL using that.
187			val = val.upper()
188			## First lets try dealing with the extra parentheticals that keep coming up but make sure to handle (LEGAL) differently.
189			if "(LEGAL)" in val:
190			## Attempt to only remove the email parenthetical, including the now empty paren.
191			val = val.replace(result.upper(),"")
192			val = val.replace("()",'')
193			#val = val.replace(")","")
194			else:
195			## Remove all parenthicals, including any character in that paren, from value.
196			val = re.sub(r"\([^)]*\)","",val)
197			val = val.strip()
198			## with the email address and the paren stripped out of the val, only move forward if anything still exists.
199			if val:
200			## if there is a comma, parse to last name, first name
201			if "," in val:
202			lastName, firstName = val.split(",")
203			lastName = lastName.strip()
204			firstName = firstName.strip()
205			elif " " in val:
206			## For now, try just splitting by the first space and take everything after as the first name.
207			firstName, lastName = val.split(" ",1)
208			## With the name now parse, try searching for all values that match on the last name.
209
210			personMatchList = nv.malPeopleList.return_list_of_matching_values('last_name',lastName)
211			if personMatchList:
212			possiblePeopleMatchesMatrix = {}
213			## For each personMatch in the list, now attempt to also see if the first name matches and, if so, put that into the possible people list
214			for personMatch in personMatchList:
215			if personMatch.first_name == firstName:
216			## This is a personMatch that matches the first and last name
217			possiblePeopleMatchesMatrix[personMatch._id] = 1
218			if possiblePeopleMatchesMatrix.keys():
219			## If the list of possible matches is just 1, we are okay doing a simple match attempt. if more than 1, we need to test for conflicting designations in the list of possible matches.
220			if len(list(possiblePeopleMatchesMatrix.keys())) < 2:
221			allPossibleVariationsList = GatherAllPossibleVariations(personMatch)
222			matchFlag = False
223			for variationPair in allPossibleVariationsList:
224			if personMatch.is_attorney == 'YES':
225			if f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
226			## This variation was found in the list of formatted values, which is fine, so just remove it.
227			if matchFlag:
228			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE name VALUE??")
229			formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
230			matchFlag = True
231			elif f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
232			## This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
233			if matchFlag:
234			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE name VALUE??")
235			formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
236			matchFlag = True
237	nino.borges	856	## TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
238			AddToIssuesList(docID,f"{variationPair[0]} ({variationPair[1]}) in To Field is Low Confidence Potential Upgrade")
239	nino.borges	858	else:
240			## This means it failed to match a value at all in the formatted field HOWEVER, this is an attorney so we should flag this as a low confidence flag.
241			AddToIssuesList(docID,f"{variationPair[0]} ({variationPair[1]}) in To Field did not directly match value in formatted however is a Low Confidence Potential Attorney")
242	nino.borges	857
243	nino.borges	855	else:
244			## TODO: will need to split this out to include split role soon.
245			if f"{variationPair[0]} ({variationPair[1]})" in formattedFieldValues:
246			if matchFlag:
247			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
248			## This variation was found in the list of formatted values, which is fine, so just remove it.
249			formattedFieldValues.remove(f"{variationPair[0]} ({variationPair[1]})")
250			matchFlag = True
251			elif f"{variationPair[0]}* ({variationPair[1]})" in formattedFieldValues:
252			## This variation was found in the list of formatted values, however it's a bad match, so remove it but also add this to the issuesList.
253			if matchFlag:
254			print("WARNING: TWO SEPARATE FORMATTED NAME MATCHES FROM A SINGLE EMAIL VALUE??")
255			formattedFieldValues.remove(f"{variationPair[0]}* ({variationPair[1]})")
256			matchFlag = True
257	nino.borges	856	## TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
258			AddToIssuesList(docID,f"{variationPair[0]}* ({variationPair[1]}) in To Field is Low Confidence Potential downgrade")
259	nino.borges	855
260
261			else:
262			## TODO: Need to ask Eli if I dont a match by checking first and last name for a match if it's needed to flag these.
263	nino.borges	857	#AddToIssuesList(docID,f"first name: {firstName} - last name: {lastName} is an email in metadata that I couldnt match in MAL")
264	nino.borges	855	pass
265	nino.borges	857
266	nino.borges	855	else:
267			## No email address could be extracted from this val. Try extracting a name from this metadata value and try matching the MAL using that.
268	nino.borges	856	AddToIssuesList(docID,f"{val} is a value in metadata that I couldnt extract an email address from")
269	nino.borges	857
270	nino.borges	855
271			## Since you itterated over the metadata values but didnt itterate over the formatted values, check for any remaining formatted values that exist in the list
272			if formattedFieldValues:
273			for val in formattedFieldValues:
274			## TODO: Confirm with Eli but we should only report these remaining values if they have a *
275			## From Eliu: the Highest risk is the * values because these are the potential overdesignations so yes but in a perfect world we would check both.
276			if "*" in val:
277	nino.borges	856	## TODO: change the hard coded "To Field" here once you change to itterate over all field groups.
278			AddToIssuesList(docID,f"{val} in To Field is an attorney but couldnt be matched to any metadata value.")
279	nino.borges	855
280
281	nino.borges	857
282	nino.borges	855	## Now just unpack and write the issues, per DocID, to the output file separated by semicolon.
283			outputFile = open(outputFileName,'w')
284			for docID in list(issuesMatrix.keys()):
285			outputFile.write(f"{docID}\|{';'.join(issuesMatrix[docID])}\n")
286			outputFile.close()