[ViewVC] Diff of: ns_dev/Python/NinoCode/Active

Comparing Python/NinoCode/Active_prgs/Redgrave/ATT-PrivLogQC.py (file contents):
Revision 948 by nino.borges, Thu May 22 20:04:49 2025 UTC vs.
Revision 949 by nino.borges, Wed Nov 5 18:18:53 2025 UTC

+import os, re
+from collections import namedtuple
+from MyCode.Tool_Box import FileEncodingLib
-+
+import MyCode.Active_prgs.Redgrave.ATT_MasterAttorneyList
+class QcPrivLog(object):
+    """A class for automating the process of performing QC on the AT&T privilege logs, including names normalization analysis"""
-<
+    version = '0.1.0'
->
+    version = '0.2.0'
+    def __init__(self, cleanedDatExportFileName, metaFromFieldName, plogFromFieldName, metaToFieldName, plogToFieldName, metaCcFieldName, plogCcFieldName, metaBccFieldName, plogBccFieldName, metaAuthorFieldName, plogAuthorFieldName, fileEncoding = 'UTF8'):
+        """Initializes the data structures. cleanedDatExportFileName should be the full path to the file.  Assumes the first row of the data file is the header and first column is DocID."""
+        print("Initializing data structures...")
-+
+        self.issuesMatrix = {}
+        self.metadataValuesDict = {}
+        self.formattedValuesDict = {}
+        self.additionalValuesDict = {}
+                                                           self.__SplitAndClean(line[self.cleanedInputDataFileHeaderPositionalMatrix[plogCcFieldName]]),
+                                                           self.__SplitAndClean(line[self.cleanedInputDataFileHeaderPositionalMatrix[plogBccFieldName]]),
+                                                           self.__SplitAndClean(line[self.cleanedInputDataFileHeaderPositionalMatrix[plogAuthorFieldName]]))
-+
+            #print(self.formattedValuesDict[docID])
+        print("Data structures created.")
+        return len(newSet)
-+
+    def __AddToIssuesMatrix(self,docID,issueMessage):
-+
+        """This method will add a single issue to the issues matrix."""
-+
+        if docID in list(self.issuesMatrix.keys()):
-+
+            self.issuesMatrix[docID].append(issueMessage)
-+
+        else:
-+
+            self.issuesMatrix[docID] = [issueMessage,]
-+
+    def PerformValueCountChecks(self, countsOnly = True):
+        """Performs the inital value count checks between the metadata values and formatted values, looking for red flags and warnings. By default reports numbers. Set countsOnly to false to export reports."""
+        workList = self.metadataValuesDict.keys()
+                duplicatesInFormattedOutputFile.write(f"{z} | {*duplicatesInFormattedMatrix[z],}\n")
+            duplicatesInFormattedOutputFile.close()
-+
+    def PerformNamesToMalChecks(self):
-+
+        """This method will compare the normalized names to the MAL, using the metadata values."""
-+
+        masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\AT&T\Cybersecurity FCA Case\_ATT_Current_MAL\RG - ATT Cross-Matter Master Attorney List (20251104)(20251104-0207).xlsx"
-+
+        attMal = MyCode.Active_prgs.Redgrave.ATT_MasterAttorneyList.ATT_MasterAttorneyList(masterAttorneyListFileName)
-+
+        workList = self.metadataValuesDict.keys()
-+
+        matches = []
-+
+        used_full_names = set()
-+
+        remaining_emails = []
-+
-+
+        for docID in workList:
-+
+            for fieldName in self.recordValuesFieldList:
-+
+                matches = []
-+
+                used_full_names = set()
-+
+                remaining_emails = []
-+
+                #print(docID)
-+
+                #print(fieldName)
-+
+                metadataFieldValues = self.metadataValuesDict[docID]._asdict()[fieldName]
-+
+                #print(metadataFieldValues)
-+
+                formattedFieldValues = self.formattedValuesDict[docID]._asdict()[fieldName]
-+
+                #print(formattedFieldValues)
-+
+                normalized_full_names = {name.upper().replace("(ESQ.)","").strip(): name for name in formattedFieldValues}
-+
+                #print(normalized_full_names)
-+
+                #formattedFieldValues = [x.upper().replace("(ESQ)","").strip() for x in formattedFieldValues]
-+
+                if metadataFieldValues:
-+
+                    for metadataFieldValue in metadataFieldValues:
-+
+                        result = re.findall(self.allPossibleEmailAddressesRegExPattern, metadataFieldValue)
-+
+                        if result:
-+
+                            for email in result:
-+
+                                person = attMal.malPeopleList.search_by_email(email.upper().strip())
-+
+                                if not person:
-+
+                                    #remaining_emails.append(email)
-+
+                                    ##self.__AddToIssuesMatrix(docID,f"There is no MAL match for email address {email}.")
-+
+                                    continue
-+
+                                ##  With this single email address, generate all possible names
-+
+                                possible_names = [name.upper() for name in attMal.malPeopleList.return_person_all_name_variations(person)]
-+
+                                #print(possible_names)
-+
+                                attorneyStatus = person.is_attorney
-+
-+
+                                ##  Attempt to find a full name match.
-+
+                                found_match = None
-+
+                                for candidate in possible_names:
-+
+                                    #print(candidate)
-+
+                                    if candidate in normalized_full_names:
-+
+                                        full_name = normalized_full_names[candidate]
-+
+                                        if full_name not in used_full_names:
-+
+                                            #matches.append((email, full_name))
-+
+                                            ## TO DO: Change these next 2 lines as soon as you support SPLIT ROLE
-+
+                                            if attorneyStatus == "SPLIT ROLE":
-+
+                                                attorneyStatus = "YES"
-+
-+
+                                            if attorneyStatus == "YES" and "(ESQ.)" in full_name:
-+
+                                                pass
-+
+                                            elif attorneyStatus == "NO" and "(ESQ.)" not in full_name:
-+
+                                                pass
-+
+                                            elif attorneyStatus == "NO":
-+
+                                                self.__AddToIssuesMatrix(docID,f"{full_name} has an ESQ but is a high confidence non-attorney match.")
-+
+                                            else:
-+
+                                                self.__AddToIssuesMatrix(docID,f"{full_name} does not have an ESQ but is a high confidence attorney match.")
-+
+                                            used_full_names.add(full_name)
-+
+                                            found_match = full_name
-+
+                                            break
-+
+                                if not found_match:
-+
+                                    #remaining_emails.append(email)
-+
+                                    if attorneyStatus == "YES":
-+
+                                        self.__AddToIssuesMatrix(docID,f"{email} from {fieldName} does not have a corresponding normalized name match but is a high confidence attorney match.")
-+
+                                    else:
-+
+                                        pass
-+
+                                        #self.__AddToIssuesMatrix(docID,f"{email} from {fieldName} does not have a corresponding normalized name match but is a high confidence non-attorney match.")
-+
+                        elif "EXCHANGE" in metadataFieldValue.upper():
-+
+                            ##  The metadata field parsed value didnt have an email address.  Try a username lookup.
-+
+                            userNameId = metadataFieldValue.split("-")[-1]
-+
+                            ## Finding that there is garbage in the userIDs...  cleaning that.
-+
+                            userNameId = userNameId.replace("]","")
-+
+                            userNameId = userNameId.replace('">','')
-+
+                            userNameId = userNameId.replace('"','')
-+
+                            if userNameId:
-+
+                                userNameId = userNameId.upper()
-+
+                                person = attMal.malPeopleList.search_by_login_id(userNameId.upper().strip())
-+
+                                if not person:
-+
+                                    #remaining_emails.append(email)
-+
+                                    ##self.__AddToIssuesMatrix(docID,f"There is no MAL match for User Name Id {userNameId}.")
-+
+                                    continue
-+
-+
+                                ##  With this user login id, generate all possible names
-+
+                                possible_names = [name.upper() for name in attMal.malPeopleList.return_person_all_name_variations(person)]
-+
+                                attorneyStatus = person.is_attorney
-+
-+
+                                ##  Attempt to find a full name match.
-+
+                                found_match = None
-+
+                                for candidate in possible_names:
-+
+                                    if candidate in normalized_full_names:
-+
+                                        full_name = normalized_full_names[candidate]
-+
+                                        if full_name not in used_full_names:
-+
+                                            #matches.append((email, full_name))
-+
+                                            ## TO DO: Change these next 2 lines as soon as you support SPLIT ROLE
-+
+                                            if attorneyStatus == "SPLIT ROLE":
-+
+                                                attorneyStatus = "YES"
-+
-+
+                                            if attorneyStatus == "YES" and "(ESQ.)" in full_name:
-+
+                                                pass
-+
+                                            elif attorneyStatus == "NO" and "(ESQ.)" not in full_name:
-+
+                                                pass
-+
+                                            elif attorneyStatus == "NO":
-+
+                                                self.__AddToIssuesMatrix(docID,f"{full_name} has an ESQ but is a high confidence non-attorney match.")
-+
+                                            else:
-+
+                                                self.__AddToIssuesMatrix(docID,f"{full_name} does not have an ESQ but is a high confidence attorney match.")
-+
+                                            used_full_names.add(full_name)
-+
+                                            found_match = full_name
-+
+                                            break
-+
+                                if not found_match:
-+
+                                    #remaining_emails.append(email)
-+
+                                    if attorneyStatus == "YES":
-+
+                                        self.__AddToIssuesMatrix(docID,f"User Login ID {userNameId} from {fieldName} does not have a corresponding normalized name match but is a high confidence attorney match.")
-+
+                                    else:
-+
+                                        pass
-+
-+
+                ## Compute remaining full names
-+
+                remaining_full_names = [name for name in formattedFieldValues if name not in used_full_names]
-+
+                for y in remaining_full_names:
-+
+                    pass
-+
+                    #self.__AddToIssuesMatrix(docID,f"{y} from {fieldName} normalized field has no matching metadata value.")
-+
+        return self.issuesMatrix
-+
+                #return matches, remaining_emails, remaining_full_names
-+
-+
+if __name__ == '__main__':
-<
+    cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\AT&T\Cybersecurity FCA Case\PLOG_Test\Shiny\20250325-Shiny-PLOG-Export-Test_Converted.txt"
->
+    cleanedDatExportFileName = r"C:\Test_Dir\ATT\PrivLogTest\ESI_Custodial\export_20250930_215204_Converted.txt"
->
+    #cleanedDatExportFileName = r"C:\Test_Dir\ATT\PrivLogTest\export_20250807_175927_Converted(SHORT).txt"
+    qcP = QcPrivLog(cleanedDatExportFileName, "From", "MA Normalized From::Full Name", "To", "MA Normalized To::Full Name",
-<
+                    "CC", "MA Normalized Cc::Full Name", "BCC", "MA Normalized Bcc::Full Name", "Author", "DocAuthor", fileEncoding = 'UTF8')
->
+                    "CC", "MA Normalized Cc::Full Name", "BCC", "MA Normalized Bcc::Full Name", "DocAuthor", "Privilege Log From/Author", fileEncoding = 'utf-8')
+    print(qcP.cleanedInputDataFileHeaderPositionalMatrix)
+    qcP.PerformValueCountChecks(countsOnly = False)
-+
+    masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\AT&T\Cybersecurity FCA Case\_ATT_Current_MAL\RG - ATT Cross-Matter Master Attorney List (20251104)(20251104-0207).xlsx"
-+
+    attMal = MyCode.Active_prgs.Redgrave.ATT_MasterAttorneyList.ATT_MasterAttorneyList(masterAttorneyListFileName)
-+
+    issuesMatrix = qcP.PerformNamesToMalChecks()
-+
+    outputFile = open(r"C:\Test_Dir\ATT\namesNormTestOutput.txt",'w',encoding='utf-8')
-+
+    for docID in list(issuesMatrix.keys()):
-+
+        outputFile.write(f"{docID}|{';'.join(issuesMatrix[docID])}\n")
-+
+    outputFile.close()
+    #qcP.PerformValueCountChecks()

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines (old)
->
+Changed lines (new)

Comparing Python/NinoCode/Active_prgs/Redgrave/ATT-PrivLogQC.py (file contents): Revision 948 by nino.borges, Thu May 22 20:04:49 2025 UTC vs. Revision 949 by nino.borges, Wed Nov 5 18:18:53 2025 UTC

Diff Legend

Comparing Python/NinoCode/Active_prgs/Redgrave/ATT-PrivLogQC.py (file contents):
Revision 948 by nino.borges, Thu May 22 20:04:49 2025 UTC vs.
Revision 949 by nino.borges, Wed Nov 5 18:18:53 2025 UTC