ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/Amazon_PerformMalIntegrityChecks.py
Revision: 880
Committed: Fri Jan 31 19:41:10 2025 UTC (13 months, 3 weeks ago) by nino.borges
Content type: text/x-python
File size: 8706 byte(s)
Log Message:
Added support for looking for duplicate numbers on the attorney and Downgrade tabs and looking for rows where Last name is missing but first name exists.

File Contents

# User Rev Content
1 nino.borges 870 """
2    
3     Amazon_PerformMalIntegrityChecks
4    
5     Created by:
6     Emanuel Borges
7     12.18.2024
8    
9     This program performs the various integrity checks on the MAL.
10 nino.borges 876 Currently looks for:
11     Rows on any of the 3 tabs where the unique row number is empty.
12     Rows on any of the 3 tabs where the unique row number skips a number. (gap test) This is skipped for Split Role tab
13     Verifies that all rows on the Attorneys Tab has the Is Attorney set to either YES or Split Role.
14     Verifies that all rows on the Downgrades Tab has the Is Attorney set to NO.
15     Rows on the Attorneys tab where more than one Split Role row references a duplicate value. (two sep rows that point to the same unique row number) this shouldnt be the case unless there are duplicate row numbers
16     Verifies that the row in the split role tab is referencing the correct attorney row number in the Attorneys tab by comparing the firstname, lastname and work email value.
17 nino.borges 879 Looks for any duplicates in the emails, across the tabs and within, both within fields and across the two separate email fields.
18     Looks for any email addresses that exist outside of the email address fields.
19 nino.borges 880 Looks for duplicate numbers on the attorney and Downgrade tabs.
20     Looks for rows where Last name is missing but first name exists.
21 nino.borges 870
22 nino.borges 880 TODO:
23    
24    
25 nino.borges 870 """
26    
27    
28     import os
29     import MyCode.Active_prgs.Redgrave.Amazon_NamesNormQC
30     from win32com.client import Dispatch
31 nino.borges 880 from collections import Counter
32 nino.borges 870
33 nino.borges 880 version = '0.3.0'
34 nino.borges 876
35 nino.borges 870 if __name__ == '__main__':
36     cleanedDatExportFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\20241215\PrivLogExports\PrivLogExport_20241211_CAAG_Converted.txt"
37 nino.borges 880 masterAttorneyListFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Amazon_ Master Attorney List 2025.01.30(20250131-0140).xlsx"
38 nino.borges 870 fullNameOveridesFileName = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\_PrivLogQCProcess\Consilio\CAAG-MasterAttorneyList\FullNameOverides.txt"
39    
40     ## TODO: Soon, change this to pulling it from the nv object.
41 nino.borges 880 excelTabParametersList = [{"tabName":"Attorneys", "beginRowNumber":2, "endRowNumber":10920, "beginColNumber":1, "endColNumber":17},
42     {"tabName":"Downgrades", "beginRowNumber":2, "endRowNumber":742, "beginColNumber":1, "endColNumber":16},
43     {"tabName":"Split Role Attorneys", "beginRowNumber":2, "endRowNumber":35, "beginColNumber":1, "endColNumber":10}]
44 nino.borges 870
45     nv = MyCode.Active_prgs.Redgrave.Amazon_NamesNormQC.NamesVerification(cleanedDatExportFileName, masterAttorneyListFileName, fullNameOveridesFileName)
46    
47     nv.RunMalEmailAddressIntegrityCheck()
48 nino.borges 878 nv.RunMalEmailOutsideEmailFieldsIntegrityCheck()
49 nino.borges 870
50     #nv.RunRowNumberIntegrityCheck()
51 nino.borges 879 print("Performing raw MAL spreadsheet integrity checks...")
52     xlApp = Dispatch('Excel.Application')
53     xlBook = xlApp.Workbooks.Open(masterAttorneyListFileName)
54     splitRoleMatrix = {}
55     for excelTab in excelTabParametersList:
56     hcRowNumberList = []
57     sht = xlBook.Worksheets(excelTab['tabName'])
58     print(f"Analyzing sheet {excelTab['tabName']}.")
59     excelFieldPositionMatrix = {}
60     for col in range (excelTab['beginColNumber'], excelTab['endColNumber'] +1):
61     excelFieldPositionMatrix[sht.Cells(1,col).Value] = col
62     for row in range(excelTab['beginRowNumber'], excelTab['endRowNumber'] +1):
63     if excelTab['tabName'] == 'Attorneys':
64     hcRowNumber = sht.Cells(row,excelFieldPositionMatrix['Row']).Value
65     ## First start by grabbing the hard coded row number into a list that you can test. Also check for rows where this is empty.
66     if hcRowNumber == None:
67     print(f"WARNING: Empty hard coded row number for Excel row {row}.")
68     else:
69     hcRowNumberList.append(int(hcRowNumber))
70     ## Next verify that these all have their is attorney set to either yes or split role. for split role, add to the matrix for testing when you get to that tab.
71     isAttorneyValue = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value
72     isAttorneyValue = isAttorneyValue.strip()
73     if isAttorneyValue.upper() == "YES":
74     pass
75     elif isAttorneyValue.upper() == "SPLIT ROLE":
76     if hcRowNumber in list(splitRoleMatrix.keys()):
77     print(f"ERROR: duplicate split role number found!")
78     else:
79    
80     splitRoleMatrix[hcRowNumber] = [sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value.strip().upper(), sht.Cells(row,excelFieldPositionMatrix['First Name']).Value.strip().upper(), sht.Cells(row,excelFieldPositionMatrix['Work Email']).Value.strip().upper()]
81     else:
82     print(f"ERROR: is attorney contains value that shouldnt be here, which is {isAttorneyValue.upper()}!!")
83 nino.borges 880 ## Next check to see if the row has a Firstname value but is missing the LastName value
84     if sht.Cells(row,excelFieldPositionMatrix['First Name']).Value:
85     if sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value:
86     pass
87     else:
88     print(f"WARNING: First Name exists with no Last Name value for Excel row {row}.")
89 nino.borges 879 elif excelTab['tabName'] == 'Split Role Attorneys':
90     hcRowNumber = sht.Cells(row,excelFieldPositionMatrix['Attorney Row']).Value
91     if splitRoleMatrix[hcRowNumber] == [sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value.strip().upper(), sht.Cells(row,excelFieldPositionMatrix['First Name']).Value.strip().upper(), sht.Cells(row,excelFieldPositionMatrix['Work Email ']).Value.strip().upper()]:
92     pass
93     else:
94     print(f"ERROR: {hcRowNumber} is pointing to a different attorney on the attorneys tab!! ")
95     elif excelTab['tabName'] == 'Downgrades':
96     hcRowNumber = sht.Cells(row,excelFieldPositionMatrix['Row']).Value
97     ## First start by grabbing the hard coded row number into a list that you can test. Also check for rows where this is empty.
98     if hcRowNumber == None:
99     print(f"WARNING: Empty hard coded row number for Excel row {row}.")
100     else:
101     hcRowNumberList.append(int(hcRowNumber))
102     ## Next verify that these all have their is attorney set to NO.
103     isAttorneyValue = sht.Cells(row,excelFieldPositionMatrix['Is Attorney']).Value.strip()
104     if isAttorneyValue.upper() == "NO":
105     pass
106     else:
107     print(f"ERROR: Excel row number {row} 'is attorney' contains value that shouldnt be here!!")
108 nino.borges 880 ## Next check to see if the row has a Firstname value but is missing the LastName value
109     if sht.Cells(row,excelFieldPositionMatrix['First Name']).Value:
110     if sht.Cells(row,excelFieldPositionMatrix['Last Name']).Value:
111     pass
112     else:
113     print(f"WARNING: First Name exists with no Last Name value for Excel row {row}.")
114 nino.borges 879
115     else:
116     print("ERROR")
117    
118     ## Now check for gaps and export a list of the missing numbers
119     print("Checking for number gaps in the hard coded row numbers...")
120     if hcRowNumberList:
121     hcRowNumberList.sort()
122     compareSet = set(range(hcRowNumberList[0], hcRowNumberList[-1]))
123     diffList = compareSet - set(hcRowNumberList)
124     if diffList:
125     print(diffList)
126     else:
127     print(f"There are no gaps in the hard coded row numbers in the {excelTab['tabName']} tab.")
128     else:
129     print(f"A hard coded row number analysis was not done for the {excelTab['tabName']} tab.")
130 nino.borges 870
131    
132 nino.borges 880 ## Now check for dupicate row numbers and export a list of the duplicate values
133     print("Checking row numbers for duplicate row number values...")
134     if hcRowNumberList:
135     hcRowNumberList.sort()
136     dupRowNumbVals = [k for k,v in Counter(hcRowNumberList).items() if v>1]
137     print(dupRowNumbVals)
138    
139    
140 nino.borges 879
141     xlBook.Close()
142    
143     ## Now run the checks that do not require the spreadsheet
144     print("\nRaw MAL spreadsheet integrity check complete.")
145    
146    
147 nino.borges 878