| 1 |
nino.borges |
779 |
"""
|
| 2 |
|
|
|
| 3 |
|
|
CsvRowCompare
|
| 4 |
|
|
|
| 5 |
|
|
Created by:
|
| 6 |
|
|
Emanuel Borges
|
| 7 |
|
|
05.27.2022
|
| 8 |
|
|
|
| 9 |
|
|
This program will take two CSV files and do a row to row compare. For this to work both CSVs need to have the same colums.
|
| 10 |
|
|
Set colOmitList to FALSE if you dont want it to parse at all and remove any columns. for that to work though you have to normalize the date
|
| 11 |
|
|
and number fields.
|
| 12 |
|
|
NOTE: the concur data HAS pipes in it, so verify each report.
|
| 13 |
|
|
|
| 14 |
|
|
"""
|
| 15 |
|
|
|
| 16 |
|
|
import os
|
| 17 |
|
|
|
| 18 |
|
|
|
| 19 |
|
|
if __name__ == '__main__':
|
| 20 |
|
|
workingDir = r"C:\Users\eborges\Documents\Cases\Endo\Temp\2022.05.26 Jurisdiction-Specific Concur files\_20220627_RowToRowAnalysis\20200803 - Missouri"
|
| 21 |
|
|
csvFileAName = "ENDO_DATA-OPIOID_MO-00000003_HIGHLY CONFIDENTIAL.csv"
|
| 22 |
|
|
csvFileBName = "UN-AllYears(Missouri-Only).csv"
|
| 23 |
|
|
|
| 24 |
|
|
|
| 25 |
|
|
## remember that it calculates the index based on each pop! so the second index needs to take into account that the first index was removed.
|
| 26 |
|
|
#colOmitList = [3,14]
|
| 27 |
|
|
#colOmitList = [15,]
|
| 28 |
|
|
colOmitList = False
|
| 29 |
|
|
|
| 30 |
|
|
csvFileASet = set()
|
| 31 |
|
|
csvFileBSet = set()
|
| 32 |
|
|
|
| 33 |
|
|
|
| 34 |
|
|
print("Now opening file A...")
|
| 35 |
|
|
contents = open(os.path.join(workingDir,csvFileAName)).readlines()
|
| 36 |
|
|
headderA = contents[0]
|
| 37 |
|
|
contents = contents[1:]
|
| 38 |
|
|
print("There are %s real rows in this file."% len(contents))
|
| 39 |
|
|
print("Creating SetA...")
|
| 40 |
|
|
for line in contents:
|
| 41 |
|
|
if colOmitList:
|
| 42 |
|
|
tempList = line.split("|")
|
| 43 |
|
|
for o in colOmitList:
|
| 44 |
|
|
tempList.pop(o)
|
| 45 |
|
|
line = "|".join(tempList)
|
| 46 |
|
|
csvFileASet.add(line.upper())
|
| 47 |
|
|
print("SetA created.")
|
| 48 |
|
|
print("\n")
|
| 49 |
|
|
|
| 50 |
|
|
|
| 51 |
|
|
print("Now opening file B...")
|
| 52 |
|
|
contents = open(os.path.join(workingDir,csvFileBName)).readlines()
|
| 53 |
|
|
headderB = contents[0]
|
| 54 |
|
|
if headderB == headderA:
|
| 55 |
|
|
print("Headders for these two files are identical.")
|
| 56 |
|
|
else:
|
| 57 |
|
|
print("WARNING: Headders for these two files are NOT identical.")
|
| 58 |
|
|
print ("\n")
|
| 59 |
|
|
|
| 60 |
|
|
contents = contents[1:]
|
| 61 |
|
|
print("There are %s real rows in this file."% len(contents))
|
| 62 |
|
|
print("Creating SetB...")
|
| 63 |
|
|
for line in contents:
|
| 64 |
|
|
if colOmitList:
|
| 65 |
|
|
tempList = line.split("|")
|
| 66 |
|
|
for o in colOmitList:
|
| 67 |
|
|
tempList.pop(o)
|
| 68 |
|
|
line = "|".join(tempList)
|
| 69 |
|
|
csvFileBSet.add(line.upper())
|
| 70 |
|
|
print("SetB created.")
|
| 71 |
|
|
|
| 72 |
|
|
|
| 73 |
|
|
|
| 74 |
|
|
uniqueToA = csvFileASet - csvFileBSet
|
| 75 |
|
|
uniqueToB = csvFileBSet - csvFileASet
|
| 76 |
|
|
|
| 77 |
|
|
|
| 78 |
|
|
print("There are %s rows that are unique to file A."%len(uniqueToA))
|
| 79 |
|
|
print("There are %s rows that are unique to file B."%len(uniqueToB))
|
| 80 |
|
|
uniqueToAList = list(uniqueToA)
|
| 81 |
|
|
uniqueToBList = list(uniqueToB)
|
| 82 |
|
|
#print(uniqueToAList[0])
|
| 83 |
|
|
#print(len(uniqueToAList))
|
| 84 |
|
|
|
| 85 |
|
|
|
| 86 |
|
|
outputFileA = open(os.path.join(workingDir, "UniqueTo"+csvFileAName),'w')
|
| 87 |
|
|
outputFileA.writelines(uniqueToAList)
|
| 88 |
|
|
outputFileA.close()
|
| 89 |
|
|
outputFileB = open(os.path.join(workingDir, "UniqueTo"+csvFileBName),'w')
|
| 90 |
|
|
outputFileB.writelines(uniqueToBList)
|
| 91 |
|
|
outputFileB.close() |