Active_prgs/Redgrave/CsvRowCompare.py

"""

CsvRowCompare

Created by:
Emanuel Borges
05.27.2022

This program will take two CSV files and do a row to row compare.  For this to work both CSVs need to have the same colums.
Set colOmitList to FALSE if you dont want it to parse at all and remove any columns.  for that to work though you have to normalize the date
and number fields.
NOTE: the concur data HAS pipes in it, so verify each report.

"""

import os


if __name__ == '__main__':
    workingDir = r"C:\Users\eborges\Documents\Cases\Endo\Temp\2022.05.26 Jurisdiction-Specific Concur files\_20220627_RowToRowAnalysis\20200803 - Missouri"
    csvFileAName = "ENDO_DATA-OPIOID_MO-00000003_HIGHLY CONFIDENTIAL.csv"
    csvFileBName = "UN-AllYears(Missouri-Only).csv"


    ## remember that it calculates the index based on each pop!  so the second index needs to take into account that the first index was removed.
    #colOmitList = [3,14]
    #colOmitList = [15,]
    colOmitList = False

    csvFileASet = set()
    csvFileBSet = set()


    print("Now opening file A...")
    contents = open(os.path.join(workingDir,csvFileAName)).readlines()
    headderA = contents[0]
    contents = contents[1:]
    print("There are %s real rows in this file."% len(contents))
    print("Creating SetA...")
    for line in contents:
        if colOmitList:
            tempList = line.split("|")
            for o in colOmitList:
                tempList.pop(o)
            line = "|".join(tempList)
        csvFileASet.add(line.upper())
    print("SetA created.")
    print("\n")


    print("Now opening file B...")
    contents = open(os.path.join(workingDir,csvFileBName)).readlines()
    headderB = contents[0]
    if headderB == headderA:
        print("Headders for these two files are identical.")
    else:
        print("WARNING: Headders for these two files are NOT identical.")
    print ("\n")
    
    contents = contents[1:]
    print("There are %s real rows in this file."% len(contents))
    print("Creating SetB...")
    for line in contents:
        if colOmitList:
            tempList = line.split("|")
            for o in colOmitList:
                tempList.pop(o)
            line = "|".join(tempList)
        csvFileBSet.add(line.upper())
    print("SetB created.")


    uniqueToA = csvFileASet - csvFileBSet
    uniqueToB = csvFileBSet - csvFileASet


    print("There are %s rows that are unique to file A."%len(uniqueToA))
    print("There are %s rows that are unique to file B."%len(uniqueToB))
    uniqueToAList = list(uniqueToA)
    uniqueToBList = list(uniqueToB)
    #print(uniqueToAList[0])
    #print(len(uniqueToAList))


    outputFileA = open(os.path.join(workingDir, "UniqueTo"+csvFileAName),'w')
    outputFileA.writelines(uniqueToAList)
    outputFileA.close()
    outputFileB = open(os.path.join(workingDir, "UniqueTo"+csvFileBName),'w')
    outputFileB.writelines(uniqueToBList)
    outputFileB.close()
Revision:	779
Committed:	Fri Aug 5 02:19:30 2022 UTC (3 years, 7 months ago) by nino.borges
Content type:	text/x-python
File size:	2994 byte(s)
Log Message:	This program will take two CSV files and do a row to row compare. For this to work both CSVs need to have the same colums. Set colOmitList to FALSE if you dont want it to parse at all and remove any columns. for that to work though you have to normalize the date and number fields. NOTE: the concur data HAS pipes in it, so verify each report.
#	User	Rev	Content
1	nino.borges	779	"""
2
3			CsvRowCompare
4
5			Created by:
6			Emanuel Borges
7			05.27.2022
8
9			This program will take two CSV files and do a row to row compare. For this to work both CSVs need to have the same colums.
10			Set colOmitList to FALSE if you dont want it to parse at all and remove any columns. for that to work though you have to normalize the date
11			and number fields.
12			NOTE: the concur data HAS pipes in it, so verify each report.
13
14			"""
15
16			import os
17
18
19			if __name__ == '__main__':
20			workingDir = r"C:\Users\eborges\Documents\Cases\Endo\Temp\2022.05.26 Jurisdiction-Specific Concur files\_20220627_RowToRowAnalysis\20200803 - Missouri"
21			csvFileAName = "ENDO_DATA-OPIOID_MO-00000003_HIGHLY CONFIDENTIAL.csv"
22			csvFileBName = "UN-AllYears(Missouri-Only).csv"
23
24
25			## remember that it calculates the index based on each pop! so the second index needs to take into account that the first index was removed.
26			#colOmitList = [3,14]
27			#colOmitList = [15,]
28			colOmitList = False
29
30			csvFileASet = set()
31			csvFileBSet = set()
32
33
34			print("Now opening file A...")
35			contents = open(os.path.join(workingDir,csvFileAName)).readlines()
36			headderA = contents[0]
37			contents = contents[1:]
38			print("There are %s real rows in this file."% len(contents))
39			print("Creating SetA...")
40			for line in contents:
41			if colOmitList:
42			tempList = line.split("\|")
43			for o in colOmitList:
44			tempList.pop(o)
45			line = "\|".join(tempList)
46			csvFileASet.add(line.upper())
47			print("SetA created.")
48			print("\n")
49
50
51			print("Now opening file B...")
52			contents = open(os.path.join(workingDir,csvFileBName)).readlines()
53			headderB = contents[0]
54			if headderB == headderA:
55			print("Headders for these two files are identical.")
56			else:
57			print("WARNING: Headders for these two files are NOT identical.")
58			print ("\n")
59
60			contents = contents[1:]
61			print("There are %s real rows in this file."% len(contents))
62			print("Creating SetB...")
63			for line in contents:
64			if colOmitList:
65			tempList = line.split("\|")
66			for o in colOmitList:
67			tempList.pop(o)
68			line = "\|".join(tempList)
69			csvFileBSet.add(line.upper())
70			print("SetB created.")
71
72
73
74			uniqueToA = csvFileASet - csvFileBSet
75			uniqueToB = csvFileBSet - csvFileASet
76
77
78			print("There are %s rows that are unique to file A."%len(uniqueToA))
79			print("There are %s rows that are unique to file B."%len(uniqueToB))
80			uniqueToAList = list(uniqueToA)
81			uniqueToBList = list(uniqueToB)
82			#print(uniqueToAList[0])
83			#print(len(uniqueToAList))
84
85
86			outputFileA = open(os.path.join(workingDir, "UniqueTo"+csvFileAName),'w')
87			outputFileA.writelines(uniqueToAList)
88			outputFileA.close()
89			outputFileB = open(os.path.join(workingDir, "UniqueTo"+csvFileBName),'w')
90			outputFileB.writelines(uniqueToBList)
91			outputFileB.close()