ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/CsvRowCompare.py
Revision: 779
Committed: Fri Aug 5 02:19:30 2022 UTC (3 years, 7 months ago) by nino.borges
Content type: text/x-python
File size: 2994 byte(s)
Log Message:
This program will take two CSV files and do a row to row compare.  For this to work both CSVs need to have the same colums.
Set colOmitList to FALSE if you dont want it to parse at all and remove any columns.  for that to work though you have to normalize the date
and number fields.
NOTE: the concur data HAS pipes in it, so verify each report.

File Contents

# User Rev Content
1 nino.borges 779 """
2    
3     CsvRowCompare
4    
5     Created by:
6     Emanuel Borges
7     05.27.2022
8    
9     This program will take two CSV files and do a row to row compare. For this to work both CSVs need to have the same colums.
10     Set colOmitList to FALSE if you dont want it to parse at all and remove any columns. for that to work though you have to normalize the date
11     and number fields.
12     NOTE: the concur data HAS pipes in it, so verify each report.
13    
14     """
15    
16     import os
17    
18    
19     if __name__ == '__main__':
20     workingDir = r"C:\Users\eborges\Documents\Cases\Endo\Temp\2022.05.26 Jurisdiction-Specific Concur files\_20220627_RowToRowAnalysis\20200803 - Missouri"
21     csvFileAName = "ENDO_DATA-OPIOID_MO-00000003_HIGHLY CONFIDENTIAL.csv"
22     csvFileBName = "UN-AllYears(Missouri-Only).csv"
23    
24    
25     ## remember that it calculates the index based on each pop! so the second index needs to take into account that the first index was removed.
26     #colOmitList = [3,14]
27     #colOmitList = [15,]
28     colOmitList = False
29    
30     csvFileASet = set()
31     csvFileBSet = set()
32    
33    
34     print("Now opening file A...")
35     contents = open(os.path.join(workingDir,csvFileAName)).readlines()
36     headderA = contents[0]
37     contents = contents[1:]
38     print("There are %s real rows in this file."% len(contents))
39     print("Creating SetA...")
40     for line in contents:
41     if colOmitList:
42     tempList = line.split("|")
43     for o in colOmitList:
44     tempList.pop(o)
45     line = "|".join(tempList)
46     csvFileASet.add(line.upper())
47     print("SetA created.")
48     print("\n")
49    
50    
51     print("Now opening file B...")
52     contents = open(os.path.join(workingDir,csvFileBName)).readlines()
53     headderB = contents[0]
54     if headderB == headderA:
55     print("Headders for these two files are identical.")
56     else:
57     print("WARNING: Headders for these two files are NOT identical.")
58     print ("\n")
59    
60     contents = contents[1:]
61     print("There are %s real rows in this file."% len(contents))
62     print("Creating SetB...")
63     for line in contents:
64     if colOmitList:
65     tempList = line.split("|")
66     for o in colOmitList:
67     tempList.pop(o)
68     line = "|".join(tempList)
69     csvFileBSet.add(line.upper())
70     print("SetB created.")
71    
72    
73    
74     uniqueToA = csvFileASet - csvFileBSet
75     uniqueToB = csvFileBSet - csvFileASet
76    
77    
78     print("There are %s rows that are unique to file A."%len(uniqueToA))
79     print("There are %s rows that are unique to file B."%len(uniqueToB))
80     uniqueToAList = list(uniqueToA)
81     uniqueToBList = list(uniqueToB)
82     #print(uniqueToAList[0])
83     #print(len(uniqueToAList))
84    
85    
86     outputFileA = open(os.path.join(workingDir, "UniqueTo"+csvFileAName),'w')
87     outputFileA.writelines(uniqueToAList)
88     outputFileA.close()
89     outputFileB = open(os.path.join(workingDir, "UniqueTo"+csvFileBName),'w')
90     outputFileB.writelines(uniqueToBList)
91     outputFileB.close()