ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Evidox/IncomingProdAnalyzer/Trunk/IncomingProdAnalyzer.py
Revision: 704
Committed: Tue Jul 14 00:14:58 2020 UTC (5 years, 8 months ago) by nino.borges
Content type: text/x-python
File size: 3505 byte(s)
Log Message:
Setting up the trunk and tags layout and tagging version 1.2, since this is a major version working before I do some big changes.

File Contents

# User Rev Content
1 nino.borges 672 """
2    
3     IncomingProdAnalyzer
4    
5     Created by
6     Emanuel Borges
7     2020.02.07
8    
9     A simple program that I can point to a DAT and it will analyze it for issues like columns not lining up,
10     what fields they gave in the headder and which ones have stuff and which are totally empty, etc.
11 nino.borges 687 Support for UTF quotechars and delims plus removing that little BOM at the beging added.
12 nino.borges 672
13     """
14    
15 nino.borges 687 import chardet
16    
17 nino.borges 688 def AnalyzeDAT(datFilePath):
18 nino.borges 695 """Returns totalRecordCount, fullFieldList, populatedFieldsList, emptyFieldsList, parsingErrorCount"""
19 nino.borges 672 matrix = {}
20     headderMatrix = {}
21 nino.borges 695 populatedFieldsList = []
22     emptyFieldsList = []
23 nino.borges 688
24    
25 nino.borges 687 contents = open(datFilePath).readlines()
26    
27 nino.borges 697 charEncoding = None
28     charEncodingCount = 0
29     while charEncoding == None:
30     charEncoding = chardet.detect(contents[charEncodingCount])['encoding']
31     charEncodingCount = charEncodingCount +1
32    
33     charEncoding = charEncoding.upper()
34 nino.borges 688 #print charEncoding
35 nino.borges 687 if "UTF" in charEncoding:
36 nino.borges 697 print "UTF found"
37 nino.borges 687 quoteChar = "\xc3\xbe"
38     headder = contents[0].replace("\xef\xbb\xbf","")
39     else:
40 nino.borges 697 print "Standard load file found"
41 nino.borges 687 quoteChar = "\xfe"
42     headder = contents[0]
43 nino.borges 672 delim = "\x14"
44    
45 nino.borges 687
46 nino.borges 672 headder = headder.replace(quoteChar,"")
47     headder = headder.split(delim)
48     ## This headder Matrix is really to look up at the end. I dont use it for the main matrix below.
49     for hSpot, hFieldName in enumerate(headder):
50     headderMatrix[hSpot] = hFieldName
51     numberOfFields = len(headder)
52     contents = contents[1:]
53    
54    
55 nino.borges 695
56 nino.borges 688 totalRecordCount = len(contents)
57     parsingErrorCount = 0
58 nino.borges 672 for line in contents:
59     line = line.replace("\n","")
60     line = line.replace(quoteChar,"")
61     line = line.split(delim)
62     if len(line) == numberOfFields:
63     pass
64     else:
65     print "Warning: number of fields for this line doenst match."
66 nino.borges 688 parsingErrorCount = parsingErrorCount +1
67 nino.borges 672 for itemSpot, value in enumerate(line):
68     if value:
69     matrix[itemSpot] = 1
70    
71 nino.borges 695
72     for spot in matrix.keys():
73     #print headder[spot]
74     populatedFieldsList.append(headder[spot])
75    
76    
77     for hSpot in headderMatrix.keys():
78     if hSpot in matrix.keys():
79     pass
80     else:
81     #print headderMatrix[hSpot]
82     emptyFieldsList.append(headderMatrix[hSpot])
83    
84     fullFieldList = headder
85    
86     return totalRecordCount, fullFieldList, populatedFieldsList, emptyFieldsList, parsingErrorCount
87    
88     if __name__ == '__main__':
89    
90     datFilePath = r"\\sas12\sas12\30393\Inbound\11\099878\All American Title Final Distribution Ledger REport\data\All American Title Final Distribution Ledger REport.DAT"
91    
92     print "Analyzing file..."
93     totalRecordCount, fullFieldList, populatedFieldsList, emptyFieldsList, parsingErroCount = AnalyzeDAT(datFilePath)
94 nino.borges 672 print ""
95 nino.borges 695 print "There are %s records in this load."%totalRecordCount
96    
97     print "\nAnalysis completed."
98     print ""
99 nino.borges 672 print "-"*10
100     print "The following fields exist in this DAT:"
101 nino.borges 695 for i in fullFieldList:
102 nino.borges 672 print i
103 nino.borges 695
104 nino.borges 672 print "-"*10
105     print ""
106     print "The following fields actually contains *some* data:"
107 nino.borges 695 for x in populatedFieldsList:
108     print x
109 nino.borges 672
110     print "-"*10
111     print ""
112     print "The following fields are totally empty:"
113 nino.borges 695 for y in emptyFieldsList:
114     print y
115 nino.borges 688
116 nino.borges 695
117    
118