ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Evidox/IncomingProdAnalyzer/IncomingProdAnalyzer.py
Revision: 697
Committed: Wed May 13 21:48:24 2020 UTC (5 years, 10 months ago) by nino.borges
Content type: text/x-python
File size: 3505 byte(s)
Log Message:
Finished up the dat report dialog and added a bit more code to try to determine the encoding.

File Contents

# Content
1 """
2
3 IncomingProdAnalyzer
4
5 Created by
6 Emanuel Borges
7 2020.02.07
8
9 A simple program that I can point to a DAT and it will analyze it for issues like columns not lining up,
10 what fields they gave in the headder and which ones have stuff and which are totally empty, etc.
11 Support for UTF quotechars and delims plus removing that little BOM at the beging added.
12
13 """
14
15 import chardet
16
17 def AnalyzeDAT(datFilePath):
18 """Returns totalRecordCount, fullFieldList, populatedFieldsList, emptyFieldsList, parsingErrorCount"""
19 matrix = {}
20 headderMatrix = {}
21 populatedFieldsList = []
22 emptyFieldsList = []
23
24
25 contents = open(datFilePath).readlines()
26
27 charEncoding = None
28 charEncodingCount = 0
29 while charEncoding == None:
30 charEncoding = chardet.detect(contents[charEncodingCount])['encoding']
31 charEncodingCount = charEncodingCount +1
32
33 charEncoding = charEncoding.upper()
34 #print charEncoding
35 if "UTF" in charEncoding:
36 print "UTF found"
37 quoteChar = "\xc3\xbe"
38 headder = contents[0].replace("\xef\xbb\xbf","")
39 else:
40 print "Standard load file found"
41 quoteChar = "\xfe"
42 headder = contents[0]
43 delim = "\x14"
44
45
46 headder = headder.replace(quoteChar,"")
47 headder = headder.split(delim)
48 ## This headder Matrix is really to look up at the end. I dont use it for the main matrix below.
49 for hSpot, hFieldName in enumerate(headder):
50 headderMatrix[hSpot] = hFieldName
51 numberOfFields = len(headder)
52 contents = contents[1:]
53
54
55
56 totalRecordCount = len(contents)
57 parsingErrorCount = 0
58 for line in contents:
59 line = line.replace("\n","")
60 line = line.replace(quoteChar,"")
61 line = line.split(delim)
62 if len(line) == numberOfFields:
63 pass
64 else:
65 print "Warning: number of fields for this line doenst match."
66 parsingErrorCount = parsingErrorCount +1
67 for itemSpot, value in enumerate(line):
68 if value:
69 matrix[itemSpot] = 1
70
71
72 for spot in matrix.keys():
73 #print headder[spot]
74 populatedFieldsList.append(headder[spot])
75
76
77 for hSpot in headderMatrix.keys():
78 if hSpot in matrix.keys():
79 pass
80 else:
81 #print headderMatrix[hSpot]
82 emptyFieldsList.append(headderMatrix[hSpot])
83
84 fullFieldList = headder
85
86 return totalRecordCount, fullFieldList, populatedFieldsList, emptyFieldsList, parsingErrorCount
87
88 if __name__ == '__main__':
89
90 datFilePath = r"\\sas12\sas12\30393\Inbound\11\099878\All American Title Final Distribution Ledger REport\data\All American Title Final Distribution Ledger REport.DAT"
91
92 print "Analyzing file..."
93 totalRecordCount, fullFieldList, populatedFieldsList, emptyFieldsList, parsingErroCount = AnalyzeDAT(datFilePath)
94 print ""
95 print "There are %s records in this load."%totalRecordCount
96
97 print "\nAnalysis completed."
98 print ""
99 print "-"*10
100 print "The following fields exist in this DAT:"
101 for i in fullFieldList:
102 print i
103
104 print "-"*10
105 print ""
106 print "The following fields actually contains *some* data:"
107 for x in populatedFieldsList:
108 print x
109
110 print "-"*10
111 print ""
112 print "The following fields are totally empty:"
113 for y in emptyFieldsList:
114 print y
115
116
117
118