ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Evidox/IncomingProdAnalyzer.py
Revision: 692
Committed: Wed May 13 15:35:21 2020 UTC (5 years, 10 months ago) by nino.borges
Content type: text/x-python
File size: 2616 byte(s)
Log Message:
Moving into it's own folder

File Contents

# Content
1 """
2
3 IncomingProdAnalyzer
4
5 Created by
6 Emanuel Borges
7 2020.02.07
8
9 A simple program that I can point to a DAT and it will analyze it for issues like columns not lining up,
10 what fields they gave in the headder and which ones have stuff and which are totally empty, etc.
11 Support for UTF quotechars and delims plus removing that little BOM at the beging added.
12
13 """
14
15 import chardet
16
17 def AnalyzeDAT(datFilePath):
18 matrix = {}
19 headderMatrix = {}
20 populatedFields = []
21 emptyFields = []
22
23
24 contents = open(datFilePath).readlines()
25
26 charEncoding = chardet.detect(contents[0])['encoding'].upper()
27 #print charEncoding
28 if "UTF" in charEncoding:
29 #print "UTF found"
30 quoteChar = "\xc3\xbe"
31 headder = contents[0].replace("\xef\xbb\xbf","")
32 else:
33 #print "Standard load file found"
34 quoteChar = "\xfe"
35 headder = contents[0]
36 delim = "\x14"
37
38
39 headder = headder.replace(quoteChar,"")
40 headder = headder.split(delim)
41 ## This headder Matrix is really to look up at the end. I dont use it for the main matrix below.
42 for hSpot, hFieldName in enumerate(headder):
43 headderMatrix[hSpot] = hFieldName
44 numberOfFields = len(headder)
45 contents = contents[1:]
46
47
48 print "Analyzing file..."
49 print "There are %s records in this load."%len(contents)
50 totalRecordCount = len(contents)
51 parsingErrorCount = 0
52 for line in contents:
53 line = line.replace("\n","")
54 line = line.replace(quoteChar,"")
55 line = line.split(delim)
56 if len(line) == numberOfFields:
57 pass
58 else:
59 print "Warning: number of fields for this line doenst match."
60 parsingErrorCount = parsingErrorCount +1
61 for itemSpot, value in enumerate(line):
62 if value:
63 matrix[itemSpot] = 1
64
65 print "Analysis completed."
66 print ""
67 print "-"*10
68 print "The following fields exist in this DAT:"
69 for i in headder:
70 print i
71 print "-"*10
72 print ""
73 print "The following fields actually contains *some* data:"
74 for spot in matrix.keys():
75 print headder[spot]
76
77 print "-"*10
78 print ""
79 print "The following fields are totally empty:"
80 for hSpot in headderMatrix.keys():
81 if hSpot in matrix.keys():
82 pass
83 else:
84 print headderMatrix[hSpot]
85
86 if __name__ == '__main__':
87
88 datFilePath = r"\\sas44\sas44\34039\Deliverable\21\NAT006\DATA\NAT006.DAT"
89 AnalyzeDAT(datFilePath)
90
91