ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Evidox/IncomingProdAnalyzer/Trunk/IncomingProdAnalyzer.py
Revision: 687
Committed: Wed May 13 00:00:36 2020 UTC (5 years, 10 months ago) by nino.borges
Content type: text/x-python
Original Path: Python/NinoCode/Active_prgs/Evidox/IncomingProdAnalyzer.py
File size: 2274 byte(s)
Log Message:
Added support for UTF dat files

File Contents

# User Rev Content
1 nino.borges 672 """
2    
3     IncomingProdAnalyzer
4    
5     Created by
6     Emanuel Borges
7     2020.02.07
8    
9     A simple program that I can point to a DAT and it will analyze it for issues like columns not lining up,
10     what fields they gave in the headder and which ones have stuff and which are totally empty, etc.
11 nino.borges 687 Support for UTF quotechars and delims plus removing that little BOM at the beging added.
12 nino.borges 672
13     """
14    
15 nino.borges 687 import chardet
16    
17 nino.borges 672 if __name__ == '__main__':
18     matrix = {}
19     headderMatrix = {}
20 nino.borges 687 datFilePath = r"\\sas44\sas44\34039\Deliverable\21\NAT006\DATA\NAT006.DAT"
21    
22     contents = open(datFilePath).readlines()
23    
24     charEncoding = chardet.detect(contents[0])['encoding']
25     if "UTF" in charEncoding:
26     quoteChar = "\xc3\xbe"
27     headder = contents[0].replace("\xef\xbb\xbf","")
28     else:
29     quoteChar = "\xfe"
30     headder = contents[0]
31 nino.borges 672 delim = "\x14"
32    
33 nino.borges 687
34 nino.borges 672 headder = headder.replace(quoteChar,"")
35     headder = headder.split(delim)
36     ## This headder Matrix is really to look up at the end. I dont use it for the main matrix below.
37     for hSpot, hFieldName in enumerate(headder):
38     headderMatrix[hSpot] = hFieldName
39     numberOfFields = len(headder)
40     contents = contents[1:]
41    
42    
43     print "Analyzing file..."
44     print "There are %s records in this load."%len(contents)
45     for line in contents:
46     line = line.replace("\n","")
47     line = line.replace(quoteChar,"")
48     line = line.split(delim)
49     if len(line) == numberOfFields:
50     pass
51     else:
52     print "Warning: number of fields for this line doenst match."
53     for itemSpot, value in enumerate(line):
54     if value:
55     matrix[itemSpot] = 1
56    
57     print "Analysis completed."
58     print ""
59     print "-"*10
60     print "The following fields exist in this DAT:"
61     for i in headder:
62     print i
63     print "-"*10
64     print ""
65     print "The following fields actually contains *some* data:"
66     for spot in matrix.keys():
67     print headder[spot]
68    
69     print "-"*10
70     print ""
71     print "The following fields are totally empty:"
72     for hSpot in headderMatrix.keys():
73     if hSpot in matrix.keys():
74     pass
75     else:
76     print headderMatrix[hSpot]
77