ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Evidox/IncomingProdAnalyzer/Trunk/IncomingProdAnalyzer.py
Revision: 718
Committed: Thu Nov 5 22:08:59 2020 UTC (5 years, 4 months ago) by nino.borges
Content type: text/x-python
File size: 5372 byte(s)
Log Message:
Adding more support for UTF by using io library for better file open control. Updated ver to 2.1

File Contents

# Content
1 """
2
3 IncomingProdAnalyzer
4
5 Created by
6 Emanuel Borges
7 2020.02.07
8
9 A simple program that I can point to a DAT and it will analyze it for issues like columns not lining up,
10 what fields they gave in the headder and which ones have stuff and which are totally empty, etc.
11 Support for UTF quotechars and delims plus removing that little BOM at the beging added.
12
13 """
14
15 import chardet, ExcelLib, BatesRangeFunctions, io
16
17 def AnalyzeDAT(datFilePath):
18 """Returns totalRecordCount, fullFieldList, populatedFieldsList, emptyFieldsList, parsingErrorCount, contents, quoteChar, delim"""
19 matrix = {}
20 headderMatrix = {}
21 populatedFieldsList = []
22 emptyFieldsList = []
23
24 ## Dummy contents just to figure out encoding
25 contents = open(datFilePath).readlines()
26
27 charEncoding = None
28 charEncodingCount = 0
29 while charEncoding == None:
30 charEncoding = chardet.detect(contents[charEncodingCount])['encoding']
31 charEncodingCount = charEncodingCount +1
32
33 charEncoding = charEncoding.upper()
34
35
36 ## #print charEncoding
37 ## if "UTF" in charEncoding:
38 ## print "UTF found"
39 ## quoteChar = "\xc3\xbe"
40 ## headder = contents[0].replace("\xef\xbb\xbf","")
41 ## else:
42 ## print "Standard load file found"
43 ## quoteChar = "\xfe"
44 ## headder = contents[0]
45
46 if "UTF" in charEncoding:
47 print "UTF found"
48 contents = io.open(datFilePath, encoding = charEncoding).readlines()
49 else:
50 print "Standard load file found"
51 contents = io.open(datFilePath).readlines()
52
53 print charEncoding
54 delim = u"\x14"
55 quoteChar = u"\xfe"
56 headder = contents[0]
57 #print contents[0]
58
59
60 headder = headder.replace(quoteChar,"")
61 headder = headder.replace("\n","")
62 headder = headder.split(delim)
63 ## This headder Matrix is really to look up at the end. I dont use it for the main matrix below.
64 for hSpot, hFieldName in enumerate(headder):
65 headderMatrix[hSpot] = hFieldName
66 numberOfFields = len(headder)
67 contents = contents[1:]
68
69
70
71 totalRecordCount = len(contents)
72 parsingErrorCount = 0
73 for line in contents:
74 line = line.replace("\n","")
75 line = line.replace(quoteChar,"")
76 line = line.split(delim)
77 if len(line) == numberOfFields:
78 pass
79 else:
80 print "Warning: number of fields for this line doenst match."
81 parsingErrorCount = parsingErrorCount +1
82 for itemSpot, value in enumerate(line):
83 if value:
84 matrix[itemSpot] = 1
85
86
87 for spot in matrix.keys():
88 #print headder[spot]
89 populatedFieldsList.append(headder[spot])
90
91
92 for hSpot in headderMatrix.keys():
93 if hSpot in matrix.keys():
94 pass
95 else:
96 #print headderMatrix[hSpot]
97 emptyFieldsList.append(headderMatrix[hSpot])
98
99 fullFieldList = headder
100
101 return totalRecordCount, fullFieldList, populatedFieldsList, emptyFieldsList, parsingErrorCount, contents, quoteChar, delim
102
103 def CreateExcelDatReport(spreadSheetPath, fullFieldList, emptyFieldsList, totalRecordCount):
104 """Creates a spreadsheet as a pretty report of a field list, noting empty fields in Red highlight"""
105 appObj = ExcelLib.ExcelConnection()
106 count = 2
107 for field in fullFieldList:
108 appObj.setCell(1,count,1,field)
109 if field in emptyFieldsList:
110 appObj.setCellProperties(1,count,1,cellColor=3)
111 count = count + 1
112 appObj.forceAutoFitRow(1,"A","B")
113 appObj.setCell(1,count + 2, 4, "Cells in Red are empty fields")
114 appObj.setCell(1,count + 3, 4, "Total Documents: %s"% totalRecordCount)
115 appObj.save(spreadSheetPath)
116 appObj.close()
117
118 def CreateBatesRangeList(colsNumbList, contents, quoteChar, delim):
119 batesList = []
120 parsingErrorCount = 0
121 for line in contents:
122 line = line.replace("\n","")
123 line = line.replace(quoteChar,"")
124 line = line.split(delim)
125 for colNumb in colsNumbList:
126 batesList.append(line[colNumb])
127 batesRangesList = BatesRangeFunctions.GetBatesRanges(batesList)
128 return batesRangesList
129
130
131
132 if __name__ == '__main__':
133
134 datFilePath = r"\\sas12\sas12\30393\Inbound\11\099878\All American Title Final Distribution Ledger REport\data\All American Title Final Distribution Ledger REport.DAT"
135
136 print "Analyzing file..."
137 totalRecordCount, fullFieldList, populatedFieldsList, emptyFieldsList, parsingErroCount, contents, quoteChar, delim = AnalyzeDAT(datFilePath)
138 print ""
139 print "There are %s records in this load."%totalRecordCount
140 print fullFieldList
141
142 print "\nAnalysis completed."
143 print ""
144 print "-"*10
145 print "The following fields exist in this DAT:"
146 for i in fullFieldList:
147 print i
148
149 print "-"*10
150 print ""
151 print "The following fields actually contains *some* data:"
152 for x in populatedFieldsList:
153 print x
154
155 print "-"*10
156 print ""
157 print "The following fields are totally empty:"
158 for y in emptyFieldsList:
159 print y
160
161 CreateExcelDatReport(r"c:\temp\foo-FieldReport.xlsx", fullFieldList, emptyFieldsList, totalRecordCount)
162
163
164
165