ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Evidox/IncomingProdAnalyzer/Trunk/IncomingProdAnalyzer.py
Revision: 710
Committed: Tue Aug 18 22:05:33 2020 UTC (5 years, 7 months ago) by nino.borges
Content type: text/x-python
File size: 4942 byte(s)
Log Message:
Version 2 which is a major re-write allowing for more tools and functions to be usable on scanned dat.  Create excel report, create bates range report.

File Contents

# Content
1 """
2
3 IncomingProdAnalyzer
4
5 Created by
6 Emanuel Borges
7 2020.02.07
8
9 A simple program that I can point to a DAT and it will analyze it for issues like columns not lining up,
10 what fields they gave in the headder and which ones have stuff and which are totally empty, etc.
11 Support for UTF quotechars and delims plus removing that little BOM at the beging added.
12
13 """
14
15 import chardet, ExcelLib, BatesRangeFunctions
16
17 def AnalyzeDAT(datFilePath):
18 """Returns totalRecordCount, fullFieldList, populatedFieldsList, emptyFieldsList, parsingErrorCount, contents, quoteChar, delim"""
19 matrix = {}
20 headderMatrix = {}
21 populatedFieldsList = []
22 emptyFieldsList = []
23
24
25 contents = open(datFilePath).readlines()
26
27 charEncoding = None
28 charEncodingCount = 0
29 while charEncoding == None:
30 charEncoding = chardet.detect(contents[charEncodingCount])['encoding']
31 charEncodingCount = charEncodingCount +1
32
33 charEncoding = charEncoding.upper()
34 #print charEncoding
35 if "UTF" in charEncoding:
36 print "UTF found"
37 quoteChar = "\xc3\xbe"
38 headder = contents[0].replace("\xef\xbb\xbf","")
39 else:
40 print "Standard load file found"
41 quoteChar = "\xfe"
42 headder = contents[0]
43 delim = "\x14"
44
45
46 headder = headder.replace(quoteChar,"")
47 headder = headder.replace("\n","")
48 headder = headder.split(delim)
49 ## This headder Matrix is really to look up at the end. I dont use it for the main matrix below.
50 for hSpot, hFieldName in enumerate(headder):
51 headderMatrix[hSpot] = hFieldName
52 numberOfFields = len(headder)
53 contents = contents[1:]
54
55
56
57 totalRecordCount = len(contents)
58 parsingErrorCount = 0
59 for line in contents:
60 line = line.replace("\n","")
61 line = line.replace(quoteChar,"")
62 line = line.split(delim)
63 if len(line) == numberOfFields:
64 pass
65 else:
66 print "Warning: number of fields for this line doenst match."
67 parsingErrorCount = parsingErrorCount +1
68 for itemSpot, value in enumerate(line):
69 if value:
70 matrix[itemSpot] = 1
71
72
73 for spot in matrix.keys():
74 #print headder[spot]
75 populatedFieldsList.append(headder[spot])
76
77
78 for hSpot in headderMatrix.keys():
79 if hSpot in matrix.keys():
80 pass
81 else:
82 #print headderMatrix[hSpot]
83 emptyFieldsList.append(headderMatrix[hSpot])
84
85 fullFieldList = headder
86
87 return totalRecordCount, fullFieldList, populatedFieldsList, emptyFieldsList, parsingErrorCount, contents, quoteChar, delim
88
89 def CreateExcelDatReport(spreadSheetPath, fullFieldList, emptyFieldsList, totalRecordCount):
90 """Creates a spreadsheet as a pretty report of a field list, noting empty fields in Red highlight"""
91 appObj = ExcelLib.ExcelConnection()
92 count = 2
93 for field in fullFieldList:
94 appObj.setCell(1,count,1,field)
95 if field in emptyFieldsList:
96 appObj.setCellProperties(1,count,1,cellColor=3)
97 count = count + 1
98 appObj.forceAutoFitRow(1,"A","B")
99 appObj.setCell(1,count + 2, 4, "Cells in Red are empty fields")
100 appObj.setCell(1,count + 3, 4, "Total Documents: %s"% totalRecordCount)
101 appObj.save(spreadSheetPath)
102 appObj.close()
103
104 def CreateBatesRangeList(colsNumbList, contents, quoteChar, delim):
105 batesList = []
106 parsingErrorCount = 0
107 for line in contents:
108 line = line.replace("\n","")
109 line = line.replace(quoteChar,"")
110 line = line.split(delim)
111 for colNumb in colsNumbList:
112 batesList.append(line[colNumb])
113 batesRangesList = BatesRangeFunctions.GetBatesRanges(batesList)
114 return batesRangesList
115
116
117
118 if __name__ == '__main__':
119
120 datFilePath = r"\\sas12\sas12\30393\Inbound\11\099878\All American Title Final Distribution Ledger REport\data\All American Title Final Distribution Ledger REport.DAT"
121
122 print "Analyzing file..."
123 totalRecordCount, fullFieldList, populatedFieldsList, emptyFieldsList, parsingErroCount, contents, quoteChar, delim = AnalyzeDAT(datFilePath)
124 print ""
125 print "There are %s records in this load."%totalRecordCount
126 print fullFieldList
127
128 print "\nAnalysis completed."
129 print ""
130 print "-"*10
131 print "The following fields exist in this DAT:"
132 for i in fullFieldList:
133 print i
134
135 print "-"*10
136 print ""
137 print "The following fields actually contains *some* data:"
138 for x in populatedFieldsList:
139 print x
140
141 print "-"*10
142 print ""
143 print "The following fields are totally empty:"
144 for y in emptyFieldsList:
145 print y
146
147 CreateExcelDatReport(r"c:\temp\foo-FieldReport.xlsx", fullFieldList, emptyFieldsList, totalRecordCount)
148
149
150
151