ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Evidox/addErrorFileFixer.py
Revision: 746
Committed: Thu Apr 15 20:11:16 2021 UTC (4 years, 11 months ago) by nino.borges
Content type: text/x-python
File size: 4846 byte(s)
Log Message:
Updated to be compatible with python3, which made the bak files.

File Contents

# User Rev Content
1 nino.borges 633 #!/usr/bin/env python
2     # -*- coding: UTF-8 -*-
3    
4     """
5    
6     addErrorFileFixer.py
7    
8     Created by
9     Emanuel Borges
10     08.31.2017
11    
12     This program will take a directory of dat files and merge them into a 3 dimensional array, ending with 1 large dat file
13    
14     """
15    
16     import codecs, os
17     from collections import namedtuple
18    
19     def ConcordanceToList(line, delimChar, quoteChar):
20     """Takes a line of UTF formatted Concordance delimited text and returns a list"""
21 nino.borges 746 line = line.replace("\r","")
22     line = line.replace("\n","")
23     line = line.replace("\ufeff","")
24     line = line.replace(quoteChar,"")
25 nino.borges 633 line = line.split(delimChar)
26     return line
27    
28     def GatherAllPossibleFields(pathToFiles, delimChar, quoteChar):
29     """cycle through all of the DAT files in this dir and extract out the unique fields in the headder"""
30     headderDict = {}
31     for f in os.listdir(pathToFiles):
32     contents = codecs.open(os.path.join(pathToFiles,f), "r", "UTF-8").readlines()
33     headderRow = contents[0]
34 nino.borges 746 headderRow = headderRow.replace(" ","_")
35 nino.borges 633 headderRow = ConcordanceToList(headderRow, delimChar, quoteChar)
36     for h in headderRow:
37     headderDict[h] = 1
38     return headderDict
39    
40     def MergeAllRecords(pathToFiles, delimChar, quoteChar, dataStoreObj,fullHeadderList):
41     """cycle through all of the dat files, populating your object"""
42     recordDict = {}
43     for f in os.listdir(pathToFiles):
44     contents = codecs.open(os.path.join(pathToFiles,f), "r", "UTF-8").readlines()
45     headderRow = contents[0]
46 nino.borges 746 headderRow = headderRow.replace(" ","_")
47 nino.borges 633 headderRow = ConcordanceToList(headderRow, delimChar, quoteChar)
48     ## Remove evidoxID from the headder row
49     headderRow = headderRow[1:]
50     ## use set to get a list of the missing headder fields, since the datastore obj cant have empty values
51     #print headderRow
52     #print fullHeadderList
53     remainderHeadderRow = set(fullHeadderList)-set(headderRow)
54     remainderHeadderRow = list(remainderHeadderRow)
55     #print remainderHeadderRow
56     contents = contents[1:]
57     for line in contents:
58     line = ConcordanceToList(line, delimChar, quoteChar)
59     eID = line[0]
60     ## Now remove it so that the headder is back in sync with the line.
61     line = line[1:]
62     tempDict = {}
63     ## Now pack a temp dict with all the possible values, including teh missing ones.
64     for i in range(len(headderRow)):
65     field = headderRow[i]
66     value = line[i]
67     #print eID, field, value
68     tempDict[field] = value
69     for rh in remainderHeadderRow:
70 nino.borges 746 tempDict[rh] = ""
71 nino.borges 633 ## Now create teh obj entry, using the ** unpack method
72     recordDict[eID] = dataStoreObj(**tempDict)
73     return dataStoreObj, recordDict
74    
75     if __name__ == '__main__':
76 nino.borges 746 delimChar = "\x14"
77     quoteChar = "\xfe"
78 nino.borges 633 mainDir = r"\\iadcifs01\iproshares01\EVDX-ADD Analytics Test-Ipro Template-02\QC Fixes\Overlays\20171122 - EBG"
79     outputPath = r"\\iadcifs01\iproshares01\EVDX-ADD Analytics Test-Ipro Template-02\QC Fixes\Overlays\20171122 - EBG - Output"
80     outputFileName = '20171122-Output.txt'
81     outputHeaderFileName = '20171122-HeaddersOutput.txt'
82    
83    
84     ## Gather all the field names to create the 3d array.
85     headderDict = GatherAllPossibleFields(mainDir, delimChar, quoteChar)
86 nino.borges 746 headderList = list(headderDict.keys())
87 nino.borges 633 headderList.sort()
88     ## Quickly make a text file with the headers, just to be nice.
89     outputHeadderFile = codecs.open(os.path.join(outputPath,outputHeaderFileName),'w',"UTF-8")
90     for h in headderList:
91 nino.borges 746 outputHeadderFile.write(h + "\r\n")
92 nino.borges 633 outputHeadderFile.close()
93    
94 nino.borges 746 headderList.remove('EVDXID')
95 nino.borges 633 ## Write the header row in the record outputfile too
96     outputRecordFile = codecs.open(os.path.join(outputPath,outputFileName),'w',"UTF-8")
97 nino.borges 746 outputRecordFile.write(quoteChar+'EVDXID'+quoteChar)
98 nino.borges 633 for h in headderList:
99     outputRecordFile.write(delimChar+quoteChar+h+quoteChar)
100 nino.borges 746 outputRecordFile.write("\r\n")
101 nino.borges 633
102    
103     #print headderList
104     ## The * here is the splat opperator
105     #ds = namedtuple(*headderList)
106 nino.borges 746 ds = namedtuple('EVDXID',headderList)
107 nino.borges 633 #print ds._fields
108     newDS, recordDict = MergeAllRecords(mainDir, delimChar, quoteChar, ds, headderList)
109    
110    
111 nino.borges 746 recordList = list(recordDict.keys())
112 nino.borges 633 recordList.sort()
113     for record in recordList:
114     outputRecordFile.write(quoteChar+record+quoteChar)
115     for i in range(len(headderList)):
116     outputRecordFile.write(delimChar + quoteChar + recordDict[record][i] + quoteChar)
117 nino.borges 746 outputRecordFile.write("\r\n")
118 nino.borges 633 outputRecordFile.close()