ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/EndoFieldsFlaggedForProdReport.py
Revision: 781
Committed: Fri Aug 5 02:20:22 2022 UTC (3 years, 7 months ago) by nino.borges
Content type: text/x-python
File size: 8622 byte(s)
Log Message:
general updates

File Contents

# User Rev Content
1 nino.borges 764 """
2    
3     EndoFieldsFlaggedForProdReport
4    
5     Created by:
6     Emanuel Borges
7     07.30.2021
8    
9     This program will take the results from EndoMultipleTableFieldCompare and will generate a
10     spreadsheet report so that specific fields can be selected for production. It will use the
11     results in the table-field compare and prioritize the tables that are not subsumed to the left
12     of the workbook and the ones that are to the right, with the idea that you will have a separator
13     tab between them. It will then highlight in green the rows that were already selected for prod.
14     Be sure to convert the multiple csvs to pipe delimited and NOT utf-16
15    
16     """
17    
18     import os
19     from datetime import datetime
20     from win32com.client import Dispatch
21    
22    
23    
24     if __name__ == '__main__':
25 nino.borges 781 subSetOfFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\csvVersion\sif_MannyAnalysis(subsetOf)b_e.txt"
26     identicalToFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\csvVersion\sif_MannyAnalysis(identicalTo)c_e.txt"
27     exportedTableListFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\SIFTableList_e.csv"
28     finalSpreadsheetReportName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210731_SIF_TableFieldReport.xlsx"
29 nino.borges 764 ## This is the file you created based on the preselected list by A&P
30     apFieldsRequested = r"C:\Users\eborges\Documents\Cases\Endo\20210722_DatabaseSearches\20210722_DatabaseTablesFields_AP_Requested.txt"
31     ## This is the original directory of the csvs
32 nino.borges 781 csvStartDir = r"C:\Users\eborges\Downloads\all_csv_test\csv\EndoPharm"
33 nino.borges 764 ## This is where it saves the new copy of the CSVs. MAKE SURE THIS DIR IS EMPTY
34 nino.borges 781 csvOutputDir = r"C:\Users\eborges\Downloads\all_csv_test\csv\EndoPharm-MANNY"
35 nino.borges 764
36 nino.borges 781 csvNamePreString = "EndoPharm_"
37     csvNamePostString = "_top_50.csv"
38 nino.borges 764
39     ## Create the prioritized list
40     compareMatrix = {}
41     contents = open(subSetOfFileName).readlines()
42     for line in contents:
43     line = line.replace("\n","")
44     line = line.replace("The field list in ","")
45     line = line.split(" is a subset of ")
46     if line[0] in list(compareMatrix.keys()):
47     compareMatrix[line[0]].append(line[1])
48     else:
49     compareMatrix[line[0]] = [line[1],]
50    
51     print(len(compareMatrix.keys()))
52    
53    
54     tablesReferenced = set()
55     for i in list(compareMatrix.keys()):
56     tablesReferenced.add(i)
57     for x in compareMatrix[i]:
58     tablesReferenced.add(x)
59    
60     print(len(tablesReferenced))
61    
62    
63 nino.borges 781 identicalToMatrix = {}
64     contents = open(identicalToFileName).readlines()
65     for line in contents:
66     line = line.replace("\n","")
67     line = line.split("|")
68     tbls = line[0].replace("The field list in ","")
69     tbls = tbls.split(" is identical to fields in ")
70     if line[1] in list(identicalToMatrix.keys()):
71     for i in tbls:
72     identicalToMatrix[line[1]].add(i)
73     else:
74     identicalToMatrix[line[1]] = set(tbls)
75    
76     for id in list(identicalToMatrix.keys()):
77     for i in identicalToMatrix[id]:
78     tablesReferenced.add(i)
79     print(len(tablesReferenced))
80    
81 nino.borges 764
82     contents = open(exportedTableListFileName).readlines()
83     ## Removing the headderrow
84     contents = contents[1:]
85     sifBackupFullTableMatrix = {}
86     for line in contents:
87     line = line.replace("\n","")
88     line = line.split("|")
89     sifBackupFullTableMatrix[line[0]] = line[1]
90    
91     print(len(sifBackupFullTableMatrix.keys()))
92    
93    
94     tablesNotReferenced = []
95     for t in (sifBackupFullTableMatrix.keys()):
96     if sifBackupFullTableMatrix[t] == "0":
97     print("%s table dropped because it has zero rows."%t)
98     else:
99     if t in tablesReferenced:
100     pass
101     else:
102     tablesNotReferenced.append(t)
103    
104     print(len(tablesNotReferenced))
105    
106    
107     tablesReferencedPosMatrix = {}
108     for t in tablesReferenced:
109     tablesReferencedPosMatrix[t] = 1
110    
111     print(len(tablesReferencedPosMatrix.keys()))
112    
113    
114     contents = open(subSetOfFileName).readlines()
115     for line in contents:
116     line = line.replace("\n","")
117     line = line.replace("The field list in ","")
118     line = line.split(" is a subset of ")
119     tablesReferencedPosMatrix[line[0]] +=1
120 nino.borges 781
121     for id in list(identicalToMatrix.keys()):
122     dupTabs = list(identicalToMatrix[id])
123     dupTabs = dupTabs[1:]
124     for i in dupTabs:
125     tablesReferencedPosMatrix[i] +=1
126    
127    
128    
129 nino.borges 764 referencedNotSubset = []
130     for i in list(tablesReferencedPosMatrix.keys()):
131     if tablesReferencedPosMatrix[i] == 1:
132     referencedNotSubset.append(i)
133    
134     print("There are %s tables not referenced.\nThere are %s referenced but are not a subset.\nThat leaves %s subsets that will go on right of break tab."% (len(tablesNotReferenced),len(referencedNotSubset),(len(tablesReferencedPosMatrix.keys())-len(referencedNotSubset))))
135     #print("There are %s tables not referenced.\nThere are %s referenced but are not a subset."% (len(tablesNotReferenced),len(referencedNotSubset)))
136    
137     ## Prioritized List created so now write that out to excel as tabs only.
138    
139     posList = []
140     for i in list(tablesReferencedPosMatrix.keys()):
141     posList.append((tablesReferencedPosMatrix[i],i))
142    
143     posList.sort()
144     #print(posList)
145     print("\nPlace the separator between these two tabs.")
146     print(posList[len(referencedNotSubset)-1])
147     print(posList[len(referencedNotSubset)])
148     print("\n")
149    
150    
151     xlApp = Dispatch('Excel.Application')
152     xlBook = xlApp.Workbooks.Add()
153    
154     for t in tablesNotReferenced:
155     sh = xlBook.Worksheets.Add(Before = None, After = xlBook.Worksheets(xlBook.Worksheets.count))
156 nino.borges 781 #print(t)
157     if len(t) > 31:
158     t = t[:31]
159     print("%s is too long. Fixing"%t)
160 nino.borges 764 sh.Name = t
161    
162     for t in posList:
163     sh = xlBook.Worksheets.Add(Before = None, After = xlBook.Worksheets(xlBook.Worksheets.count))
164     nm = t[1]
165     if len(nm) > 31:
166 nino.borges 781 nm = nm[:31]
167 nino.borges 764 print("%s is too long. Fixing"%t[1])
168     sh.Name = nm
169    
170     xlBook.SaveAs(finalSpreadsheetReportName)
171     xlBook.Close()
172    
173     ## Tabs Created. Now populate the tabs with the csv data, making sure to first convert out of UTF-16 if they are AND pipe delimited
174 nino.borges 781 ## for f in os.listdir(csvStartDir):
175     ## contents = open(os.path.join(csvStartDir,f),encoding="UTF-16").readlines()
176     ## outputFile = open(os.path.join(csvOutputDir,f),'w', encoding='UTF-8')
177     ## for line in contents:
178     ## line = line.replace('","','|')
179     ## line = line.replace('"','')
180     ## nl = outputFile.write(line)
181     ## outputFile.close()
182 nino.borges 764
183    
184     xlBook = xlApp.Workbooks.Open(finalSpreadsheetReportName)
185     for f in os.listdir(csvOutputDir):
186     tempxlBook = xlApp.Workbooks.Open(os.path.join(csvOutputDir,f))
187 nino.borges 781 tblName = f.replace(csvNamePreString,"")
188     tblName = tblName.replace(csvNamePostString,"")
189 nino.borges 764 if len(tblName) >31:
190 nino.borges 781 tblName = tblName[:31]
191     print("%s is too long. Fixing."%tblName)
192     print ("now copying %s"%tblName)
193     try:
194     xlBook.Worksheets(tblName).Range("A1:ZZ100").Value = tempxlBook.Worksheets(1).Range("A1:ZZ100").Value
195     except:
196     print("%s FAILED TO COPY!!!"%tblName)
197 nino.borges 764 tempxlBook.Close()
198     xlBook.Close(SaveChanges=True)
199    
200     ## Tabs populated. Now pre-color fields based on their pre-selections.
201     preSelectedFieldsMatrix = {}
202     contents = open(apFieldsRequested).readlines()
203     for line in contents:
204     line = line.replace("\n","")
205     line = line.split("|")
206     preSelectedFieldsMatrix[line[0]] = []
207     itms = line[1].split("; ")
208     for i in itms:
209     preSelectedFieldsMatrix[line[0]].append(i)
210    
211     print(len(preSelectedFieldsMatrix.keys()))
212    
213    
214     xlBook = xlApp.Workbooks.Open(finalSpreadsheetReportName)
215     for tbl in list(preSelectedFieldsMatrix.keys()):
216     try:
217     sht = xlBook.Worksheets(tbl)
218     count = 1
219     while sht.Cells(2,count).value:
220     if sht.Cells(2,count).value.lower() in preSelectedFieldsMatrix[tbl]:
221     sht.Cells(2,count).Interior.ColorIndex = 50
222     count +=1
223     except:
224     print("%s not found."%tbl)