Active_prgs/Redgrave/EndoFieldsFlaggedForProdReport.py

"""

EndoFieldsFlaggedForProdReport

Created by:
Emanuel Borges
07.30.2021

This program will take the results from EndoMultipleTableFieldCompare and will generate a
spreadsheet report so that specific fields can be selected for production.  It will use the
results in the table-field compare and prioritize the tables that are not subsumed to the left
of the workbook and the ones that are to the right, with the idea that you will have a separator
tab between them.  It will then highlight in green the rows that were already selected for prod.
Be sure to convert the multiple csvs to pipe delimited and NOT utf-16

"""

import os
from datetime import datetime
from win32com.client import Dispatch


if __name__ == '__main__':
    subSetOfFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\csvVersion\sif_MannyAnalysis(subsetOf)b_e.txt"
    identicalToFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\csvVersion\sif_MannyAnalysis(identicalTo)c_e.txt"
    exportedTableListFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\SIFTableList_e.csv"
    finalSpreadsheetReportName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210731_SIF_TableFieldReport.xlsx"
    ##  This is the file you created based on the preselected list by A&P
    apFieldsRequested = r"C:\Users\eborges\Documents\Cases\Endo\20210722_DatabaseSearches\20210722_DatabaseTablesFields_AP_Requested.txt"
    ## This is the original directory of the csvs
    csvStartDir = r"C:\Users\eborges\Downloads\all_csv_test\csv\EndoPharm"
    ## This is where it saves the new copy of the CSVs.  MAKE SURE THIS DIR IS EMPTY
    csvOutputDir = r"C:\Users\eborges\Downloads\all_csv_test\csv\EndoPharm-MANNY"

    csvNamePreString = "EndoPharm_"
    csvNamePostString = "_top_50.csv"

    ##  Create the prioritized list
    compareMatrix = {}
    contents = open(subSetOfFileName).readlines()
    for line in contents:
        line = line.replace("\n","")
        line = line.replace("The field list in ","")
        line = line.split(" is a subset of ")
        if line[0] in list(compareMatrix.keys()):
            compareMatrix[line[0]].append(line[1])
        else:
            compareMatrix[line[0]] = [line[1],]

    print(len(compareMatrix.keys()))


    tablesReferenced = set()
    for i in list(compareMatrix.keys()):
        tablesReferenced.add(i)
        for x in compareMatrix[i]:
            tablesReferenced.add(x)

    print(len(tablesReferenced))


    identicalToMatrix = {}
    contents = open(identicalToFileName).readlines()
    for line in contents:
        line = line.replace("\n","")
        line = line.split("|")
        tbls = line[0].replace("The field list in ","")
        tbls = tbls.split(" is identical to fields in ")
        if line[1] in list(identicalToMatrix.keys()):
            for i in tbls:
                identicalToMatrix[line[1]].add(i)
        else:
            identicalToMatrix[line[1]] = set(tbls)

    for id in list(identicalToMatrix.keys()):
        for i in identicalToMatrix[id]:
            tablesReferenced.add(i)
    print(len(tablesReferenced))

    
    contents = open(exportedTableListFileName).readlines()
    ## Removing the headderrow
    contents = contents[1:]
    sifBackupFullTableMatrix = {}
    for line in contents:
        line = line.replace("\n","")
        line = line.split("|")
        sifBackupFullTableMatrix[line[0]] = line[1]

    print(len(sifBackupFullTableMatrix.keys()))
    

    tablesNotReferenced = []
    for t in (sifBackupFullTableMatrix.keys()):
        if sifBackupFullTableMatrix[t] == "0":
            print("%s table dropped because it has zero rows."%t)
        else:
            if t in tablesReferenced:
                pass
            else:
                tablesNotReferenced.append(t)

    print(len(tablesNotReferenced))


    tablesReferencedPosMatrix = {}
    for t in tablesReferenced:
        tablesReferencedPosMatrix[t] = 1

    print(len(tablesReferencedPosMatrix.keys()))


    contents = open(subSetOfFileName).readlines()
    for line in contents:
        line = line.replace("\n","")
        line = line.replace("The field list in ","")
        line = line.split(" is a subset of ")
        tablesReferencedPosMatrix[line[0]] +=1

    for id in list(identicalToMatrix.keys()):
        dupTabs = list(identicalToMatrix[id])
        dupTabs = dupTabs[1:]
        for i in dupTabs:
            tablesReferencedPosMatrix[i] +=1


    referencedNotSubset = []
    for i in list(tablesReferencedPosMatrix.keys()):
        if tablesReferencedPosMatrix[i] == 1:
            referencedNotSubset.append(i)

    print("There are %s tables not referenced.\nThere are %s referenced but are not a subset.\nThat leaves %s subsets that will go on right of break tab."% (len(tablesNotReferenced),len(referencedNotSubset),(len(tablesReferencedPosMatrix.keys())-len(referencedNotSubset))))
    #print("There are %s tables not referenced.\nThere are %s referenced but are not a subset."% (len(tablesNotReferenced),len(referencedNotSubset)))

    ##  Prioritized List created so now write that out to excel as tabs only.
    
    posList = []
    for i in list(tablesReferencedPosMatrix.keys()):
        posList.append((tablesReferencedPosMatrix[i],i))

    posList.sort()
    #print(posList)
    print("\nPlace the separator between these two tabs.")
    print(posList[len(referencedNotSubset)-1])
    print(posList[len(referencedNotSubset)])
    print("\n")


    xlApp = Dispatch('Excel.Application')
    xlBook = xlApp.Workbooks.Add()

    for t in tablesNotReferenced:
        sh = xlBook.Worksheets.Add(Before = None, After = xlBook.Worksheets(xlBook.Worksheets.count))
        #print(t)
        if len(t) > 31:
            t = t[:31]
            print("%s is too long. Fixing"%t)
        sh.Name = t
        
    for t in posList:
        sh = xlBook.Worksheets.Add(Before = None, After = xlBook.Worksheets(xlBook.Worksheets.count))
        nm = t[1]
        if len(nm) > 31:
            nm = nm[:31]
            print("%s is too long. Fixing"%t[1])
        sh.Name = nm

    xlBook.SaveAs(finalSpreadsheetReportName)
    xlBook.Close()

    ##  Tabs Created.  Now populate the tabs with the csv data, making sure to first convert out of UTF-16 if they are AND pipe delimited
##    for f in os.listdir(csvStartDir):
##        contents = open(os.path.join(csvStartDir,f),encoding="UTF-16").readlines()
##        outputFile = open(os.path.join(csvOutputDir,f),'w', encoding='UTF-8')
##        for line in contents:
##            line = line.replace('","','|')
##            line = line.replace('"','')
##            nl = outputFile.write(line)
##        outputFile.close()


    xlBook = xlApp.Workbooks.Open(finalSpreadsheetReportName)
    for f in os.listdir(csvOutputDir):
        tempxlBook = xlApp.Workbooks.Open(os.path.join(csvOutputDir,f))
        tblName = f.replace(csvNamePreString,"")
        tblName = tblName.replace(csvNamePostString,"")
        if len(tblName) >31:
            tblName = tblName[:31]
            print("%s is too long. Fixing."%tblName)
        print ("now copying %s"%tblName)
        try:
            xlBook.Worksheets(tblName).Range("A1:ZZ100").Value = tempxlBook.Worksheets(1).Range("A1:ZZ100").Value
        except:
            print("%s FAILED TO COPY!!!"%tblName)
        tempxlBook.Close()
    xlBook.Close(SaveChanges=True)

    ##  Tabs populated.  Now pre-color fields based on their pre-selections.
    preSelectedFieldsMatrix = {}
    contents = open(apFieldsRequested).readlines()
    for line in contents:
        line = line.replace("\n","")
        line = line.split("|")
        preSelectedFieldsMatrix[line[0]] = []
        itms = line[1].split("; ")
        for i in itms:
            preSelectedFieldsMatrix[line[0]].append(i)

    print(len(preSelectedFieldsMatrix.keys()))


    xlBook = xlApp.Workbooks.Open(finalSpreadsheetReportName)
    for tbl in list(preSelectedFieldsMatrix.keys()):
        try:
            sht = xlBook.Worksheets(tbl)
            count = 1
            while sht.Cells(2,count).value:
                if sht.Cells(2,count).value.lower() in preSelectedFieldsMatrix[tbl]:
                    sht.Cells(2,count).Interior.ColorIndex = 50
                count +=1
        except:
            print("%s not found."%tbl)
Revision:	781
Committed:	Fri Aug 5 02:20:22 2022 UTC (3 years, 7 months ago) by nino.borges
Content type:	text/x-python
File size:	8622 byte(s)
Log Message:	general updates
#	User	Rev	Content
1	nino.borges	764	"""
2
3			EndoFieldsFlaggedForProdReport
4
5			Created by:
6			Emanuel Borges
7			07.30.2021
8
9			This program will take the results from EndoMultipleTableFieldCompare and will generate a
10			spreadsheet report so that specific fields can be selected for production. It will use the
11			results in the table-field compare and prioritize the tables that are not subsumed to the left
12			of the workbook and the ones that are to the right, with the idea that you will have a separator
13			tab between them. It will then highlight in green the rows that were already selected for prod.
14			Be sure to convert the multiple csvs to pipe delimited and NOT utf-16
15
16			"""
17
18			import os
19			from datetime import datetime
20			from win32com.client import Dispatch
21
22
23
24			if __name__ == '__main__':
25	nino.borges	781	subSetOfFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\csvVersion\sif_MannyAnalysis(subsetOf)b_e.txt"
26			identicalToFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\csvVersion\sif_MannyAnalysis(identicalTo)c_e.txt"
27			exportedTableListFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\SIFTableList_e.csv"
28			finalSpreadsheetReportName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210731_SIF_TableFieldReport.xlsx"
29	nino.borges	764	## This is the file you created based on the preselected list by A&P
30			apFieldsRequested = r"C:\Users\eborges\Documents\Cases\Endo\20210722_DatabaseSearches\20210722_DatabaseTablesFields_AP_Requested.txt"
31			## This is the original directory of the csvs
32	nino.borges	781	csvStartDir = r"C:\Users\eborges\Downloads\all_csv_test\csv\EndoPharm"
33	nino.borges	764	## This is where it saves the new copy of the CSVs. MAKE SURE THIS DIR IS EMPTY
34	nino.borges	781	csvOutputDir = r"C:\Users\eborges\Downloads\all_csv_test\csv\EndoPharm-MANNY"
35	nino.borges	764
36	nino.borges	781	csvNamePreString = "EndoPharm_"
37			csvNamePostString = "_top_50.csv"
38	nino.borges	764
39			## Create the prioritized list
40			compareMatrix = {}
41			contents = open(subSetOfFileName).readlines()
42			for line in contents:
43			line = line.replace("\n","")
44			line = line.replace("The field list in ","")
45			line = line.split(" is a subset of ")
46			if line[0] in list(compareMatrix.keys()):
47			compareMatrix[line[0]].append(line[1])
48			else:
49			compareMatrix[line[0]] = [line[1],]
50
51			print(len(compareMatrix.keys()))
52
53
54			tablesReferenced = set()
55			for i in list(compareMatrix.keys()):
56			tablesReferenced.add(i)
57			for x in compareMatrix[i]:
58			tablesReferenced.add(x)
59
60			print(len(tablesReferenced))
61
62
63	nino.borges	781	identicalToMatrix = {}
64			contents = open(identicalToFileName).readlines()
65			for line in contents:
66			line = line.replace("\n","")
67			line = line.split("\|")
68			tbls = line[0].replace("The field list in ","")
69			tbls = tbls.split(" is identical to fields in ")
70			if line[1] in list(identicalToMatrix.keys()):
71			for i in tbls:
72			identicalToMatrix[line[1]].add(i)
73			else:
74			identicalToMatrix[line[1]] = set(tbls)
75
76			for id in list(identicalToMatrix.keys()):
77			for i in identicalToMatrix[id]:
78			tablesReferenced.add(i)
79			print(len(tablesReferenced))
80
81	nino.borges	764
82			contents = open(exportedTableListFileName).readlines()
83			## Removing the headderrow
84			contents = contents[1:]
85			sifBackupFullTableMatrix = {}
86			for line in contents:
87			line = line.replace("\n","")
88			line = line.split("\|")
89			sifBackupFullTableMatrix[line[0]] = line[1]
90
91			print(len(sifBackupFullTableMatrix.keys()))
92
93
94			tablesNotReferenced = []
95			for t in (sifBackupFullTableMatrix.keys()):
96			if sifBackupFullTableMatrix[t] == "0":
97			print("%s table dropped because it has zero rows."%t)
98			else:
99			if t in tablesReferenced:
100			pass
101			else:
102			tablesNotReferenced.append(t)
103
104			print(len(tablesNotReferenced))
105
106
107			tablesReferencedPosMatrix = {}
108			for t in tablesReferenced:
109			tablesReferencedPosMatrix[t] = 1
110
111			print(len(tablesReferencedPosMatrix.keys()))
112
113
114			contents = open(subSetOfFileName).readlines()
115			for line in contents:
116			line = line.replace("\n","")
117			line = line.replace("The field list in ","")
118			line = line.split(" is a subset of ")
119			tablesReferencedPosMatrix[line[0]] +=1
120	nino.borges	781
121			for id in list(identicalToMatrix.keys()):
122			dupTabs = list(identicalToMatrix[id])
123			dupTabs = dupTabs[1:]
124			for i in dupTabs:
125			tablesReferencedPosMatrix[i] +=1
126
127
128
129	nino.borges	764	referencedNotSubset = []
130			for i in list(tablesReferencedPosMatrix.keys()):
131			if tablesReferencedPosMatrix[i] == 1:
132			referencedNotSubset.append(i)
133
134			print("There are %s tables not referenced.\nThere are %s referenced but are not a subset.\nThat leaves %s subsets that will go on right of break tab."% (len(tablesNotReferenced),len(referencedNotSubset),(len(tablesReferencedPosMatrix.keys())-len(referencedNotSubset))))
135			#print("There are %s tables not referenced.\nThere are %s referenced but are not a subset."% (len(tablesNotReferenced),len(referencedNotSubset)))
136
137			## Prioritized List created so now write that out to excel as tabs only.
138
139			posList = []
140			for i in list(tablesReferencedPosMatrix.keys()):
141			posList.append((tablesReferencedPosMatrix[i],i))
142
143			posList.sort()
144			#print(posList)
145			print("\nPlace the separator between these two tabs.")
146			print(posList[len(referencedNotSubset)-1])
147			print(posList[len(referencedNotSubset)])
148			print("\n")
149
150
151			xlApp = Dispatch('Excel.Application')
152			xlBook = xlApp.Workbooks.Add()
153
154			for t in tablesNotReferenced:
155			sh = xlBook.Worksheets.Add(Before = None, After = xlBook.Worksheets(xlBook.Worksheets.count))
156	nino.borges	781	#print(t)
157			if len(t) > 31:
158			t = t[:31]
159			print("%s is too long. Fixing"%t)
160	nino.borges	764	sh.Name = t
161
162			for t in posList:
163			sh = xlBook.Worksheets.Add(Before = None, After = xlBook.Worksheets(xlBook.Worksheets.count))
164			nm = t[1]
165			if len(nm) > 31:
166	nino.borges	781	nm = nm[:31]
167	nino.borges	764	print("%s is too long. Fixing"%t[1])
168			sh.Name = nm
169
170			xlBook.SaveAs(finalSpreadsheetReportName)
171			xlBook.Close()
172
173			## Tabs Created. Now populate the tabs with the csv data, making sure to first convert out of UTF-16 if they are AND pipe delimited
174	nino.borges	781	## for f in os.listdir(csvStartDir):
175			## contents = open(os.path.join(csvStartDir,f),encoding="UTF-16").readlines()
176			## outputFile = open(os.path.join(csvOutputDir,f),'w', encoding='UTF-8')
177			## for line in contents:
178			## line = line.replace('","','\|')
179			## line = line.replace('"','')
180			## nl = outputFile.write(line)
181			## outputFile.close()
182	nino.borges	764
183
184			xlBook = xlApp.Workbooks.Open(finalSpreadsheetReportName)
185			for f in os.listdir(csvOutputDir):
186			tempxlBook = xlApp.Workbooks.Open(os.path.join(csvOutputDir,f))
187	nino.borges	781	tblName = f.replace(csvNamePreString,"")
188			tblName = tblName.replace(csvNamePostString,"")
189	nino.borges	764	if len(tblName) >31:
190	nino.borges	781	tblName = tblName[:31]
191			print("%s is too long. Fixing."%tblName)
192			print ("now copying %s"%tblName)
193			try:
194			xlBook.Worksheets(tblName).Range("A1:ZZ100").Value = tempxlBook.Worksheets(1).Range("A1:ZZ100").Value
195			except:
196			print("%s FAILED TO COPY!!!"%tblName)
197	nino.borges	764	tempxlBook.Close()
198			xlBook.Close(SaveChanges=True)
199
200			## Tabs populated. Now pre-color fields based on their pre-selections.
201			preSelectedFieldsMatrix = {}
202			contents = open(apFieldsRequested).readlines()
203			for line in contents:
204			line = line.replace("\n","")
205			line = line.split("\|")
206			preSelectedFieldsMatrix[line[0]] = []
207			itms = line[1].split("; ")
208			for i in itms:
209			preSelectedFieldsMatrix[line[0]].append(i)
210
211			print(len(preSelectedFieldsMatrix.keys()))
212
213
214			xlBook = xlApp.Workbooks.Open(finalSpreadsheetReportName)
215			for tbl in list(preSelectedFieldsMatrix.keys()):
216			try:
217			sht = xlBook.Worksheets(tbl)
218			count = 1
219			while sht.Cells(2,count).value:
220			if sht.Cells(2,count).value.lower() in preSelectedFieldsMatrix[tbl]:
221			sht.Cells(2,count).Interior.ColorIndex = 50
222			count +=1
223			except:
224			print("%s not found."%tbl)