Active_prgs/Redgrave/EndoXlsFieldDataExtractor.py

"""

EndoXlsFieldDataExtractor

Created by:
Emanuel Borges
09.15.2021

Very simple program that will read an xls and extract field data, using an external matrix, a set text files for me.

"""

import os
from win32com.client import Dispatch


if __name__ == '__main__':
    xlFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210727_Rows_Data\SFA_AllNonEmptyTables_top_50.xlsx"
    tablesOfInterestFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\2021.09.10 Aspect Delivery - Multi Systems DB Tables and Fields Reports\SFA_tablesOfInterest.txt"
    outputDir = r"C:\Users\eborges\Documents\Cases\Endo\Temp\2021.08.18 IQVIA - Navigator data dictionary\_Suppl\SFA2"


    xlApp = Dispatch('Excel.Application')
    xlBook = xlApp.Workbooks.Open(xlFileName)
    print("there are %s sheets in this workbook"% xlBook.Sheets.Count)
    sheetMatrix = {}

    count = 1
    for i in range(1,xlBook.Sheets.Count +1):
        sheetName = xlBook.Worksheets(i).name
        sheetName = sheetName.rsplit("_",1)[0]
        sheetName = sheetName.upper()
        sheetMatrix[sheetName] = count
        count +=1
    print("There are %s sheets in this spreadsheet."% len(sheetMatrix.keys()))


    ##  Read tables and fields of interest and make a matrix
    contents = open(tablesOfInterestFileName).readlines()
    tablesOfInterestList = []
    for line in contents:
        line = line.replace("\n","")
        line = line.split("|")
        tablesOfInterestList.append(line)

    print("There are %s tables-fields in this report."% len(tablesOfInterestList))


    ##  Read the xls and first test that all tables are finable (occ need to modify the table names since some are truncated)
    for tf in tablesOfInterestList:
        sht = xlBook.Worksheets(sheetMatrix[tf[0]])


    ##  Re-Read and export the text files.
    foundList = []
    for tf in tablesOfInterestList:
        sht = xlBook.Worksheets(sheetMatrix[tf[0]])
        count = 1
        while sht.Cells(2,count).value:
            if tf[1] == sht.Cells(2,count).value.upper():
                foundList.append(tf[1])
            count +=1

    print("Sheets found count %s."%len(foundList))
    

    for tf in tablesOfInterestList:
        sht = xlBook.Worksheets(sheetMatrix[tf[0]])
        count = 1
        while sht.Cells(2,count).value:
            if tf[1] == sht.Cells(2,count).value.upper():
                outputFile = open(os.path.join(outputDir,"%s-%s.txt"%(tf[0],tf[1])),'w')
                for i in range(1,55):
                    outputFile.write("%s\n"%sht.Cells(i,count).value)
                outputFile.close()
            count +=1
Revision:	767
Committed:	Wed Sep 15 18:54:57 2021 UTC (4 years, 6 months ago) by nino.borges
Content type:	text/x-python
File size:	2720 byte(s)
Log Message:	Very simple program that will read an xls and extract field data, using an external matrix, a set text files for me.
#	User	Rev	Content
1	nino.borges	767	"""
2
3			EndoXlsFieldDataExtractor
4
5			Created by:
6			Emanuel Borges
7			09.15.2021
8
9			Very simple program that will read an xls and extract field data, using an external matrix, a set text files for me.
10
11			"""
12
13			import os
14			from win32com.client import Dispatch
15
16
17			if __name__ == '__main__':
18			xlFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210727_Rows_Data\SFA_AllNonEmptyTables_top_50.xlsx"
19			tablesOfInterestFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\2021.09.10 Aspect Delivery - Multi Systems DB Tables and Fields Reports\SFA_tablesOfInterest.txt"
20			outputDir = r"C:\Users\eborges\Documents\Cases\Endo\Temp\2021.08.18 IQVIA - Navigator data dictionary\_Suppl\SFA2"
21
22
23
24			xlApp = Dispatch('Excel.Application')
25			xlBook = xlApp.Workbooks.Open(xlFileName)
26			print("there are %s sheets in this workbook"% xlBook.Sheets.Count)
27			sheetMatrix = {}
28
29			count = 1
30			for i in range(1,xlBook.Sheets.Count +1):
31			sheetName = xlBook.Worksheets(i).name
32			sheetName = sheetName.rsplit("_",1)[0]
33			sheetName = sheetName.upper()
34			sheetMatrix[sheetName] = count
35			count +=1
36			print("There are %s sheets in this spreadsheet."% len(sheetMatrix.keys()))
37
38
39			## Read tables and fields of interest and make a matrix
40			contents = open(tablesOfInterestFileName).readlines()
41			tablesOfInterestList = []
42			for line in contents:
43			line = line.replace("\n","")
44			line = line.split("\|")
45			tablesOfInterestList.append(line)
46
47			print("There are %s tables-fields in this report."% len(tablesOfInterestList))
48
49
50			## Read the xls and first test that all tables are finable (occ need to modify the table names since some are truncated)
51			for tf in tablesOfInterestList:
52			sht = xlBook.Worksheets(sheetMatrix[tf[0]])
53
54
55
56
57			## Re-Read and export the text files.
58			foundList = []
59			for tf in tablesOfInterestList:
60			sht = xlBook.Worksheets(sheetMatrix[tf[0]])
61			count = 1
62			while sht.Cells(2,count).value:
63			if tf[1] == sht.Cells(2,count).value.upper():
64			foundList.append(tf[1])
65			count +=1
66
67			print("Sheets found count %s."%len(foundList))
68
69
70			for tf in tablesOfInterestList:
71			sht = xlBook.Worksheets(sheetMatrix[tf[0]])
72			count = 1
73			while sht.Cells(2,count).value:
74			if tf[1] == sht.Cells(2,count).value.upper():
75			outputFile = open(os.path.join(outputDir,"%s-%s.txt"%(tf[0],tf[1])),'w')
76			for i in range(1,55):
77			outputFile.write("%s\n"%sht.Cells(i,count).value)
78			outputFile.close()
79			count +=1