ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/Lilly-DataFlowSearch.py
Revision: 824
Committed: Mon Apr 22 21:43:18 2024 UTC (23 months ago) by nino.borges
Content type: text/x-python
File size: 5252 byte(s)
Log Message:
This is the completed version that was used for the final export of the systems.  This includes support for writing to an Excel file.

File Contents

# User Rev Content
1 nino.borges 820 """
2    
3     Lilly-DataFlowSearch
4    
5     Created by:
6     Emanuel Borges
7     02.13.2024
8    
9     Very simple program that will read an XLS file and search for references to specific systems. If found, that entire row will be copied.
10    
11     """
12    
13 nino.borges 822 import os, re
14 nino.borges 820 from datetime import datetime
15     from win32com.client import Dispatch
16    
17     class DataFlowFileSearcher(object):
18 nino.borges 824 version = "0.05"
19 nino.borges 820
20     def __init__(self):
21     self.fileToSearch = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Eli Lilly\Clinical Data Flow Analysis Req\_working\cmdb_ci_business_app_02022024_EB.xlsx"
22    
23    
24    
25 nino.borges 821 self.searchTermsList = ["Vault EDC","Vault Coder","Vault CDB","CDTS","Argus","LSS","Information Hub","CLUWE","IMPACT","eCTS","CTMS","Adjudication","IQVIA-ECG","Central Laboratory","Covance","Q2 Ithica","Bioagilytix","Immunogenicity","Vault Clinical-CTMS","LabCorp","WuXi","IQVia","ABPM","Biotel","REESE"]
26     #self.searchTermsList = ["LSS","Information Hub","CLUWE","IMPACT","eCTS","CTMS","Adjudication","IQVIA-ECG","Central Laboratory","Covance","Q2 Ithica","Bioagilytix","Immunogenicity","Vault Clinical-CTMS","LabCorp","WuXi","IQVia","ABPM","Biotel","REESE"]
27 nino.borges 820 self.workingDir = r"C:\Test_Dir\lili_dataflow"
28 nino.borges 821 self.searchTermsMatrix = {}
29 nino.borges 824 self.rawRowMatrix = {}
30 nino.borges 820
31    
32     self.xlApp = Dispatch('Excel.Application')
33    
34     def PerformDataFlowSearch(self):
35     wb = self.xlApp.Workbooks.Open(self.fileToSearch)
36     sht = wb.Worksheets(1)
37 nino.borges 821
38     for rowNumb in range(1,13840):
39     fullRow = sht.Range(f"A{rowNumb}:W{rowNumb}")
40 nino.borges 824 fullRowList = []
41     for row, cell in enumerate(fullRow):
42     fullRowList.append(str(cell))
43     fullRowCombined = " ".join(fullRowList)
44     for keyTerm in self.searchTermsList:
45     if re.search(r'\b%s\b'%keyTerm.upper(), fullRowCombined.upper()):
46 nino.borges 823 try:
47 nino.borges 824 self.searchTermsMatrix[keyTerm].append(fullRowList)
48 nino.borges 823 except:
49 nino.borges 824 self.searchTermsMatrix[keyTerm] = [fullRowList,]
50    
51 nino.borges 821
52 nino.borges 824 ## termFoundInRow = False
53     ## for colNumb in range(1,23):
54     ## cellToSearchValue = sht.Cells(rowNumb,colNumb).Value
55     ## for keyTerm in self.searchTermsList:
56     ## ## If I cant change it to uppercase, it's not a string that I want to search.
57     ## try:
58     ## cellToSearchValue = cellToSearchValue.upper()
59     ## except:
60     ## cellToSearchValue = " "
61     ##
62     ## if re.search(r'\b%s\b'%keyTerm.upper(), cellToSearchValue):
63     ## termFoundInRow = True
64     ## #if keyTerm.upper() in cellToSearchValue.upper():
65     ## try:
66     ## self.searchTermsMatrix[keyTerm].add(str(fullRow))
67     ## except:
68     ## self.searchTermsMatrix[keyTerm] = set()
69     ## self.searchTermsMatrix[keyTerm].add(str(fullRow))
70     ## if termFoundInRow:
71     ## self.rawRowMatrix[sht.Cells(rowNumb,1).Value] = fullRow
72    
73 nino.borges 821 wb.Close()
74    
75     def WriteValuesToFiles(self):
76     for keyTerm in self.searchTermsMatrix:
77     outputFile = open(os.path.join(self.workingDir,f"{keyTerm}.TXT"),'w', encoding="UTF8")
78     for fullRow in self.searchTermsMatrix[keyTerm]:
79     outputFile.write(f"{fullRow}\n")
80 nino.borges 820 outputFile.close()
81 nino.borges 824
82    
83     def WriteValuesToSpreadsheet(self):
84     wb = self.xlApp.Workbooks.Add()
85     shtCount = 1
86    
87     for keyTerm in self.searchTermsMatrix:
88     rowNumb = 0
89     wb.Worksheets(shtCount).Name = keyTerm
90     sht = wb.Worksheets(shtCount)
91     for rowHits in self.searchTermsMatrix[keyTerm]:
92     rowNumb +=1
93     colNumb = 0
94     for value in rowHits:
95     colNumb +=1
96     sht.Cells(rowNumb, colNumb).Value = value
97     shtCount +=1
98     wb.Sheets.Add(Before = None, After = wb.Sheets(wb.Sheets.Count))
99    
100     wb.SaveAs(r"C:\Test_Dir\lili_dataflow\test.xlsx")
101     wb.Close()
102 nino.borges 820
103    
104 nino.borges 821
105     ## for keyTerm in self.searchTermsList:
106     ## print(f"Now searching for {keyTerm}...")
107     ## outputFile = open(os.path.join(self.workingDir,f"{keyTerm}.TXT"),'w')
108     ## for rowNumb in range(1,13840):
109     ## termFound = False
110     ## for colNumb in range(1,4):
111     ## cellToSearchValue = sht.Cells(rowNumb,colNumb).Value
112     ## if keyTerm.upper() in cellToSearchValue.upper():
113     ## termFound = True
114     ## if termFound:
115     ## fullRow = sht.Range(f"A{rowNumb}:W{rowNumb}")
116     ## outputFile.write(f"{fullRow}\n")
117     ## outputFile.close()
118     ## wb.Close()
119    
120    
121    
122 nino.borges 820 if __name__ == '__main__':
123     dfs = DataFlowFileSearcher()
124 nino.borges 821 dfs.PerformDataFlowSearch()
125 nino.borges 824 dfs.WriteValuesToFiles()
126     dfs.WriteValuesToSpreadsheet()