| 1 |
nino.borges |
820 |
"""
|
| 2 |
|
|
|
| 3 |
|
|
Lilly-DataFlowSearch
|
| 4 |
|
|
|
| 5 |
|
|
Created by:
|
| 6 |
|
|
Emanuel Borges
|
| 7 |
|
|
02.13.2024
|
| 8 |
|
|
|
| 9 |
|
|
Very simple program that will read an XLS file and search for references to specific systems. If found, that entire row will be copied.
|
| 10 |
|
|
|
| 11 |
|
|
"""
|
| 12 |
|
|
|
| 13 |
nino.borges |
822 |
import os, re
|
| 14 |
nino.borges |
820 |
from datetime import datetime
|
| 15 |
|
|
from win32com.client import Dispatch
|
| 16 |
|
|
|
| 17 |
|
|
class DataFlowFileSearcher(object):
|
| 18 |
nino.borges |
822 |
version = "0.03"
|
| 19 |
nino.borges |
820 |
|
| 20 |
|
|
def __init__(self):
|
| 21 |
|
|
self.fileToSearch = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Eli Lilly\Clinical Data Flow Analysis Req\_working\cmdb_ci_business_app_02022024_EB.xlsx"
|
| 22 |
|
|
|
| 23 |
|
|
|
| 24 |
|
|
|
| 25 |
nino.borges |
821 |
self.searchTermsList = ["Vault EDC","Vault Coder","Vault CDB","CDTS","Argus","LSS","Information Hub","CLUWE","IMPACT","eCTS","CTMS","Adjudication","IQVIA-ECG","Central Laboratory","Covance","Q2 Ithica","Bioagilytix","Immunogenicity","Vault Clinical-CTMS","LabCorp","WuXi","IQVia","ABPM","Biotel","REESE"]
|
| 26 |
|
|
#self.searchTermsList = ["LSS","Information Hub","CLUWE","IMPACT","eCTS","CTMS","Adjudication","IQVIA-ECG","Central Laboratory","Covance","Q2 Ithica","Bioagilytix","Immunogenicity","Vault Clinical-CTMS","LabCorp","WuXi","IQVia","ABPM","Biotel","REESE"]
|
| 27 |
nino.borges |
820 |
self.workingDir = r"C:\Test_Dir\lili_dataflow"
|
| 28 |
nino.borges |
821 |
self.searchTermsMatrix = {}
|
| 29 |
nino.borges |
820 |
|
| 30 |
|
|
|
| 31 |
|
|
self.xlApp = Dispatch('Excel.Application')
|
| 32 |
|
|
|
| 33 |
|
|
def PerformDataFlowSearch(self):
|
| 34 |
|
|
wb = self.xlApp.Workbooks.Open(self.fileToSearch)
|
| 35 |
|
|
sht = wb.Worksheets(1)
|
| 36 |
nino.borges |
821 |
|
| 37 |
|
|
for rowNumb in range(1,13840):
|
| 38 |
|
|
fullRow = sht.Range(f"A{rowNumb}:W{rowNumb}")
|
| 39 |
|
|
for colNumb in range(1,4):
|
| 40 |
|
|
cellToSearchValue = sht.Cells(rowNumb,colNumb).Value
|
| 41 |
|
|
for keyTerm in self.searchTermsList:
|
| 42 |
nino.borges |
822 |
if re.search(r'\b%s\b'%keyTerm.upper(), cellToSearchValue.upper()):
|
| 43 |
|
|
#if keyTerm.upper() in cellToSearchValue.upper():
|
| 44 |
nino.borges |
821 |
try:
|
| 45 |
|
|
self.searchTermsMatrix[keyTerm].add(str(fullRow))
|
| 46 |
|
|
except:
|
| 47 |
|
|
self.searchTermsMatrix[keyTerm] = set()
|
| 48 |
|
|
self.searchTermsMatrix[keyTerm].add(str(fullRow))
|
| 49 |
|
|
|
| 50 |
|
|
wb.Close()
|
| 51 |
|
|
|
| 52 |
|
|
def WriteValuesToFiles(self):
|
| 53 |
|
|
for keyTerm in self.searchTermsMatrix:
|
| 54 |
|
|
outputFile = open(os.path.join(self.workingDir,f"{keyTerm}.TXT"),'w', encoding="UTF8")
|
| 55 |
|
|
for fullRow in self.searchTermsMatrix[keyTerm]:
|
| 56 |
|
|
outputFile.write(f"{fullRow}\n")
|
| 57 |
nino.borges |
820 |
outputFile.close()
|
| 58 |
|
|
|
| 59 |
|
|
|
| 60 |
|
|
|
| 61 |
nino.borges |
821 |
|
| 62 |
|
|
## for keyTerm in self.searchTermsList:
|
| 63 |
|
|
## print(f"Now searching for {keyTerm}...")
|
| 64 |
|
|
## outputFile = open(os.path.join(self.workingDir,f"{keyTerm}.TXT"),'w')
|
| 65 |
|
|
## for rowNumb in range(1,13840):
|
| 66 |
|
|
## termFound = False
|
| 67 |
|
|
## for colNumb in range(1,4):
|
| 68 |
|
|
## cellToSearchValue = sht.Cells(rowNumb,colNumb).Value
|
| 69 |
|
|
## if keyTerm.upper() in cellToSearchValue.upper():
|
| 70 |
|
|
## termFound = True
|
| 71 |
|
|
## if termFound:
|
| 72 |
|
|
## fullRow = sht.Range(f"A{rowNumb}:W{rowNumb}")
|
| 73 |
|
|
## outputFile.write(f"{fullRow}\n")
|
| 74 |
|
|
## outputFile.close()
|
| 75 |
|
|
## wb.Close()
|
| 76 |
|
|
|
| 77 |
|
|
|
| 78 |
|
|
|
| 79 |
nino.borges |
820 |
if __name__ == '__main__':
|
| 80 |
|
|
dfs = DataFlowFileSearcher()
|
| 81 |
nino.borges |
821 |
dfs.PerformDataFlowSearch()
|
| 82 |
|
|
dfs.WriteValuesToFiles() |