| 1 |
nino.borges |
820 |
"""
|
| 2 |
|
|
|
| 3 |
|
|
Lilly-DataFlowSearch
|
| 4 |
|
|
|
| 5 |
|
|
Created by:
|
| 6 |
|
|
Emanuel Borges
|
| 7 |
|
|
02.13.2024
|
| 8 |
|
|
|
| 9 |
|
|
Very simple program that will read an XLS file and search for references to specific systems. If found, that entire row will be copied.
|
| 10 |
nino.borges |
825 |
As of version .06, also used this for searching for other key terms, like for mobile apps.
|
| 11 |
nino.borges |
820 |
|
| 12 |
|
|
"""
|
| 13 |
|
|
|
| 14 |
nino.borges |
822 |
import os, re
|
| 15 |
nino.borges |
820 |
from datetime import datetime
|
| 16 |
|
|
from win32com.client import Dispatch
|
| 17 |
|
|
|
| 18 |
|
|
class DataFlowFileSearcher(object):
|
| 19 |
nino.borges |
825 |
version = "0.06"
|
| 20 |
nino.borges |
820 |
|
| 21 |
|
|
def __init__(self):
|
| 22 |
nino.borges |
825 |
self.fileToSearch = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Eli Lilly\Clinical Data Flow Analysis Req\_working\cmdb_ci_business_app_03272024_EB.xlsx"
|
| 23 |
nino.borges |
820 |
|
| 24 |
|
|
|
| 25 |
|
|
|
| 26 |
nino.borges |
825 |
#self.searchTermsList = ["Vault EDC","Vault Coder","Vault CDB","CDTS","Argus","LSS","Information Hub","CLUWE","IMPACT","eCTS","CTMS","Adjudication","IQVIA-ECG","Central Laboratory","Covance","Q2 Ithica","Bioagilytix","Immunogenicity","Vault Clinical-CTMS","LabCorp","WuXi","IQVia","ABPM","Biotel","REESE"]
|
| 27 |
nino.borges |
821 |
#self.searchTermsList = ["LSS","Information Hub","CLUWE","IMPACT","eCTS","CTMS","Adjudication","IQVIA-ECG","Central Laboratory","Covance","Q2 Ithica","Bioagilytix","Immunogenicity","Vault Clinical-CTMS","LabCorp","WuXi","IQVia","ABPM","Biotel","REESE"]
|
| 28 |
nino.borges |
825 |
#self.searchTermsList = ["ios","mobile","cloud","chat", "bot"]
|
| 29 |
|
|
self.searchTermsList = ["iphone", "ipad", "android"]
|
| 30 |
nino.borges |
820 |
self.workingDir = r"C:\Test_Dir\lili_dataflow"
|
| 31 |
nino.borges |
821 |
self.searchTermsMatrix = {}
|
| 32 |
nino.borges |
824 |
self.rawRowMatrix = {}
|
| 33 |
nino.borges |
820 |
|
| 34 |
|
|
|
| 35 |
|
|
self.xlApp = Dispatch('Excel.Application')
|
| 36 |
|
|
|
| 37 |
|
|
def PerformDataFlowSearch(self):
|
| 38 |
|
|
wb = self.xlApp.Workbooks.Open(self.fileToSearch)
|
| 39 |
|
|
sht = wb.Worksheets(1)
|
| 40 |
nino.borges |
821 |
|
| 41 |
nino.borges |
825 |
for rowNumb in range(1,14134):
|
| 42 |
nino.borges |
821 |
fullRow = sht.Range(f"A{rowNumb}:W{rowNumb}")
|
| 43 |
nino.borges |
824 |
fullRowList = []
|
| 44 |
|
|
for row, cell in enumerate(fullRow):
|
| 45 |
|
|
fullRowList.append(str(cell))
|
| 46 |
|
|
fullRowCombined = " ".join(fullRowList)
|
| 47 |
|
|
for keyTerm in self.searchTermsList:
|
| 48 |
|
|
if re.search(r'\b%s\b'%keyTerm.upper(), fullRowCombined.upper()):
|
| 49 |
nino.borges |
823 |
try:
|
| 50 |
nino.borges |
824 |
self.searchTermsMatrix[keyTerm].append(fullRowList)
|
| 51 |
nino.borges |
823 |
except:
|
| 52 |
nino.borges |
824 |
self.searchTermsMatrix[keyTerm] = [fullRowList,]
|
| 53 |
|
|
|
| 54 |
nino.borges |
821 |
|
| 55 |
nino.borges |
824 |
## termFoundInRow = False
|
| 56 |
|
|
## for colNumb in range(1,23):
|
| 57 |
|
|
## cellToSearchValue = sht.Cells(rowNumb,colNumb).Value
|
| 58 |
|
|
## for keyTerm in self.searchTermsList:
|
| 59 |
|
|
## ## If I cant change it to uppercase, it's not a string that I want to search.
|
| 60 |
|
|
## try:
|
| 61 |
|
|
## cellToSearchValue = cellToSearchValue.upper()
|
| 62 |
|
|
## except:
|
| 63 |
|
|
## cellToSearchValue = " "
|
| 64 |
|
|
##
|
| 65 |
|
|
## if re.search(r'\b%s\b'%keyTerm.upper(), cellToSearchValue):
|
| 66 |
|
|
## termFoundInRow = True
|
| 67 |
|
|
## #if keyTerm.upper() in cellToSearchValue.upper():
|
| 68 |
|
|
## try:
|
| 69 |
|
|
## self.searchTermsMatrix[keyTerm].add(str(fullRow))
|
| 70 |
|
|
## except:
|
| 71 |
|
|
## self.searchTermsMatrix[keyTerm] = set()
|
| 72 |
|
|
## self.searchTermsMatrix[keyTerm].add(str(fullRow))
|
| 73 |
|
|
## if termFoundInRow:
|
| 74 |
|
|
## self.rawRowMatrix[sht.Cells(rowNumb,1).Value] = fullRow
|
| 75 |
|
|
|
| 76 |
nino.borges |
821 |
wb.Close()
|
| 77 |
|
|
|
| 78 |
|
|
def WriteValuesToFiles(self):
|
| 79 |
|
|
for keyTerm in self.searchTermsMatrix:
|
| 80 |
|
|
outputFile = open(os.path.join(self.workingDir,f"{keyTerm}.TXT"),'w', encoding="UTF8")
|
| 81 |
|
|
for fullRow in self.searchTermsMatrix[keyTerm]:
|
| 82 |
|
|
outputFile.write(f"{fullRow}\n")
|
| 83 |
nino.borges |
820 |
outputFile.close()
|
| 84 |
nino.borges |
824 |
|
| 85 |
|
|
|
| 86 |
|
|
def WriteValuesToSpreadsheet(self):
|
| 87 |
|
|
wb = self.xlApp.Workbooks.Add()
|
| 88 |
|
|
shtCount = 1
|
| 89 |
|
|
|
| 90 |
|
|
for keyTerm in self.searchTermsMatrix:
|
| 91 |
|
|
rowNumb = 0
|
| 92 |
|
|
wb.Worksheets(shtCount).Name = keyTerm
|
| 93 |
|
|
sht = wb.Worksheets(shtCount)
|
| 94 |
|
|
for rowHits in self.searchTermsMatrix[keyTerm]:
|
| 95 |
|
|
rowNumb +=1
|
| 96 |
|
|
colNumb = 0
|
| 97 |
|
|
for value in rowHits:
|
| 98 |
|
|
colNumb +=1
|
| 99 |
|
|
sht.Cells(rowNumb, colNumb).Value = value
|
| 100 |
|
|
shtCount +=1
|
| 101 |
|
|
wb.Sheets.Add(Before = None, After = wb.Sheets(wb.Sheets.Count))
|
| 102 |
|
|
|
| 103 |
nino.borges |
825 |
wb.SaveAs(r"C:\Test_Dir\lili_dataflow\iphoneipadandroid.xlsx")
|
| 104 |
nino.borges |
824 |
wb.Close()
|
| 105 |
nino.borges |
820 |
|
| 106 |
|
|
|
| 107 |
nino.borges |
821 |
|
| 108 |
|
|
## for keyTerm in self.searchTermsList:
|
| 109 |
|
|
## print(f"Now searching for {keyTerm}...")
|
| 110 |
|
|
## outputFile = open(os.path.join(self.workingDir,f"{keyTerm}.TXT"),'w')
|
| 111 |
|
|
## for rowNumb in range(1,13840):
|
| 112 |
|
|
## termFound = False
|
| 113 |
|
|
## for colNumb in range(1,4):
|
| 114 |
|
|
## cellToSearchValue = sht.Cells(rowNumb,colNumb).Value
|
| 115 |
|
|
## if keyTerm.upper() in cellToSearchValue.upper():
|
| 116 |
|
|
## termFound = True
|
| 117 |
|
|
## if termFound:
|
| 118 |
|
|
## fullRow = sht.Range(f"A{rowNumb}:W{rowNumb}")
|
| 119 |
|
|
## outputFile.write(f"{fullRow}\n")
|
| 120 |
|
|
## outputFile.close()
|
| 121 |
|
|
## wb.Close()
|
| 122 |
|
|
|
| 123 |
|
|
|
| 124 |
|
|
|
| 125 |
nino.borges |
820 |
if __name__ == '__main__':
|
| 126 |
|
|
dfs = DataFlowFileSearcher()
|
| 127 |
nino.borges |
821 |
dfs.PerformDataFlowSearch()
|
| 128 |
nino.borges |
824 |
dfs.WriteValuesToFiles()
|
| 129 |
|
|
dfs.WriteValuesToSpreadsheet() |