ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/Lilly-DataFlowSearch.py
Revision: 822
Committed: Wed Feb 14 19:23:56 2024 UTC (2 years, 1 month ago) by nino.borges
Content type: text/x-python
File size: 3408 byte(s)
Log Message:
This version uses a RegEx and is working great.  Need to increase the column count and decide how I want to deal with the output.

File Contents

# Content
1 """
2
3 Lilly-DataFlowSearch
4
5 Created by:
6 Emanuel Borges
7 02.13.2024
8
9 Very simple program that will read an XLS file and search for references to specific systems. If found, that entire row will be copied.
10
11 """
12
13 import os, re
14 from datetime import datetime
15 from win32com.client import Dispatch
16
17 class DataFlowFileSearcher(object):
18 version = "0.03"
19
20 def __init__(self):
21 self.fileToSearch = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Eli Lilly\Clinical Data Flow Analysis Req\_working\cmdb_ci_business_app_02022024_EB.xlsx"
22
23
24
25 self.searchTermsList = ["Vault EDC","Vault Coder","Vault CDB","CDTS","Argus","LSS","Information Hub","CLUWE","IMPACT","eCTS","CTMS","Adjudication","IQVIA-ECG","Central Laboratory","Covance","Q2 Ithica","Bioagilytix","Immunogenicity","Vault Clinical-CTMS","LabCorp","WuXi","IQVia","ABPM","Biotel","REESE"]
26 #self.searchTermsList = ["LSS","Information Hub","CLUWE","IMPACT","eCTS","CTMS","Adjudication","IQVIA-ECG","Central Laboratory","Covance","Q2 Ithica","Bioagilytix","Immunogenicity","Vault Clinical-CTMS","LabCorp","WuXi","IQVia","ABPM","Biotel","REESE"]
27 self.workingDir = r"C:\Test_Dir\lili_dataflow"
28 self.searchTermsMatrix = {}
29
30
31 self.xlApp = Dispatch('Excel.Application')
32
33 def PerformDataFlowSearch(self):
34 wb = self.xlApp.Workbooks.Open(self.fileToSearch)
35 sht = wb.Worksheets(1)
36
37 for rowNumb in range(1,13840):
38 fullRow = sht.Range(f"A{rowNumb}:W{rowNumb}")
39 for colNumb in range(1,4):
40 cellToSearchValue = sht.Cells(rowNumb,colNumb).Value
41 for keyTerm in self.searchTermsList:
42 if re.search(r'\b%s\b'%keyTerm.upper(), cellToSearchValue.upper()):
43 #if keyTerm.upper() in cellToSearchValue.upper():
44 try:
45 self.searchTermsMatrix[keyTerm].add(str(fullRow))
46 except:
47 self.searchTermsMatrix[keyTerm] = set()
48 self.searchTermsMatrix[keyTerm].add(str(fullRow))
49
50 wb.Close()
51
52 def WriteValuesToFiles(self):
53 for keyTerm in self.searchTermsMatrix:
54 outputFile = open(os.path.join(self.workingDir,f"{keyTerm}.TXT"),'w', encoding="UTF8")
55 for fullRow in self.searchTermsMatrix[keyTerm]:
56 outputFile.write(f"{fullRow}\n")
57 outputFile.close()
58
59
60
61
62 ## for keyTerm in self.searchTermsList:
63 ## print(f"Now searching for {keyTerm}...")
64 ## outputFile = open(os.path.join(self.workingDir,f"{keyTerm}.TXT"),'w')
65 ## for rowNumb in range(1,13840):
66 ## termFound = False
67 ## for colNumb in range(1,4):
68 ## cellToSearchValue = sht.Cells(rowNumb,colNumb).Value
69 ## if keyTerm.upper() in cellToSearchValue.upper():
70 ## termFound = True
71 ## if termFound:
72 ## fullRow = sht.Range(f"A{rowNumb}:W{rowNumb}")
73 ## outputFile.write(f"{fullRow}\n")
74 ## outputFile.close()
75 ## wb.Close()
76
77
78
79 if __name__ == '__main__':
80 dfs = DataFlowFileSearcher()
81 dfs.PerformDataFlowSearch()
82 dfs.WriteValuesToFiles()