ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/NTRS_ICC_CountsAnalysis.py
Revision: 816
Committed: Tue Jan 16 20:12:09 2024 UTC (2 years, 2 months ago) by nino.borges
Content type: text/x-python
File size: 5640 byte(s)
Log Message:
This program will open multiple xlsx files, using the Excel COM API, and will perform a simple combining of the data into a single xlsx report.  This version is the first delivered report.

File Contents

# Content
1 """
2
3 NTRS-ICC_CountsAnalysis
4
5 Created by:
6 Emanuel Borges
7 01.11.2024
8
9 This program will open multiple xlsx files, using the Excel COM API, and will perform a simple combining of the data into a single xlsx report.
10
11 """
12
13 import os, re
14 #import MyCode.Tool_Box.ExcelLib
15 from win32com.client import Dispatch
16
17
18 class ICC_CountsAnalyzer(object):
19 version = "0.01"
20
21 def __init__(self):
22 #self.startDir = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req\_LocalVersion"
23 ## Excel is having issues with the super long path names. not a fan of having to do this...
24 ## Before running, do net use M: "\\localhost\c$\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
25 self.startDir = r"m:\_LocalVersion"
26 self.outputFilePath = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
27 self.outputFileName = r"ExportReport.xlsx"
28
29 ## Setting the rowStart number to 2 here, to avoid the header row.
30 self.rowStartNumber = 2
31 ## Setting the Column number where the dated columns will start. This is for the outputHeaderMatrix
32 self.datedColumnsStartNumber = 4
33
34 ## the header matrix with column name to column number
35 self.outputHeaderMatrix = {}
36 ## The headder list with an expanding number of column names
37 self.outputHeaderList = []
38
39 self.icc_CountMatrix = {}
40 self.xlApp = Dispatch('Excel.Application')
41
42 ## Simple match to pull out the date as recorded in the path
43 self.dateInPathRegExPattern = '2023-[0-9]{2}-[0-9]{2}'
44
45 def _CompileHeaderMatrix(self):
46 """Internal method to process the icc_CountMatrix and populate the outputHeaderMatrix"""
47 sendersList = list(self.icc_CountMatrix.keys())
48 for sender in sendersList:
49 valuePairsList = self.icc_CountMatrix[sender]
50 for valuePair in valuePairsList:
51 fName,mCount = valuePair
52 if fName in self.outputHeaderList:
53 pass
54 else:
55 self.outputHeaderList.append(fName)
56 self.outputHeaderList.sort()
57 print(f"There are {len(self.outputHeaderList)} dated columns")
58 count = self.datedColumnsStartNumber
59 for i in self.outputHeaderList:
60 self.outputHeaderMatrix[i] = count
61 count +=1
62
63
64 def _ConvertFileNameToDate(self, fileName):
65 """Converts a file name to a date, which will be used as the column name"""
66 pass
67
68 def _CreateOutputSpreadsheet(self):
69 """This method creates the final spreadsheet report, based on the informaton gathered"""
70 print("Writing the final report...")
71 currentRow = 2
72 self.xlBook = self.xlApp.Workbooks.Add()
73 sht = self.xlBook.Worksheets(1)
74 ## First create the header row
75 sht.Cells(1,1).Value = "Rank"
76 sht.Cells(1,2).Value = "Row Labels"
77 sht.Cells(1,3).Value = "Batch"
78 for colName in list(self.outputHeaderMatrix.keys()):
79 sht.Cells(1,self.outputHeaderMatrix[colName]).Value = colName
80 ## Next populate all of the values
81 for senderValue in list(self.icc_CountMatrix.keys()):
82 sht.Cells(currentRow,2).Value = senderValue
83 valuePairsList = self.icc_CountMatrix[senderValue]
84 for valuePair in valuePairsList:
85 colName,mCount = valuePair
86 sht.Cells(currentRow,self.outputHeaderMatrix[colName]).Value = mCount
87 currentRow += 1
88 ## Finally save the spreadsheet
89 self.xlBook.SaveAs(os.path.join(self.outputFilePath, self.outputFileName))
90 self.xlBook.Close()
91
92
93 def AnalyzeICC_Counts(self):
94 """Main Method in this program"""
95 for (root,dirs,files) in os.walk(self.startDir):
96 for fl in files:
97 ## Note here that I'm assuming these are excel files. Might need to add some logic here in future.
98 print(f"now testing {os.path.join(root,fl)}")
99 self.xlBook = self.xlApp.Workbooks.Open(os.path.join(root,fl))
100 sht = self.xlBook.Worksheets(1)
101
102 ## Determine the last row with data
103 rowEndNumber = self.rowStartNumber
104 while sht.Cells(rowEndNumber + 1, 1).Value not in [None, '']:
105 rowEndNumber += 1
106
107 ## For each row, gather the upper of the text field and then the count
108 for row in range(self.rowStartNumber, rowEndNumber +1):
109 senderValue = sht.Cells(row,2).Value
110 senderValue = senderValue.upper()
111 mCountValue = sht.Cells(row,1).Value
112 if senderValue in list(self.icc_CountMatrix.keys()):
113 self.icc_CountMatrix[senderValue].append((fl,mCountValue))
114 else:
115 self.icc_CountMatrix[senderValue] = [(fl,mCountValue),]
116 #testValue = sht.Cells(rowEndNumber,1).Value
117 #print(testValue)
118 self.xlBook.Close()
119 #del self.xlApp
120
121 self._CompileHeaderMatrix()
122 self._CreateOutputSpreadsheet()
123
124
125
126
127
128 if __name__ == '__main__':
129
130 ica = ICC_CountsAnalyzer()
131 ica.AnalyzeICC_Counts()
132