| 1 |
"""
|
| 2 |
|
| 3 |
NTRS-ICC_CountsAnalysis
|
| 4 |
|
| 5 |
Created by:
|
| 6 |
Emanuel Borges
|
| 7 |
01.11.2024
|
| 8 |
|
| 9 |
This program will open multiple xlsx files, using the Excel COM API, and will perform a simple combining of the data into a single xlsx report.
|
| 10 |
|
| 11 |
"""
|
| 12 |
|
| 13 |
import os, re
|
| 14 |
#import MyCode.Tool_Box.ExcelLib
|
| 15 |
from win32com.client import Dispatch
|
| 16 |
|
| 17 |
|
| 18 |
class ICC_CountsAnalyzer(object):
|
| 19 |
version = "0.01"
|
| 20 |
|
| 21 |
def __init__(self):
|
| 22 |
#self.startDir = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req\_LocalVersion"
|
| 23 |
## Excel is having issues with the super long path names. not a fan of having to do this...
|
| 24 |
## Before running, do net use M: "\\localhost\c$\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
|
| 25 |
self.startDir = r"m:\_LocalVersion"
|
| 26 |
self.outputFilePath = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
|
| 27 |
self.outputFileName = r"ExportReport.xlsx"
|
| 28 |
|
| 29 |
## Setting the rowStart number to 2 here, to avoid the header row.
|
| 30 |
self.rowStartNumber = 2
|
| 31 |
## Setting the Column number where the dated columns will start. This is for the outputHeaderMatrix
|
| 32 |
self.datedColumnsStartNumber = 4
|
| 33 |
|
| 34 |
## the header matrix with column name to column number
|
| 35 |
self.outputHeaderMatrix = {}
|
| 36 |
## The headder list with an expanding number of column names
|
| 37 |
self.outputHeaderList = []
|
| 38 |
|
| 39 |
self.icc_CountMatrix = {}
|
| 40 |
self.xlApp = Dispatch('Excel.Application')
|
| 41 |
|
| 42 |
## Simple match to pull out the date as recorded in the path
|
| 43 |
self.dateInPathRegExPattern = '2023-[0-9]{2}-[0-9]{2}'
|
| 44 |
|
| 45 |
def _CompileHeaderMatrix(self):
|
| 46 |
"""Internal method to process the icc_CountMatrix and populate the outputHeaderMatrix"""
|
| 47 |
sendersList = list(self.icc_CountMatrix.keys())
|
| 48 |
for sender in sendersList:
|
| 49 |
valuePairsList = self.icc_CountMatrix[sender]
|
| 50 |
for valuePair in valuePairsList:
|
| 51 |
fName,mCount = valuePair
|
| 52 |
if fName in self.outputHeaderList:
|
| 53 |
pass
|
| 54 |
else:
|
| 55 |
self.outputHeaderList.append(fName)
|
| 56 |
self.outputHeaderList.sort()
|
| 57 |
print(f"There are {len(self.outputHeaderList)} dated columns")
|
| 58 |
count = self.datedColumnsStartNumber
|
| 59 |
for i in self.outputHeaderList:
|
| 60 |
self.outputHeaderMatrix[i] = count
|
| 61 |
count +=1
|
| 62 |
|
| 63 |
|
| 64 |
def _ConvertFileNameToDate(self, fileName):
|
| 65 |
"""Converts a file name to a date, which will be used as the column name"""
|
| 66 |
pass
|
| 67 |
|
| 68 |
def _CreateOutputSpreadsheet(self):
|
| 69 |
"""This method creates the final spreadsheet report, based on the informaton gathered"""
|
| 70 |
print("Writing the final report...")
|
| 71 |
currentRow = 2
|
| 72 |
self.xlBook = self.xlApp.Workbooks.Add()
|
| 73 |
sht = self.xlBook.Worksheets(1)
|
| 74 |
## First create the header row
|
| 75 |
sht.Cells(1,1).Value = "Rank"
|
| 76 |
sht.Cells(1,2).Value = "Row Labels"
|
| 77 |
sht.Cells(1,3).Value = "Batch"
|
| 78 |
for colName in list(self.outputHeaderMatrix.keys()):
|
| 79 |
sht.Cells(1,self.outputHeaderMatrix[colName]).Value = colName
|
| 80 |
## Next populate all of the values
|
| 81 |
for senderValue in list(self.icc_CountMatrix.keys()):
|
| 82 |
sht.Cells(currentRow,2).Value = senderValue
|
| 83 |
valuePairsList = self.icc_CountMatrix[senderValue]
|
| 84 |
for valuePair in valuePairsList:
|
| 85 |
colName,mCount = valuePair
|
| 86 |
sht.Cells(currentRow,self.outputHeaderMatrix[colName]).Value = mCount
|
| 87 |
currentRow += 1
|
| 88 |
## Finally save the spreadsheet
|
| 89 |
self.xlBook.SaveAs(os.path.join(self.outputFilePath, self.outputFileName))
|
| 90 |
self.xlBook.Close()
|
| 91 |
|
| 92 |
|
| 93 |
def AnalyzeICC_Counts(self):
|
| 94 |
"""Main Method in this program"""
|
| 95 |
for (root,dirs,files) in os.walk(self.startDir):
|
| 96 |
for fl in files:
|
| 97 |
## Note here that I'm assuming these are excel files. Might need to add some logic here in future.
|
| 98 |
print(f"now testing {os.path.join(root,fl)}")
|
| 99 |
self.xlBook = self.xlApp.Workbooks.Open(os.path.join(root,fl))
|
| 100 |
sht = self.xlBook.Worksheets(1)
|
| 101 |
|
| 102 |
## Determine the last row with data
|
| 103 |
rowEndNumber = self.rowStartNumber
|
| 104 |
while sht.Cells(rowEndNumber + 1, 1).Value not in [None, '']:
|
| 105 |
rowEndNumber += 1
|
| 106 |
|
| 107 |
## For each row, gather the upper of the text field and then the count
|
| 108 |
for row in range(self.rowStartNumber, rowEndNumber +1):
|
| 109 |
senderValue = sht.Cells(row,2).Value
|
| 110 |
senderValue = senderValue.upper()
|
| 111 |
mCountValue = sht.Cells(row,1).Value
|
| 112 |
if senderValue in list(self.icc_CountMatrix.keys()):
|
| 113 |
self.icc_CountMatrix[senderValue].append((fl,mCountValue))
|
| 114 |
else:
|
| 115 |
self.icc_CountMatrix[senderValue] = [(fl,mCountValue),]
|
| 116 |
#testValue = sht.Cells(rowEndNumber,1).Value
|
| 117 |
#print(testValue)
|
| 118 |
self.xlBook.Close()
|
| 119 |
#del self.xlApp
|
| 120 |
|
| 121 |
self._CompileHeaderMatrix()
|
| 122 |
self._CreateOutputSpreadsheet()
|
| 123 |
|
| 124 |
|
| 125 |
|
| 126 |
|
| 127 |
|
| 128 |
if __name__ == '__main__':
|
| 129 |
|
| 130 |
ica = ICC_CountsAnalyzer()
|
| 131 |
ica.AnalyzeICC_Counts()
|
| 132 |
|