| 1 |
nino.borges |
816 |
"""
|
| 2 |
|
|
|
| 3 |
|
|
NTRS-ICC_CountsAnalysis
|
| 4 |
|
|
|
| 5 |
|
|
Created by:
|
| 6 |
|
|
Emanuel Borges
|
| 7 |
|
|
01.11.2024
|
| 8 |
|
|
|
| 9 |
|
|
This program will open multiple xlsx files, using the Excel COM API, and will perform a simple combining of the data into a single xlsx report.
|
| 10 |
|
|
|
| 11 |
|
|
"""
|
| 12 |
|
|
|
| 13 |
|
|
import os, re
|
| 14 |
|
|
#import MyCode.Tool_Box.ExcelLib
|
| 15 |
|
|
from win32com.client import Dispatch
|
| 16 |
|
|
|
| 17 |
|
|
|
| 18 |
|
|
class ICC_CountsAnalyzer(object):
|
| 19 |
|
|
version = "0.01"
|
| 20 |
|
|
|
| 21 |
|
|
def __init__(self):
|
| 22 |
|
|
#self.startDir = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req\_LocalVersion"
|
| 23 |
|
|
## Excel is having issues with the super long path names. not a fan of having to do this...
|
| 24 |
|
|
## Before running, do net use M: "\\localhost\c$\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
|
| 25 |
|
|
self.startDir = r"m:\_LocalVersion"
|
| 26 |
|
|
self.outputFilePath = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
|
| 27 |
|
|
self.outputFileName = r"ExportReport.xlsx"
|
| 28 |
|
|
|
| 29 |
|
|
## Setting the rowStart number to 2 here, to avoid the header row.
|
| 30 |
|
|
self.rowStartNumber = 2
|
| 31 |
|
|
## Setting the Column number where the dated columns will start. This is for the outputHeaderMatrix
|
| 32 |
|
|
self.datedColumnsStartNumber = 4
|
| 33 |
|
|
|
| 34 |
|
|
## the header matrix with column name to column number
|
| 35 |
|
|
self.outputHeaderMatrix = {}
|
| 36 |
|
|
## The headder list with an expanding number of column names
|
| 37 |
|
|
self.outputHeaderList = []
|
| 38 |
|
|
|
| 39 |
|
|
self.icc_CountMatrix = {}
|
| 40 |
|
|
self.xlApp = Dispatch('Excel.Application')
|
| 41 |
|
|
|
| 42 |
|
|
## Simple match to pull out the date as recorded in the path
|
| 43 |
|
|
self.dateInPathRegExPattern = '2023-[0-9]{2}-[0-9]{2}'
|
| 44 |
|
|
|
| 45 |
|
|
def _CompileHeaderMatrix(self):
|
| 46 |
|
|
"""Internal method to process the icc_CountMatrix and populate the outputHeaderMatrix"""
|
| 47 |
|
|
sendersList = list(self.icc_CountMatrix.keys())
|
| 48 |
|
|
for sender in sendersList:
|
| 49 |
|
|
valuePairsList = self.icc_CountMatrix[sender]
|
| 50 |
|
|
for valuePair in valuePairsList:
|
| 51 |
|
|
fName,mCount = valuePair
|
| 52 |
|
|
if fName in self.outputHeaderList:
|
| 53 |
|
|
pass
|
| 54 |
|
|
else:
|
| 55 |
|
|
self.outputHeaderList.append(fName)
|
| 56 |
|
|
self.outputHeaderList.sort()
|
| 57 |
|
|
print(f"There are {len(self.outputHeaderList)} dated columns")
|
| 58 |
|
|
count = self.datedColumnsStartNumber
|
| 59 |
|
|
for i in self.outputHeaderList:
|
| 60 |
|
|
self.outputHeaderMatrix[i] = count
|
| 61 |
|
|
count +=1
|
| 62 |
|
|
|
| 63 |
|
|
|
| 64 |
|
|
def _ConvertFileNameToDate(self, fileName):
|
| 65 |
|
|
"""Converts a file name to a date, which will be used as the column name"""
|
| 66 |
|
|
pass
|
| 67 |
|
|
|
| 68 |
|
|
def _CreateOutputSpreadsheet(self):
|
| 69 |
|
|
"""This method creates the final spreadsheet report, based on the informaton gathered"""
|
| 70 |
|
|
print("Writing the final report...")
|
| 71 |
|
|
currentRow = 2
|
| 72 |
|
|
self.xlBook = self.xlApp.Workbooks.Add()
|
| 73 |
|
|
sht = self.xlBook.Worksheets(1)
|
| 74 |
|
|
## First create the header row
|
| 75 |
|
|
sht.Cells(1,1).Value = "Rank"
|
| 76 |
|
|
sht.Cells(1,2).Value = "Row Labels"
|
| 77 |
|
|
sht.Cells(1,3).Value = "Batch"
|
| 78 |
|
|
for colName in list(self.outputHeaderMatrix.keys()):
|
| 79 |
|
|
sht.Cells(1,self.outputHeaderMatrix[colName]).Value = colName
|
| 80 |
|
|
## Next populate all of the values
|
| 81 |
|
|
for senderValue in list(self.icc_CountMatrix.keys()):
|
| 82 |
|
|
sht.Cells(currentRow,2).Value = senderValue
|
| 83 |
|
|
valuePairsList = self.icc_CountMatrix[senderValue]
|
| 84 |
|
|
for valuePair in valuePairsList:
|
| 85 |
|
|
colName,mCount = valuePair
|
| 86 |
|
|
sht.Cells(currentRow,self.outputHeaderMatrix[colName]).Value = mCount
|
| 87 |
|
|
currentRow += 1
|
| 88 |
|
|
## Finally save the spreadsheet
|
| 89 |
|
|
self.xlBook.SaveAs(os.path.join(self.outputFilePath, self.outputFileName))
|
| 90 |
|
|
self.xlBook.Close()
|
| 91 |
|
|
|
| 92 |
|
|
|
| 93 |
|
|
def AnalyzeICC_Counts(self):
|
| 94 |
|
|
"""Main Method in this program"""
|
| 95 |
|
|
for (root,dirs,files) in os.walk(self.startDir):
|
| 96 |
|
|
for fl in files:
|
| 97 |
|
|
## Note here that I'm assuming these are excel files. Might need to add some logic here in future.
|
| 98 |
|
|
print(f"now testing {os.path.join(root,fl)}")
|
| 99 |
|
|
self.xlBook = self.xlApp.Workbooks.Open(os.path.join(root,fl))
|
| 100 |
|
|
sht = self.xlBook.Worksheets(1)
|
| 101 |
|
|
|
| 102 |
|
|
## Determine the last row with data
|
| 103 |
|
|
rowEndNumber = self.rowStartNumber
|
| 104 |
|
|
while sht.Cells(rowEndNumber + 1, 1).Value not in [None, '']:
|
| 105 |
|
|
rowEndNumber += 1
|
| 106 |
|
|
|
| 107 |
|
|
## For each row, gather the upper of the text field and then the count
|
| 108 |
|
|
for row in range(self.rowStartNumber, rowEndNumber +1):
|
| 109 |
|
|
senderValue = sht.Cells(row,2).Value
|
| 110 |
|
|
senderValue = senderValue.upper()
|
| 111 |
|
|
mCountValue = sht.Cells(row,1).Value
|
| 112 |
|
|
if senderValue in list(self.icc_CountMatrix.keys()):
|
| 113 |
|
|
self.icc_CountMatrix[senderValue].append((fl,mCountValue))
|
| 114 |
|
|
else:
|
| 115 |
|
|
self.icc_CountMatrix[senderValue] = [(fl,mCountValue),]
|
| 116 |
|
|
#testValue = sht.Cells(rowEndNumber,1).Value
|
| 117 |
|
|
#print(testValue)
|
| 118 |
|
|
self.xlBook.Close()
|
| 119 |
|
|
#del self.xlApp
|
| 120 |
|
|
|
| 121 |
|
|
self._CompileHeaderMatrix()
|
| 122 |
|
|
self._CreateOutputSpreadsheet()
|
| 123 |
|
|
|
| 124 |
|
|
|
| 125 |
|
|
|
| 126 |
|
|
|
| 127 |
|
|
|
| 128 |
|
|
if __name__ == '__main__':
|
| 129 |
|
|
|
| 130 |
|
|
ica = ICC_CountsAnalyzer()
|
| 131 |
|
|
ica.AnalyzeICC_Counts()
|
| 132 |
|
|
|