Active_prgs/Redgrave/NTRS_ICC_CountsAnalysis.py

"""

NTRS-ICC_CountsAnalysis

Created by:
Emanuel Borges
01.11.2024

This program will open multiple xlsx files, using the Excel COM API, and will perform a simple combining of the data into a single xlsx report.

"""

import os, re
#import MyCode.Tool_Box.ExcelLib
from win32com.client import Dispatch


class ICC_CountsAnalyzer(object):
    version = "0.01"

    def __init__(self):
        #self.startDir = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req\_LocalVersion"
        ##  Excel is having issues with the super long path names.  not a fan of having to do this...
        ##  Before running, do  net use M: "\\localhost\c$\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
        self.startDir = r"m:\_LocalVersion"
        self.outputFilePath = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
        self.outputFileName = r"ExportReport.xlsx"
        
        ##  Setting the rowStart number to 2 here, to avoid the header row.
        self.rowStartNumber = 2
        ##  Setting the Column number where the dated columns will start. This is for the outputHeaderMatrix
        self.datedColumnsStartNumber = 4
        
        ##  the header matrix with column name to column number
        self.outputHeaderMatrix = {}
        ##  The headder list with an expanding number of column names
        self.outputHeaderList = []

        self.icc_CountMatrix = {}
        self.xlApp = Dispatch('Excel.Application')

        ##  Simple match to pull out the date as recorded in the path
        self.dateInPathRegExPattern = '2023-[0-9]{2}-[0-9]{2}'

    def _CompileHeaderMatrix(self):
        """Internal method to process the icc_CountMatrix and populate the outputHeaderMatrix"""
        sendersList = list(self.icc_CountMatrix.keys())
        for sender in sendersList:
            valuePairsList = self.icc_CountMatrix[sender]
            for valuePair in valuePairsList:
                fName,mCount = valuePair
                if fName in self.outputHeaderList:
                    pass
                else:
                    self.outputHeaderList.append(fName)
        self.outputHeaderList.sort()
        print(f"There are {len(self.outputHeaderList)} dated columns")
        count = self.datedColumnsStartNumber
        for i in self.outputHeaderList:
            self.outputHeaderMatrix[i] = count
            count +=1
            
    
    def _ConvertFileNameToDate(self, fileName):
        """Converts a file name to a date, which will be used as the column name"""
        pass

    def _CreateOutputSpreadsheet(self):
        """This method creates the final spreadsheet report, based on the informaton gathered"""
        print("Writing the final report...")
        currentRow = 2
        self.xlBook = self.xlApp.Workbooks.Add()
        sht = self.xlBook.Worksheets(1)
        ##  First create the header row
        sht.Cells(1,1).Value = "Rank"
        sht.Cells(1,2).Value = "Row Labels"
        sht.Cells(1,3).Value = "Batch"
        for colName in list(self.outputHeaderMatrix.keys()):
            sht.Cells(1,self.outputHeaderMatrix[colName]).Value = colName
        ##  Next populate all of the values
        for senderValue in list(self.icc_CountMatrix.keys()):
            sht.Cells(currentRow,2).Value = senderValue
            valuePairsList = self.icc_CountMatrix[senderValue]
            for valuePair in valuePairsList:
                colName,mCount = valuePair
                sht.Cells(currentRow,self.outputHeaderMatrix[colName]).Value = mCount
            currentRow += 1
        ##  Finally save the spreadsheet
        self.xlBook.SaveAs(os.path.join(self.outputFilePath, self.outputFileName))
        self.xlBook.Close()
        

    def AnalyzeICC_Counts(self):
        """Main Method in this program"""
        for (root,dirs,files) in os.walk(self.startDir):
            for fl in files:
                ##  Note here that I'm assuming these are excel files.  Might need to add some logic here in future.
                print(f"now testing {os.path.join(root,fl)}")
                self.xlBook = self.xlApp.Workbooks.Open(os.path.join(root,fl))
                sht = self.xlBook.Worksheets(1)
                
                ##  Determine the last row with data
                rowEndNumber = self.rowStartNumber
                while sht.Cells(rowEndNumber + 1, 1).Value not in [None, '']:
                    rowEndNumber += 1

                ##  For each row, gather the upper of the text field and then the count
                for row in range(self.rowStartNumber, rowEndNumber +1):
                    senderValue = sht.Cells(row,2).Value
                    senderValue = senderValue.upper()
                    mCountValue = sht.Cells(row,1).Value
                    if senderValue in list(self.icc_CountMatrix.keys()):
                        self.icc_CountMatrix[senderValue].append((fl,mCountValue))
                    else:
                        self.icc_CountMatrix[senderValue] = [(fl,mCountValue),]
                #testValue = sht.Cells(rowEndNumber,1).Value
                #print(testValue)
                self.xlBook.Close()
                #del self.xlApp
                
        self._CompileHeaderMatrix()
        self._CreateOutputSpreadsheet()


if __name__ == '__main__':

    ica = ICC_CountsAnalyzer()
    ica.AnalyzeICC_Counts()
    
Revision:	816
Committed:	Tue Jan 16 20:12:09 2024 UTC (2 years, 2 months ago) by nino.borges
Content type:	text/x-python
File size:	5640 byte(s)
Log Message:	This program will open multiple xlsx files, using the Excel COM API, and will perform a simple combining of the data into a single xlsx report. This version is the first delivered report.
#	Content
1	"""
2
3	NTRS-ICC_CountsAnalysis
4
5	Created by:
6	Emanuel Borges
7	01.11.2024
8
9	This program will open multiple xlsx files, using the Excel COM API, and will perform a simple combining of the data into a single xlsx report.
10
11	"""
12
13	import os, re
14	#import MyCode.Tool_Box.ExcelLib
15	from win32com.client import Dispatch
16
17
18	class ICC_CountsAnalyzer(object):
19	version = "0.01"
20
21	def __init__(self):
22	#self.startDir = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req\_LocalVersion"
23	## Excel is having issues with the super long path names. not a fan of having to do this...
24	## Before running, do net use M: "\\localhost\c$\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
25	self.startDir = r"m:\_LocalVersion"
26	self.outputFilePath = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
27	self.outputFileName = r"ExportReport.xlsx"
28
29	## Setting the rowStart number to 2 here, to avoid the header row.
30	self.rowStartNumber = 2
31	## Setting the Column number where the dated columns will start. This is for the outputHeaderMatrix
32	self.datedColumnsStartNumber = 4
33
34	## the header matrix with column name to column number
35	self.outputHeaderMatrix = {}
36	## The headder list with an expanding number of column names
37	self.outputHeaderList = []
38
39	self.icc_CountMatrix = {}
40	self.xlApp = Dispatch('Excel.Application')
41
42	## Simple match to pull out the date as recorded in the path
43	self.dateInPathRegExPattern = '2023-[0-9]{2}-[0-9]{2}'
44
45	def _CompileHeaderMatrix(self):
46	"""Internal method to process the icc_CountMatrix and populate the outputHeaderMatrix"""
47	sendersList = list(self.icc_CountMatrix.keys())
48	for sender in sendersList:
49	valuePairsList = self.icc_CountMatrix[sender]
50	for valuePair in valuePairsList:
51	fName,mCount = valuePair
52	if fName in self.outputHeaderList:
53	pass
54	else:
55	self.outputHeaderList.append(fName)
56	self.outputHeaderList.sort()
57	print(f"There are {len(self.outputHeaderList)} dated columns")
58	count = self.datedColumnsStartNumber
59	for i in self.outputHeaderList:
60	self.outputHeaderMatrix[i] = count
61	count +=1
62
63
64	def _ConvertFileNameToDate(self, fileName):
65	"""Converts a file name to a date, which will be used as the column name"""
66	pass
67
68	def _CreateOutputSpreadsheet(self):
69	"""This method creates the final spreadsheet report, based on the informaton gathered"""
70	print("Writing the final report...")
71	currentRow = 2
72	self.xlBook = self.xlApp.Workbooks.Add()
73	sht = self.xlBook.Worksheets(1)
74	## First create the header row
75	sht.Cells(1,1).Value = "Rank"
76	sht.Cells(1,2).Value = "Row Labels"
77	sht.Cells(1,3).Value = "Batch"
78	for colName in list(self.outputHeaderMatrix.keys()):
79	sht.Cells(1,self.outputHeaderMatrix[colName]).Value = colName
80	## Next populate all of the values
81	for senderValue in list(self.icc_CountMatrix.keys()):
82	sht.Cells(currentRow,2).Value = senderValue
83	valuePairsList = self.icc_CountMatrix[senderValue]
84	for valuePair in valuePairsList:
85	colName,mCount = valuePair
86	sht.Cells(currentRow,self.outputHeaderMatrix[colName]).Value = mCount
87	currentRow += 1
88	## Finally save the spreadsheet
89	self.xlBook.SaveAs(os.path.join(self.outputFilePath, self.outputFileName))
90	self.xlBook.Close()
91
92
93	def AnalyzeICC_Counts(self):
94	"""Main Method in this program"""
95	for (root,dirs,files) in os.walk(self.startDir):
96	for fl in files:
97	## Note here that I'm assuming these are excel files. Might need to add some logic here in future.
98	print(f"now testing {os.path.join(root,fl)}")
99	self.xlBook = self.xlApp.Workbooks.Open(os.path.join(root,fl))
100	sht = self.xlBook.Worksheets(1)
101
102	## Determine the last row with data
103	rowEndNumber = self.rowStartNumber
104	while sht.Cells(rowEndNumber + 1, 1).Value not in [None, '']:
105	rowEndNumber += 1
106
107	## For each row, gather the upper of the text field and then the count
108	for row in range(self.rowStartNumber, rowEndNumber +1):
109	senderValue = sht.Cells(row,2).Value
110	senderValue = senderValue.upper()
111	mCountValue = sht.Cells(row,1).Value
112	if senderValue in list(self.icc_CountMatrix.keys()):
113	self.icc_CountMatrix[senderValue].append((fl,mCountValue))
114	else:
115	self.icc_CountMatrix[senderValue] = [(fl,mCountValue),]
116	#testValue = sht.Cells(rowEndNumber,1).Value
117	#print(testValue)
118	self.xlBook.Close()
119	#del self.xlApp
120
121	self._CompileHeaderMatrix()
122	self._CreateOutputSpreadsheet()
123
124
125
126
127
128	if __name__ == '__main__':
129
130	ica = ICC_CountsAnalyzer()
131	ica.AnalyzeICC_Counts()
132