ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/NTRS_ICC_CountsAnalysis.py
Revision: 816
Committed: Tue Jan 16 20:12:09 2024 UTC (2 years, 2 months ago) by nino.borges
Content type: text/x-python
File size: 5640 byte(s)
Log Message:
This program will open multiple xlsx files, using the Excel COM API, and will perform a simple combining of the data into a single xlsx report.  This version is the first delivered report.

File Contents

# User Rev Content
1 nino.borges 816 """
2    
3     NTRS-ICC_CountsAnalysis
4    
5     Created by:
6     Emanuel Borges
7     01.11.2024
8    
9     This program will open multiple xlsx files, using the Excel COM API, and will perform a simple combining of the data into a single xlsx report.
10    
11     """
12    
13     import os, re
14     #import MyCode.Tool_Box.ExcelLib
15     from win32com.client import Dispatch
16    
17    
18     class ICC_CountsAnalyzer(object):
19     version = "0.01"
20    
21     def __init__(self):
22     #self.startDir = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req\_LocalVersion"
23     ## Excel is having issues with the super long path names. not a fan of having to do this...
24     ## Before running, do net use M: "\\localhost\c$\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
25     self.startDir = r"m:\_LocalVersion"
26     self.outputFilePath = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
27     self.outputFileName = r"ExportReport.xlsx"
28    
29     ## Setting the rowStart number to 2 here, to avoid the header row.
30     self.rowStartNumber = 2
31     ## Setting the Column number where the dated columns will start. This is for the outputHeaderMatrix
32     self.datedColumnsStartNumber = 4
33    
34     ## the header matrix with column name to column number
35     self.outputHeaderMatrix = {}
36     ## The headder list with an expanding number of column names
37     self.outputHeaderList = []
38    
39     self.icc_CountMatrix = {}
40     self.xlApp = Dispatch('Excel.Application')
41    
42     ## Simple match to pull out the date as recorded in the path
43     self.dateInPathRegExPattern = '2023-[0-9]{2}-[0-9]{2}'
44    
45     def _CompileHeaderMatrix(self):
46     """Internal method to process the icc_CountMatrix and populate the outputHeaderMatrix"""
47     sendersList = list(self.icc_CountMatrix.keys())
48     for sender in sendersList:
49     valuePairsList = self.icc_CountMatrix[sender]
50     for valuePair in valuePairsList:
51     fName,mCount = valuePair
52     if fName in self.outputHeaderList:
53     pass
54     else:
55     self.outputHeaderList.append(fName)
56     self.outputHeaderList.sort()
57     print(f"There are {len(self.outputHeaderList)} dated columns")
58     count = self.datedColumnsStartNumber
59     for i in self.outputHeaderList:
60     self.outputHeaderMatrix[i] = count
61     count +=1
62    
63    
64     def _ConvertFileNameToDate(self, fileName):
65     """Converts a file name to a date, which will be used as the column name"""
66     pass
67    
68     def _CreateOutputSpreadsheet(self):
69     """This method creates the final spreadsheet report, based on the informaton gathered"""
70     print("Writing the final report...")
71     currentRow = 2
72     self.xlBook = self.xlApp.Workbooks.Add()
73     sht = self.xlBook.Worksheets(1)
74     ## First create the header row
75     sht.Cells(1,1).Value = "Rank"
76     sht.Cells(1,2).Value = "Row Labels"
77     sht.Cells(1,3).Value = "Batch"
78     for colName in list(self.outputHeaderMatrix.keys()):
79     sht.Cells(1,self.outputHeaderMatrix[colName]).Value = colName
80     ## Next populate all of the values
81     for senderValue in list(self.icc_CountMatrix.keys()):
82     sht.Cells(currentRow,2).Value = senderValue
83     valuePairsList = self.icc_CountMatrix[senderValue]
84     for valuePair in valuePairsList:
85     colName,mCount = valuePair
86     sht.Cells(currentRow,self.outputHeaderMatrix[colName]).Value = mCount
87     currentRow += 1
88     ## Finally save the spreadsheet
89     self.xlBook.SaveAs(os.path.join(self.outputFilePath, self.outputFileName))
90     self.xlBook.Close()
91    
92    
93     def AnalyzeICC_Counts(self):
94     """Main Method in this program"""
95     for (root,dirs,files) in os.walk(self.startDir):
96     for fl in files:
97     ## Note here that I'm assuming these are excel files. Might need to add some logic here in future.
98     print(f"now testing {os.path.join(root,fl)}")
99     self.xlBook = self.xlApp.Workbooks.Open(os.path.join(root,fl))
100     sht = self.xlBook.Worksheets(1)
101    
102     ## Determine the last row with data
103     rowEndNumber = self.rowStartNumber
104     while sht.Cells(rowEndNumber + 1, 1).Value not in [None, '']:
105     rowEndNumber += 1
106    
107     ## For each row, gather the upper of the text field and then the count
108     for row in range(self.rowStartNumber, rowEndNumber +1):
109     senderValue = sht.Cells(row,2).Value
110     senderValue = senderValue.upper()
111     mCountValue = sht.Cells(row,1).Value
112     if senderValue in list(self.icc_CountMatrix.keys()):
113     self.icc_CountMatrix[senderValue].append((fl,mCountValue))
114     else:
115     self.icc_CountMatrix[senderValue] = [(fl,mCountValue),]
116     #testValue = sht.Cells(rowEndNumber,1).Value
117     #print(testValue)
118     self.xlBook.Close()
119     #del self.xlApp
120    
121     self._CompileHeaderMatrix()
122     self._CreateOutputSpreadsheet()
123    
124    
125    
126    
127    
128     if __name__ == '__main__':
129    
130     ica = ICC_CountsAnalyzer()
131     ica.AnalyzeICC_Counts()
132