ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/NTRS_ICC_CountsAnalysis.py
Revision: 817
Committed: Tue Jan 16 21:37:56 2024 UTC (2 years, 2 months ago) by nino.borges
Content type: text/x-python
File size: 5924 byte(s)
Log Message:
This version added code to name the dated header columns correctly.  QCd this by selecting 5 rows at random and comparing them to Tom's table.

File Contents

# User Rev Content
1 nino.borges 816 """
2    
3     NTRS-ICC_CountsAnalysis
4    
5     Created by:
6     Emanuel Borges
7     01.11.2024
8    
9     This program will open multiple xlsx files, using the Excel COM API, and will perform a simple combining of the data into a single xlsx report.
10    
11     """
12    
13     import os, re
14     #import MyCode.Tool_Box.ExcelLib
15 nino.borges 817 from datetime import datetime
16 nino.borges 816 from win32com.client import Dispatch
17    
18    
19     class ICC_CountsAnalyzer(object):
20 nino.borges 817 version = "0.02"
21 nino.borges 816
22     def __init__(self):
23     #self.startDir = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req\_LocalVersion"
24     ## Excel is having issues with the super long path names. not a fan of having to do this...
25     ## Before running, do net use M: "\\localhost\c$\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
26     self.startDir = r"m:\_LocalVersion"
27     self.outputFilePath = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Northern Trust\20240110 - ICC_CountsAnalysis-req"
28     self.outputFileName = r"ExportReport.xlsx"
29    
30     ## Setting the rowStart number to 2 here, to avoid the header row.
31     self.rowStartNumber = 2
32     ## Setting the Column number where the dated columns will start. This is for the outputHeaderMatrix
33     self.datedColumnsStartNumber = 4
34    
35     ## the header matrix with column name to column number
36     self.outputHeaderMatrix = {}
37     ## The headder list with an expanding number of column names
38     self.outputHeaderList = []
39    
40     self.icc_CountMatrix = {}
41     self.xlApp = Dispatch('Excel.Application')
42    
43     ## Simple match to pull out the date as recorded in the path
44     self.dateInPathRegExPattern = '2023-[0-9]{2}-[0-9]{2}'
45    
46     def _CompileHeaderMatrix(self):
47     """Internal method to process the icc_CountMatrix and populate the outputHeaderMatrix"""
48     sendersList = list(self.icc_CountMatrix.keys())
49     for sender in sendersList:
50     valuePairsList = self.icc_CountMatrix[sender]
51     for valuePair in valuePairsList:
52     fName,mCount = valuePair
53     if fName in self.outputHeaderList:
54     pass
55     else:
56     self.outputHeaderList.append(fName)
57     self.outputHeaderList.sort()
58     print(f"There are {len(self.outputHeaderList)} dated columns")
59     count = self.datedColumnsStartNumber
60     for i in self.outputHeaderList:
61     self.outputHeaderMatrix[i] = count
62     count +=1
63    
64    
65     def _ConvertFileNameToDate(self, fileName):
66 nino.borges 817 """Converts a file name to a date, which will be used as the column name. Returns a string of that extracted date."""
67     fileName = os.path.splitext(fileName)[0]
68     tempDtObj = datetime.strptime(fileName,'%d-%b-%Y')
69     return datetime.strftime(tempDtObj,'%m/%d/%Y')
70 nino.borges 816
71     def _CreateOutputSpreadsheet(self):
72     """This method creates the final spreadsheet report, based on the informaton gathered"""
73     print("Writing the final report...")
74     currentRow = 2
75     self.xlBook = self.xlApp.Workbooks.Add()
76     sht = self.xlBook.Worksheets(1)
77     ## First create the header row
78     sht.Cells(1,1).Value = "Rank"
79     sht.Cells(1,2).Value = "Row Labels"
80     sht.Cells(1,3).Value = "Batch"
81     for colName in list(self.outputHeaderMatrix.keys()):
82     sht.Cells(1,self.outputHeaderMatrix[colName]).Value = colName
83     ## Next populate all of the values
84     for senderValue in list(self.icc_CountMatrix.keys()):
85     sht.Cells(currentRow,2).Value = senderValue
86     valuePairsList = self.icc_CountMatrix[senderValue]
87     for valuePair in valuePairsList:
88     colName,mCount = valuePair
89     sht.Cells(currentRow,self.outputHeaderMatrix[colName]).Value = mCount
90     currentRow += 1
91     ## Finally save the spreadsheet
92     self.xlBook.SaveAs(os.path.join(self.outputFilePath, self.outputFileName))
93     self.xlBook.Close()
94    
95    
96     def AnalyzeICC_Counts(self):
97     """Main Method in this program"""
98     for (root,dirs,files) in os.walk(self.startDir):
99     for fl in files:
100     ## Note here that I'm assuming these are excel files. Might need to add some logic here in future.
101     print(f"now testing {os.path.join(root,fl)}")
102     self.xlBook = self.xlApp.Workbooks.Open(os.path.join(root,fl))
103     sht = self.xlBook.Worksheets(1)
104    
105     ## Determine the last row with data
106     rowEndNumber = self.rowStartNumber
107     while sht.Cells(rowEndNumber + 1, 1).Value not in [None, '']:
108     rowEndNumber += 1
109    
110     ## For each row, gather the upper of the text field and then the count
111     for row in range(self.rowStartNumber, rowEndNumber +1):
112     senderValue = sht.Cells(row,2).Value
113     senderValue = senderValue.upper()
114     mCountValue = sht.Cells(row,1).Value
115     if senderValue in list(self.icc_CountMatrix.keys()):
116 nino.borges 817 self.icc_CountMatrix[senderValue].append((self._ConvertFileNameToDate(fl),mCountValue))
117 nino.borges 816 else:
118 nino.borges 817 self.icc_CountMatrix[senderValue] = [(self._ConvertFileNameToDate(fl),mCountValue),]
119 nino.borges 816 #testValue = sht.Cells(rowEndNumber,1).Value
120     #print(testValue)
121     self.xlBook.Close()
122     #del self.xlApp
123    
124     self._CompileHeaderMatrix()
125     self._CreateOutputSpreadsheet()
126    
127    
128    
129    
130    
131     if __name__ == '__main__':
132    
133     ica = ICC_CountsAnalyzer()
134     ica.AnalyzeICC_Counts()
135