Active_prgs/Redgrave/NTRS-TopSenderAnalysis.py

"""

NTRS-TopSenderAnalysis

Created by:
Emanuel Borges
09.20.2023

Very simple program that will read multiple CSV files and export a report based on LanID and other general information.
To-Do: Method to QC for any parsing errors.  There should be the same number of fields across all of the CSVs.; 

"""

import csv, os, re

if __name__ == '__main__':
    startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req"
    allToLanAddressesSet = set()
    regExPattern = '[A-Za-z]{2}[0-9]{2,3}@NTRS.COM'
    

    for (root,dirs,files) in os.walk(startDir):
        for fl in files:
            fileRowCount = 0
            eightyCharRowCount = 0
            nonEightyCharRowCount = 0
            charsOverEighty = False
            with open(os.path.join(root,fl),mode='r',encoding='ANSI') as csv_file:
                csv_reader = csv.DictReader(csv_file)    
                for row in csv_reader:
                    fileRowCount += 1
                    if len(row['To']) == 80:
                        eightyCharRowCount +=1
                    else:
                        nonEightyCharRowCount +=1
                    if len(row['To']) > 80:
                       charsOverEighty = True 
                    toValue = row['To']
                    toValue = toValue.upper()
                    if "@NTRS.COM" in toValue:
                        ntrsLanAddressesList = re.findall(regExPattern, toValue)
                        if ntrsLanAddressesList:
                            for a in ntrsLanAddressesList:
                                allToLanAddressesSet.add(a)
            print(f"{fl}|{fileRowCount}|{eightyCharRowCount}|{nonEightyCharRowCount}|{charsOverEighty}")
            csv_file.close()
    print(f"There are {len(allToLanAddressesSet)} unique LAN ID addresses.")
    outputFile = open(r"C:\Users\eborges\Documents\Cases\Northern Trust\ExtractedLanAddresses.txt",'w')
    allToLanAddressesList = list(allToLanAddressesSet)
    allToLanAddressesList.sort()
    for i in allToLanAddressesList:
        outputFile.write(f"{i}\n")
    outputFile.close()

    ## Initially gathering some very basic information across the CSV files, not using csv lib
#    for (root,dirs,files) in os.walk(r"C:\Users\eborges\Documents\Cases\Northern Trust"):
#        for fl in files:
#            contents = open(os.path.join(root,fl), encoding='ANSI').readlines()
#            print(f"{fl}|{len(contents)-1}")
Revision:	798
Committed:	Thu Sep 21 19:18:39 2023 UTC (2 years, 6 months ago) by nino.borges
Content type:	text/x-python
File size:	2517 byte(s)
Log Message:	This version created the beginnings of the report. It generates the counts and uses the regEx to pull out all of the ntrs values from the TO line, writing these to a file so that I can look for instances where it's pulling an ntrs but it's not a lan ID one.
#	User	Rev	Content
1	nino.borges	797	"""
2
3			NTRS-TopSenderAnalysis
4
5			Created by:
6			Emanuel Borges
7			09.20.2023
8
9			Very simple program that will read multiple CSV files and export a report based on LanID and other general information.
10	nino.borges	798	To-Do: Method to QC for any parsing errors. There should be the same number of fields across all of the CSVs.;
11	nino.borges	797
12			"""
13
14	nino.borges	798	import csv, os, re
15	nino.borges	797
16			if __name__ == '__main__':
17	nino.borges	798	startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req"
18			allToLanAddressesSet = set()
19			regExPattern = '[A-Za-z]{2}[0-9]{2,3}@NTRS.COM'
20
21	nino.borges	797
22
23			for (root,dirs,files) in os.walk(startDir):
24			for fl in files:
25	nino.borges	798	fileRowCount = 0
26			eightyCharRowCount = 0
27			nonEightyCharRowCount = 0
28			charsOverEighty = False
29	nino.borges	797	with open(os.path.join(root,fl),mode='r',encoding='ANSI') as csv_file:
30	nino.borges	798	csv_reader = csv.DictReader(csv_file)
31	nino.borges	797	for row in csv_reader:
32	nino.borges	798	fileRowCount += 1
33			if len(row['To']) == 80:
34			eightyCharRowCount +=1
35			else:
36			nonEightyCharRowCount +=1
37			if len(row['To']) > 80:
38			charsOverEighty = True
39			toValue = row['To']
40			toValue = toValue.upper()
41			if "@NTRS.COM" in toValue:
42			ntrsLanAddressesList = re.findall(regExPattern, toValue)
43			if ntrsLanAddressesList:
44			for a in ntrsLanAddressesList:
45			allToLanAddressesSet.add(a)
46			print(f"{fl}\|{fileRowCount}\|{eightyCharRowCount}\|{nonEightyCharRowCount}\|{charsOverEighty}")
47			csv_file.close()
48			print(f"There are {len(allToLanAddressesSet)} unique LAN ID addresses.")
49			outputFile = open(r"C:\Users\eborges\Documents\Cases\Northern Trust\ExtractedLanAddresses.txt",'w')
50			allToLanAddressesList = list(allToLanAddressesSet)
51			allToLanAddressesList.sort()
52			for i in allToLanAddressesList:
53			outputFile.write(f"{i}\n")
54			outputFile.close()
55	nino.borges	797
56			## Initially gathering some very basic information across the CSV files, not using csv lib
57			# for (root,dirs,files) in os.walk(r"C:\Users\eborges\Documents\Cases\Northern Trust"):
58			# for fl in files:
59			# contents = open(os.path.join(root,fl), encoding='ANSI').readlines()
60			# print(f"{fl}\|{len(contents)-1}")