Active_prgs/Redgrave/NTRS-TopSenderAnalysis.py

"""

NTRS-TopSenderAnalysis

Created by:
Emanuel Borges
09.20.2023

Very simple program that will read multiple CSV files and export a report based on LanID and other general information.
To-Do: Method to QC for any parsing errors.  There should be the same number of fields across all of the CSVs.; 

"""

import csv, os, re

if __name__ == '__main__':
    startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req"
    allToLanAddressesSet = set()
    regExPattern = '[A-Za-z]{2}[0-9]{2,3}@NTRS.COM'
    

    for (root,dirs,files) in os.walk(startDir):
        for fl in files:
            fileRowCount = 0
            eightyCharRowCount = 0
            nonEightyCharRowCount = 0
            charsOverEighty = False
            with open(os.path.join(root,fl),mode='r',encoding='ANSI') as csv_file:
                csv_reader = csv.DictReader(csv_file)    
                for row in csv_reader:
                    fileRowCount += 1
                    if len(row['To']) == 80:
                        eightyCharRowCount +=1
                    else:
                        nonEightyCharRowCount +=1
                    if len(row['To']) > 80:
                       charsOverEighty = True 
                    toValue = row['To']
                    toValue = toValue.upper()
                    if "@NTRS.COM" in toValue:
                        ntrsLanAddressesList = re.findall(regExPattern, toValue)
                        if ntrsLanAddressesList:
                            for a in ntrsLanAddressesList:
                                allToLanAddressesSet.add(a)
            print(f"{fl}|{fileRowCount}|{eightyCharRowCount}|{nonEightyCharRowCount}|{charsOverEighty}")
            csv_file.close()
    print(f"There are {len(allToLanAddressesSet)} unique LAN ID addresses.")
    outputFile = open(r"C:\Users\eborges\Documents\Cases\Northern Trust\ExtractedLanAddresses.txt",'w')
    allToLanAddressesList = list(allToLanAddressesSet)
    allToLanAddressesList.sort()
    for i in allToLanAddressesList:
        outputFile.write(f"{i}\n")
    outputFile.close()

    ## Initially gathering some very basic information across the CSV files, not using csv lib
#    for (root,dirs,files) in os.walk(r"C:\Users\eborges\Documents\Cases\Northern Trust"):
#        for fl in files:
#            contents = open(os.path.join(root,fl), encoding='ANSI').readlines()
#            print(f"{fl}|{len(contents)-1}")
Revision:	798
Committed:	Thu Sep 21 19:18:39 2023 UTC (2 years, 6 months ago) by nino.borges
Content type:	text/x-python
File size:	2517 byte(s)
Log Message:	This version created the beginnings of the report. It generates the counts and uses the regEx to pull out all of the ntrs values from the TO line, writing these to a file so that I can look for instances where it's pulling an ntrs but it's not a lan ID one.
#	Content
1	"""
2
3	NTRS-TopSenderAnalysis
4
5	Created by:
6	Emanuel Borges
7	09.20.2023
8
9	Very simple program that will read multiple CSV files and export a report based on LanID and other general information.
10	To-Do: Method to QC for any parsing errors. There should be the same number of fields across all of the CSVs.;
11
12	"""
13
14	import csv, os, re
15
16	if __name__ == '__main__':
17	startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req"
18	allToLanAddressesSet = set()
19	regExPattern = '[A-Za-z]{2}[0-9]{2,3}@NTRS.COM'
20
21
22
23	for (root,dirs,files) in os.walk(startDir):
24	for fl in files:
25	fileRowCount = 0
26	eightyCharRowCount = 0
27	nonEightyCharRowCount = 0
28	charsOverEighty = False
29	with open(os.path.join(root,fl),mode='r',encoding='ANSI') as csv_file:
30	csv_reader = csv.DictReader(csv_file)
31	for row in csv_reader:
32	fileRowCount += 1
33	if len(row['To']) == 80:
34	eightyCharRowCount +=1
35	else:
36	nonEightyCharRowCount +=1
37	if len(row['To']) > 80:
38	charsOverEighty = True
39	toValue = row['To']
40	toValue = toValue.upper()
41	if "@NTRS.COM" in toValue:
42	ntrsLanAddressesList = re.findall(regExPattern, toValue)
43	if ntrsLanAddressesList:
44	for a in ntrsLanAddressesList:
45	allToLanAddressesSet.add(a)
46	print(f"{fl}\|{fileRowCount}\|{eightyCharRowCount}\|{nonEightyCharRowCount}\|{charsOverEighty}")
47	csv_file.close()
48	print(f"There are {len(allToLanAddressesSet)} unique LAN ID addresses.")
49	outputFile = open(r"C:\Users\eborges\Documents\Cases\Northern Trust\ExtractedLanAddresses.txt",'w')
50	allToLanAddressesList = list(allToLanAddressesSet)
51	allToLanAddressesList.sort()
52	for i in allToLanAddressesList:
53	outputFile.write(f"{i}\n")
54	outputFile.close()
55
56	## Initially gathering some very basic information across the CSV files, not using csv lib
57	# for (root,dirs,files) in os.walk(r"C:\Users\eborges\Documents\Cases\Northern Trust"):
58	# for fl in files:
59	# contents = open(os.path.join(root,fl), encoding='ANSI').readlines()
60	# print(f"{fl}\|{len(contents)-1}")