ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/NTRS-TopSenderAnalysis.py
Revision: 798
Committed: Thu Sep 21 19:18:39 2023 UTC (2 years, 6 months ago) by nino.borges
Content type: text/x-python
File size: 2517 byte(s)
Log Message:
This version created the beginnings of the report.  It generates the counts and uses the regEx to pull out all of the ntrs values from the TO line, writing these to a file so that I can look for instances where it's pulling an ntrs but it's not a lan ID one.

File Contents

# Content
1 """
2
3 NTRS-TopSenderAnalysis
4
5 Created by:
6 Emanuel Borges
7 09.20.2023
8
9 Very simple program that will read multiple CSV files and export a report based on LanID and other general information.
10 To-Do: Method to QC for any parsing errors. There should be the same number of fields across all of the CSVs.;
11
12 """
13
14 import csv, os, re
15
16 if __name__ == '__main__':
17 startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req"
18 allToLanAddressesSet = set()
19 regExPattern = '[A-Za-z]{2}[0-9]{2,3}@NTRS.COM'
20
21
22
23 for (root,dirs,files) in os.walk(startDir):
24 for fl in files:
25 fileRowCount = 0
26 eightyCharRowCount = 0
27 nonEightyCharRowCount = 0
28 charsOverEighty = False
29 with open(os.path.join(root,fl),mode='r',encoding='ANSI') as csv_file:
30 csv_reader = csv.DictReader(csv_file)
31 for row in csv_reader:
32 fileRowCount += 1
33 if len(row['To']) == 80:
34 eightyCharRowCount +=1
35 else:
36 nonEightyCharRowCount +=1
37 if len(row['To']) > 80:
38 charsOverEighty = True
39 toValue = row['To']
40 toValue = toValue.upper()
41 if "@NTRS.COM" in toValue:
42 ntrsLanAddressesList = re.findall(regExPattern, toValue)
43 if ntrsLanAddressesList:
44 for a in ntrsLanAddressesList:
45 allToLanAddressesSet.add(a)
46 print(f"{fl}|{fileRowCount}|{eightyCharRowCount}|{nonEightyCharRowCount}|{charsOverEighty}")
47 csv_file.close()
48 print(f"There are {len(allToLanAddressesSet)} unique LAN ID addresses.")
49 outputFile = open(r"C:\Users\eborges\Documents\Cases\Northern Trust\ExtractedLanAddresses.txt",'w')
50 allToLanAddressesList = list(allToLanAddressesSet)
51 allToLanAddressesList.sort()
52 for i in allToLanAddressesList:
53 outputFile.write(f"{i}\n")
54 outputFile.close()
55
56 ## Initially gathering some very basic information across the CSV files, not using csv lib
57 # for (root,dirs,files) in os.walk(r"C:\Users\eborges\Documents\Cases\Northern Trust"):
58 # for fl in files:
59 # contents = open(os.path.join(root,fl), encoding='ANSI').readlines()
60 # print(f"{fl}|{len(contents)-1}")