ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/NTRS-TopSenderAnalysis.py
Revision: 798
Committed: Thu Sep 21 19:18:39 2023 UTC (2 years, 6 months ago) by nino.borges
Content type: text/x-python
File size: 2517 byte(s)
Log Message:
This version created the beginnings of the report.  It generates the counts and uses the regEx to pull out all of the ntrs values from the TO line, writing these to a file so that I can look for instances where it's pulling an ntrs but it's not a lan ID one.

File Contents

# User Rev Content
1 nino.borges 797 """
2    
3     NTRS-TopSenderAnalysis
4    
5     Created by:
6     Emanuel Borges
7     09.20.2023
8    
9     Very simple program that will read multiple CSV files and export a report based on LanID and other general information.
10 nino.borges 798 To-Do: Method to QC for any parsing errors. There should be the same number of fields across all of the CSVs.;
11 nino.borges 797
12     """
13    
14 nino.borges 798 import csv, os, re
15 nino.borges 797
16     if __name__ == '__main__':
17 nino.borges 798 startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req"
18     allToLanAddressesSet = set()
19     regExPattern = '[A-Za-z]{2}[0-9]{2,3}@NTRS.COM'
20    
21 nino.borges 797
22    
23     for (root,dirs,files) in os.walk(startDir):
24     for fl in files:
25 nino.borges 798 fileRowCount = 0
26     eightyCharRowCount = 0
27     nonEightyCharRowCount = 0
28     charsOverEighty = False
29 nino.borges 797 with open(os.path.join(root,fl),mode='r',encoding='ANSI') as csv_file:
30 nino.borges 798 csv_reader = csv.DictReader(csv_file)
31 nino.borges 797 for row in csv_reader:
32 nino.borges 798 fileRowCount += 1
33     if len(row['To']) == 80:
34     eightyCharRowCount +=1
35     else:
36     nonEightyCharRowCount +=1
37     if len(row['To']) > 80:
38     charsOverEighty = True
39     toValue = row['To']
40     toValue = toValue.upper()
41     if "@NTRS.COM" in toValue:
42     ntrsLanAddressesList = re.findall(regExPattern, toValue)
43     if ntrsLanAddressesList:
44     for a in ntrsLanAddressesList:
45     allToLanAddressesSet.add(a)
46     print(f"{fl}|{fileRowCount}|{eightyCharRowCount}|{nonEightyCharRowCount}|{charsOverEighty}")
47     csv_file.close()
48     print(f"There are {len(allToLanAddressesSet)} unique LAN ID addresses.")
49     outputFile = open(r"C:\Users\eborges\Documents\Cases\Northern Trust\ExtractedLanAddresses.txt",'w')
50     allToLanAddressesList = list(allToLanAddressesSet)
51     allToLanAddressesList.sort()
52     for i in allToLanAddressesList:
53     outputFile.write(f"{i}\n")
54     outputFile.close()
55 nino.borges 797
56     ## Initially gathering some very basic information across the CSV files, not using csv lib
57     # for (root,dirs,files) in os.walk(r"C:\Users\eborges\Documents\Cases\Northern Trust"):
58     # for fl in files:
59     # contents = open(os.path.join(root,fl), encoding='ANSI').readlines()
60     # print(f"{fl}|{len(contents)-1}")