ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/NTRS-TopSenderAnalysis.py
Revision: 799
Committed: Mon Sep 25 15:44:23 2023 UTC (2 years, 6 months ago) by nino.borges
Content type: text/x-python
File size: 3060 byte(s)
Log Message:
Version of the program before finding many other examples that could also be LAN IDs and plan on adding extra verification.

File Contents

# User Rev Content
1 nino.borges 797 """
2    
3     NTRS-TopSenderAnalysis
4    
5     Created by:
6     Emanuel Borges
7     09.20.2023
8    
9     Very simple program that will read multiple CSV files and export a report based on LanID and other general information.
10 nino.borges 798 To-Do: Method to QC for any parsing errors. There should be the same number of fields across all of the CSVs.;
11 nino.borges 797
12     """
13    
14 nino.borges 798 import csv, os, re
15 nino.borges 797
16     if __name__ == '__main__':
17 nino.borges 798 startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req"
18     allToLanAddressesSet = set()
19 nino.borges 799 #regExPattern = '[A-Za-z]{2}[0-9]{2,3}@NTRS.COM'
20     regExPattern = '[A-Za-z]{1,15}[0-9]{1,3}@NTRS.COM'
21 nino.borges 798
22 nino.borges 797
23    
24     for (root,dirs,files) in os.walk(startDir):
25     for fl in files:
26 nino.borges 798 fileRowCount = 0
27     eightyCharRowCount = 0
28     nonEightyCharRowCount = 0
29     charsOverEighty = False
30 nino.borges 799 noNtrsDomainBucketCount = 0
31     ntrsAndLanBucketCount = 0
32     ntrsNoLanBucketCount = 0
33    
34 nino.borges 797 with open(os.path.join(root,fl),mode='r',encoding='ANSI') as csv_file:
35 nino.borges 798 csv_reader = csv.DictReader(csv_file)
36 nino.borges 797 for row in csv_reader:
37 nino.borges 798 fileRowCount += 1
38     if len(row['To']) == 80:
39     eightyCharRowCount +=1
40     else:
41     nonEightyCharRowCount +=1
42     if len(row['To']) > 80:
43     charsOverEighty = True
44     toValue = row['To']
45     toValue = toValue.upper()
46     if "@NTRS.COM" in toValue:
47 nino.borges 799 ## No ntrs addresses found at all,
48 nino.borges 798 ntrsLanAddressesList = re.findall(regExPattern, toValue)
49     if ntrsLanAddressesList:
50 nino.borges 799 ntrsAndLanBucketCount +=1
51 nino.borges 798 for a in ntrsLanAddressesList:
52     allToLanAddressesSet.add(a)
53 nino.borges 799 else:
54     ntrsNoLanBucketCount +=1
55     else:
56     noNtrsDomainBucketCount +=1
57     print(f"{fl}|{fileRowCount}|{eightyCharRowCount}|{nonEightyCharRowCount}|{charsOverEighty}|{noNtrsDomainBucketCount}|{ntrsAndLanBucketCount}|{ntrsNoLanBucketCount}")
58 nino.borges 798 csv_file.close()
59     print(f"There are {len(allToLanAddressesSet)} unique LAN ID addresses.")
60     outputFile = open(r"C:\Users\eborges\Documents\Cases\Northern Trust\ExtractedLanAddresses.txt",'w')
61     allToLanAddressesList = list(allToLanAddressesSet)
62     allToLanAddressesList.sort()
63     for i in allToLanAddressesList:
64     outputFile.write(f"{i}\n")
65     outputFile.close()
66 nino.borges 797
67     ## Initially gathering some very basic information across the CSV files, not using csv lib
68     # for (root,dirs,files) in os.walk(r"C:\Users\eborges\Documents\Cases\Northern Trust"):
69     # for fl in files:
70     # contents = open(os.path.join(root,fl), encoding='ANSI').readlines()
71     # print(f"{fl}|{len(contents)-1}")