ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/NTRS-TopSenderAnalysis.py
Revision: 800
Committed: Mon Sep 25 18:17:43 2023 UTC (2 years, 6 months ago) by nino.borges
Content type: text/x-python
File size: 4079 byte(s)
Log Message:
This version with a working TrueTest method which goes one step further than the RegEx.

File Contents

# User Rev Content
1 nino.borges 797 """
2    
3     NTRS-TopSenderAnalysis
4    
5     Created by:
6     Emanuel Borges
7     09.20.2023
8    
9     Very simple program that will read multiple CSV files and export a report based on LanID and other general information.
10 nino.borges 798 To-Do: Method to QC for any parsing errors. There should be the same number of fields across all of the CSVs.;
11 nino.borges 797
12     """
13    
14 nino.borges 798 import csv, os, re
15 nino.borges 797
16 nino.borges 800 def LanIDTrueTest(listOfIds):
17     """a need for a more complicated LAN ID test was needed. Returns True if at least 1 true LAN ID is found in the list."""
18     lanIDTestResult = False
19     for lanID in listOfIds:
20     alphaOnly = [x.lower() for x in lanID if x.isalpha()]
21     if len(alphaOnly) > 10:
22     ## I'm too big to be a true LAN ID
23     print(lanID)
24     else:
25     lanIDTestResult = True
26     return lanIDTestResult
27    
28    
29 nino.borges 797 if __name__ == '__main__':
30 nino.borges 798 startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req"
31 nino.borges 800 #startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req\_LocalVersion\2023-07-11 FileNet Messages Delete Project - Top sender analysis\documents\ERM_Notifications"
32 nino.borges 798 allToLanAddressesSet = set()
33 nino.borges 799 #regExPattern = '[A-Za-z]{2}[0-9]{2,3}@NTRS.COM'
34     regExPattern = '[A-Za-z]{1,15}[0-9]{1,3}@NTRS.COM'
35 nino.borges 798
36 nino.borges 797
37    
38     for (root,dirs,files) in os.walk(startDir):
39     for fl in files:
40 nino.borges 798 fileRowCount = 0
41     eightyCharRowCount = 0
42     nonEightyCharRowCount = 0
43     charsOverEighty = False
44 nino.borges 799 noNtrsDomainBucketCount = 0
45     ntrsAndLanBucketCount = 0
46     ntrsNoLanBucketCount = 0
47    
48 nino.borges 797 with open(os.path.join(root,fl),mode='r',encoding='ANSI') as csv_file:
49 nino.borges 798 csv_reader = csv.DictReader(csv_file)
50 nino.borges 797 for row in csv_reader:
51 nino.borges 798 fileRowCount += 1
52     if len(row['To']) == 80:
53     eightyCharRowCount +=1
54     else:
55     nonEightyCharRowCount +=1
56     if len(row['To']) > 80:
57     charsOverEighty = True
58     toValue = row['To']
59     toValue = toValue.upper()
60     if "@NTRS.COM" in toValue:
61 nino.borges 800 ## The domain was found. Apply next test.
62 nino.borges 798 ntrsLanAddressesList = re.findall(regExPattern, toValue)
63 nino.borges 800 ntrsLanIDTrueTestResult = LanIDTrueTest(ntrsLanAddressesList)
64     if ntrsLanIDTrueTestResult:
65     ## At least 1 LAN ID was found, using the True Test
66 nino.borges 799 ntrsAndLanBucketCount +=1
67 nino.borges 800 #for a in ntrsLanAddressesList:
68     # allToLanAddressesSet.add(a)
69 nino.borges 799 else:
70 nino.borges 800 ## Not 1 true LAN ID was found, using the True Test
71 nino.borges 799 ntrsNoLanBucketCount +=1
72     else:
73 nino.borges 800 ## No ntrs addresses found at all,
74 nino.borges 799 noNtrsDomainBucketCount +=1
75     print(f"{fl}|{fileRowCount}|{eightyCharRowCount}|{nonEightyCharRowCount}|{charsOverEighty}|{noNtrsDomainBucketCount}|{ntrsAndLanBucketCount}|{ntrsNoLanBucketCount}")
76 nino.borges 798 csv_file.close()
77 nino.borges 800 #print(f"There are {len(allToLanAddressesSet)} unique LAN ID addresses.")
78     #outputFile = open(r"C:\Users\eborges\Documents\Cases\Northern Trust\ExtractedLanAddresses.txt",'w')
79     #allToLanAddressesList = list(allToLanAddressesSet)
80     #allToLanAddressesList.sort()
81     #for i in allToLanAddressesList:
82     # outputFile.write(f"{i}\n")
83     #outputFile.close()
84 nino.borges 797
85     ## Initially gathering some very basic information across the CSV files, not using csv lib
86     # for (root,dirs,files) in os.walk(r"C:\Users\eborges\Documents\Cases\Northern Trust"):
87     # for fl in files:
88     # contents = open(os.path.join(root,fl), encoding='ANSI').readlines()
89     # print(f"{fl}|{len(contents)-1}")