ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/NTRS-TopSenderAnalysis.py
Revision: 800
Committed: Mon Sep 25 18:17:43 2023 UTC (2 years, 6 months ago) by nino.borges
Content type: text/x-python
File size: 4079 byte(s)
Log Message:
This version with a working TrueTest method which goes one step further than the RegEx.

File Contents

# Content
1 """
2
3 NTRS-TopSenderAnalysis
4
5 Created by:
6 Emanuel Borges
7 09.20.2023
8
9 Very simple program that will read multiple CSV files and export a report based on LanID and other general information.
10 To-Do: Method to QC for any parsing errors. There should be the same number of fields across all of the CSVs.;
11
12 """
13
14 import csv, os, re
15
16 def LanIDTrueTest(listOfIds):
17 """a need for a more complicated LAN ID test was needed. Returns True if at least 1 true LAN ID is found in the list."""
18 lanIDTestResult = False
19 for lanID in listOfIds:
20 alphaOnly = [x.lower() for x in lanID if x.isalpha()]
21 if len(alphaOnly) > 10:
22 ## I'm too big to be a true LAN ID
23 print(lanID)
24 else:
25 lanIDTestResult = True
26 return lanIDTestResult
27
28
29 if __name__ == '__main__':
30 startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req"
31 #startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req\_LocalVersion\2023-07-11 FileNet Messages Delete Project - Top sender analysis\documents\ERM_Notifications"
32 allToLanAddressesSet = set()
33 #regExPattern = '[A-Za-z]{2}[0-9]{2,3}@NTRS.COM'
34 regExPattern = '[A-Za-z]{1,15}[0-9]{1,3}@NTRS.COM'
35
36
37
38 for (root,dirs,files) in os.walk(startDir):
39 for fl in files:
40 fileRowCount = 0
41 eightyCharRowCount = 0
42 nonEightyCharRowCount = 0
43 charsOverEighty = False
44 noNtrsDomainBucketCount = 0
45 ntrsAndLanBucketCount = 0
46 ntrsNoLanBucketCount = 0
47
48 with open(os.path.join(root,fl),mode='r',encoding='ANSI') as csv_file:
49 csv_reader = csv.DictReader(csv_file)
50 for row in csv_reader:
51 fileRowCount += 1
52 if len(row['To']) == 80:
53 eightyCharRowCount +=1
54 else:
55 nonEightyCharRowCount +=1
56 if len(row['To']) > 80:
57 charsOverEighty = True
58 toValue = row['To']
59 toValue = toValue.upper()
60 if "@NTRS.COM" in toValue:
61 ## The domain was found. Apply next test.
62 ntrsLanAddressesList = re.findall(regExPattern, toValue)
63 ntrsLanIDTrueTestResult = LanIDTrueTest(ntrsLanAddressesList)
64 if ntrsLanIDTrueTestResult:
65 ## At least 1 LAN ID was found, using the True Test
66 ntrsAndLanBucketCount +=1
67 #for a in ntrsLanAddressesList:
68 # allToLanAddressesSet.add(a)
69 else:
70 ## Not 1 true LAN ID was found, using the True Test
71 ntrsNoLanBucketCount +=1
72 else:
73 ## No ntrs addresses found at all,
74 noNtrsDomainBucketCount +=1
75 print(f"{fl}|{fileRowCount}|{eightyCharRowCount}|{nonEightyCharRowCount}|{charsOverEighty}|{noNtrsDomainBucketCount}|{ntrsAndLanBucketCount}|{ntrsNoLanBucketCount}")
76 csv_file.close()
77 #print(f"There are {len(allToLanAddressesSet)} unique LAN ID addresses.")
78 #outputFile = open(r"C:\Users\eborges\Documents\Cases\Northern Trust\ExtractedLanAddresses.txt",'w')
79 #allToLanAddressesList = list(allToLanAddressesSet)
80 #allToLanAddressesList.sort()
81 #for i in allToLanAddressesList:
82 # outputFile.write(f"{i}\n")
83 #outputFile.close()
84
85 ## Initially gathering some very basic information across the CSV files, not using csv lib
86 # for (root,dirs,files) in os.walk(r"C:\Users\eborges\Documents\Cases\Northern Trust"):
87 # for fl in files:
88 # contents = open(os.path.join(root,fl), encoding='ANSI').readlines()
89 # print(f"{fl}|{len(contents)-1}")