ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/NTRS-TopSenderAnalysis.py
(Generate patch)

Comparing Python/NinoCode/Active_prgs/Redgrave/NTRS-TopSenderAnalysis.py (file contents):
Revision 809 by nino.borges, Thu Jan 4 21:03:18 2024 UTC vs.
Revision 810 by nino.borges, Fri Jan 5 19:24:53 2024 UTC

# Line 14 | Line 14 | To-Do: Method to QC for any parsing erro
14   import csv, os, re
15  
16   class TopSendersAnalyzer(object):
17 <    version = "0.06"
17 >    version = "0.07"
18  
19      def __init__(self):
20          self.startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20231227 - FileNetTopSenderAnalysis-Req"
# Line 40 | Line 40 | class TopSendersAnalyzer(object):
40          ##  False positive NTRS LAN ID matches, per specification provided to me.  Close but just outside of specification. (for analysis)
41          self.falsePositiveLanIdAddressesSet = set()
42          self.falsePositiveLanIdAddressesOutputFileName = r"C:\Users\eborges\Documents\Cases\Northern Trust\Extracted_FALSE-POSTIVE_LanAddresses.txt"
43 +
44 +        ##  Sender values, deduplicated within each CSV only, where there is either no NTRS domain in the to or there is but it's not a lanID. Matrix of fileName:valuesSet.
45 +        self.senderEmailAddressesMatrix = {}
46 +        self.senderEmailAddressesFileName = r"C:\Users\eborges\Documents\Cases\Northern Trust\SendersListSpecial.txt"
47          
48          #self.lanIdRegExPattern = '[A-Za-z]{2}[0-9]{2,3}@NTRS.COM'
49          self.lanIdRegExPattern = '[A-Za-z]{1,15}[0-9]{1,3}@NTRS.COM'
# Line 56 | Line 60 | class TopSendersAnalyzer(object):
60          #self.allPossibleEmailAddressesRegExPattern = r"[\w\.-]+@[\w\.-]+\.\w+"
61          self.allPossibleEmailAddressesRegExPattern = r"[\w.+-]+@[\w-]+\.[\w.-]+"
62  
63 <    def AnalyzeTopSenders(self, writeTrueLanIDLogFile = False, writeFalsePositiveLanIDLogFile = False, writeAllPossibleEmailAddressesLogFile = False):
63 >    def AnalyzeTopSenders(self, writeTrueLanIDLogFile = False, writeFalsePositiveLanIDLogFile = False, writeAllPossibleEmailAddressesLogFile = False, writeSpecialSendersListLogFile = False):
64          """Main Method in this program"""
65          print("FileName|CSV Date|Count no Header|80 Char TO Row Count|# Rows with NO EMAIL ADDRESSES|non-80 Char TO Row Count|TO over 80 Char?|Unique Email Addresses Count|Average # of Recipients|# Rows No NTRS Domains|% No NTRS Domains|# Rows NTRS and LAN IDs|% NTRS and LAN IDs|# Rows NTRS No LAN IDs|% NTRS No LAN IDs|Scenario|Scenario Short Description")
66          for (root,dirs,files) in os.walk(self.startDir):
# Line 72 | Line 76 | class TopSendersAnalyzer(object):
76                  dateInPath = re.findall(self.dateInPathRegExPattern, root)
77                  allEmailAddressesInCSVSet = set()
78  
79 +                ##  This is the list of unique Sender Addresses,per CSV, ONLY IF THE EMAIL IS NOT TO A LANID.  Diana asked for this list.
80 +                ##  Since there is only a single value in sender, I'm grabbing the entire value and not just the email address.
81 +                senderEmailAddressesInCSVSet = set()
82 +
83                  ##  This is the full count of receipient addresses found in the CSV, not unique addresses
84                  toFieldAddressesInCSVCount = 0
85  
# Line 111 | Line 119 | class TopSendersAnalyzer(object):
119                              else:
120                                  ## Not 1 true LAN ID was found, using the True Test
121                                  ntrsNoLanBucketCount +=1
122 +                                ## Also since no LAN ID was found in the TO field, add to the unique senders list.
123 +                                senderValue = toValue = row['Sender']
124 +                                senderEmailAddressesInCSVSet.add(senderValue.upper())
125                          else:
126                              ## No ntrs addresses found at all,
127                              noNtrsDomainBucketCount +=1
128 +                            ## Also since no NTRS address was found in the TO field at all, add to the unique senders list.
129 +                            senderValue = toValue = row['Sender']
130 +                            senderEmailAddressesInCSVSet.add(senderValue.upper())
131                  scenario = self.CalculateScenario(ntrsAndLanBucketCount/fileRowCount, toFieldAddressesInCSVCount/fileRowCount)
132                  print(f"{fl}|{dateInPath}|{fileRowCount}|{eightyCharRowCount}|{noAddressesInToFieldCount}|{nonEightyCharRowCount}|{charsOverEighty}|{len(allEmailAddressesInCSVSet)}|{toFieldAddressesInCSVCount/fileRowCount}|{noNtrsDomainBucketCount}|{noNtrsDomainBucketCount/fileRowCount}|{ntrsAndLanBucketCount}|{ntrsAndLanBucketCount/fileRowCount}|{ntrsNoLanBucketCount}|{ntrsNoLanBucketCount/fileRowCount}|{scenario}|{self.scenarioDescriptionMatrix[scenario]}")
133                  csv_file.close()
134                  ##  Update the global all email addresses set, if they selected this option
135                  if writeAllPossibleEmailAddressesLogFile:
136                      self.allPossibleEmailAddressesSet.update(allEmailAddressesInCSVSet)
137 +
138 +                ##  Update the global special senders matrix, if they selected this option.
139 +                if writeSpecialSendersListLogFile:
140 +                    self.senderEmailAddressesMatrix[fl] = senderEmailAddressesInCSVSet
141 +                    
142          if writeTrueLanIDLogFile:
143              print("Writing the True LAN ID log file...")
144              self.WriteLogFile(self.trueLanIdAddressesSet, self.trueLanIdAddressesOutputFileName)
# Line 132 | Line 151 | class TopSendersAnalyzer(object):
151              print("Writing the All Possible Email Addresses Across All CSV Files log file...")
152              self.WriteLogFile(self.allPossibleEmailAddressesSet, self.allPossibleEmailAddressesOutputFileName)
153              print("Done.\n")
154 +        if writeSpecialSendersListLogFile:
155 +            print("Writing the deduplicated special senders data to log file....")
156 +            
157 +            
158 +            print("Done.\n")
159          
160  
161      def LanIDTrueTest(self, listOfIds):

Diff Legend

Removed lines
+ Added lines
< Changed lines (old)
> Changed lines (new)