Active_prgs/Redgrave/NTRS-TopSenderAnalysis.py

"""

NTRS-TopSenderAnalysis

Created by:
Emanuel Borges
09.20.2023

Very simple program that will read multiple CSV files and export a report based on LanID and other general information.
To-Do: Method to QC for any parsing errors.  There should be the same number of fields across all of the CSVs.; 

"""

import csv, os, re

class TopSendersAnalyzer(object):
    version = "0.06"

    def __init__(self):
        self.startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20231227 - FileNetTopSenderAnalysis-Req"
        #self.startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req"
        #self.startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req\_LocalVersion\2023-08-14 FileNet Messages Delete Project - Top sender analysis\8_14_2023\xact_report_Dec_2014_CSV"


        ##  Matrix containing the scenarios to scenario descriptions
        self.scenarioDescriptionMatrix = {"A or B":"Sends to few recipients; evaluate subjects re: single or multi-purpose", "C":">75% of messages are to LAN IDs", "D":"<33% of messages are to LAN IDs","Uncategorized":"Remainders"}

        ##  All possible email addresses across all CSV files
        self.allPossibleEmailAddressesSet = set()
        self.allPossibleEmailAddressesOutputFileName = r"C:\Users\eborges\Documents\Cases\Northern Trust\Extracted_AllEmailAddresses_All-CSVs.txt"

        ##  All email addresses with an @NTRS.COM domain. Currently unsupported.
        #self.allToNtrsAddressesSet = set()
        #self.allToNtrsAddressesOutputFileName = r""

        ##  All true NTRS LAN ID matches, per specification provided to me.
        self.trueLanIdAddressesSet = set()
        self.trueLanIdAddressesOutputFileName = r"C:\Users\eborges\Documents\Cases\Northern Trust\Extracted_TRUE_LanAddresses.txt"

        ##  False positive NTRS LAN ID matches, per specification provided to me.  Close but just outside of specification. (for analysis)
        self.falsePositiveLanIdAddressesSet = set()
        self.falsePositiveLanIdAddressesOutputFileName = r"C:\Users\eborges\Documents\Cases\Northern Trust\Extracted_FALSE-POSTIVE_LanAddresses.txt"
        
        #self.lanIdRegExPattern = '[A-Za-z]{2}[0-9]{2,3}@NTRS.COM'
        self.lanIdRegExPattern = '[A-Za-z]{1,15}[0-9]{1,3}@NTRS.COM'


        ##  Simple match to pull out the date as recorded in the path
        self.dateInPathRegExPattern = '2023-[0-9]{2}-[0-9]{2}'


        ##  Match for pulling out all email addresses, regardless of domain.
        #self.allPossibleEmailAddressesRegExPattern = '([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,})+'
        #self.allPossibleEmailAddressesRegExPattern = """(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])"""
        #self.allPossibleEmailAddressesRegExPattern = r"([-!#-'*+/-9=?A-Z^-~]+(\.[-!#-'*+/-9=?A-Z^-~]+)*|\"([]!#-[^-~ \t]|(\\[\t -~]))+\")@([-!#-'*+/-9=?A-Z^-~]+(\.[-!#-'*+/-9=?A-Z^-~]+)*|\[[\t -Z^-~]*])"
        #self.allPossibleEmailAddressesRegExPattern = r"[\w\.-]+@[\w\.-]+\.\w+"
        self.allPossibleEmailAddressesRegExPattern = r"[\w.+-]+@[\w-]+\.[\w.-]+"

    def AnalyzeTopSenders(self, writeTrueLanIDLogFile = False, writeFalsePositiveLanIDLogFile = False, writeAllPossibleEmailAddressesLogFile = False):
        """Main Method in this program"""
        print("FileName|CSV Date|Count no Header|80 Char TO Row Count|# Rows with NO EMAIL ADDRESSES|non-80 Char TO Row Count|TO over 80 Char?|Unique Email Addresses Count|Average # of Recipients|# Rows No NTRS Domains|% No NTRS Domains|# Rows NTRS and LAN IDs|% NTRS and LAN IDs|# Rows NTRS No LAN IDs|% NTRS No LAN IDs|Scenario|Scenario Short Description")
        for (root,dirs,files) in os.walk(self.startDir):
            for fl in files:
                fileRowCount = 0
                noAddressesInToFieldCount = 0
                eightyCharRowCount = 0
                nonEightyCharRowCount = 0
                charsOverEighty = False
                noNtrsDomainBucketCount = 0
                ntrsAndLanBucketCount = 0
                ntrsNoLanBucketCount = 0
                dateInPath = re.findall(self.dateInPathRegExPattern, root)
                allEmailAddressesInCSVSet = set()

                ##  This is the full count of receipient addresses found in the CSV, not unique addresses
                toFieldAddressesInCSVCount = 0

                with open(os.path.join(root,fl),mode='r',encoding='ANSI') as csv_file:
                    csv_reader = csv.DictReader(csv_file)    
                    for row in csv_reader:
                        fileRowCount += 1
                        if len(row['To']) == 80:
                            eightyCharRowCount +=1
                        else:
                            nonEightyCharRowCount +=1
                        if len(row['To']) > 80:
                           charsOverEighty = True 
                        toValue = row['To']
                        toValue = toValue.upper()
                        
                        ##  Match and gather all possible email addresses, adding it to the per CSV set.                        
                        allEmailAddresses = re.findall(self.allPossibleEmailAddressesRegExPattern, toValue)    
                        for eAddress in allEmailAddresses:
                            allEmailAddressesInCSVSet.add(eAddress)

                        ## If there are no email addresses in the TO field at all, increment that count. Else, add the number to the full count of email addresses for average calculation
                        if len(allEmailAddresses) == 0:
                            noAddressesInToFieldCount +=1
                        else:
                            toFieldAddressesInCSVCount += len(allEmailAddresses)
                        ##  Perform the main logic tests
                        if "NTRS.COM" in toValue:
                            ## The domain was found. Apply next test.
                            ntrsLanAddressesList = re.findall(self.lanIdRegExPattern, toValue)
                            ntrsLanIDTrueTestResult = self.LanIDTrueTest(ntrsLanAddressesList)
                            if ntrsLanIDTrueTestResult:
                                ## At least 1 LAN ID was found, using the True Test
                                ntrsAndLanBucketCount +=1
                                #for a in ntrsLanAddressesList:
                                #    allToLanAddressesSet.add(a)
                            else:
                                ## Not 1 true LAN ID was found, using the True Test
                                ntrsNoLanBucketCount +=1
                        else:
                            ## No ntrs addresses found at all, 
                            noNtrsDomainBucketCount +=1
                scenario = self.CalculateScenario(ntrsAndLanBucketCount/fileRowCount, toFieldAddressesInCSVCount/fileRowCount)
                print(f"{fl}|{dateInPath}|{fileRowCount}|{eightyCharRowCount}|{noAddressesInToFieldCount}|{nonEightyCharRowCount}|{charsOverEighty}|{len(allEmailAddressesInCSVSet)}|{toFieldAddressesInCSVCount/fileRowCount}|{noNtrsDomainBucketCount}|{noNtrsDomainBucketCount/fileRowCount}|{ntrsAndLanBucketCount}|{ntrsAndLanBucketCount/fileRowCount}|{ntrsNoLanBucketCount}|{ntrsNoLanBucketCount/fileRowCount}|{scenario}|{self.scenarioDescriptionMatrix[scenario]}")
                csv_file.close()
                ##  Update the global all email addresses set, if they selected this option
                if writeAllPossibleEmailAddressesLogFile:
                    self.allPossibleEmailAddressesSet.update(allEmailAddressesInCSVSet)
        if writeTrueLanIDLogFile:
            print("Writing the True LAN ID log file...")
            self.WriteLogFile(self.trueLanIdAddressesSet, self.trueLanIdAddressesOutputFileName)
            print("Done.\n")
        if writeFalsePositiveLanIDLogFile:
            print("Writing the False-Positive LAN ID log file...")
            self.WriteLogFile(self.falsePositiveLanIdAddressesSet, self.falsePositiveLanIdAddressesOutputFileName)
            print("Done.\n")
        if writeAllPossibleEmailAddressesLogFile:
            print("Writing the All Possible Email Addresses Across All CSV Files log file...")
            self.WriteLogFile(self.allPossibleEmailAddressesSet, self.allPossibleEmailAddressesOutputFileName)
            print("Done.\n")
        

    def LanIDTrueTest(self, listOfIds):
        """A need for a more complicated LAN ID test was needed. Returns True if at least 1 true LAN ID is found in the list."""
        lanIDTestResult = False
        for lanID in listOfIds:
            alphaOnly = [x.lower() for x in lanID if x.isalpha()]
            if len(alphaOnly) > 10:
                ## I'm too big to be a true LAN ID
                self.falsePositiveLanIdAddressesSet.add(lanID)
            else:
                self.trueLanIdAddressesSet.add(lanID)
                lanIDTestResult = True
        return lanIDTestResult


    def WriteLogFile(self, setOfValues, outputFilePath):
        """Takes a Set containing values, sorts these, and then writes them to the given outputFilePath)"""
        fileNameInc = 0
        while os.path.isfile(outputFilePath):
            fileNameInc +=1
            outputFile, extension = os.path.splitext(outputFilePath)
            outputFilePath = outputFile + str(fileNameInc) + extension
        outFl = open(outputFilePath,'w')
        tempList = list(setOfValues)
        tempList.sort()
        for i in tempList:
            outFl.write(f"{i}\n")
        outFl.close()

    def CalculateScenario(self,rawNumber, averageNumbRecip):
        """This method takes the raw number, which should be a decimal calculation of the percent, and returns the scenario code"""
        scenario = "Uncategorized"
        if rawNumber > .76:
            scenario = "C"
        elif averageNumbRecip < 15 and rawNumber < .019:
            scenario = "A or B"
        elif  rawNumber > .009 and rawNumber < .34:
            scenario = "D"

        return scenario

if __name__ == '__main__':

    tsa = TopSendersAnalyzer()
    tsa.AnalyzeTopSenders(writeTrueLanIDLogFile = True, writeFalsePositiveLanIDLogFile = True, writeAllPossibleEmailAddressesLogFile = True)


    #print(f"There are {len(allToLanAddressesSet)} unique LAN ID addresses.")
    #outputFile = open(r"C:\Users\eborges\Documents\Cases\Northern Trust\ExtractedLanAddresses.txt",'w')
    #allToLanAddressesList = list(allToLanAddressesSet)
    #allToLanAddressesList.sort()
    #for i in allToLanAddressesList:
    #    outputFile.write(f"{i}\n")
    #outputFile.close()

    ## Initially gathering some very basic information across the CSV files, not using csv lib
#    for (root,dirs,files) in os.walk(r"C:\Users\eborges\Documents\Cases\Northern Trust"):
#        for fl in files:
#            contents = open(os.path.join(root,fl), encoding='ANSI').readlines()
#            print(f"{fl}|{len(contents)-1}")
Revision:	809
Committed:	Thu Jan 4 21:03:18 2024 UTC (2 years, 2 months ago) by nino.borges
Content type:	text/x-python
File size:	11628 byte(s)
Log Message:	This was the final version that was used, which included the scenario calculations, and was sent to Tom and Diana. I then changed the start path to the second topsender request and re-ran this. This is why it points to the second top sender request.
#	Content
1	"""
2
3	NTRS-TopSenderAnalysis
4
5	Created by:
6	Emanuel Borges
7	09.20.2023
8
9	Very simple program that will read multiple CSV files and export a report based on LanID and other general information.
10	To-Do: Method to QC for any parsing errors. There should be the same number of fields across all of the CSVs.;
11
12	"""
13
14	import csv, os, re
15
16	class TopSendersAnalyzer(object):
17	version = "0.06"
18
19	def __init__(self):
20	self.startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20231227 - FileNetTopSenderAnalysis-Req"
21	#self.startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req"
22	#self.startDir = r"C:\Users\eborges\Documents\Cases\Northern Trust\20230919 - FileNetTopSenderAnalysis-Req\_LocalVersion\2023-08-14 FileNet Messages Delete Project - Top sender analysis\8_14_2023\xact_report_Dec_2014_CSV"
23
24
25	## Matrix containing the scenarios to scenario descriptions
26	self.scenarioDescriptionMatrix = {"A or B":"Sends to few recipients; evaluate subjects re: single or multi-purpose", "C":">75% of messages are to LAN IDs", "D":"<33% of messages are to LAN IDs","Uncategorized":"Remainders"}
27
28	## All possible email addresses across all CSV files
29	self.allPossibleEmailAddressesSet = set()
30	self.allPossibleEmailAddressesOutputFileName = r"C:\Users\eborges\Documents\Cases\Northern Trust\Extracted_AllEmailAddresses_All-CSVs.txt"
31
32	## All email addresses with an @NTRS.COM domain. Currently unsupported.
33	#self.allToNtrsAddressesSet = set()
34	#self.allToNtrsAddressesOutputFileName = r""
35
36	## All true NTRS LAN ID matches, per specification provided to me.
37	self.trueLanIdAddressesSet = set()
38	self.trueLanIdAddressesOutputFileName = r"C:\Users\eborges\Documents\Cases\Northern Trust\Extracted_TRUE_LanAddresses.txt"
39
40	## False positive NTRS LAN ID matches, per specification provided to me. Close but just outside of specification. (for analysis)
41	self.falsePositiveLanIdAddressesSet = set()
42	self.falsePositiveLanIdAddressesOutputFileName = r"C:\Users\eborges\Documents\Cases\Northern Trust\Extracted_FALSE-POSTIVE_LanAddresses.txt"
43
44	#self.lanIdRegExPattern = '[A-Za-z]{2}[0-9]{2,3}@NTRS.COM'
45	self.lanIdRegExPattern = '[A-Za-z]{1,15}[0-9]{1,3}@NTRS.COM'
46
47
48	## Simple match to pull out the date as recorded in the path
49	self.dateInPathRegExPattern = '2023-[0-9]{2}-[0-9]{2}'
50
51
52	## Match for pulling out all email addresses, regardless of domain.
53	#self.allPossibleEmailAddressesRegExPattern = '([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z\|a-z]{2,})+'
54	#self.allPossibleEmailAddressesRegExPattern = """(?:[a-z0-9!#$%&'+/=?^_`{\|}~-]+(?:\.[a-z0-9!#$%&'+/=^_`{\|}~-]+)\|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]\|\\[\x01-\x09\x0b\x0c\x0e-\x7f])")@(?:(?:[a-z0-9](?:[a-z0-9-][a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-][a-z0-9])?\|\[(?:(?:(2(5[0-5]\|[0-4][0-9])\|1[0-9][0-9]\|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]\|[0-4][0-9])\|1[0-9][0-9]\|[1-9]?[0-9])\|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]\|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])"""
55	#self.allPossibleEmailAddressesRegExPattern = r"([-!#-'+/-9=?A-Z^-~]+(\.[-!#-'+/-9=?A-Z^-~]+)\|\"([]!#-[^-~ \t]\|(\\[\t -~]))+\")@([-!#-'+/-9=?A-Z^-~]+(\.[-!#-'+/-9=?A-Z^-~]+)\|\[[\t -Z^-~]*])"
56	#self.allPossibleEmailAddressesRegExPattern = r"[\w\.-]+@[\w\.-]+\.\w+"
57	self.allPossibleEmailAddressesRegExPattern = r"[\w.+-]+@[\w-]+\.[\w.-]+"
58
59	def AnalyzeTopSenders(self, writeTrueLanIDLogFile = False, writeFalsePositiveLanIDLogFile = False, writeAllPossibleEmailAddressesLogFile = False):
60	"""Main Method in this program"""
61	print("FileName\|CSV Date\|Count no Header\|80 Char TO Row Count\|# Rows with NO EMAIL ADDRESSES\|non-80 Char TO Row Count\|TO over 80 Char?\|Unique Email Addresses Count\|Average # of Recipients\|# Rows No NTRS Domains\|% No NTRS Domains\|# Rows NTRS and LAN IDs\|% NTRS and LAN IDs\|# Rows NTRS No LAN IDs\|% NTRS No LAN IDs\|Scenario\|Scenario Short Description")
62	for (root,dirs,files) in os.walk(self.startDir):
63	for fl in files:
64	fileRowCount = 0
65	noAddressesInToFieldCount = 0
66	eightyCharRowCount = 0
67	nonEightyCharRowCount = 0
68	charsOverEighty = False
69	noNtrsDomainBucketCount = 0
70	ntrsAndLanBucketCount = 0
71	ntrsNoLanBucketCount = 0
72	dateInPath = re.findall(self.dateInPathRegExPattern, root)
73	allEmailAddressesInCSVSet = set()
74
75	## This is the full count of receipient addresses found in the CSV, not unique addresses
76	toFieldAddressesInCSVCount = 0
77
78	with open(os.path.join(root,fl),mode='r',encoding='ANSI') as csv_file:
79	csv_reader = csv.DictReader(csv_file)
80	for row in csv_reader:
81	fileRowCount += 1
82	if len(row['To']) == 80:
83	eightyCharRowCount +=1
84	else:
85	nonEightyCharRowCount +=1
86	if len(row['To']) > 80:
87	charsOverEighty = True
88	toValue = row['To']
89	toValue = toValue.upper()
90
91	## Match and gather all possible email addresses, adding it to the per CSV set.
92	allEmailAddresses = re.findall(self.allPossibleEmailAddressesRegExPattern, toValue)
93	for eAddress in allEmailAddresses:
94	allEmailAddressesInCSVSet.add(eAddress)
95
96	## If there are no email addresses in the TO field at all, increment that count. Else, add the number to the full count of email addresses for average calculation
97	if len(allEmailAddresses) == 0:
98	noAddressesInToFieldCount +=1
99	else:
100	toFieldAddressesInCSVCount += len(allEmailAddresses)
101	## Perform the main logic tests
102	if "NTRS.COM" in toValue:
103	## The domain was found. Apply next test.
104	ntrsLanAddressesList = re.findall(self.lanIdRegExPattern, toValue)
105	ntrsLanIDTrueTestResult = self.LanIDTrueTest(ntrsLanAddressesList)
106	if ntrsLanIDTrueTestResult:
107	## At least 1 LAN ID was found, using the True Test
108	ntrsAndLanBucketCount +=1
109	#for a in ntrsLanAddressesList:
110	# allToLanAddressesSet.add(a)
111	else:
112	## Not 1 true LAN ID was found, using the True Test
113	ntrsNoLanBucketCount +=1
114	else:
115	## No ntrs addresses found at all,
116	noNtrsDomainBucketCount +=1
117	scenario = self.CalculateScenario(ntrsAndLanBucketCount/fileRowCount, toFieldAddressesInCSVCount/fileRowCount)
118	print(f"{fl}\|{dateInPath}\|{fileRowCount}\|{eightyCharRowCount}\|{noAddressesInToFieldCount}\|{nonEightyCharRowCount}\|{charsOverEighty}\|{len(allEmailAddressesInCSVSet)}\|{toFieldAddressesInCSVCount/fileRowCount}\|{noNtrsDomainBucketCount}\|{noNtrsDomainBucketCount/fileRowCount}\|{ntrsAndLanBucketCount}\|{ntrsAndLanBucketCount/fileRowCount}\|{ntrsNoLanBucketCount}\|{ntrsNoLanBucketCount/fileRowCount}\|{scenario}\|{self.scenarioDescriptionMatrix[scenario]}")
119	csv_file.close()
120	## Update the global all email addresses set, if they selected this option
121	if writeAllPossibleEmailAddressesLogFile:
122	self.allPossibleEmailAddressesSet.update(allEmailAddressesInCSVSet)
123	if writeTrueLanIDLogFile:
124	print("Writing the True LAN ID log file...")
125	self.WriteLogFile(self.trueLanIdAddressesSet, self.trueLanIdAddressesOutputFileName)
126	print("Done.\n")
127	if writeFalsePositiveLanIDLogFile:
128	print("Writing the False-Positive LAN ID log file...")
129	self.WriteLogFile(self.falsePositiveLanIdAddressesSet, self.falsePositiveLanIdAddressesOutputFileName)
130	print("Done.\n")
131	if writeAllPossibleEmailAddressesLogFile:
132	print("Writing the All Possible Email Addresses Across All CSV Files log file...")
133	self.WriteLogFile(self.allPossibleEmailAddressesSet, self.allPossibleEmailAddressesOutputFileName)
134	print("Done.\n")
135
136
137	def LanIDTrueTest(self, listOfIds):
138	"""A need for a more complicated LAN ID test was needed. Returns True if at least 1 true LAN ID is found in the list."""
139	lanIDTestResult = False
140	for lanID in listOfIds:
141	alphaOnly = [x.lower() for x in lanID if x.isalpha()]
142	if len(alphaOnly) > 10:
143	## I'm too big to be a true LAN ID
144	self.falsePositiveLanIdAddressesSet.add(lanID)
145	else:
146	self.trueLanIdAddressesSet.add(lanID)
147	lanIDTestResult = True
148	return lanIDTestResult
149
150
151	def WriteLogFile(self, setOfValues, outputFilePath):
152	"""Takes a Set containing values, sorts these, and then writes them to the given outputFilePath)"""
153	fileNameInc = 0
154	while os.path.isfile(outputFilePath):
155	fileNameInc +=1
156	outputFile, extension = os.path.splitext(outputFilePath)
157	outputFilePath = outputFile + str(fileNameInc) + extension
158	outFl = open(outputFilePath,'w')
159	tempList = list(setOfValues)
160	tempList.sort()
161	for i in tempList:
162	outFl.write(f"{i}\n")
163	outFl.close()
164
165	def CalculateScenario(self,rawNumber, averageNumbRecip):
166	"""This method takes the raw number, which should be a decimal calculation of the percent, and returns the scenario code"""
167	scenario = "Uncategorized"
168	if rawNumber > .76:
169	scenario = "C"
170	elif averageNumbRecip < 15 and rawNumber < .019:
171	scenario = "A or B"
172	elif rawNumber > .009 and rawNumber < .34:
173	scenario = "D"
174
175	return scenario
176
177	if __name__ == '__main__':
178
179	tsa = TopSendersAnalyzer()
180	tsa.AnalyzeTopSenders(writeTrueLanIDLogFile = True, writeFalsePositiveLanIDLogFile = True, writeAllPossibleEmailAddressesLogFile = True)
181
182
183	#print(f"There are {len(allToLanAddressesSet)} unique LAN ID addresses.")
184	#outputFile = open(r"C:\Users\eborges\Documents\Cases\Northern Trust\ExtractedLanAddresses.txt",'w')
185	#allToLanAddressesList = list(allToLanAddressesSet)
186	#allToLanAddressesList.sort()
187	#for i in allToLanAddressesList:
188	# outputFile.write(f"{i}\n")
189	#outputFile.close()
190
191	## Initially gathering some very basic information across the CSV files, not using csv lib
192	# for (root,dirs,files) in os.walk(r"C:\Users\eborges\Documents\Cases\Northern Trust"):
193	# for fl in files:
194	# contents = open(os.path.join(root,fl), encoding='ANSI').readlines()
195	# print(f"{fl}\|{len(contents)-1}")