ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/EmailDomainExtractor.py
Revision: 621
Committed: Fri Nov 4 15:00:05 2016 UTC (9 years, 4 months ago) by nino.borges
Content type: text/x-python
File size: 2711 byte(s)
Log Message:
a program for extracting email domains and outputting the unique values.

File Contents

# Content
1 """
2
3 EmailDomainExtractor
4
5 Created by
6 Emanuel Borges
7 11.14.2014
8
9 This program will take a delimited file with fields containing email address data and will parse out the domains,
10 outputting a list of unique values.
11
12 """
13
14 import os
15
16 class ExtractDomains():
17 version = "0.1.0"
18 def __init__(self):
19 self.delimFile = ""
20 self.outputFile = ""
21 self.domainMatrix = {}
22 self.domainList = []
23
24 def TestValues(self):
25 """An internal method to first test that delimFile and outputFile have been selected and are allowed"""
26 everythingOk = True
27 if self.delimFile:
28 if os.path.exists(self.delimFile):
29 pass
30 else:
31 everythingOK = False
32 else:
33 everythingOK = False
34
35 if self.outPutFile:
36 pass
37 else:
38 everythingOK = False
39
40 return everythingOk
41
42 def ParseDomain(self, emailAddress):
43 """Interal method that will parse 1 email address, returning only the domain"""
44 if "@" in emailAddress:
45 domain = emailAddress.split("@")[1]
46 domain = "@" + domain
47 else:
48 domain = None
49 return domain
50
51 def ExtractDomains(self):
52 """Main method that goes through the file and does the parsing."""
53 everythingOk = self.TestValues()
54 if everythingOk:
55 contents = open(self.delimFile).readlines()
56 for line in contents:
57 line = line.replace("\n","")
58 fields = line.split("|")
59 for field in fields:
60 if ";" in field:
61 values = field.split(";")
62 else:
63 values = [field]
64 for emailAddress in values:
65 domain = self.ParseDomain(emailAddress)
66 if domain:
67 self.domainMatrix[domain] = 1
68 self.domainList = domainMatrix.keys()
69 sucessResult = True
70 else:
71 sucessResult = False
72 return sucessResult
73
74 def WriteValuesToFile(self):
75 """This method unpacks the domainList to the outputFile"""
76 everythingOk = self.TestValues()
77 if everythingOk:
78 outputFile = open(self.outputFile,'w')
79 if self.domainList:
80 outputFile.write("List of Domains\n")
81 for domain in domainList:
82 outputFile.write("%s\n"%domain)
83 outputFile.close()
84 sucessResult = True
85 else:
86 sucessResult = False
87
88
89