ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/EmailDomainExtractor.py
Revision: 621
Committed: Fri Nov 4 15:00:05 2016 UTC (9 years, 4 months ago) by nino.borges
Content type: text/x-python
File size: 2711 byte(s)
Log Message:
a program for extracting email domains and outputting the unique values.

File Contents

# User Rev Content
1 nino.borges 621 """
2    
3     EmailDomainExtractor
4    
5     Created by
6     Emanuel Borges
7     11.14.2014
8    
9     This program will take a delimited file with fields containing email address data and will parse out the domains,
10     outputting a list of unique values.
11    
12     """
13    
14     import os
15    
16     class ExtractDomains():
17     version = "0.1.0"
18     def __init__(self):
19     self.delimFile = ""
20     self.outputFile = ""
21     self.domainMatrix = {}
22     self.domainList = []
23    
24     def TestValues(self):
25     """An internal method to first test that delimFile and outputFile have been selected and are allowed"""
26     everythingOk = True
27     if self.delimFile:
28     if os.path.exists(self.delimFile):
29     pass
30     else:
31     everythingOK = False
32     else:
33     everythingOK = False
34    
35     if self.outPutFile:
36     pass
37     else:
38     everythingOK = False
39    
40     return everythingOk
41    
42     def ParseDomain(self, emailAddress):
43     """Interal method that will parse 1 email address, returning only the domain"""
44     if "@" in emailAddress:
45     domain = emailAddress.split("@")[1]
46     domain = "@" + domain
47     else:
48     domain = None
49     return domain
50    
51     def ExtractDomains(self):
52     """Main method that goes through the file and does the parsing."""
53     everythingOk = self.TestValues()
54     if everythingOk:
55     contents = open(self.delimFile).readlines()
56     for line in contents:
57     line = line.replace("\n","")
58     fields = line.split("|")
59     for field in fields:
60     if ";" in field:
61     values = field.split(";")
62     else:
63     values = [field]
64     for emailAddress in values:
65     domain = self.ParseDomain(emailAddress)
66     if domain:
67     self.domainMatrix[domain] = 1
68     self.domainList = domainMatrix.keys()
69     sucessResult = True
70     else:
71     sucessResult = False
72     return sucessResult
73    
74     def WriteValuesToFile(self):
75     """This method unpacks the domainList to the outputFile"""
76     everythingOk = self.TestValues()
77     if everythingOk:
78     outputFile = open(self.outputFile,'w')
79     if self.domainList:
80     outputFile.write("List of Domains\n")
81     for domain in domainList:
82     outputFile.write("%s\n"%domain)
83     outputFile.close()
84     sucessResult = True
85     else:
86     sucessResult = False
87    
88    
89