ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/comicGrab.py
Revision: 8
Committed: Sat May 5 04:21:19 2012 UTC (13 years, 10 months ago) by ninoborges
Content type: text/x-python
File size: 3103 byte(s)
Log Message:
Initial Import

File Contents

# User Rev Content
1 ninoborges 8 #!/usr/bin/env python
2    
3     import urllib, urllib2
4    
5    
6     def GrabComic(mainLink, keyWord, finalImageName, overRide='None', imgServerRef = 'None'):
7     siteNode = urllib.urlopen(mainLink)
8     siteContents = siteNode.readlines()
9     siteHeader = siteNode.info()
10     for line in siteContents:
11     if keyWord in line:
12     theLine = line.split('<')
13     print 'Found the line that contains the comic for %s' % mainLink
14     for tag in theLine:
15     if keyWord in tag:
16     imgTag = '<' + tag
17     print 'Found the exact tag that contains the comic for %s' % mainLink
18     parts = imgTag.split('"')
19     #parts = imgTag.split("'")
20     if 'http' in parts[1]:
21     link = parts[1]
22     else:
23     link = mainLink + parts[1]
24     if overRide != 'None':
25     link = overRide + parts[1]
26     print 'Here is the link Im going to try to copy %s' % link
27     if imgServerRef == 'None':
28     urllib.urlretrieve(link,r'c:\test_dir\%s' %finalImageName)
29     else:
30     # Grab the header of the initial site and convert to dict
31    
32     finalFile = hackImageServer(link,imgServerRef, siteHeader)
33     out = open(r'c:\test_dir\%s'%finalImageName,'wb')
34     out.write(finalFile)
35     out.close()
36     siteNode.close()
37    
38    
39     def hackImageServer(link, imgServerRef, siteHeader):
40     txdata = None
41     newHeader = {}
42     for k in siteHeader.dict.keys():
43     newHeader[k] = siteHeader[k]
44     newHeader['Referer'] = imgServerRef
45     print newHeader
46     req = urllib2.Request(link, txdata, newHeader)
47     u = urllib2.urlopen(req)
48     data = u.read()
49     return data
50    
51    
52    
53     if __name__ == '__main__':
54     # Zits
55     GrabComic('http://seattlepi.nwsource.com/fun/zits.asp', 'BORDER=0 SRC="http://pst.rbma.com/content', 'zits.gif', imgServerRef = 'http://seattlepi.nwsource.com')
56    
57     # Dilbert Module
58     GrabComic('http://www.dilbert.com', "Today's Comic", 'dilbert.gif')
59    
60     # FoxTrot Module
61     GrabComic('http://www.foxtrot.com', 'width="600" height="189" border="0"', 'FoxTrot.gif')
62    
63     # Calvin And Hobbs
64     GrabComic('http://www.gocomics.com/calvinandhobbes', 'width="600" height="190" border="0"', 'CalvinAndHobbs.gif')
65    
66     # Close to Home
67     GrabComic('http://www.gocomics.com/closetohome', 'width="300" height="397" border="0"', 'CloseToHome.gif')
68    
69     # Garfield
70     GrabComic('http://www.gocomics.com/garfield', 'width="600" height="177" border="0"', 'Garfield.gif')
71    
72     # NonSequitur
73     GrabComic('http://www.gocomics.com/nonsequitur', 'width="600" height="', 'NonSequitur.gif')
74    
75     # HelpDesk
76     ## Hmnnnnn....
77    
78     # Shoe
79     GrabComic('http://www.gocomics.com/shoe', 'width="600" height="', 'Shoe.gif')
80    
81     # GetFuzzy
82     GrabComic('http://www.comics.com/comics/getfuzzy', "Today's Comic", 'GetFuzzy.gif', overRide = 'http://www.comics.com')
83    
84     # Snoppy
85     GrabComic('http://www.snoopy.com', "Today's Strip", 'Snoopy.gif')
86    
87     # Frazz
88     GrabComic('http://www.comics.com/comics/frazz', "Today's Comic", 'Frazz.gif', overRide = 'http://www.comics.com')