| 1 |
ninoborges |
8 |
#!/usr/bin/env python
|
| 2 |
|
|
|
| 3 |
|
|
import urllib, urllib2
|
| 4 |
|
|
|
| 5 |
|
|
|
| 6 |
|
|
def GrabComic(mainLink, keyWord, finalImageName, overRide='None', imgServerRef = 'None'):
|
| 7 |
|
|
siteNode = urllib.urlopen(mainLink)
|
| 8 |
|
|
siteContents = siteNode.readlines()
|
| 9 |
|
|
siteHeader = siteNode.info()
|
| 10 |
|
|
for line in siteContents:
|
| 11 |
|
|
if keyWord in line:
|
| 12 |
|
|
theLine = line.split('<')
|
| 13 |
|
|
print 'Found the line that contains the comic for %s' % mainLink
|
| 14 |
|
|
for tag in theLine:
|
| 15 |
|
|
if keyWord in tag:
|
| 16 |
|
|
imgTag = '<' + tag
|
| 17 |
|
|
print 'Found the exact tag that contains the comic for %s' % mainLink
|
| 18 |
|
|
parts = imgTag.split('"')
|
| 19 |
|
|
#parts = imgTag.split("'")
|
| 20 |
|
|
if 'http' in parts[1]:
|
| 21 |
|
|
link = parts[1]
|
| 22 |
|
|
else:
|
| 23 |
|
|
link = mainLink + parts[1]
|
| 24 |
|
|
if overRide != 'None':
|
| 25 |
|
|
link = overRide + parts[1]
|
| 26 |
|
|
print 'Here is the link Im going to try to copy %s' % link
|
| 27 |
|
|
if imgServerRef == 'None':
|
| 28 |
|
|
urllib.urlretrieve(link,r'c:\test_dir\%s' %finalImageName)
|
| 29 |
|
|
else:
|
| 30 |
|
|
# Grab the header of the initial site and convert to dict
|
| 31 |
|
|
|
| 32 |
|
|
finalFile = hackImageServer(link,imgServerRef, siteHeader)
|
| 33 |
|
|
out = open(r'c:\test_dir\%s'%finalImageName,'wb')
|
| 34 |
|
|
out.write(finalFile)
|
| 35 |
|
|
out.close()
|
| 36 |
|
|
siteNode.close()
|
| 37 |
|
|
|
| 38 |
|
|
|
| 39 |
|
|
def hackImageServer(link, imgServerRef, siteHeader):
|
| 40 |
|
|
txdata = None
|
| 41 |
|
|
newHeader = {}
|
| 42 |
|
|
for k in siteHeader.dict.keys():
|
| 43 |
|
|
newHeader[k] = siteHeader[k]
|
| 44 |
|
|
newHeader['Referer'] = imgServerRef
|
| 45 |
|
|
print newHeader
|
| 46 |
|
|
req = urllib2.Request(link, txdata, newHeader)
|
| 47 |
|
|
u = urllib2.urlopen(req)
|
| 48 |
|
|
data = u.read()
|
| 49 |
|
|
return data
|
| 50 |
|
|
|
| 51 |
|
|
|
| 52 |
|
|
|
| 53 |
|
|
if __name__ == '__main__':
|
| 54 |
|
|
# Zits
|
| 55 |
|
|
GrabComic('http://seattlepi.nwsource.com/fun/zits.asp', 'BORDER=0 SRC="http://pst.rbma.com/content', 'zits.gif', imgServerRef = 'http://seattlepi.nwsource.com')
|
| 56 |
|
|
|
| 57 |
|
|
# Dilbert Module
|
| 58 |
|
|
GrabComic('http://www.dilbert.com', "Today's Comic", 'dilbert.gif')
|
| 59 |
|
|
|
| 60 |
|
|
# FoxTrot Module
|
| 61 |
|
|
GrabComic('http://www.foxtrot.com', 'width="600" height="189" border="0"', 'FoxTrot.gif')
|
| 62 |
|
|
|
| 63 |
|
|
# Calvin And Hobbs
|
| 64 |
|
|
GrabComic('http://www.gocomics.com/calvinandhobbes', 'width="600" height="190" border="0"', 'CalvinAndHobbs.gif')
|
| 65 |
|
|
|
| 66 |
|
|
# Close to Home
|
| 67 |
|
|
GrabComic('http://www.gocomics.com/closetohome', 'width="300" height="397" border="0"', 'CloseToHome.gif')
|
| 68 |
|
|
|
| 69 |
|
|
# Garfield
|
| 70 |
|
|
GrabComic('http://www.gocomics.com/garfield', 'width="600" height="177" border="0"', 'Garfield.gif')
|
| 71 |
|
|
|
| 72 |
|
|
# NonSequitur
|
| 73 |
|
|
GrabComic('http://www.gocomics.com/nonsequitur', 'width="600" height="', 'NonSequitur.gif')
|
| 74 |
|
|
|
| 75 |
|
|
# HelpDesk
|
| 76 |
|
|
## Hmnnnnn....
|
| 77 |
|
|
|
| 78 |
|
|
# Shoe
|
| 79 |
|
|
GrabComic('http://www.gocomics.com/shoe', 'width="600" height="', 'Shoe.gif')
|
| 80 |
|
|
|
| 81 |
|
|
# GetFuzzy
|
| 82 |
|
|
GrabComic('http://www.comics.com/comics/getfuzzy', "Today's Comic", 'GetFuzzy.gif', overRide = 'http://www.comics.com')
|
| 83 |
|
|
|
| 84 |
|
|
# Snoppy
|
| 85 |
|
|
GrabComic('http://www.snoopy.com', "Today's Strip", 'Snoopy.gif')
|
| 86 |
|
|
|
| 87 |
|
|
# Frazz
|
| 88 |
|
|
GrabComic('http://www.comics.com/comics/frazz', "Today's Comic", 'Frazz.gif', overRide = 'http://www.comics.com') |