Python/learning/ImageExtractor.py

import sys, os, string

#write now write only GIF/JPEG

#image types 
imgtypes=['JPG', 'GIF','GIF']

#signature at beginning of file
imgsigs=['JFIF', 'GIF87a', 'GIF89a']

#offset of signatures from
#file beginning
imgsigoffs=[6, 0, 0]

#our marker array
imgmarker=[]

def main():

        if len(sys.argv) < 2:
                print 'Usage: picdumper <file>\n'
                sys.exit(1)

        filename = os.path.abspath(sys.argv[1])

        if not os.path.isfile(filename):
                print 'Error: No such file ', filename
                sys.exit(2)

        #open file in binary mode
        try:
                infile = open(filename, 'rb')
        #dont bother about specific exceptions
        except:
                print 'Could not open file to read !', filename
                sys.exit(3)
                
        if infile is None:
                print 'Error opening file ', filename
                sys.exit(3)

        c = infile.read(1)

        lastmatch=""
        while c != '':

                #look for image sig
                for x in range(0, len(imgsigs)):

                        #find if c is first character of imgsig
                        sig=imgsigs[x]
                        
                        if c == sig[0]:
                                #find if the rest of imgsig match
                                lentoread=len(sig) - 1
                                chunk=c + infile.read(lentoread)
                                #print chunk
                                #matches
                                if chunk==sig:
                                        fpos=int(infile.tell())
                                        
                                        #now we are at end of sig, for getting image
                                        #pos we need to subtract length of sig and offset
                                        sigpos=fpos - len(sig)
                                        imgpos=sigpos - imgsigoffs[x]

                                        #write position and image type to marker
                                        imgmarker.append((imgpos, imgtypes[x]))
                                        lastmatch=imgtypes[x]
                                else:
                                        #bug, we need to reset file position
                                        #to match other sigs correctly if this
                                        #one does not.
                                        currpos=int(infile.tell())
                                        prevpos=currpos-lentoread
                                        #seek to previous position
                                        infile.seek(prevpos)
                                        

                #read next char
                c=infile.read(1)

        posn=int(infile.tell())
        imgmarker.append((posn, lastmatch))

        print imgmarker
        #write images

        #rewind file
        infile.seek(0)

        imgcount=0

        #most collections store image in reverse
        #order that was appended
        x=len(imgmarker)-1

        while x>=1:

                imgcount += 1
                imginfo=imgmarker[x]

                imgposn=imginfo[0]
                imgtype=imginfo[1]

                #this is the tricky part, to get the correct image
                #we need the file posn before previous one!, that
                #is we need to jump a position. Otherwise all images
                #will be junk or of small resolution.
                imginfoprev=imgmarker[x-2]
                imgposnprev=imginfoprev[0]

                #get length in chars
                imglen= imgposn - imgposnprev
                
                #seek to file position
                infile.seek(imgposnprev)
                #read so many chars
                data=infile.read(imglen)

                #create file name
                imgname="image" + str(imgcount) + '.' + string.lower(imgtype)
                try:
                        ofile=open(imgname, 'wb')
                except:
                        print 'Could not open file ', imgname, ' for writing...\n'
                        continue
                
                if ofile is None:
                        print 'Error while trying to create file ', imgname, '!\n'
                        continue
                else:
                        print 'Dumping image file ', imgname, '...\n'
                        ofile.write(data)
                        ofile.close()

                #previous marker
                x-=1

                
        print 'Dumped ', imgcount, ' images\n'
        
if __name__=="__main__":
        main()
Revision:	591
Committed:	Tue Nov 3 22:45:12 2015 UTC (10 years, 4 months ago) by nino.borges
Content type:	text/x-python
File size:	3083 byte(s)
Log Message:	Moved dir out of main dir.
#	Content
1	import sys, os, string
2
3	#write now write only GIF/JPEG
4
5	#image types
6	imgtypes=['JPG', 'GIF','GIF']
7
8	#signature at beginning of file
9	imgsigs=['JFIF', 'GIF87a', 'GIF89a']
10
11	#offset of signatures from
12	#file beginning
13	imgsigoffs=[6, 0, 0]
14
15	#our marker array
16	imgmarker=[]
17
18	def main():
19
20	if len(sys.argv) < 2:
21	print 'Usage: picdumper <file>\n'
22	sys.exit(1)
23
24	filename = os.path.abspath(sys.argv[1])
25
26	if not os.path.isfile(filename):
27	print 'Error: No such file ', filename
28	sys.exit(2)
29
30	#open file in binary mode
31	try:
32	infile = open(filename, 'rb')
33	#dont bother about specific exceptions
34	except:
35	print 'Could not open file to read !', filename
36	sys.exit(3)
37
38	if infile is None:
39	print 'Error opening file ', filename
40	sys.exit(3)
41
42	c = infile.read(1)
43
44	lastmatch=""
45	while c != '':
46
47	#look for image sig
48	for x in range(0, len(imgsigs)):
49
50	#find if c is first character of imgsig
51	sig=imgsigs[x]
52
53	if c == sig[0]:
54	#find if the rest of imgsig match
55	lentoread=len(sig) - 1
56	chunk=c + infile.read(lentoread)
57	#print chunk
58	#matches
59	if chunk==sig:
60	fpos=int(infile.tell())
61
62	#now we are at end of sig, for getting image
63	#pos we need to subtract length of sig and offset
64	sigpos=fpos - len(sig)
65	imgpos=sigpos - imgsigoffs[x]
66
67	#write position and image type to marker
68	imgmarker.append((imgpos, imgtypes[x]))
69	lastmatch=imgtypes[x]
70	else:
71	#bug, we need to reset file position
72	#to match other sigs correctly if this
73	#one does not.
74	currpos=int(infile.tell())
75	prevpos=currpos-lentoread
76	#seek to previous position
77	infile.seek(prevpos)
78
79
80	#read next char
81	c=infile.read(1)
82
83	posn=int(infile.tell())
84	imgmarker.append((posn, lastmatch))
85
86	print imgmarker
87	#write images
88
89	#rewind file
90	infile.seek(0)
91
92	imgcount=0
93
94	#most collections store image in reverse
95	#order that was appended
96	x=len(imgmarker)-1
97
98	while x>=1:
99
100	imgcount += 1
101	imginfo=imgmarker[x]
102
103	imgposn=imginfo[0]
104	imgtype=imginfo[1]
105
106	#this is the tricky part, to get the correct image
107	#we need the file posn before previous one!, that
108	#is we need to jump a position. Otherwise all images
109	#will be junk or of small resolution.
110	imginfoprev=imgmarker[x-2]
111	imgposnprev=imginfoprev[0]
112
113	#get length in chars
114	imglen= imgposn - imgposnprev
115
116	#seek to file position
117	infile.seek(imgposnprev)
118	#read so many chars
119	data=infile.read(imglen)
120
121	#create file name
122	imgname="image" + str(imgcount) + '.' + string.lower(imgtype)
123	try:
124	ofile=open(imgname, 'wb')
125	except:
126	print 'Could not open file ', imgname, ' for writing...\n'
127	continue
128
129	if ofile is None:
130	print 'Error while trying to create file ', imgname, '!\n'
131	continue
132	else:
133	print 'Dumping image file ', imgname, '...\n'
134	ofile.write(data)
135	ofile.close()
136
137	#previous marker
138	x-=1
139
140
141	print 'Dumped ', imgcount, ' images\n'
142
143	if __name__=="__main__":
144	main()