ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Evidox/LoadFilesMissing.py
Revision: 675
Committed: Thu Mar 26 14:37:00 2020 UTC (6 years ago) by nino.borges
Content type: text/x-python
File size: 5223 byte(s)
Log Message:
Added some error checking at the end to make sure all found images are accounted for in the LFP.

File Contents

# User Rev Content
1 nino.borges 674 """
2    
3     LoadFilesMissing
4    
5     Created by
6     Emanuel Borges
7     2020.03.25
8    
9     A simple program that will attempt to make load files for an incoming prod where they only gave us text files and images.
10     This makes Lots of assumptions:
11     - Images are Tiff or JPG
12     - Images are single page
13     - Text is multipage
14     - There is a text file for all documents and there are not images not represented in the text files
15     - everything is named for the bates
16    
17     """
18    
19     import os
20     import BatesRangeFunctions
21    
22     def LfpWriteLine(openFileObj, bates, filePath, firstPage = True):
23     if os.path.splitext(filePath)[1].upper() == ".JPG":
24     docType = "4"
25     else:
26     docType = "2"
27    
28     if firstPage:
29     firstPageMark = "D"
30     else:
31     firstPageMark = ""
32     #outputImagesFile.write("IM,%s,D,0,@;%s;%s\n"% (prevBates, imageFileMatrix[prevBates],docType))
33     openFileObj.write("IM,%s,%s,0,@;%s;%s\n"% (bates, firstPageMark, filePath, docType))
34    
35     if __name__ == '__main__':
36     textFilesStartDir = r"\\sas40\sas40\33444\Inbound\2\092779\PNC - Response to Subpoena\PNC Subpoena Response (rec'd 9.27.19)\TEXT"
37     imageFilesStartDir = r"\\sas40\sas40\33444\Inbound\2\092779\PNC - Response to Subpoena\PNC Subpoena Response (rec'd 9.27.19)\IMAGES"
38     volumeName = "PNC003"
39    
40     outputDirectory = r"C:\Temp"
41    
42     approvedImageExtensions = ['.TIFF','.JPG','.TIF']
43     textFileMatrix = {}
44     imageFileMatrix = {}
45    
46     ## scan the text files folder and make a matrix of text files
47     for root, dirs, files in os.walk(textFilesStartDir):
48     for f in files:
49     if os.path.splitext(f)[1].upper() == ".TXT":
50     bates = os.path.splitext(f)[0]
51     textFileMatrix[bates] = os.path.join(root,f)
52    
53    
54    
55     ## scan the images folder and make a matrix of image files
56     for root, dirs, files in os.walk(imageFilesStartDir):
57     for f in files:
58     if os.path.splitext(f)[1].upper() in approvedImageExtensions:
59     bates = os.path.splitext(f)[0]
60     imageFileMatrix[bates] = os.path.join(root,f)
61    
62     ## make the dat and populate it with begno, endno, textPath
63     ## make the LFP and populate with paths.
64     outputFile = open(os.path.join(outputDirectory,volumeName+".DAT"),'w')
65     outputFile.write("ProdBegBates|ProdEndBates|TextFilePath\n")
66     outputImagesFile = open(os.path.join(outputDirectory,volumeName+".LFP"),'w')
67     batesList = textFileMatrix.keys()
68     batesList.sort()
69     imageList = imageFileMatrix.keys()
70     imageList.sort()
71     prevBates = ""
72    
73     for bates in batesList:
74     if prevBates:
75     batesEnum = BatesRangeFunctions.EnumerateBates(prevBates,bates)
76     outputFile.write("%s|%s|%s\n"%(batesEnum[0],batesEnum[-2], textFileMatrix[prevBates]))
77     firstImage = True
78     for page in batesEnum[:-1]:
79     ## if os.path.splitext(imageFileMatrix[prevBates])[1].upper() == ".JPG":
80     ## docType = "4"
81     ## else:
82     ## docType = "2"
83     if firstImage:
84     LfpWriteLine(outputImagesFile, page, imageFileMatrix[page], firstPage = True)
85     #outputImagesFile.write("IM,%s,D,0,@;%s;%s\n"% (prevBates, imageFileMatrix[prevBates],docType))
86     firstImage = False
87     else:
88     LfpWriteLine(outputImagesFile, page, imageFileMatrix[page], firstPage = False)
89     #outputImagesFile.write("IM,%s,,0,@;%s;%s\n"% (prevBates, imageFileMatrix[prevBates],docType))
90    
91     prevBates = bates
92    
93     else:
94     prevBates = bates
95    
96     #print imageList[-1]
97     batesEnum = BatesRangeFunctions.EnumerateBates(bates,imageList[-1])
98     outputFile.write("%s|%s|%s\n"%(batesEnum[0],batesEnum[-1], textFileMatrix[bates]))
99     outputFile.close()
100     firstImage = True
101     for page in batesEnum:
102     ## if os.path.splitext(imageFileMatrix[prevBates])[1].upper() == ".JPG":
103     ## docType = "4"
104     ## else:
105     ## docType = "2"
106     if firstImage:
107     LfpWriteLine(outputImagesFile, page, imageFileMatrix[page], firstPage = True)
108     #outputImagesFile.write("IM,%s,D,0,@;%s;%s\n"% (bates, imageFileMatrix[bates],docType))
109     firstImage = False
110     else:
111     LfpWriteLine(outputImagesFile, page, imageFileMatrix[page], firstPage = False)
112     #outputImagesFile.write("IM,%s,,0,@;%s;%s\n"% (bates, imageFileMatrix[bates],docType))
113     outputImagesFile.close()
114    
115    
116    
117     ## run test to see if there are images missing from LFP
118 nino.borges 675 contents = open(os.path.join(outputDirectory,volumeName+".LFP")).readlines()
119     errLog = open(os.path.join(outputDirectory,volumeName+".ERR"),'w')
120     testImageList = []
121     for line in contents:
122     line = line.replace("\n","")
123     line = line.split(",")
124     testImageList.append(line[1])
125     for bates in imageList:
126     if bates in testImageList:
127     pass
128     else:
129     errLog.write("%s not accounted for.\n"% bates)
130    
131     errLog.close()
132 nino.borges 674