ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Evidox/LoadFilesMissing.py
Revision: 676
Committed: Thu Mar 26 16:23:22 2020 UTC (6 years ago) by nino.borges
Content type: text/x-python
File size: 4585 byte(s)
Log Message:
Cleaned things up a bit more by removing unneeded logic, now that we have a function here, and also added a try except statement.

File Contents

# User Rev Content
1 nino.borges 674 """
2    
3     LoadFilesMissing
4    
5     Created by
6     Emanuel Borges
7     2020.03.25
8    
9     A simple program that will attempt to make load files for an incoming prod where they only gave us text files and images.
10     This makes Lots of assumptions:
11     - Images are Tiff or JPG
12     - Images are single page
13     - Text is multipage
14     - There is a text file for all documents and there are not images not represented in the text files
15     - everything is named for the bates
16    
17     """
18    
19     import os
20     import BatesRangeFunctions
21    
22     def LfpWriteLine(openFileObj, bates, filePath, firstPage = True):
23     if os.path.splitext(filePath)[1].upper() == ".JPG":
24     docType = "4"
25     else:
26     docType = "2"
27    
28     if firstPage:
29     firstPageMark = "D"
30     else:
31     firstPageMark = ""
32     #outputImagesFile.write("IM,%s,D,0,@;%s;%s\n"% (prevBates, imageFileMatrix[prevBates],docType))
33     openFileObj.write("IM,%s,%s,0,@;%s;%s\n"% (bates, firstPageMark, filePath, docType))
34    
35     if __name__ == '__main__':
36 nino.borges 676 textFilesStartDir = r"\\sas40\sas40\33444\Inbound\2\092779\PNC - Response to Subpoena\PNC Subpoena Response (rec'd 11.15.19)\VOL0001\TEXT"
37     imageFilesStartDir = r"\\sas40\sas40\33444\Inbound\2\092779\PNC - Response to Subpoena\PNC Subpoena Response (rec'd 11.15.19)\VOL0001\IMAGES"
38     volumeName = "PNC007"
39 nino.borges 674
40     outputDirectory = r"C:\Temp"
41 nino.borges 676 errLog = open(os.path.join(outputDirectory,volumeName+"_XDD"+".ERR"),'w')
42 nino.borges 674
43     approvedImageExtensions = ['.TIFF','.JPG','.TIF']
44     textFileMatrix = {}
45     imageFileMatrix = {}
46    
47     ## scan the text files folder and make a matrix of text files
48     for root, dirs, files in os.walk(textFilesStartDir):
49     for f in files:
50     if os.path.splitext(f)[1].upper() == ".TXT":
51     bates = os.path.splitext(f)[0]
52     textFileMatrix[bates] = os.path.join(root,f)
53    
54    
55    
56     ## scan the images folder and make a matrix of image files
57     for root, dirs, files in os.walk(imageFilesStartDir):
58     for f in files:
59     if os.path.splitext(f)[1].upper() in approvedImageExtensions:
60     bates = os.path.splitext(f)[0]
61     imageFileMatrix[bates] = os.path.join(root,f)
62    
63     ## make the dat and populate it with begno, endno, textPath
64     ## make the LFP and populate with paths.
65 nino.borges 676 outputFile = open(os.path.join(outputDirectory,volumeName+"_XDD"+".DAT"),'w')
66 nino.borges 674 outputFile.write("ProdBegBates|ProdEndBates|TextFilePath\n")
67 nino.borges 676 outputImagesFile = open(os.path.join(outputDirectory,volumeName+"_XDD"+".LFP"),'w')
68 nino.borges 674 batesList = textFileMatrix.keys()
69     batesList.sort()
70     imageList = imageFileMatrix.keys()
71     imageList.sort()
72     prevBates = ""
73    
74     for bates in batesList:
75     if prevBates:
76     batesEnum = BatesRangeFunctions.EnumerateBates(prevBates,bates)
77     outputFile.write("%s|%s|%s\n"%(batesEnum[0],batesEnum[-2], textFileMatrix[prevBates]))
78     firstImage = True
79     for page in batesEnum[:-1]:
80 nino.borges 676 try:
81     imagePath = imageFileMatrix[page]
82     except:
83     imagePath = "ERROR FILE NOT FOUND.TIFF"
84     errLog.write("%s Image FILE NOT FOUND.\n"% page)
85     LfpWriteLine(outputImagesFile, page, imagePath, firstImage)
86     firstImage = False
87 nino.borges 674
88     prevBates = bates
89    
90     else:
91     prevBates = bates
92    
93     #print imageList[-1]
94     batesEnum = BatesRangeFunctions.EnumerateBates(bates,imageList[-1])
95     outputFile.write("%s|%s|%s\n"%(batesEnum[0],batesEnum[-1], textFileMatrix[bates]))
96     outputFile.close()
97     firstImage = True
98     for page in batesEnum:
99 nino.borges 676 try:
100     imagePath = imageFileMatrix[page]
101     except:
102     imagePath = "ERROR FILE NOT FOUND.TIFF"
103     errLog.write("%s Image FILE NOT FOUND.\n"% page)
104     LfpWriteLine(outputImagesFile, page, imagePath, firstImage)
105     firstImage = False
106 nino.borges 674 outputImagesFile.close()
107    
108    
109    
110     ## run test to see if there are images missing from LFP
111 nino.borges 676 contents = open(os.path.join(outputDirectory,volumeName+"_XDD"+".LFP")).readlines()
112    
113 nino.borges 675 testImageList = []
114     for line in contents:
115     line = line.replace("\n","")
116     line = line.split(",")
117     testImageList.append(line[1])
118     for bates in imageList:
119     if bates in testImageList:
120     pass
121     else:
122     errLog.write("%s not accounted for.\n"% bates)
123    
124     errLog.close()
125 nino.borges 674