ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Evidox/LoadFilesMissing.py
Revision: 746
Committed: Thu Apr 15 20:11:16 2021 UTC (4 years, 11 months ago) by nino.borges
Content type: text/x-python
File size: 4597 byte(s)
Log Message:
Updated to be compatible with python3, which made the bak files.

File Contents

# Content
1 """
2
3 LoadFilesMissing
4
5 Created by
6 Emanuel Borges
7 2020.03.25
8
9 A simple program that will attempt to make load files for an incoming prod where they only gave us text files and images.
10 This makes Lots of assumptions:
11 - Images are Tiff or JPG
12 - Images are single page
13 - Text is multipage
14 - There is a text file for all documents and there are not images not represented in the text files
15 - everything is named for the bates
16
17 """
18
19 import os
20 import BatesRangeFunctions
21
22 def LfpWriteLine(openFileObj, bates, filePath, firstPage = True):
23 if os.path.splitext(filePath)[1].upper() == ".JPG":
24 docType = "4"
25 else:
26 docType = "2"
27
28 if firstPage:
29 firstPageMark = "D"
30 else:
31 firstPageMark = ""
32 #outputImagesFile.write("IM,%s,D,0,@;%s;%s\n"% (prevBates, imageFileMatrix[prevBates],docType))
33 openFileObj.write("IM,%s,%s,0,@;%s;%s\n"% (bates, firstPageMark, filePath, docType))
34
35 if __name__ == '__main__':
36 textFilesStartDir = r"\\sas40\sas40\33444\Inbound\2\092779\PNC - Response to Subpoena\PNC Subpoena Response (rec'd 11.15.19)\VOL0001\TEXT"
37 imageFilesStartDir = r"\\sas40\sas40\33444\Inbound\2\092779\PNC - Response to Subpoena\PNC Subpoena Response (rec'd 11.15.19)\VOL0001\IMAGES"
38 volumeName = "PNC007"
39
40 outputDirectory = r"C:\Temp"
41 errLog = open(os.path.join(outputDirectory,volumeName+"_XDD"+".ERR"),'w')
42
43 approvedImageExtensions = ['.TIFF','.JPG','.TIF']
44 textFileMatrix = {}
45 imageFileMatrix = {}
46
47 ## scan the text files folder and make a matrix of text files
48 for root, dirs, files in os.walk(textFilesStartDir):
49 for f in files:
50 if os.path.splitext(f)[1].upper() == ".TXT":
51 bates = os.path.splitext(f)[0]
52 textFileMatrix[bates] = os.path.join(root,f)
53
54
55
56 ## scan the images folder and make a matrix of image files
57 for root, dirs, files in os.walk(imageFilesStartDir):
58 for f in files:
59 if os.path.splitext(f)[1].upper() in approvedImageExtensions:
60 bates = os.path.splitext(f)[0]
61 imageFileMatrix[bates] = os.path.join(root,f)
62
63 ## make the dat and populate it with begno, endno, textPath
64 ## make the LFP and populate with paths.
65 outputFile = open(os.path.join(outputDirectory,volumeName+"_XDD"+".DAT"),'w')
66 outputFile.write("ProdBegBates|ProdEndBates|TextFilePath\n")
67 outputImagesFile = open(os.path.join(outputDirectory,volumeName+"_XDD"+".LFP"),'w')
68 batesList = list(textFileMatrix.keys())
69 batesList.sort()
70 imageList = list(imageFileMatrix.keys())
71 imageList.sort()
72 prevBates = ""
73
74 for bates in batesList:
75 if prevBates:
76 batesEnum = BatesRangeFunctions.EnumerateBates(prevBates,bates)
77 outputFile.write("%s|%s|%s\n"%(batesEnum[0],batesEnum[-2], textFileMatrix[prevBates]))
78 firstImage = True
79 for page in batesEnum[:-1]:
80 try:
81 imagePath = imageFileMatrix[page]
82 except:
83 imagePath = "ERROR FILE NOT FOUND.TIFF"
84 errLog.write("%s Image FILE NOT FOUND.\n"% page)
85 LfpWriteLine(outputImagesFile, page, imagePath, firstImage)
86 firstImage = False
87
88 prevBates = bates
89
90 else:
91 prevBates = bates
92
93 #print imageList[-1]
94 batesEnum = BatesRangeFunctions.EnumerateBates(bates,imageList[-1])
95 outputFile.write("%s|%s|%s\n"%(batesEnum[0],batesEnum[-1], textFileMatrix[bates]))
96 outputFile.close()
97 firstImage = True
98 for page in batesEnum:
99 try:
100 imagePath = imageFileMatrix[page]
101 except:
102 imagePath = "ERROR FILE NOT FOUND.TIFF"
103 errLog.write("%s Image FILE NOT FOUND.\n"% page)
104 LfpWriteLine(outputImagesFile, page, imagePath, firstImage)
105 firstImage = False
106 outputImagesFile.close()
107
108
109
110 ## run test to see if there are images missing from LFP
111 contents = open(os.path.join(outputDirectory,volumeName+"_XDD"+".LFP")).readlines()
112
113 testImageList = []
114 for line in contents:
115 line = line.replace("\n","")
116 line = line.split(",")
117 testImageList.append(line[1])
118 for bates in imageList:
119 if bates in testImageList:
120 pass
121 else:
122 errLog.write("%s not accounted for.\n"% bates)
123
124 errLog.close()
125