ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/RandomCodeRequests/HoneyWell_reqs.txt
Revision: 580
Committed: Thu Mar 26 13:50:30 2015 UTC (11 years ago) by nino.borges
Content type: text/plain
File size: 25287 byte(s)
Log Message:
update before leaving

File Contents

# User Rev Content
1 nino.borges 580 === Here I was asked to group dup documents using a field where they are putting in the dup values ===
2     == DO NOT USE. Moved updated code to standalone prog called PerligoDupeGroup.py ==
3 nino.borges 566 == First I made the matrix, flatening the data into “groups” ==
4     >>> matrix = {}
5 nino.borges 580 >>> contents = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\20150109_dedupeProj\NearDupesExport.dat").readlines()
6 nino.borges 566 >>> matrix2 = {}
7     >>> count = 1
8     >>> for line in contents:
9     line = line.replace("\n","")
10     line = line.replace("\r","")
11     line = line.split("|")
12     test = None
13     for docID in line:
14     if docID in matrix.keys():
15     test = matrix[docID]
16     if test:
17     pass
18     else:
19     test = count
20     count = count + 1
21     for docID in line:
22     matrix[docID] = test
23    
24     >>> len(matrix.keys())
25     >>> for begNo in matrix.keys():
26     group = matrix[begNo]
27     if group in matrix2.keys():
28     if begNo in matrix2[group]:
29     pass
30     else:
31     matrix2[group].append(begNo)
32     else:
33     matrix2[group] = [begNo,]
34    
35     >>> outputFile = open(r"/Users/ninoborges/Dropbox/Misc/359_export.txt",'w')
36     >>> for i in matrix2.keys():
37     outputFile.write(str(matrix2[i])+"\n")
38    
39    
40     >>> outputFile.close()
41     >>> len(matrix2.keys())
42     == Now use a list as the prefered “original” list ==
43     >>> contents = open(r"/Users/ninoborges/Dropbox/Misc/originals_list.dat").readlines()
44     >>> origList = []
45     >>> for line in contents:
46     line = line.replace("\n","")
47     origList.append(line)
48    
49    
50     >>> len(origList)
51    
52     == now unpack and if it’s in the orig list, use that, if not use the smaller one. ==
53     >>> outputFile = open(r"/Users/ninoborges/Dropbox/Misc/finalReport.txt",'w')
54     >>> for k in matrix2.keys():
55     group = matrix2[k]
56     group.sort()
57     parent = None
58     for orig in origList:
59     if orig in group:
60     parent = orig
61     if parent:
62     outputFile.write(parent + "|")
63     for i in group:
64     if i == parent:
65     pass
66     else:
67     outputFile.write(i+",")
68     else:
69     outputFile.write(group[0]+"|")
70     for i in group[1:]:
71     outputFile.write(i+",")
72     outputFile.write('\n')
73    
74    
75     >>> outputFile.close()
76     ===end===
77    
78 nino.borges 559 === Here I was asked to make a report of the relates to but using priority tags ===
79     == First I take the priority scale that they gave me into a dict ==
80     contents = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo_Priority\Group A\GroupA_pri.txt").readlines()
81     >>> priMatrix = {}
82 nino.borges 558 >>> for line in contents:
83     ... line = line.replace("\n","")
84 nino.borges 559 ... line = line.split("*")
85     ... pri = line[0]
86     ... tag = line[2]
87     ... tag = tag.replace(" ","")
88     ... tag = tag.upper()
89     ... if pri in priMatrix.keys():
90     ... priMatrix[pri].append(tag)
91     ... else:
92     ... priMatrix[pri] = [tag,]
93 nino.borges 558 ...
94 nino.borges 559 == Then I parse the export from the db ==
95     >>> contents = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo_Priority\Group A\export_20140926_163727.dat").readlines()
96     >>> head = contents[0]
97     >>> head = head.replace("\n","")
98     >>> contents = contents[1:]
99     >>> head = head.split("|")
100     >>> head = head[1:]
101     >>> dbMatrix = {}
102     >>> for line in contents:
103     ... line = line.replace("\n","")
104     ... line = line.split("|")
105     ... entries = line[1:]
106     ... count = 0
107     ... finalList = []
108     ... for i in entries:
109     ... if i:
110     ... if ";" in i:
111     ... i = i.split(";")
112     ... else:
113     ... i = [i,]
114     ... for x in i:
115     ... x = head[count] + "|" + x
116     ... x = x.replace(" ","")
117     ... x = x.upper()
118     ... finalList.append(x)
119     ... count = count +1
120     ... dbMatrix[line[0]] = list(finalList)
121     ...
122     == Now unpack ==
123     >>> tallyMatrix = {}
124     >>> for i in priMatrix.keys():
125     ... tallyMatrix[i] = []
126    
127     for bates in dbMatrix.keys():
128     ... for entry in dbMatrix[bates]:
129     ... if entry in priMatrix['A1']:
130     ... if entry in tallyMatrix['A1']:
131     ... pass
132     ... else:
133     ... tallyMatrix['A1'].append(bates)
134     ... elif entry in priMatrix['A2']:
135     ... if entry in tallyMatrix['A2']:
136     ... pass
137     ... else:
138     ... tallyMatrix['A2'].append(bates)
139     ... elif entry in priMatrix['A3']:
140     ... if entry in tallyMatrix['A3']:
141     ... pass
142     ... else:
143     ... tallyMatrix['A3'].append(bates)
144     ... elif entry in priMatrix['A4']:
145     ... if entry in tallyMatrix['A4']:
146     ... pass
147     ... else:
148     ... tallyMatrix['A4'].append(bates)
149     ... elif entry in priMatrix['A5']:
150     ... if entry in tallyMatrix['A5']:
151     ... pass
152     ... else:
153     ... tallyMatrix['A5'].append(bates)
154     ... elif entry in priMatrix['A6']:
155     ... if entry in tallyMatrix['A6']:
156     ... pass
157     ... else:
158     ... tallyMatrix['A6'].append(bates)
159     ... elif entry in priMatrix['A7']:
160     ... if entry in tallyMatrix['A7']:
161     ... pass
162     ... else:
163     ... tallyMatrix['A7'].append(bates)
164     ... elif entry in priMatrix['A8']:
165     ... if entry in tallyMatrix['A8']:
166     ... pass
167     ... else:
168     ... tallyMatrix['A8'].append(bates)
169     ... elif entry in priMatrix['A9']:
170     ... if entry in tallyMatrix['A9']:
171     ... pass
172     ... else:
173     ... tallyMatrix['A9'].append(bates)
174    
175     ## But remember there will be dups. 1 document will exist in multiple A's
176    
177     >>> len(tallyMatrix['A1'])
178     103
179     >>> len(tallyMatrix['A2'])
180     51
181     >>> len(tallyMatrix['A3'])
182     126
183     >>> len(tallyMatrix['A4'])
184     230
185     >>> len(tallyMatrix['A5'])
186     6020
187     >>> len(tallyMatrix['A6'])
188     9458
189     >>> len(tallyMatrix['A7'])
190     3555
191     >>>
192    
193     == So now we write to file but removing dups ==
194     >>> outputFile = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo_Priority\Group A\overlay.dat",'w')
195     >>> outputFile.write("Bates|Level 1 Review Coding Consolidated\n")
196     >>> a1List = []
197     >>> a2List = []
198     >>> a3List = []
199     >>> a4List = []
200     >>> a5List = []
201     >>> a6List = []
202     >>> a7List = []
203     >>> a8List = []
204     >>> a9List = []
205     >>> totList = []
206     >>> sets = priMatrix.keys()
207     >>> sets.sort()
208     >>> aLists = [a1List,a2List,a3List,a4List,a5List,a6List,a7List,a8List,a9List]
209     >>> count = 0
210     ## This only works because I go in order.
211     >>> for a in sets:
212     ... for bates in tallyMatrix[a]:
213     ... if bates in totList:
214     ... pass
215     ... else:
216     ... outputFile.write("%s|%s\n"%(bates,a))
217     ... totList.append(bates)
218     ... aLists[count].append(bates)
219     ... count = count +1
220     ...
221 nino.borges 558 >>> outputFile.close()
222 nino.borges 559 >>> len(a1List)
223     103
224     >>> len(a2List)
225     51
226     >>> len(a3List)
227     126
228     >>> len(a4List)
229     230
230     >>> len(a5List)
231     6020
232     >>> len(a6List)
233     8863
234     >>> len(a7List)
235     3115
236     >>> len(totList)
237     18508
238     >>>
239    
240    
241 nino.borges 558 === END ===
242 nino.borges 559 === Here I was asked to get a report of documents that contain X tags ===
243     >>> contents = open(r"\\BSTDD967DTW1\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo\export_2.dat").readlines()
244     >>> contents = contents[1:]
245     >>> contents[0]
246     'HWHC000000001||||Livonia, MI|Invoice/Purchase order|||||Document retention (policies/compliance)|\n'
247     >>> matrix = {}
248     >>> for line in contents:
249     ... line = line.replace("\n","")
250     ... line = line.split("|")
251     ... bates = line[0]
252     ... parts = line[1:]
253     ... countList = []
254     ... for i in parts:
255     ... if i:
256     ... i = i.split(";")
257     ... for x in i:
258     ... countList.append(x)
259     ... if len(countList) in matrix.keys():
260     ... matrix[len(countList)].append(bates)
261     ... else:
262     ... matrix[len(countList)] = [bates,]
263     ...
264     >>> outputFile = open(r"\\BSTDD967DTW1\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo\relatesToReport.txt",'w')
265     >>> for i in matrix.keys():
266     ... outputFile.write("%s documents have %s tags.\n"% (len(matrix[i]),i))
267     ...
268     >>> outputFile.close()
269     === END ===
270    
271     === Here I was asked to do a 5 before and 5 after field and at first only for docs without famiies ===
272     == first I removed documents with families from the entire uni ==
273     >>> contents = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\export.csv").readlines()
274     >>> outputFile = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\StandAloneOnly.dat",'w')
275     >>> for line in contents:
276     ... newline = line.replace("\n","")
277     ... newline = newline.split(",")
278     ... err = False
279     ... if newline[0] == newline [2]:
280     ... err = True
281     ... if newline[1] == newline[3]:
282     ... err = True
283     ... if newline[2] == "":
284     ... err = True
285     ... if err == True:
286     ... outputFile.write(line)
287    
288     == then to make a packed matrix ==
289     =first back buffer=
290     >>> contents = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\StandAloneOnly.dat").readlines()
291     >>> outputFile = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\ShiftedOverlay.dat",'w')
292     >>> befBuffer = []
293     >>> befMatrix = {}
294     >>> for line in contents:
295     ... line = line.replace("\n","")
296     ... begNo = line.split(",")[0]
297     ... befBuffer.append(begNo)
298     ... if len(befBuffer) > 6:
299     ... nul = befBuffer.pop(0)
300     ... befMatrix[begNo] = list(befBuffer)
301    
302     = then reversing for the after buffer=
303     >>> contents.reverse()
304     >>> aftMatrix = {}
305     >>> aftBuffer = []
306     >>> for line in contents:
307     ... line = line.replace("\n","")
308     ... begNo = line.split(",")[0]
309     ... aftBuffer.append(begNo)
310     ... if len(aftBuffer) > 6:
311     ... nul = aftBuffer.pop(0)
312     ... aftMatrix[begNo] = list(aftBuffer)
313    
314     == now to unpack and make the final load file ==
315     >>> outputFile = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\ShiftedOverlay.dat",'w')
316     >>> for line in contents:
317     ... begno = line.split(",")[0]
318     ... outputFile.write("%s|"%begno)
319     ... for i in befMatrix[begno]:
320     ... outputFile.write("%s;"%i)
321     ... count = 1
322     ... aftList = aftMatrix[begno]
323     ... aftList.remove(begno)
324     ... aftList.sort()
325     ... for i in aftList:
326     ... outputFile.write("%s"%i)
327     ... if count < len(aftList):
328     ... outputFile.write(";")
329     ... count = count +1
330     ... outputFile.write("\n")
331     ...
332     >>> outputFile.close()
333     === END ===
334     === Here I was asked, in prok 199, to compare the page counts of prod vs review and report ===
335     for f in os.listdir(startDir):
336     ... outputFile = open(os.path.join(outputDir,os.path.splitext(f)[0]+".txt"),'w')
337     ... outputErrFile = open(os.path.join(outputDir,os.path.splitext(f)[0]+".err"),'w')
338     ... contents = open(os.path.join(startDir,f)).readlines()
339     ... headder = contents[0]
340     ... outputFile.write(headder)
341     ... outputErrFile.write(headder)
342     ... contents = contents[1:]
343     ... for line in contents:
344     ... line = line.replace("\n","")
345     ... prodBeg,prodEnd,beg,end,concordBeg = line.split(",")
346     ... if prodBeg:
347     ... pass
348     ... else:
349     ... prodBeg = beg
350     ... err = False
351     ... if ";" in prodBeg:
352     ... err = True
353     ... if ";" in prodEnd:
354     ... testList = prodEnd.split(";")
355     ... if len(testList) == 2:
356     ... test1,test2 = prodEnd.split(";")
357     ... test1 = test1.strip()
358     ... test2 = test2.strip()
359     ... if test1:
360     ... if test2:
361     ... err = True
362     ... else:
363     ... prodEnd = test1
364     ... elif test2:
365     ... prodEnd = test2
366     ... else:
367     ... err = True
368     ... else:
369     ... err = True
370     ... if ";" in concordBeg:
371     ... err = True
372     ... if ";" in end:
373     ... err = True
374     ... if prodBeg[-5:] == "LOKEY":
375     ... err = True
376     ... if concordBeg == "":
377     ... err = True
378     ... if err == False:
379     ... try:
380     ... prodList = FixBatesRange_func.EnumerateBates(prodBeg,prodEnd)
381     ... revList = FixBatesRange_func.EnumerateBates(concordBeg,end)
382     ... if len(prodList) == len(revList):
383     ... outputFile.write(line+"\n")
384     ... else:
385     ... outputErrFile.write(line+"\n")
386     ... except:
387     ... outputErrFile.write(line+"\n")
388     ... else:
389     ... outputErrFile.write(line+"\n")
390     ... outputFile.close()
391     ... outputErrFile.close()
392    
393     == Then make a dat file for KPMG ==
394     for f in os.listdir(startDir):
395     ... outputFile = open(os.path.join(outputDir,os.path.splitext(f)[0]+".DAT"),'w')
396     ... outputFile.write("BegImage|%s BatesBeg|%s BatesEnd|Prod Production History Multi Choice\n"%(os.path.splitext(f)[0],os.path.splitext(f)[0]))
397     ... contents = open(os.path.join(startDir,f)).readlines()
398     ... contents = contents[1:]
399     ... for line in contents:
400     ... line = line.replace("\n","")
401     ... prodBeg,prodEnd,beg,end,concordBeg = line.split(",")
402     ... if prodBeg:
403     ... pass
404     ... else:
405     ... prodBeg = beg
406     ... err = False
407     ... if ";" in prodBeg:
408     ... err = True
409     ... if ";" in prodEnd:
410     ... testList = prodEnd.split(";")
411     ... if len(testList) == 2:
412     ... test1,test2 = prodEnd.split(";")
413     ... test1 = test1.strip()
414     ... test2 = test2.strip()
415     ... if test1:
416     ... if test2:
417     ... err = True
418     ... else:
419     ... prodEnd = test1
420     ... elif test2:
421     ... prodEnd = test2
422     ... else:
423     ... err = True
424     ... else:
425     ... err = True
426     ... if ";" in concordBeg:
427     ... err = True
428     ... if ";" in end:
429     ... err = True
430     ... if prodBeg[-5:] == "LOKEY":
431     ... err = True
432     ... if concordBeg == "":
433     ... err = True
434     ... if err == False:
435     ... outputFile.write("%s|%s|%s|%s\n"%(concordBeg,prodBeg,prodEnd,os.path.splitext(f)[0]))
436     ... outputFile.close()
437     === End ===
438    
439     === Here I was asked to group and code documents by doc title ===
440     ## first take her huge xls file and make a smaller pipe delim file with only fields that have stuff in the key fields
441     >>> contents = open(r"W:\Manny\Client\honeywell\DocTitleBatchCode_Proj\Document Title Tally.csv").readlines()
442     >>> outputFile = open(r"W:\Manny\Client\honeywell\DocTitleBatchCode_Proj\Document Title Tally_small.csv",'w')
443     >>> contents = contents[1:]
444     >>> for line in contents:
445     ... wr = False
446     ... line = line.split("|")
447     ... if line[2]:
448     ... wr = True
449     ... if line[4]:
450     ... wr = True
451     ... if line[5]:
452     ... wr = True
453     ... if line[6]:
454     ... wr = True
455     ... if line[7]:
456     ... wr = True
457     ... if line[8]:
458     ... wr = True
459     ... if line[9]:
460     ... wr = True
461     ... if wr:
462     ... outputFile.write("%s|%s|%s|%s|%s|%s|%s|%s\n"% (line[0],line[2],line[4],line[5],line[6],line[7],line[8],line[9]))
463     ...
464     >>> outputFile.close()
465    
466     ## Create the matrix, if you dont have a pickle. Here I write to a pickel, which can be reused, since the doctitle shouldnt change
467     >>> contents = open(r"W:\Manny\Client\honeywell\DocTitleTally.dat").readlines()
468     >>> matrix = {}
469     >>> for line in contents:
470     ... line = line.replace("\n","")
471     ... line = line.split("|")
472     ... bates = line[0]
473     ... title = line[2]
474     ... if title in matrix.keys():
475     ... matrix[title].append(bates)
476     ... else:
477     ... matrix[title] = [bates,]
478     ...
479     >>> len(matrix.keys())
480     239844
481     ## saving to a pickel
482     import pickle
483     >>> with open(r"C:\McDermott-Discovery\Client\docTitle_proj.pkl",'wb') as f:
484     ... pickle.dump(matrix,f,pickle.HIGHEST_PROTOCOL)
485     ...
486     ##
487     >>> outputFile = open(r"C:\McDermott-Discovery\Client\docTitle_proj.dat",'w')
488     >>> contents = open(r"\\nykads01\data\CLI\Honeywell-17th Floor\Manny\20140630_DocTitleTally_MannyCopy_small.dat").readlines()
489     >>> headder = contents[0]
490     >>> headder = "Bates|" + headder
491     >>> outputFile.write(headder)
492     >>> for line in contents[1:]:
493     ... line = line.replace("\n","")
494     ... line = line.split("|")
495     ... for b in matrix[line[0]]:
496     ... outputFile.write("%s|%s|%s|%s|%s|%s|%s\n"%(b,line[1],line[2],line[3],line[4],line[5],line[6]))
497     ...
498     >>> outputFile.close()
499    
500    
501     ## family groups
502     >>> contents = open(r"C:\McDermott-Discovery\Client\RL_BegImage_BegAttach.csv").readlines()
503     >>> contents = contents[1:]
504     >>> familyLookupMatrix = {}
505     >>> familyMatrix = {}
506     >>> for line in contents:
507     ... line = line.replace('"','')
508     ... line = line.replace("\n","")
509     ... bates,begAttach = line.split(",")
510     ... familyLookupMatrix[bates] = begAttach
511     ... if begAttach in familyMatrix.keys():
512     ... familyMatrix[begAttach].append(bates)
513     ... else:
514     ... familyMatrix[begAttach] = [bates,]
515     ...
516     ## Now the meat of the proj
517     >>> contents = open(r"C:\McDermott-Discovery\Client\docTitle_proj.dat").readlines()
518     >>> contents = contents[1:]
519     >>> fbc = {}
520     >>> rbg1 = {}
521     >>> rbg2 = {}
522     >>> rbg3 = {}
523     >>> rbg4 = {}
524     >>> rbg5 = {}
525     >>> for line in contents:
526     ... line = line.replace("\n","")
527     ... line = line.split("|")
528     ... parent = familyLookupMatrix[line[0]]
529     ... fullFam = familyMatrix[parent]
530     ... for x in fullFam:
531     ... if line[1]:
532     ... if x in fbc.keys():
533     ... if line[1] in fbc[x]:
534     ... pass
535     ... else:
536     ... fbc[x].append(line[1])
537     ... else:
538     ... fbc[x] = [line[1],]
539     ... if line[2]:
540     ... if x in rbg1.keys():
541     ... if line[2] in rbg1[x]:
542     ... pass
543     ... else:
544     ... rbg1[x].append(line[2])
545     ... else:
546     ... rbg1[x] = [line[2],]
547     ... if line[3]:
548     ... if x in rbg2.keys():
549     ... if line[3] in rbg2[x]:
550     ... pass
551     ... else:
552     ... rbg2[x].append(line[3])
553     ... else:
554     ... rbg2[x] = [line[3],]
555     ... if line[4]:
556     ... if x in rbg3.keys():
557     ... if line[4] in rbg3[x]:
558     ... pass
559     ... else:
560     ... rbg3[x].append(line[4])
561     ... else:
562     ... rbg3[x] = [line[4],]
563     ... if line[5]:
564     ... if x in rbg4.keys():
565     ... if line[5] in rbg4[x]:
566     ... pass
567     ... else:
568     ... rbg4[x].append(line[5])
569     ... else:
570     ... rbg4[x] = [line[5],]
571     ... if line[6]:
572     ... if x in rbg5.keys():
573     ... if line[6] in rbg5[x]:
574     ... pass
575     ... else:
576     ... rbg5[x].append(line[6])
577     ... else:
578     ... rbg5[x] = [line[6],]
579     ...
580     >>> mainList = []
581     >>> for i in fbc.keys():
582     ... if i in mainList:
583     ... pass
584     ... else:
585     ... mainList.append(i)
586     ...
587     >>> len(mainList)
588     80490
589     >>> for i in rbg1.keys():
590     ... if i in mainList:
591     ... pass
592     ... else:
593     ... mainList.append(i)
594     ...
595     >>> len(mainList)
596     80490
597     >>> for i in rbg2.keys():
598     ... if i in mainList:
599     ... pass
600     ... else:
601     ... mainList.append(i)
602     ...
603     >>> for i in rbg3.keys():
604     ... if i in mainList:
605     ... pass
606     ... else:
607     ... mainList.append(i)
608     ...
609     >>> for i in rbg4.keys():
610     ... if i in mainList:
611     ... pass
612     ... else:
613     ... mainList.append(i)
614     ...
615     >>> for i in rbg5.keys():
616     ... if i in mainList:
617     ... pass
618     ... else:
619     ... mainList.append(i)
620     >>> mainList.sort()
621     >>> outputFile = open(r"C:\McDermott-Discovery\Client\output.txt",'w')
622     >>> for bates in mainList:
623     ... if bates in fbc.keys():
624     ... if fbc[bates]:
625     ... fbcVal = str(fbc[bates])
626     ... else:
627     ... fbcVal = ""
628     ... else:
629     ... fbcVal = ""
630     ... if bates in rbg1.keys():
631     ... if rbg1[bates]:
632     ... rbg1Val = str(rbg1[bates])
633     ... else:
634     ... rbg1Val = ""
635     ... else:
636     ... rbg1Val = ""
637     ... if bates in rbg2.keys():
638     ... if rbg2[bates]:
639     ... rbg2Val = str(rbg2[bates])
640     ... else:
641     ... rbg2Val = ""
642     ... else:
643     ... rbg2Val = ""
644     ... if bates in rbg3.keys():
645     ... if rbg3[bates]:
646     ... rbg3Val = str(rbg3[bates])
647     ... else:
648     ... rbg3Val = ""
649     ... else:
650     ... rbg3Val = ""
651     ... if bates in rbg4.keys():
652     ... if rbg4[bates]:
653     ... rbg4Val = str(rbg4[bates])
654     ... else:
655     ... rbg4Val = ""
656     ... else:
657     ... rbg4Val = ""
658     ... if bates in rbg5.keys():
659     ... if rbg5[bates]:
660     ... rbg5Val = str(rbg5[bates])
661     ... else:
662     ... rbg5Val = ""
663     ... else:
664     ... rbg5Val = ""
665     ... outputFile.write("%s|%s|%s|%s|%s|%s|%s\n"%(bates,fbcVal,rbg1Val,rbg2Val,rbg3Val,rbg4Val,rbg5Val))
666     ...
667     >>> outputFile.close()
668    
669     ## Cleanup the output and make the final overlay
670     >>> contents = open(r"C:\McDermott-Discovery\Client\output.txt").readlines()
671     >>> outputFile = open(r"C:\McDermott-Discovery\Client\overlay.dat",'w')
672     >>> for line in contents:
673     ... line = line.split("|")
674     ... for i in line:
675     ... i = i.replace("'","")
676     ... i = i.replace("[","")
677     ... i = i.replace("]","")
678     ... i = i.replace(",",";")
679     ... if "\n" in i:
680     ... outputFile.write(i)
681     ... else:
682     ... outputFile.write(i + "|")
683     ...
684     >>> outputFile.close()
685    
686    
687     === Here I was asked to copy frm long text date to real date field but there were errors because some of the dates were missing info ie. 00/00/1991. I fixed the entries based on their criteria.===
688     >>> contents = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Document_Delivery\Vol003\GroupA\export_20140929_192612.csv").readlines()
689     >>> contents = contents[1:]
690     >>> outputFile = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Document_Delivery\Vol003\GroupA\FIXED.dat",'w')
691     >>> outputFile.write("BegDoc|DocDate_New\n")
692     >>> for line in contents:
693     ... line = line.replace("\n","")
694     ... bates,date,nul = line.split("|")
695     ... if date:
696     ... mm,dd,yy = date.split("/")
697     ... if mm == '00':
698     ... mm = '01'
699     ... if dd == '00':
700     ... dd = '01'
701     ... if yy == '0000':
702     ... yy = '1800'
703     ... date = '%s/%s/%s'%(mm,dd,yy)
704     ... outputFile.write("%s|%s\n"%(bates,date))
705     >>> outputFile.close()
706     === END ===
707 nino.borges 558 === Here I was asked to make a tally report frm the prefixes in HOBX ===
708     >>> contents = open(r"W:\Manny\Client\honeywell\PrefixTallyProj\20140717 search for manny_export\20140717 search for manny_export.csv").readlines()
709     >>> contents = contents[1:]
710     >>> import FixBatesRange_func
711     >>> outputFile = open(r"W:\Manny\Client\honeywell\PrefixTallyProj\20140717 search for manny_export\output.txt",'w')
712     >>> errorFile = open(r"W:\Manny\Client\honeywell\PrefixTallyProj\20140717 search for manny_export\errFile.txt",'w')
713     >>> matrix = {}
714     >>> cnt = NinoGenTools.Counter()
715     >>> for line in contents:
716     ... line = line.replace("\n","")
717     ... line = line.replace('"',"")
718     ... try:
719     ... alph = FixBatesRange_func.SeperateAlpha3(line)[0]
720     ... except:
721     ... alph = "00"
722     ... errorFile.write("%s\n"%line)
723     ... if alph[:2] == "00":
724     ... alph = ""
725     ... elif alph[-1] == "0":
726     ... while alph[-1] == "0":
727     ... alph = alph[:-1]
728     ... if alph in matrix.keys():
729     ... matrix[alph] = matrix[alph] + 1
730     ... else:
731     ... matrix[alph] = 1
732     ... cnt.inc()
733     ...
734     >>> outputFile.close()
735     >>> errorFile.close()
736     === End ===
737 nino.borges 475
738 nino.borges 558 ==== Here I was asked to find documents with family problems. Note that the values have to be enumerated first.
739     import FixBatesRange_func
740     >>> content = open(r"W:\Manny\Client\Perligo\Family_probs_export.csv").readlines()
741     >>> batesValues = []
742     >>> attachValues = {}
743     >>> for line in content:
744     ... line = line.replace("\n","")
745     ... batesEnum = line.split("|")[6]
746     ... batesEnum = batesEnum.split(';')
747     ... if batesEnum >1:
748     ... for i in batesEnum:
749     ... batesValues.append(i)
750     ... else:
751     ... batesValues.append(batesEnum)
752     ...
753     >>> content = content[1:]
754     >>> for line in content:
755     ... line = line.replace("\n","")
756     ... batesBeg = line.split("|")[4]
757     ... batesEnd = line.split("|")[5]
758     ... batesEnum = FixBatesRange_func.EnumerateBates(batesBeg,batesEnd)
759     ... clump = "%s,%s"%(batesBeg,batesEnd)
760     ... attachValues[clump] = batesEnum
761     ...
762     >>> outputFile = open(r"W:\Manny\Client\Perligo\export_output.txt",'w')
763     >>> for clump in attachValues.keys():
764     ... errList = []
765     ... for i in attachValues[clump]:
766     ... if i in batesValues:
767     ... pass
768     ... else:
769     ... errList.append(i)
770     ... if errList:
771     ... outputFile.write("%s family is an issue because it's missing pages: "%clump)
772     ... for x in errList:
773     ... outputFile.write("%s;"%x)
774     ... outputFile.write("\n")
775     ...
776     >>> outputFile.close()
777     >>> begAttachList = []
778     >>> contents = open(r"W:\Manny\Client\Perligo\export_output.txt").readlines()
779     >>> for i in contents:
780     ... i = i.split(",")
781     ... begAttachList.append(i[0])
782     ...
783     >>> contents = open(r"W:\Manny\Client\Perligo\Family_probs_export.csv").readlines()
784     >>> outputFile = open(r"W:\Manny\Client\Perligo\overlay.dat",'w')
785     >>> for line in contents:
786     ... line = line.split("|")
787     ... if line[4] in begAttachList:
788     ... outputFile.write("%s|x\n"%line[0])
789     ...
790     >>> outputFile.close()
791     >>>
792    
793     ==== END =======
794    
795    
796 nino.borges 475 ==== Here I was asked to both test the kpmg enumerated values field and to also verify that
797     all of the bendix ranges in rl were in my LFP.
798     -------------------------------
799     >>> contents = open(r"\\bstads01\app\Manny\Client\honeywell\LFP Export\LFP Export\Non_microfiche\373540873_REVIEW_JUSTBENDIX.lfp").readlines()
800     >>> MasterList = []
801     >>> for line in contents:
802     ... bates = line.split(",")[1]
803     ... MasterList.append(bates)
804     ...
805     >>> len(MasterList)
806     357525
807    
808     -------------------------------
809     >>> outputFile2 = open(r"C:\Client\honeywell\BENDIX-Beg_End_Enum_enumErr.csv",'w')
810     >>> outputFile = open(r"C:\Client\honeywell\BENDIX-Beg_End_Enum_output.csv",'w')
811     >>> for line in contents:
812     ... line = line.replace("\n","")
813     ... begNo,endNo,enumValues = line.split(",")
814     ... begTest = begNo.split(" ")[-1]
815     ... endTest = endNo.split(" ")[-1]
816     ... rangeTest = FixBatesRange_func.EnumerateBates(begTest,endTest)
817     ... enumCount = len(rangeTest)
818     ... if ";" in enumValues:
819     ... kpmgEnumCount = len(enumValues.split(";"))
820     ... else:
821     ... kpmgEnumCount = 1
822     ... if enumCount == kpmgEnumCount:
823     ... pass
824     ... else:
825     ... outputFile2.write(line+"\n")
826     ... if begNo in MasterList:
827     ... pass
828     ... elif begNo.replace(" "," ") in MasterList:
829     ... pass
830     ... elif begNo.replace(" "," ") in MasterList:
831     ... pass
832     ... elif begNo.replace(" "," ") in MasterList:
833     ... pass
834     ... elif begNo.replace(" ","") in MasterList:
835     ... pass
836     ... elif begNo.replace(" ","") in MasterList:
837     ... pass
838     ... else:
839     ... outputFile.write(line+"\n")
840     ...
841     >>> outputFile.close()
842 nino.borges 558 >>> outputFile2.close()
843    
844     ==== END ====