NinoCode/RandomCodeRequests/HoneyWell_reqs.txt

=== Here I was asked to group dup documents using a field where they are putting in the dup values ===
== DO NOT USE. Moved updated code to standalone prog called PerligoDupeGroup.py ==
== First I made the matrix, flatening the data into “groups” ==
>>> matrix = {}
>>> contents = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\20150109_dedupeProj\NearDupesExport.dat").readlines()
>>> matrix2 = {}
>>> count = 1
>>> for line in contents:
        line = line.replace("\n","")
        line = line.replace("\r","")
        line = line.split("|")
        test = None
        for docID in line:
                if docID in matrix.keys():
                        test = matrix[docID]
        if test:
                pass
        else:
                test = count
                count = count + 1
        for docID in line:
                matrix[docID] = test

>>> len(matrix.keys())
>>> for begNo in matrix.keys():
        group = matrix[begNo]
        if group in matrix2.keys():
                if begNo in matrix2[group]:
                        pass
                else:
                        matrix2[group].append(begNo)
        else:
                matrix2[group] = [begNo,]

>>> outputFile = open(r"/Users/ninoborges/Dropbox/Misc/359_export.txt",'w')
>>> for i in matrix2.keys():
        outputFile.write(str(matrix2[i])+"\n")

        
>>> outputFile.close()
>>> len(matrix2.keys())
== Now use a list as the prefered “original” list ==
>>> contents = open(r"/Users/ninoborges/Dropbox/Misc/originals_list.dat").readlines()
>>> origList = []
>>> for line in contents:
        line = line.replace("\n","")
        origList.append(line)

        
>>> len(origList)

== now unpack and if it’s in the orig list, use that, if not use the smaller one. ==
>>> outputFile = open(r"/Users/ninoborges/Dropbox/Misc/finalReport.txt",'w')
>>> for k in matrix2.keys():
        group = matrix2[k]
        group.sort()
        parent = None
        for orig in origList:
                if orig in group:
                        parent = orig
        if parent:
                outputFile.write(parent + "|")
                for i in group:
                        if i == parent:
                                pass
                        else:
                                outputFile.write(i+",")
        else:
                outputFile.write(group[0]+"|")
                for i in group[1:]:
                        outputFile.write(i+",")
        outputFile.write('\n')

        
>>> outputFile.close()
===end===

=== Here I was asked to make a report of the relates to but using priority tags ===
== First I take the priority scale that they gave me into a dict ==
contents = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo_Priority\Group A\GroupA_pri.txt").readlines()
>>> priMatrix = {}
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("*")
...     pri = line[0]
...     tag = line[2]
...     tag = tag.replace(" ","")
...     tag = tag.upper()
...     if pri in priMatrix.keys():
...             priMatrix[pri].append(tag)
...     else:
...             priMatrix[pri] = [tag,]
... 
== Then I parse the export from the db ==
>>> contents = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo_Priority\Group A\export_20140926_163727.dat").readlines()
>>> head = contents[0]
>>> head = head.replace("\n","")
>>> contents = contents[1:]
>>> head = head.split("|")
>>> head = head[1:]
>>> dbMatrix = {}
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     entries = line[1:]
...     count = 0
...     finalList = []
...     for i in entries:
...             if i:
...                     if ";" in i:
...                             i = i.split(";")
...                     else:
...                             i = [i,]
...                     for x in i:
...                             x = head[count] + "|" + x
...                             x = x.replace(" ","")
...                             x = x.upper()
...                             finalList.append(x)
...             count = count +1
...     dbMatrix[line[0]] = list(finalList)
... 
== Now unpack  ==
>>> tallyMatrix = {}
>>> for i in priMatrix.keys():
...     tallyMatrix[i] = []

for bates in dbMatrix.keys():
...     for entry in dbMatrix[bates]:
...             if entry in priMatrix['A1']:
...                     if entry in tallyMatrix['A1']:
...                             pass
...                     else:
...                             tallyMatrix['A1'].append(bates)
...             elif entry in priMatrix['A2']:
...                     if entry in tallyMatrix['A2']:
...                             pass
...                     else:
...                             tallyMatrix['A2'].append(bates)
...             elif entry in priMatrix['A3']:
...                     if entry in tallyMatrix['A3']:
...                             pass
...                     else:
...                             tallyMatrix['A3'].append(bates)
...             elif entry in priMatrix['A4']:
...                     if entry in tallyMatrix['A4']:
...                             pass
...                     else:
...                             tallyMatrix['A4'].append(bates)
...             elif entry in priMatrix['A5']:
...                     if entry in tallyMatrix['A5']:
...                             pass
...                     else:
...                             tallyMatrix['A5'].append(bates)
...             elif entry in priMatrix['A6']:
...                     if entry in tallyMatrix['A6']:
...                             pass
...                     else:
...                             tallyMatrix['A6'].append(bates)
...             elif entry in priMatrix['A7']:
...                     if entry in tallyMatrix['A7']:
...                             pass
...                     else:
...                             tallyMatrix['A7'].append(bates)
...             elif entry in priMatrix['A8']:
...                     if entry in tallyMatrix['A8']:
...                             pass
...                     else:
...                             tallyMatrix['A8'].append(bates)
...             elif entry in priMatrix['A9']:
...                     if entry in tallyMatrix['A9']:
...                             pass
...                     else:
...                             tallyMatrix['A9'].append(bates)

## But remember there will be dups.  1 document will exist in multiple A's

>>> len(tallyMatrix['A1'])
103
>>> len(tallyMatrix['A2'])
51
>>> len(tallyMatrix['A3'])
126
>>> len(tallyMatrix['A4'])
230
>>> len(tallyMatrix['A5'])
6020
>>> len(tallyMatrix['A6'])
9458
>>> len(tallyMatrix['A7'])
3555
>>>

== So now we write to file but removing dups == 
>>> outputFile = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo_Priority\Group A\overlay.dat",'w')
>>> outputFile.write("Bates|Level 1 Review Coding Consolidated\n")
>>> a1List = []
>>> a2List = []
>>> a3List = []
>>> a4List = []
>>> a5List = []
>>> a6List = []
>>> a7List = []
>>> a8List = []
>>> a9List = []
>>> totList = []
>>> sets = priMatrix.keys()
>>> sets.sort()
>>> aLists = [a1List,a2List,a3List,a4List,a5List,a6List,a7List,a8List,a9List]
>>> count = 0
## This only works because I go in order.
>>> for a in sets:
...     for bates in tallyMatrix[a]:
...             if bates in totList:
...                     pass
...             else:
...                     outputFile.write("%s|%s\n"%(bates,a))
...                     totList.append(bates)
...                     aLists[count].append(bates)
...     count = count +1
...
>>> outputFile.close()
>>> len(a1List)
103
>>> len(a2List)
51
>>> len(a3List)
126
>>> len(a4List)
230
>>> len(a5List)
6020
>>> len(a6List)
8863
>>> len(a7List)
3115
>>> len(totList)
18508
>>> 


=== END ===
=== Here I was asked to get a report of documents that contain X tags ===
>>> contents = open(r"\\BSTDD967DTW1\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo\export_2.dat").readlines()
>>> contents = contents[1:]
>>> contents[0]
'HWHC000000001||||Livonia, MI|Invoice/Purchase order|||||Document retention (policies/compliance)|\n'
>>> matrix = {}
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     bates = line[0]
...     parts = line[1:]
...     countList = []
...     for i in parts:
...             if i:
...                     i = i.split(";")
...                     for x in i:
...                             countList.append(x)
...     if len(countList) in matrix.keys():
...             matrix[len(countList)].append(bates)
...     else:
...             matrix[len(countList)] = [bates,]
... 
>>> outputFile = open(r"\\BSTDD967DTW1\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo\relatesToReport.txt",'w')
>>> for i in matrix.keys():
...     outputFile.write("%s documents have %s tags.\n"% (len(matrix[i]),i))
... 
>>> outputFile.close()
=== END ===

=== Here I was asked to do a 5 before and 5 after field and at first only for docs without famiies ===
== first I removed documents with families from the entire uni ==
>>> contents = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\export.csv").readlines()
>>> outputFile = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\StandAloneOnly.dat",'w')
>>> for line in contents:
...     newline = line.replace("\n","")
...     newline = newline.split(",")
...     err = False
...     if newline[0] == newline [2]:
...             err = True
...     if newline[1] == newline[3]:
...             err = True
...     if newline[2] == "":
...             err = True
...     if err == True:
...             outputFile.write(line)

== then to make a packed matrix ==
=first back buffer=
>>> contents = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\StandAloneOnly.dat").readlines()
>>> outputFile = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\ShiftedOverlay.dat",'w')
>>> befBuffer = []
>>> befMatrix = {}
>>> for line in contents:
...     line = line.replace("\n","")
...     begNo = line.split(",")[0]
...     befBuffer.append(begNo)
...     if len(befBuffer) > 6:
...             nul = befBuffer.pop(0)
...     befMatrix[begNo] = list(befBuffer)

= then reversing for the after buffer=
>>> contents.reverse()
>>> aftMatrix = {}
>>> aftBuffer = []
>>> for line in contents:
...     line = line.replace("\n","")
...     begNo = line.split(",")[0]
...     aftBuffer.append(begNo)
...     if len(aftBuffer) > 6:
...             nul = aftBuffer.pop(0)
...     aftMatrix[begNo] = list(aftBuffer)

== now to unpack and make the final load file ==
>>> outputFile = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\ShiftedOverlay.dat",'w')
>>> for line in contents:
...     begno = line.split(",")[0]
...     outputFile.write("%s|"%begno)
...     for i in befMatrix[begno]:
...             outputFile.write("%s;"%i)
...     count = 1
...     aftList = aftMatrix[begno]
...     aftList.remove(begno)
...     aftList.sort()
...     for i in aftList:
...             outputFile.write("%s"%i)
...             if count < len(aftList):
...                     outputFile.write(";")
...             count = count +1
...     outputFile.write("\n")
...     
>>> outputFile.close()
=== END ===
=== Here I was asked, in prok 199, to compare the page counts of prod vs review and report ===
for f in os.listdir(startDir):
...     outputFile = open(os.path.join(outputDir,os.path.splitext(f)[0]+".txt"),'w')
...     outputErrFile = open(os.path.join(outputDir,os.path.splitext(f)[0]+".err"),'w')
...     contents = open(os.path.join(startDir,f)).readlines()
...     headder = contents[0]
...     outputFile.write(headder)
...     outputErrFile.write(headder)
...     contents = contents[1:]
...     for line in contents:
...             line = line.replace("\n","")
...             prodBeg,prodEnd,beg,end,concordBeg = line.split(",")
...             if prodBeg:
...                     pass
...             else:
...                     prodBeg = beg
...             err = False
...             if ";" in prodBeg:
...                     err = True
...             if ";" in prodEnd:
...                     testList = prodEnd.split(";")
...                     if len(testList) == 2:
...                             test1,test2 = prodEnd.split(";")
...                             test1 = test1.strip()
...                             test2 = test2.strip()
...                             if test1:
...                                     if test2:
...                                             err = True
...                                     else:
...                                             prodEnd = test1
...                             elif test2:
...                                     prodEnd = test2
...                             else:
...                                     err = True
...                     else:
...                             err = True
...             if ";" in concordBeg:
...                     err = True
...             if ";" in end:
...                     err = True
...             if prodBeg[-5:] == "LOKEY":
...                     err = True
...             if concordBeg == "":
...                     err = True
...             if err == False:
...                     try:
...                             prodList = FixBatesRange_func.EnumerateBates(prodBeg,prodEnd)
...                             revList = FixBatesRange_func.EnumerateBates(concordBeg,end)
...                             if len(prodList) == len(revList):
...                                 outputFile.write(line+"\n")
...                             else:
...                                 outputErrFile.write(line+"\n")
...                     except:
...                             outputErrFile.write(line+"\n")
...             else:
...                     outputErrFile.write(line+"\n")
...     outputFile.close()
...     outputErrFile.close()

== Then make a dat file for KPMG ==
for f in os.listdir(startDir):
...     outputFile = open(os.path.join(outputDir,os.path.splitext(f)[0]+".DAT"),'w')
...     outputFile.write("BegImage|%s BatesBeg|%s BatesEnd|Prod Production History Multi Choice\n"%(os.path.splitext(f)[0],os.path.splitext(f)[0]))
...     contents = open(os.path.join(startDir,f)).readlines()
...     contents = contents[1:]
...     for line in contents:
...             line = line.replace("\n","")
...             prodBeg,prodEnd,beg,end,concordBeg = line.split(",")
...             if prodBeg:
...                     pass
...             else:
...                     prodBeg = beg
...             err = False
...             if ";" in prodBeg:
...                     err = True
...             if ";" in prodEnd:
...                     testList = prodEnd.split(";")
...                     if len(testList) == 2:
...                             test1,test2 = prodEnd.split(";")
...                             test1 = test1.strip()
...                             test2 = test2.strip()
...                             if test1:
...                                     if test2:
...                                             err = True
...                                     else:
...                                             prodEnd = test1
...                             elif test2:
...                                     prodEnd = test2
...                             else:
...                                     err = True
...                     else:
...                             err = True
...             if ";" in concordBeg:
...                     err = True
...             if ";" in end:
...                     err = True
...             if prodBeg[-5:] == "LOKEY":
...                     err = True
...             if concordBeg == "":
...                     err = True
...             if err == False:
...                     outputFile.write("%s|%s|%s|%s\n"%(concordBeg,prodBeg,prodEnd,os.path.splitext(f)[0]))
...     outputFile.close()
=== End ===

=== Here I was asked to group and code documents by doc title ===
## first take her huge xls file and make a smaller pipe delim file with only fields that have stuff in the key fields
>>> contents = open(r"W:\Manny\Client\honeywell\DocTitleBatchCode_Proj\Document Title Tally.csv").readlines()
>>> outputFile = open(r"W:\Manny\Client\honeywell\DocTitleBatchCode_Proj\Document Title Tally_small.csv",'w')
>>> contents = contents[1:]
>>> for line in contents:
...     wr = False
...     line = line.split("|")
...     if line[2]:
...             wr = True
...     if line[4]:
...             wr = True
...     if line[5]:
...             wr = True
...     if line[6]:
...             wr = True
...     if line[7]:
...             wr = True
...     if line[8]:
...             wr = True
...     if line[9]:
...             wr = True
...     if wr:
...             outputFile.write("%s|%s|%s|%s|%s|%s|%s|%s\n"% (line[0],line[2],line[4],line[5],line[6],line[7],line[8],line[9]))
...             
>>> outputFile.close()

## Create the matrix, if you dont have a pickle.  Here I write to a pickel, which can be reused, since the doctitle shouldnt change
>>> contents = open(r"W:\Manny\Client\honeywell\DocTitleTally.dat").readlines()
>>> matrix = {}
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     bates = line[0]
...     title = line[2]
...     if title in matrix.keys():
...             matrix[title].append(bates)
...     else:
...             matrix[title] = [bates,]
...             
>>> len(matrix.keys())
239844
## saving to a pickel
import pickle
>>> with open(r"C:\McDermott-Discovery\Client\docTitle_proj.pkl",'wb') as f:
...     pickle.dump(matrix,f,pickle.HIGHEST_PROTOCOL)
... 
##
>>> outputFile = open(r"C:\McDermott-Discovery\Client\docTitle_proj.dat",'w')
>>> contents = open(r"\\nykads01\data\CLI\Honeywell-17th Floor\Manny\20140630_DocTitleTally_MannyCopy_small.dat").readlines()
>>> headder = contents[0]
>>> headder = "Bates|" + headder
>>> outputFile.write(headder)
>>> for line in contents[1:]:
...     line = line.replace("\n","")
...     line = line.split("|")
...     for b in matrix[line[0]]:
...             outputFile.write("%s|%s|%s|%s|%s|%s|%s\n"%(b,line[1],line[2],line[3],line[4],line[5],line[6]))
...             
>>> outputFile.close()


## family groups
>>> contents = open(r"C:\McDermott-Discovery\Client\RL_BegImage_BegAttach.csv").readlines()
>>> contents = contents[1:]
>>> familyLookupMatrix = {}
>>> familyMatrix = {}
>>> for line in contents:
...     line = line.replace('"','')
...     line = line.replace("\n","")
...     bates,begAttach = line.split(",")
...     familyLookupMatrix[bates] = begAttach
...     if begAttach in familyMatrix.keys():
...             familyMatrix[begAttach].append(bates)
...     else:
...             familyMatrix[begAttach] = [bates,]
... 
## Now the meat of the proj
>>> contents = open(r"C:\McDermott-Discovery\Client\docTitle_proj.dat").readlines()
>>> contents = contents[1:]
>>> fbc = {}
>>> rbg1 = {}
>>> rbg2 = {}
>>> rbg3 = {}
>>> rbg4 = {}
>>> rbg5 = {}
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     parent = familyLookupMatrix[line[0]]
...     fullFam = familyMatrix[parent]
...     for x in fullFam:
...             if line[1]:
...                     if x in fbc.keys():
...                             if line[1] in fbc[x]:
...                                     pass
...                             else:
...                                     fbc[x].append(line[1])
...                     else:
...                             fbc[x] = [line[1],]
...             if line[2]:
...                     if x in rbg1.keys():
...                             if line[2] in rbg1[x]:
...                                     pass
...                             else:
...                                     rbg1[x].append(line[2])
...                     else:
...                             rbg1[x] = [line[2],]
...             if line[3]:
...                     if x in rbg2.keys():
...                             if line[3] in rbg2[x]:
...                                     pass
...                             else:
...                                     rbg2[x].append(line[3])
...                     else:
...                             rbg2[x] = [line[3],]
...             if line[4]:
...                     if x in rbg3.keys():
...                             if line[4] in rbg3[x]:
...                                     pass
...                             else:
...                                     rbg3[x].append(line[4])
...                     else:
...                             rbg3[x] = [line[4],]
...             if line[5]:
...                     if x in rbg4.keys():
...                             if line[5] in rbg4[x]:
...                                     pass
...                             else:
...                                     rbg4[x].append(line[5])
...                     else:
...                             rbg4[x] = [line[5],]
...             if line[6]:
...                     if x in rbg5.keys():
...                             if line[6] in rbg5[x]:
...                                     pass
...                             else:
...                                     rbg5[x].append(line[6])
...                     else:
...                             rbg5[x] = [line[6],]
...                             
>>> mainList = []
>>> for i in fbc.keys():
...     if i in mainList:
...             pass
...     else:
...             mainList.append(i)
...             
>>> len(mainList)
80490
>>> for i in rbg1.keys():
...     if i in mainList:
...             pass
...     else:
...             mainList.append(i)
...             
>>> len(mainList)
80490
>>> for i in rbg2.keys():
...     if i in mainList:
...             pass
...     else:
...             mainList.append(i)
...     
>>> for i in rbg3.keys():
...     if i in mainList:
...             pass
...     else:
...             mainList.append(i)
...     
>>> for i in rbg4.keys():
...     if i in mainList:
...             pass
...     else:
...             mainList.append(i)
...     
>>> for i in rbg5.keys():
...     if i in mainList:
...             pass
...     else:
...             mainList.append(i)
>>> mainList.sort()
>>> outputFile = open(r"C:\McDermott-Discovery\Client\output.txt",'w')
>>> for bates in mainList:
...     if bates in fbc.keys():
...             if fbc[bates]:
...                     fbcVal = str(fbc[bates])
...             else:
...                     fbcVal = ""
...     else:
...             fbcVal = ""
...     if bates in rbg1.keys():
...             if rbg1[bates]:
...                     rbg1Val = str(rbg1[bates])
...             else:
...                     rbg1Val = ""
...     else:
...             rbg1Val = ""
...     if bates in rbg2.keys():
...             if rbg2[bates]:
...                     rbg2Val = str(rbg2[bates])
...             else:
...                     rbg2Val = ""
...     else:
...             rbg2Val = ""
...     if bates in rbg3.keys():
...             if rbg3[bates]:
...                     rbg3Val = str(rbg3[bates])
...             else:
...                     rbg3Val = ""
...     else:
...             rbg3Val = ""
...     if bates in rbg4.keys():
...             if rbg4[bates]:
...                     rbg4Val = str(rbg4[bates])
...             else:
...                     rbg4Val = ""
...     else:
...             rbg4Val = ""
...     if bates in rbg5.keys():
...             if rbg5[bates]:
...                     rbg5Val = str(rbg5[bates])
...             else:
...                     rbg5Val = ""
...     else:
...             rbg5Val = ""
...     outputFile.write("%s|%s|%s|%s|%s|%s|%s\n"%(bates,fbcVal,rbg1Val,rbg2Val,rbg3Val,rbg4Val,rbg5Val))
...     
>>> outputFile.close()

## Cleanup the output and make the final overlay
>>> contents = open(r"C:\McDermott-Discovery\Client\output.txt").readlines()
>>> outputFile = open(r"C:\McDermott-Discovery\Client\overlay.dat",'w')         
>>> for line in contents:
...     line = line.split("|")
...     for i in line:
...             i = i.replace("'","")
...             i = i.replace("[","")
...             i = i.replace("]","")
...             i = i.replace(",",";")
...             if "\n" in i:
...                     outputFile.write(i)
...             else:
...                     outputFile.write(i + "|")
...                     
>>> outputFile.close()


=== Here I was asked to copy frm long text date to real date field but there were errors because some of the dates were missing info ie. 00/00/1991. I fixed the entries based on their criteria.===
>>> contents = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Document_Delivery\Vol003\GroupA\export_20140929_192612.csv").readlines()
>>> contents = contents[1:]
>>> outputFile = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Document_Delivery\Vol003\GroupA\FIXED.dat",'w')
>>> outputFile.write("BegDoc|DocDate_New\n")
>>> for line in contents:
...     line = line.replace("\n","")
...     bates,date,nul = line.split("|")
...     if date:
...             mm,dd,yy = date.split("/")
...             if mm == '00':
...                     mm = '01'
...             if dd == '00':
...                     dd = '01'
...             if yy == '0000':
...                     yy = '1800'
...             date = '%s/%s/%s'%(mm,dd,yy)
...             outputFile.write("%s|%s\n"%(bates,date))
>>> outputFile.close()
=== END ===
=== Here I was asked to make a tally report frm the prefixes in HOBX ===
>>> contents = open(r"W:\Manny\Client\honeywell\PrefixTallyProj\20140717 search for manny_export\20140717 search for manny_export.csv").readlines()
>>> contents = contents[1:]
>>> import FixBatesRange_func
>>> outputFile = open(r"W:\Manny\Client\honeywell\PrefixTallyProj\20140717 search for manny_export\output.txt",'w')
>>> errorFile = open(r"W:\Manny\Client\honeywell\PrefixTallyProj\20140717 search for manny_export\errFile.txt",'w')
>>> matrix = {}
>>> cnt = NinoGenTools.Counter()
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.replace('"',"")
...     try:
...             alph = FixBatesRange_func.SeperateAlpha3(line)[0]
...     except:
...             alph = "00"
...             errorFile.write("%s\n"%line)
...     if alph[:2] == "00":
...             alph = ""
...     elif alph[-1] == "0":
...             while alph[-1] == "0":
...                     alph = alph[:-1]
...     if alph in matrix.keys():
...             matrix[alph] = matrix[alph] + 1
...     else:
...             matrix[alph] = 1
...     cnt.inc()
...     
>>> outputFile.close()
>>> errorFile.close()
=== End ===

==== Here I was asked to find documents with family problems.  Note that the values have to be enumerated first.
import FixBatesRange_func
>>> content = open(r"W:\Manny\Client\Perligo\Family_probs_export.csv").readlines()
>>> batesValues = []
>>> attachValues = {}
>>> for line in content:
...     line = line.replace("\n","")
...     batesEnum = line.split("|")[6]
...     batesEnum = batesEnum.split(';')
...     if batesEnum >1:
...             for i in batesEnum:
...                     batesValues.append(i)
...     else:
...             batesValues.append(batesEnum)
... 
>>> content = content[1:]
>>> for line in content:
...     line = line.replace("\n","")
...     batesBeg = line.split("|")[4]
...     batesEnd = line.split("|")[5]
...     batesEnum = FixBatesRange_func.EnumerateBates(batesBeg,batesEnd)
...     clump = "%s,%s"%(batesBeg,batesEnd)
...     attachValues[clump] = batesEnum
... 
>>> outputFile = open(r"W:\Manny\Client\Perligo\export_output.txt",'w')
>>> for clump in attachValues.keys():
...     errList = []
...     for i in attachValues[clump]:
...             if i in batesValues:
...                     pass
...             else:
...                     errList.append(i)
...     if errList:
...             outputFile.write("%s family is an issue because it's missing pages: "%clump)
...             for x in errList:
...                     outputFile.write("%s;"%x)
...             outputFile.write("\n")
...             
>>> outputFile.close()
>>> begAttachList = []
>>> contents = open(r"W:\Manny\Client\Perligo\export_output.txt").readlines()
>>> for i in contents:
...     i = i.split(",")
...     begAttachList.append(i[0])
...     
>>> contents = open(r"W:\Manny\Client\Perligo\Family_probs_export.csv").readlines()
>>> outputFile = open(r"W:\Manny\Client\Perligo\overlay.dat",'w')
>>> for line in contents:
...     line = line.split("|")
...     if line[4] in begAttachList:
...             outputFile.write("%s|x\n"%line[0])
...             
>>> outputFile.close()
>>> 

==== END =======


==== Here I was asked to both test the kpmg enumerated values field and to also verify that
all of the bendix ranges in rl were in my LFP.
-------------------------------
>>> contents = open(r"\\bstads01\app\Manny\Client\honeywell\LFP Export\LFP Export\Non_microfiche\373540873_REVIEW_JUSTBENDIX.lfp").readlines()
>>> MasterList = []
>>> for line in contents:
...     bates = line.split(",")[1]
...     MasterList.append(bates)
...     
>>> len(MasterList)
357525

-------------------------------
>>> outputFile2 = open(r"C:\Client\honeywell\BENDIX-Beg_End_Enum_enumErr.csv",'w')
>>> outputFile = open(r"C:\Client\honeywell\BENDIX-Beg_End_Enum_output.csv",'w')
>>> for line in contents:
...     line = line.replace("\n","")
...     begNo,endNo,enumValues = line.split(",")
...     begTest = begNo.split(" ")[-1]
...     endTest = endNo.split(" ")[-1]
...     rangeTest = FixBatesRange_func.EnumerateBates(begTest,endTest)
...     enumCount = len(rangeTest)
...     if ";" in enumValues:
...             kpmgEnumCount = len(enumValues.split(";"))
...     else:
...             kpmgEnumCount = 1
...     if enumCount == kpmgEnumCount:
...             pass
...     else:
...             outputFile2.write(line+"\n")
...     if begNo in MasterList:
...             pass
...     elif begNo.replace(" ","  ") in MasterList:
...             pass
...     elif begNo.replace(" ","   ") in MasterList:
...             pass
...     elif begNo.replace(" ","    ") in MasterList:
...             pass
...     elif begNo.replace(" ","") in MasterList:
...             pass
...     elif begNo.replace("  ","") in MasterList:
...             pass
...     else:
...             outputFile.write(line+"\n")
...             
>>> outputFile.close()
>>> outputFile2.close()

==== END ====
Revision:	580
Committed:	Thu Mar 26 13:50:30 2015 UTC (11 years ago) by nino.borges
Content type:	text/plain
File size:	25287 byte(s)
Log Message:	update before leaving