| 1 |
< |
=== Here I was asked to copy frm long text date to real date field but there were errors because some of the dates were missing info ie. 00/00/1991. I fixed the entries based on their criteria.=== |
| 2 |
< |
contents = open(r"W:\Manny\Client\honeywell\20140718_ErrorLines_635412948936590376.dat").readlines() |
| 3 |
< |
>>> outputFile = open(r"W:\Manny\Client\honeywell\20140718_ErrorLines_FIXED.dat",'w') |
| 1 |
> |
=== Here I was asked to make a report of the relates to but using priority tags === |
| 2 |
> |
== First I take the priority scale that they gave me into a dict == |
| 3 |
> |
contents = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo_Priority\Group A\GroupA_pri.txt").readlines() |
| 4 |
> |
>>> priMatrix = {} |
| 5 |
> |
>>> for line in contents: |
| 6 |
> |
... line = line.replace("\n","") |
| 7 |
> |
... line = line.split("*") |
| 8 |
> |
... pri = line[0] |
| 9 |
> |
... tag = line[2] |
| 10 |
> |
... tag = tag.replace(" ","") |
| 11 |
> |
... tag = tag.upper() |
| 12 |
> |
... if pri in priMatrix.keys(): |
| 13 |
> |
... priMatrix[pri].append(tag) |
| 14 |
> |
... else: |
| 15 |
> |
... priMatrix[pri] = [tag,] |
| 16 |
> |
... |
| 17 |
> |
== Then I parse the export from the db == |
| 18 |
> |
>>> contents = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo_Priority\Group A\export_20140926_163727.dat").readlines() |
| 19 |
> |
>>> head = contents[0] |
| 20 |
> |
>>> head = head.replace("\n","") |
| 21 |
> |
>>> contents = contents[1:] |
| 22 |
> |
>>> head = head.split("|") |
| 23 |
> |
>>> head = head[1:] |
| 24 |
> |
>>> dbMatrix = {} |
| 25 |
> |
>>> for line in contents: |
| 26 |
> |
... line = line.replace("\n","") |
| 27 |
> |
... line = line.split("|") |
| 28 |
> |
... entries = line[1:] |
| 29 |
> |
... count = 0 |
| 30 |
> |
... finalList = [] |
| 31 |
> |
... for i in entries: |
| 32 |
> |
... if i: |
| 33 |
> |
... if ";" in i: |
| 34 |
> |
... i = i.split(";") |
| 35 |
> |
... else: |
| 36 |
> |
... i = [i,] |
| 37 |
> |
... for x in i: |
| 38 |
> |
... x = head[count] + "|" + x |
| 39 |
> |
... x = x.replace(" ","") |
| 40 |
> |
... x = x.upper() |
| 41 |
> |
... finalList.append(x) |
| 42 |
> |
... count = count +1 |
| 43 |
> |
... dbMatrix[line[0]] = list(finalList) |
| 44 |
> |
... |
| 45 |
> |
== Now unpack == |
| 46 |
> |
>>> tallyMatrix = {} |
| 47 |
> |
>>> for i in priMatrix.keys(): |
| 48 |
> |
... tallyMatrix[i] = [] |
| 49 |
> |
|
| 50 |
> |
for bates in dbMatrix.keys(): |
| 51 |
> |
... for entry in dbMatrix[bates]: |
| 52 |
> |
... if entry in priMatrix['A1']: |
| 53 |
> |
... if entry in tallyMatrix['A1']: |
| 54 |
> |
... pass |
| 55 |
> |
... else: |
| 56 |
> |
... tallyMatrix['A1'].append(bates) |
| 57 |
> |
... elif entry in priMatrix['A2']: |
| 58 |
> |
... if entry in tallyMatrix['A2']: |
| 59 |
> |
... pass |
| 60 |
> |
... else: |
| 61 |
> |
... tallyMatrix['A2'].append(bates) |
| 62 |
> |
... elif entry in priMatrix['A3']: |
| 63 |
> |
... if entry in tallyMatrix['A3']: |
| 64 |
> |
... pass |
| 65 |
> |
... else: |
| 66 |
> |
... tallyMatrix['A3'].append(bates) |
| 67 |
> |
... elif entry in priMatrix['A4']: |
| 68 |
> |
... if entry in tallyMatrix['A4']: |
| 69 |
> |
... pass |
| 70 |
> |
... else: |
| 71 |
> |
... tallyMatrix['A4'].append(bates) |
| 72 |
> |
... elif entry in priMatrix['A5']: |
| 73 |
> |
... if entry in tallyMatrix['A5']: |
| 74 |
> |
... pass |
| 75 |
> |
... else: |
| 76 |
> |
... tallyMatrix['A5'].append(bates) |
| 77 |
> |
... elif entry in priMatrix['A6']: |
| 78 |
> |
... if entry in tallyMatrix['A6']: |
| 79 |
> |
... pass |
| 80 |
> |
... else: |
| 81 |
> |
... tallyMatrix['A6'].append(bates) |
| 82 |
> |
... elif entry in priMatrix['A7']: |
| 83 |
> |
... if entry in tallyMatrix['A7']: |
| 84 |
> |
... pass |
| 85 |
> |
... else: |
| 86 |
> |
... tallyMatrix['A7'].append(bates) |
| 87 |
> |
... elif entry in priMatrix['A8']: |
| 88 |
> |
... if entry in tallyMatrix['A8']: |
| 89 |
> |
... pass |
| 90 |
> |
... else: |
| 91 |
> |
... tallyMatrix['A8'].append(bates) |
| 92 |
> |
... elif entry in priMatrix['A9']: |
| 93 |
> |
... if entry in tallyMatrix['A9']: |
| 94 |
> |
... pass |
| 95 |
> |
... else: |
| 96 |
> |
... tallyMatrix['A9'].append(bates) |
| 97 |
> |
|
| 98 |
> |
## But remember there will be dups. 1 document will exist in multiple A's |
| 99 |
> |
|
| 100 |
> |
>>> len(tallyMatrix['A1']) |
| 101 |
> |
103 |
| 102 |
> |
>>> len(tallyMatrix['A2']) |
| 103 |
> |
51 |
| 104 |
> |
>>> len(tallyMatrix['A3']) |
| 105 |
> |
126 |
| 106 |
> |
>>> len(tallyMatrix['A4']) |
| 107 |
> |
230 |
| 108 |
> |
>>> len(tallyMatrix['A5']) |
| 109 |
> |
6020 |
| 110 |
> |
>>> len(tallyMatrix['A6']) |
| 111 |
> |
9458 |
| 112 |
> |
>>> len(tallyMatrix['A7']) |
| 113 |
> |
3555 |
| 114 |
> |
>>> |
| 115 |
> |
|
| 116 |
> |
== So now we write to file but removing dups == |
| 117 |
> |
>>> outputFile = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo_Priority\Group A\overlay.dat",'w') |
| 118 |
> |
>>> outputFile.write("Bates|Level 1 Review Coding Consolidated\n") |
| 119 |
> |
>>> a1List = [] |
| 120 |
> |
>>> a2List = [] |
| 121 |
> |
>>> a3List = [] |
| 122 |
> |
>>> a4List = [] |
| 123 |
> |
>>> a5List = [] |
| 124 |
> |
>>> a6List = [] |
| 125 |
> |
>>> a7List = [] |
| 126 |
> |
>>> a8List = [] |
| 127 |
> |
>>> a9List = [] |
| 128 |
> |
>>> totList = [] |
| 129 |
> |
>>> sets = priMatrix.keys() |
| 130 |
> |
>>> sets.sort() |
| 131 |
> |
>>> aLists = [a1List,a2List,a3List,a4List,a5List,a6List,a7List,a8List,a9List] |
| 132 |
> |
>>> count = 0 |
| 133 |
> |
## This only works because I go in order. |
| 134 |
> |
>>> for a in sets: |
| 135 |
> |
... for bates in tallyMatrix[a]: |
| 136 |
> |
... if bates in totList: |
| 137 |
> |
... pass |
| 138 |
> |
... else: |
| 139 |
> |
... outputFile.write("%s|%s\n"%(bates,a)) |
| 140 |
> |
... totList.append(bates) |
| 141 |
> |
... aLists[count].append(bates) |
| 142 |
> |
... count = count +1 |
| 143 |
> |
... |
| 144 |
> |
>>> outputFile.close() |
| 145 |
> |
>>> len(a1List) |
| 146 |
> |
103 |
| 147 |
> |
>>> len(a2List) |
| 148 |
> |
51 |
| 149 |
> |
>>> len(a3List) |
| 150 |
> |
126 |
| 151 |
> |
>>> len(a4List) |
| 152 |
> |
230 |
| 153 |
> |
>>> len(a5List) |
| 154 |
> |
6020 |
| 155 |
> |
>>> len(a6List) |
| 156 |
> |
8863 |
| 157 |
> |
>>> len(a7List) |
| 158 |
> |
3115 |
| 159 |
> |
>>> len(totList) |
| 160 |
> |
18508 |
| 161 |
> |
>>> |
| 162 |
> |
|
| 163 |
> |
|
| 164 |
> |
=== END === |
| 165 |
> |
=== Here I was asked to get a report of documents that contain X tags === |
| 166 |
> |
>>> contents = open(r"\\BSTDD967DTW1\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo\export_2.dat").readlines() |
| 167 |
> |
>>> contents = contents[1:] |
| 168 |
> |
>>> contents[0] |
| 169 |
> |
'HWHC000000001||||Livonia, MI|Invoice/Purchase order|||||Document retention (policies/compliance)|\n' |
| 170 |
> |
>>> matrix = {} |
| 171 |
> |
>>> for line in contents: |
| 172 |
> |
... line = line.replace("\n","") |
| 173 |
> |
... line = line.split("|") |
| 174 |
> |
... bates = line[0] |
| 175 |
> |
... parts = line[1:] |
| 176 |
> |
... countList = [] |
| 177 |
> |
... for i in parts: |
| 178 |
> |
... if i: |
| 179 |
> |
... i = i.split(";") |
| 180 |
> |
... for x in i: |
| 181 |
> |
... countList.append(x) |
| 182 |
> |
... if len(countList) in matrix.keys(): |
| 183 |
> |
... matrix[len(countList)].append(bates) |
| 184 |
> |
... else: |
| 185 |
> |
... matrix[len(countList)] = [bates,] |
| 186 |
> |
... |
| 187 |
> |
>>> outputFile = open(r"\\BSTDD967DTW1\Users\eborges\Box Sync\Client\Honeywell\Perligo\Project_RelatesTo\relatesToReport.txt",'w') |
| 188 |
> |
>>> for i in matrix.keys(): |
| 189 |
> |
... outputFile.write("%s documents have %s tags.\n"% (len(matrix[i]),i)) |
| 190 |
> |
... |
| 191 |
> |
>>> outputFile.close() |
| 192 |
> |
=== END === |
| 193 |
> |
|
| 194 |
> |
=== Here I was asked to do a 5 before and 5 after field and at first only for docs without famiies === |
| 195 |
> |
== first I removed documents with families from the entire uni == |
| 196 |
> |
>>> contents = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\export.csv").readlines() |
| 197 |
> |
>>> outputFile = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\StandAloneOnly.dat",'w') |
| 198 |
> |
>>> for line in contents: |
| 199 |
> |
... newline = line.replace("\n","") |
| 200 |
> |
... newline = newline.split(",") |
| 201 |
> |
... err = False |
| 202 |
> |
... if newline[0] == newline [2]: |
| 203 |
> |
... err = True |
| 204 |
> |
... if newline[1] == newline[3]: |
| 205 |
> |
... err = True |
| 206 |
> |
... if newline[2] == "": |
| 207 |
> |
... err = True |
| 208 |
> |
... if err == True: |
| 209 |
> |
... outputFile.write(line) |
| 210 |
> |
|
| 211 |
> |
== then to make a packed matrix == |
| 212 |
> |
=first back buffer= |
| 213 |
> |
>>> contents = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\StandAloneOnly.dat").readlines() |
| 214 |
> |
>>> outputFile = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\ShiftedOverlay.dat",'w') |
| 215 |
> |
>>> befBuffer = [] |
| 216 |
> |
>>> befMatrix = {} |
| 217 |
> |
>>> for line in contents: |
| 218 |
> |
... line = line.replace("\n","") |
| 219 |
> |
... begNo = line.split(",")[0] |
| 220 |
> |
... befBuffer.append(begNo) |
| 221 |
> |
... if len(befBuffer) > 6: |
| 222 |
> |
... nul = befBuffer.pop(0) |
| 223 |
> |
... befMatrix[begNo] = list(befBuffer) |
| 224 |
> |
|
| 225 |
> |
= then reversing for the after buffer= |
| 226 |
> |
>>> contents.reverse() |
| 227 |
> |
>>> aftMatrix = {} |
| 228 |
> |
>>> aftBuffer = [] |
| 229 |
> |
>>> for line in contents: |
| 230 |
> |
... line = line.replace("\n","") |
| 231 |
> |
... begNo = line.split(",")[0] |
| 232 |
> |
... aftBuffer.append(begNo) |
| 233 |
> |
... if len(aftBuffer) > 6: |
| 234 |
> |
... nul = aftBuffer.pop(0) |
| 235 |
> |
... aftMatrix[begNo] = list(aftBuffer) |
| 236 |
> |
|
| 237 |
> |
== now to unpack and make the final load file == |
| 238 |
> |
>>> outputFile = open(r"w:\manny\client\honeywell\perligo\5Before_5After_Proj\ShiftedOverlay.dat",'w') |
| 239 |
> |
>>> for line in contents: |
| 240 |
> |
... begno = line.split(",")[0] |
| 241 |
> |
... outputFile.write("%s|"%begno) |
| 242 |
> |
... for i in befMatrix[begno]: |
| 243 |
> |
... outputFile.write("%s;"%i) |
| 244 |
> |
... count = 1 |
| 245 |
> |
... aftList = aftMatrix[begno] |
| 246 |
> |
... aftList.remove(begno) |
| 247 |
> |
... aftList.sort() |
| 248 |
> |
... for i in aftList: |
| 249 |
> |
... outputFile.write("%s"%i) |
| 250 |
> |
... if count < len(aftList): |
| 251 |
> |
... outputFile.write(";") |
| 252 |
> |
... count = count +1 |
| 253 |
> |
... outputFile.write("\n") |
| 254 |
> |
... |
| 255 |
> |
>>> outputFile.close() |
| 256 |
> |
=== END === |
| 257 |
> |
=== Here I was asked, in prok 199, to compare the page counts of prod vs review and report === |
| 258 |
> |
for f in os.listdir(startDir): |
| 259 |
> |
... outputFile = open(os.path.join(outputDir,os.path.splitext(f)[0]+".txt"),'w') |
| 260 |
> |
... outputErrFile = open(os.path.join(outputDir,os.path.splitext(f)[0]+".err"),'w') |
| 261 |
> |
... contents = open(os.path.join(startDir,f)).readlines() |
| 262 |
> |
... headder = contents[0] |
| 263 |
> |
... outputFile.write(headder) |
| 264 |
> |
... outputErrFile.write(headder) |
| 265 |
> |
... contents = contents[1:] |
| 266 |
> |
... for line in contents: |
| 267 |
> |
... line = line.replace("\n","") |
| 268 |
> |
... prodBeg,prodEnd,beg,end,concordBeg = line.split(",") |
| 269 |
> |
... if prodBeg: |
| 270 |
> |
... pass |
| 271 |
> |
... else: |
| 272 |
> |
... prodBeg = beg |
| 273 |
> |
... err = False |
| 274 |
> |
... if ";" in prodBeg: |
| 275 |
> |
... err = True |
| 276 |
> |
... if ";" in prodEnd: |
| 277 |
> |
... testList = prodEnd.split(";") |
| 278 |
> |
... if len(testList) == 2: |
| 279 |
> |
... test1,test2 = prodEnd.split(";") |
| 280 |
> |
... test1 = test1.strip() |
| 281 |
> |
... test2 = test2.strip() |
| 282 |
> |
... if test1: |
| 283 |
> |
... if test2: |
| 284 |
> |
... err = True |
| 285 |
> |
... else: |
| 286 |
> |
... prodEnd = test1 |
| 287 |
> |
... elif test2: |
| 288 |
> |
... prodEnd = test2 |
| 289 |
> |
... else: |
| 290 |
> |
... err = True |
| 291 |
> |
... else: |
| 292 |
> |
... err = True |
| 293 |
> |
... if ";" in concordBeg: |
| 294 |
> |
... err = True |
| 295 |
> |
... if ";" in end: |
| 296 |
> |
... err = True |
| 297 |
> |
... if prodBeg[-5:] == "LOKEY": |
| 298 |
> |
... err = True |
| 299 |
> |
... if concordBeg == "": |
| 300 |
> |
... err = True |
| 301 |
> |
... if err == False: |
| 302 |
> |
... try: |
| 303 |
> |
... prodList = FixBatesRange_func.EnumerateBates(prodBeg,prodEnd) |
| 304 |
> |
... revList = FixBatesRange_func.EnumerateBates(concordBeg,end) |
| 305 |
> |
... if len(prodList) == len(revList): |
| 306 |
> |
... outputFile.write(line+"\n") |
| 307 |
> |
... else: |
| 308 |
> |
... outputErrFile.write(line+"\n") |
| 309 |
> |
... except: |
| 310 |
> |
... outputErrFile.write(line+"\n") |
| 311 |
> |
... else: |
| 312 |
> |
... outputErrFile.write(line+"\n") |
| 313 |
> |
... outputFile.close() |
| 314 |
> |
... outputErrFile.close() |
| 315 |
> |
|
| 316 |
> |
== Then make a dat file for KPMG == |
| 317 |
> |
for f in os.listdir(startDir): |
| 318 |
> |
... outputFile = open(os.path.join(outputDir,os.path.splitext(f)[0]+".DAT"),'w') |
| 319 |
> |
... outputFile.write("BegImage|%s BatesBeg|%s BatesEnd|Prod Production History Multi Choice\n"%(os.path.splitext(f)[0],os.path.splitext(f)[0])) |
| 320 |
> |
... contents = open(os.path.join(startDir,f)).readlines() |
| 321 |
> |
... contents = contents[1:] |
| 322 |
> |
... for line in contents: |
| 323 |
> |
... line = line.replace("\n","") |
| 324 |
> |
... prodBeg,prodEnd,beg,end,concordBeg = line.split(",") |
| 325 |
> |
... if prodBeg: |
| 326 |
> |
... pass |
| 327 |
> |
... else: |
| 328 |
> |
... prodBeg = beg |
| 329 |
> |
... err = False |
| 330 |
> |
... if ";" in prodBeg: |
| 331 |
> |
... err = True |
| 332 |
> |
... if ";" in prodEnd: |
| 333 |
> |
... testList = prodEnd.split(";") |
| 334 |
> |
... if len(testList) == 2: |
| 335 |
> |
... test1,test2 = prodEnd.split(";") |
| 336 |
> |
... test1 = test1.strip() |
| 337 |
> |
... test2 = test2.strip() |
| 338 |
> |
... if test1: |
| 339 |
> |
... if test2: |
| 340 |
> |
... err = True |
| 341 |
> |
... else: |
| 342 |
> |
... prodEnd = test1 |
| 343 |
> |
... elif test2: |
| 344 |
> |
... prodEnd = test2 |
| 345 |
> |
... else: |
| 346 |
> |
... err = True |
| 347 |
> |
... else: |
| 348 |
> |
... err = True |
| 349 |
> |
... if ";" in concordBeg: |
| 350 |
> |
... err = True |
| 351 |
> |
... if ";" in end: |
| 352 |
> |
... err = True |
| 353 |
> |
... if prodBeg[-5:] == "LOKEY": |
| 354 |
> |
... err = True |
| 355 |
> |
... if concordBeg == "": |
| 356 |
> |
... err = True |
| 357 |
> |
... if err == False: |
| 358 |
> |
... outputFile.write("%s|%s|%s|%s\n"%(concordBeg,prodBeg,prodEnd,os.path.splitext(f)[0])) |
| 359 |
> |
... outputFile.close() |
| 360 |
> |
=== End === |
| 361 |
> |
|
| 362 |
> |
=== Here I was asked to group and code documents by doc title === |
| 363 |
> |
## first take her huge xls file and make a smaller pipe delim file with only fields that have stuff in the key fields |
| 364 |
> |
>>> contents = open(r"W:\Manny\Client\honeywell\DocTitleBatchCode_Proj\Document Title Tally.csv").readlines() |
| 365 |
> |
>>> outputFile = open(r"W:\Manny\Client\honeywell\DocTitleBatchCode_Proj\Document Title Tally_small.csv",'w') |
| 366 |
> |
>>> contents = contents[1:] |
| 367 |
> |
>>> for line in contents: |
| 368 |
> |
... wr = False |
| 369 |
> |
... line = line.split("|") |
| 370 |
> |
... if line[2]: |
| 371 |
> |
... wr = True |
| 372 |
> |
... if line[4]: |
| 373 |
> |
... wr = True |
| 374 |
> |
... if line[5]: |
| 375 |
> |
... wr = True |
| 376 |
> |
... if line[6]: |
| 377 |
> |
... wr = True |
| 378 |
> |
... if line[7]: |
| 379 |
> |
... wr = True |
| 380 |
> |
... if line[8]: |
| 381 |
> |
... wr = True |
| 382 |
> |
... if line[9]: |
| 383 |
> |
... wr = True |
| 384 |
> |
... if wr: |
| 385 |
> |
... outputFile.write("%s|%s|%s|%s|%s|%s|%s|%s\n"% (line[0],line[2],line[4],line[5],line[6],line[7],line[8],line[9])) |
| 386 |
> |
... |
| 387 |
> |
>>> outputFile.close() |
| 388 |
> |
|
| 389 |
> |
## Create the matrix, if you dont have a pickle. Here I write to a pickel, which can be reused, since the doctitle shouldnt change |
| 390 |
> |
>>> contents = open(r"W:\Manny\Client\honeywell\DocTitleTally.dat").readlines() |
| 391 |
> |
>>> matrix = {} |
| 392 |
|
>>> for line in contents: |
| 393 |
|
... line = line.replace("\n","") |
| 394 |
< |
... bates,date = line.split("|") |
| 395 |
< |
... mm,dd,yy = date.split("/") |
| 396 |
< |
... if mm == '00': |
| 397 |
< |
... mm = '01' |
| 398 |
< |
... if dd == '00': |
| 399 |
< |
... dd = '01' |
| 400 |
< |
... if yy == '0000': |
| 401 |
< |
... yy = '1800' |
| 402 |
< |
... date = '%s/%s/%s'%(mm,dd,yy) |
| 403 |
< |
... outputFile.write("%s|%s\n"%(bates,date)) |
| 394 |
> |
... line = line.split("|") |
| 395 |
> |
... bates = line[0] |
| 396 |
> |
... title = line[2] |
| 397 |
> |
... if title in matrix.keys(): |
| 398 |
> |
... matrix[title].append(bates) |
| 399 |
> |
... else: |
| 400 |
> |
... matrix[title] = [bates,] |
| 401 |
> |
... |
| 402 |
> |
>>> len(matrix.keys()) |
| 403 |
> |
239844 |
| 404 |
> |
## saving to a pickel |
| 405 |
> |
import pickle |
| 406 |
> |
>>> with open(r"C:\McDermott-Discovery\Client\docTitle_proj.pkl",'wb') as f: |
| 407 |
> |
... pickle.dump(matrix,f,pickle.HIGHEST_PROTOCOL) |
| 408 |
|
... |
| 409 |
+ |
## |
| 410 |
+ |
>>> outputFile = open(r"C:\McDermott-Discovery\Client\docTitle_proj.dat",'w') |
| 411 |
+ |
>>> contents = open(r"\\nykads01\data\CLI\Honeywell-17th Floor\Manny\20140630_DocTitleTally_MannyCopy_small.dat").readlines() |
| 412 |
+ |
>>> headder = contents[0] |
| 413 |
+ |
>>> headder = "Bates|" + headder |
| 414 |
+ |
>>> outputFile.write(headder) |
| 415 |
+ |
>>> for line in contents[1:]: |
| 416 |
+ |
... line = line.replace("\n","") |
| 417 |
+ |
... line = line.split("|") |
| 418 |
+ |
... for b in matrix[line[0]]: |
| 419 |
+ |
... outputFile.write("%s|%s|%s|%s|%s|%s|%s\n"%(b,line[1],line[2],line[3],line[4],line[5],line[6])) |
| 420 |
+ |
... |
| 421 |
+ |
>>> outputFile.close() |
| 422 |
+ |
|
| 423 |
+ |
|
| 424 |
+ |
## family groups |
| 425 |
+ |
>>> contents = open(r"C:\McDermott-Discovery\Client\RL_BegImage_BegAttach.csv").readlines() |
| 426 |
+ |
>>> contents = contents[1:] |
| 427 |
+ |
>>> familyLookupMatrix = {} |
| 428 |
+ |
>>> familyMatrix = {} |
| 429 |
+ |
>>> for line in contents: |
| 430 |
+ |
... line = line.replace('"','') |
| 431 |
+ |
... line = line.replace("\n","") |
| 432 |
+ |
... bates,begAttach = line.split(",") |
| 433 |
+ |
... familyLookupMatrix[bates] = begAttach |
| 434 |
+ |
... if begAttach in familyMatrix.keys(): |
| 435 |
+ |
... familyMatrix[begAttach].append(bates) |
| 436 |
+ |
... else: |
| 437 |
+ |
... familyMatrix[begAttach] = [bates,] |
| 438 |
+ |
... |
| 439 |
+ |
## Now the meat of the proj |
| 440 |
+ |
>>> contents = open(r"C:\McDermott-Discovery\Client\docTitle_proj.dat").readlines() |
| 441 |
+ |
>>> contents = contents[1:] |
| 442 |
+ |
>>> fbc = {} |
| 443 |
+ |
>>> rbg1 = {} |
| 444 |
+ |
>>> rbg2 = {} |
| 445 |
+ |
>>> rbg3 = {} |
| 446 |
+ |
>>> rbg4 = {} |
| 447 |
+ |
>>> rbg5 = {} |
| 448 |
+ |
>>> for line in contents: |
| 449 |
+ |
... line = line.replace("\n","") |
| 450 |
+ |
... line = line.split("|") |
| 451 |
+ |
... parent = familyLookupMatrix[line[0]] |
| 452 |
+ |
... fullFam = familyMatrix[parent] |
| 453 |
+ |
... for x in fullFam: |
| 454 |
+ |
... if line[1]: |
| 455 |
+ |
... if x in fbc.keys(): |
| 456 |
+ |
... if line[1] in fbc[x]: |
| 457 |
+ |
... pass |
| 458 |
+ |
... else: |
| 459 |
+ |
... fbc[x].append(line[1]) |
| 460 |
+ |
... else: |
| 461 |
+ |
... fbc[x] = [line[1],] |
| 462 |
+ |
... if line[2]: |
| 463 |
+ |
... if x in rbg1.keys(): |
| 464 |
+ |
... if line[2] in rbg1[x]: |
| 465 |
+ |
... pass |
| 466 |
+ |
... else: |
| 467 |
+ |
... rbg1[x].append(line[2]) |
| 468 |
+ |
... else: |
| 469 |
+ |
... rbg1[x] = [line[2],] |
| 470 |
+ |
... if line[3]: |
| 471 |
+ |
... if x in rbg2.keys(): |
| 472 |
+ |
... if line[3] in rbg2[x]: |
| 473 |
+ |
... pass |
| 474 |
+ |
... else: |
| 475 |
+ |
... rbg2[x].append(line[3]) |
| 476 |
+ |
... else: |
| 477 |
+ |
... rbg2[x] = [line[3],] |
| 478 |
+ |
... if line[4]: |
| 479 |
+ |
... if x in rbg3.keys(): |
| 480 |
+ |
... if line[4] in rbg3[x]: |
| 481 |
+ |
... pass |
| 482 |
+ |
... else: |
| 483 |
+ |
... rbg3[x].append(line[4]) |
| 484 |
+ |
... else: |
| 485 |
+ |
... rbg3[x] = [line[4],] |
| 486 |
+ |
... if line[5]: |
| 487 |
+ |
... if x in rbg4.keys(): |
| 488 |
+ |
... if line[5] in rbg4[x]: |
| 489 |
+ |
... pass |
| 490 |
+ |
... else: |
| 491 |
+ |
... rbg4[x].append(line[5]) |
| 492 |
+ |
... else: |
| 493 |
+ |
... rbg4[x] = [line[5],] |
| 494 |
+ |
... if line[6]: |
| 495 |
+ |
... if x in rbg5.keys(): |
| 496 |
+ |
... if line[6] in rbg5[x]: |
| 497 |
+ |
... pass |
| 498 |
+ |
... else: |
| 499 |
+ |
... rbg5[x].append(line[6]) |
| 500 |
+ |
... else: |
| 501 |
+ |
... rbg5[x] = [line[6],] |
| 502 |
+ |
... |
| 503 |
+ |
>>> mainList = [] |
| 504 |
+ |
>>> for i in fbc.keys(): |
| 505 |
+ |
... if i in mainList: |
| 506 |
+ |
... pass |
| 507 |
+ |
... else: |
| 508 |
+ |
... mainList.append(i) |
| 509 |
+ |
... |
| 510 |
+ |
>>> len(mainList) |
| 511 |
+ |
80490 |
| 512 |
+ |
>>> for i in rbg1.keys(): |
| 513 |
+ |
... if i in mainList: |
| 514 |
+ |
... pass |
| 515 |
+ |
... else: |
| 516 |
+ |
... mainList.append(i) |
| 517 |
+ |
... |
| 518 |
+ |
>>> len(mainList) |
| 519 |
+ |
80490 |
| 520 |
+ |
>>> for i in rbg2.keys(): |
| 521 |
+ |
... if i in mainList: |
| 522 |
+ |
... pass |
| 523 |
+ |
... else: |
| 524 |
+ |
... mainList.append(i) |
| 525 |
+ |
... |
| 526 |
+ |
>>> for i in rbg3.keys(): |
| 527 |
+ |
... if i in mainList: |
| 528 |
+ |
... pass |
| 529 |
+ |
... else: |
| 530 |
+ |
... mainList.append(i) |
| 531 |
+ |
... |
| 532 |
+ |
>>> for i in rbg4.keys(): |
| 533 |
+ |
... if i in mainList: |
| 534 |
+ |
... pass |
| 535 |
+ |
... else: |
| 536 |
+ |
... mainList.append(i) |
| 537 |
+ |
... |
| 538 |
+ |
>>> for i in rbg5.keys(): |
| 539 |
+ |
... if i in mainList: |
| 540 |
+ |
... pass |
| 541 |
+ |
... else: |
| 542 |
+ |
... mainList.append(i) |
| 543 |
+ |
>>> mainList.sort() |
| 544 |
+ |
>>> outputFile = open(r"C:\McDermott-Discovery\Client\output.txt",'w') |
| 545 |
+ |
>>> for bates in mainList: |
| 546 |
+ |
... if bates in fbc.keys(): |
| 547 |
+ |
... if fbc[bates]: |
| 548 |
+ |
... fbcVal = str(fbc[bates]) |
| 549 |
+ |
... else: |
| 550 |
+ |
... fbcVal = "" |
| 551 |
+ |
... else: |
| 552 |
+ |
... fbcVal = "" |
| 553 |
+ |
... if bates in rbg1.keys(): |
| 554 |
+ |
... if rbg1[bates]: |
| 555 |
+ |
... rbg1Val = str(rbg1[bates]) |
| 556 |
+ |
... else: |
| 557 |
+ |
... rbg1Val = "" |
| 558 |
+ |
... else: |
| 559 |
+ |
... rbg1Val = "" |
| 560 |
+ |
... if bates in rbg2.keys(): |
| 561 |
+ |
... if rbg2[bates]: |
| 562 |
+ |
... rbg2Val = str(rbg2[bates]) |
| 563 |
+ |
... else: |
| 564 |
+ |
... rbg2Val = "" |
| 565 |
+ |
... else: |
| 566 |
+ |
... rbg2Val = "" |
| 567 |
+ |
... if bates in rbg3.keys(): |
| 568 |
+ |
... if rbg3[bates]: |
| 569 |
+ |
... rbg3Val = str(rbg3[bates]) |
| 570 |
+ |
... else: |
| 571 |
+ |
... rbg3Val = "" |
| 572 |
+ |
... else: |
| 573 |
+ |
... rbg3Val = "" |
| 574 |
+ |
... if bates in rbg4.keys(): |
| 575 |
+ |
... if rbg4[bates]: |
| 576 |
+ |
... rbg4Val = str(rbg4[bates]) |
| 577 |
+ |
... else: |
| 578 |
+ |
... rbg4Val = "" |
| 579 |
+ |
... else: |
| 580 |
+ |
... rbg4Val = "" |
| 581 |
+ |
... if bates in rbg5.keys(): |
| 582 |
+ |
... if rbg5[bates]: |
| 583 |
+ |
... rbg5Val = str(rbg5[bates]) |
| 584 |
+ |
... else: |
| 585 |
+ |
... rbg5Val = "" |
| 586 |
+ |
... else: |
| 587 |
+ |
... rbg5Val = "" |
| 588 |
+ |
... outputFile.write("%s|%s|%s|%s|%s|%s|%s\n"%(bates,fbcVal,rbg1Val,rbg2Val,rbg3Val,rbg4Val,rbg5Val)) |
| 589 |
+ |
... |
| 590 |
+ |
>>> outputFile.close() |
| 591 |
+ |
|
| 592 |
+ |
## Cleanup the output and make the final overlay |
| 593 |
+ |
>>> contents = open(r"C:\McDermott-Discovery\Client\output.txt").readlines() |
| 594 |
+ |
>>> outputFile = open(r"C:\McDermott-Discovery\Client\overlay.dat",'w') |
| 595 |
+ |
>>> for line in contents: |
| 596 |
+ |
... line = line.split("|") |
| 597 |
+ |
... for i in line: |
| 598 |
+ |
... i = i.replace("'","") |
| 599 |
+ |
... i = i.replace("[","") |
| 600 |
+ |
... i = i.replace("]","") |
| 601 |
+ |
... i = i.replace(",",";") |
| 602 |
+ |
... if "\n" in i: |
| 603 |
+ |
... outputFile.write(i) |
| 604 |
+ |
... else: |
| 605 |
+ |
... outputFile.write(i + "|") |
| 606 |
+ |
... |
| 607 |
+ |
>>> outputFile.close() |
| 608 |
+ |
|
| 609 |
+ |
|
| 610 |
+ |
=== Here I was asked to copy frm long text date to real date field but there were errors because some of the dates were missing info ie. 00/00/1991. I fixed the entries based on their criteria.=== |
| 611 |
+ |
>>> contents = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Document_Delivery\Vol003\GroupA\export_20140929_192612.csv").readlines() |
| 612 |
+ |
>>> contents = contents[1:] |
| 613 |
+ |
>>> outputFile = open(r"C:\Users\eborges\Box Sync\Client\Honeywell\Perligo\Document_Delivery\Vol003\GroupA\FIXED.dat",'w') |
| 614 |
+ |
>>> outputFile.write("BegDoc|DocDate_New\n") |
| 615 |
+ |
>>> for line in contents: |
| 616 |
+ |
... line = line.replace("\n","") |
| 617 |
+ |
... bates,date,nul = line.split("|") |
| 618 |
+ |
... if date: |
| 619 |
+ |
... mm,dd,yy = date.split("/") |
| 620 |
+ |
... if mm == '00': |
| 621 |
+ |
... mm = '01' |
| 622 |
+ |
... if dd == '00': |
| 623 |
+ |
... dd = '01' |
| 624 |
+ |
... if yy == '0000': |
| 625 |
+ |
... yy = '1800' |
| 626 |
+ |
... date = '%s/%s/%s'%(mm,dd,yy) |
| 627 |
+ |
... outputFile.write("%s|%s\n"%(bates,date)) |
| 628 |
|
>>> outputFile.close() |
| 629 |
|
=== END === |
| 630 |
|
=== Here I was asked to make a tally report frm the prefixes in HOBX === |