| 1 |
"""
|
| 2 |
|
| 3 |
CustodianFromEmailFields
|
| 4 |
|
| 5 |
Created by:
|
| 6 |
Emanuel Borges
|
| 7 |
02.01.2018
|
| 8 |
|
| 9 |
This program was created for the Shark Ninja case where we had to take an export from Eclipse and create multiple load files, using
|
| 10 |
the to,from,cc,bcc as the custodian. So if X doc had 4 people in those fields, it would result in that same line being included in
|
| 11 |
4 diff files.
|
| 12 |
|
| 13 |
The original headder was:
|
| 14 |
'EVDXID|EVDXID EndDoc|BegAttach (Include Family)|DocType|Atty Notes|Custodian|Email From|Email BCC|Email CC|Email To|Email Subject|DateSent|TimeSent|FileName|Hidden Properties|Search Hits|Prod BegBates|Prod EndBates|DateCreated|DateLastMod|FileExtension|MD5HASH|MM_MediaExternalID|Source|EDataFolder|EDataSource|Volume|Entity\n'
|
| 15 |
|
| 16 |
What is most important is right up to the to field, same order is needed. everything beyond is ignored.
|
| 17 |
|
| 18 |
THIS IGNORES THE BCC (line 7) so update if you also need that one.
|
| 19 |
|
| 20 |
|
| 21 |
|
| 22 |
"""
|
| 23 |
|
| 24 |
import os
|
| 25 |
|
| 26 |
if __name__ == '__main__':
|
| 27 |
outputPath = r"c:\Test-PY\SN4"
|
| 28 |
#contents = open(r"L:\__People\Emanuel\MyCases\Beck Reed Riden LLP\SharkNinja - Keurig\File_Cabinet\20180126 - Additional Custodians - DeletionList.csv").readlines()
|
| 29 |
contents = open(r"L:\__People\Emanuel\MyCases\Beck Reed Riden LLP\SharkNinja - Keurig\File_Cabinet\temp\sn4.csv").readlines()
|
| 30 |
headderRow = contents[0]
|
| 31 |
contents = contents[1:]
|
| 32 |
familyMatrix = {}
|
| 33 |
|
| 34 |
for line in contents:
|
| 35 |
newLine = line.split("|")
|
| 36 |
if newLine[0] == newLine[2]:
|
| 37 |
# Im a parent
|
| 38 |
familyMatrix[newLine[0]] = [line,]
|
| 39 |
elif newLine[2] == "":
|
| 40 |
# Im an edoc
|
| 41 |
familyMatrix[newLine[0]] = [line,]
|
| 42 |
else:
|
| 43 |
#im a child
|
| 44 |
familyMatrix[newLine[2]].append(line)
|
| 45 |
|
| 46 |
|
| 47 |
custodianMatrix = {}
|
| 48 |
|
| 49 |
for line in contents:
|
| 50 |
newLine = line.split("|")
|
| 51 |
if newLine[0] == newLine[2]:
|
| 52 |
# Im a parent
|
| 53 |
if newLine[6]:
|
| 54 |
custList = newLine[6].split(";")
|
| 55 |
for x in custList:
|
| 56 |
if x.strip() in list(custodianMatrix.keys()):
|
| 57 |
custodianMatrix[x.strip()].append(newLine[0])
|
| 58 |
else:
|
| 59 |
custodianMatrix[x.strip()] = [newLine[0]]
|
| 60 |
if newLine[8]:
|
| 61 |
custList = newLine[8].split(";")
|
| 62 |
for x in custList:
|
| 63 |
if x.strip() in list(custodianMatrix.keys()):
|
| 64 |
custodianMatrix[x.strip()].append(newLine[0])
|
| 65 |
else:
|
| 66 |
custodianMatrix[x.strip()] = [newLine[0]]
|
| 67 |
if newLine[9]:
|
| 68 |
custList = newLine[9].split(";")
|
| 69 |
for x in custList:
|
| 70 |
if x.strip() in list(custodianMatrix.keys()):
|
| 71 |
custodianMatrix[x.strip()].append(newLine[0])
|
| 72 |
else:
|
| 73 |
custodianMatrix[x.strip()] = [newLine[0]]
|
| 74 |
elif newLine[2] == "":
|
| 75 |
# Im an edoc
|
| 76 |
if newLine[5] in list(custodianMatrix.keys()):
|
| 77 |
custodianMatrix[newLine[5].strip()].append(newLine[0])
|
| 78 |
else:
|
| 79 |
custodianMatrix[newLine[5].strip()] = [newLine[0]]
|
| 80 |
|
| 81 |
|
| 82 |
for name in list(custodianMatrix.keys()):
|
| 83 |
fileName = name.replace("<","")
|
| 84 |
fileName = fileName.replace(">","")
|
| 85 |
outputFile = open(os.path.join(outputPath,"%s.txt"%fileName),'w')
|
| 86 |
outputFile.write(headderRow)
|
| 87 |
for bates in custodianMatrix[name]:
|
| 88 |
for line in familyMatrix[bates]:
|
| 89 |
outputFile.write(line)
|
| 90 |
outputFile.close() |