ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/RandomCodeRequests/Lee-Mercer2.txt
Revision: 742
Committed: Tue Apr 13 20:36:48 2021 UTC (4 years, 11 months ago) by nino.borges
Content type: text/plain
File size: 6947 byte(s)
Log Message:
added some missing code request txt files

File Contents

# User Rev Content
1 nino.borges 742 ## Lee-Mercer.
2     ## taking an original q-mobile delivery, hashing it, taking a new deliverable, hashing that
3     ## for the purposes of comparing both to pull out the new stuff only.
4    
5     >>> contents = open(r"\\sas22\sas22\35081\Inbound\08\103950\load_file.txt").readlines()
6     >>> contents[0]
7     '\xc3\xbeForensic Item Number\xc3\xbe\x14\xc3\xbeCustodian - Single Choice\xc3\xbe\x14\xc3\xbeDocID\xc3\xbe\x14\xc3\xbeGroup Identifier\xc3\xbe\x14\xc3\xbeBegAttach\xc3\xbe\x14\xc3\xbeEndAttach\xc3\xbe\x14\xc3\xbeMessage Thread ID\xc3\xbe\x14\xc3\xbeMobile Duplicate ID\xc3\xbe\x14\xc3\xbeChat Parties\xc3\xbe\x14\xc3\xbeParties_QD\xc3\xbe\x14\xc3\xbeParties by Group_QD\xc3\xbe\x14\xc3\xbeDate/Time Chat Start\xc3\xbe\x14\xc3\xbeDate/Time Chat End\xc3\xbe\x14\xc3\xbeDate/Time Mobile Event\xc3\xbe\x14\xc3\xbeDate/Time Message Delivered\xc3\xbe\x14\xc3\xbeDate/Time Message Read\xc3\xbe\x14\xc3\xbeMobile Folder\xc3\xbe\x14\xc3\xbeMobile To\xc3\xbe\x14\xc3\xbeMobile From\xc3\xbe\x14\xc3\xbeText Message Subject\xc3\xbe\x14\xc3\xbeHas Message Attachment\xc3\xbe\x14\xc3\xbeIs Message Attachment\xc3\xbe\x14\xc3\xbeFile Name\xc3\xbe\x14\xc3\xbeFile Size\xc3\xbe\x14\xc3\xbeDocument Extension\xc3\xbe\x14\xc3\xbeDate/Time Created\xc3\xbe\x14\xc3\xbeDate/Time Last Modified\xc3\xbe\x14\xc3\xbeAttachment MD5\xc3\xbe\x14\xc3\xbeMessage Read Status\xc3\xbe\x14\xc3\xbeMobile Deleted or Intact\xc3\xbe\x14\xc3\xbeChat Deleted\xc3\xbe\x14\xc3\xbeMessage Type\xc3\xbe\x14\xc3\xbeMessage SubType\xc3\xbe\x14\xc3\xbeCall Duration\xc3\xbe\x14\xc3\xbeMobile Country Code\xc3\xbe\x14\xc3\xbeMobile Video Call\xc3\xbe\x14\xc3\xbeQMobile Time Zone Field\xc3\xbe\x14\xc3\xbeExtracted Text\xc3\xbe\x14\xc3\xbeNative File Path\xc3\xbe\x14\xc3\xbeParent Message Type\xc3\xbe\x14\xc3\xbeMobile Source File Name\xc3\xbe\x14\xc3\xbeMobile Extraction Source\xc3\xbe\x14\xc3\xbeMobile Event Direction\xc3\xbe\x14\xc3\xbeException Reason\xc3\xbe\x14\xc3\xbeTime Zone Field\xc3\xbe\x14\xc3\xbeRelativity Native Time Zone Offset\xc3\xbe\n'
8     >>> contents = contents[1:]
9     >>> origMatrix = {}
10     >>> delim = "\x14"
11    
12     >>> import hashlib
13    
14     >>> for line in contents:
15     ... line = line.replace("\n","")
16     ... line = line.split(delim)
17     ... bates = line[2]
18     ... if bates == line[4]:
19     ... ## Parent found
20     ... rawStringContents = "%s%s%s%s%s%s"% (line[6], line[13], line[17], line[18], line[16], line[37])
21     ... hashValue = hashlib.md5(rawStringContents).hexdigest()
22     ... origMatrix[bates] = hashValue
23     ...
24     >>> len(origMatrix.keys())
25     62665
26    
27     >>> colList = []
28     >>> for i in range(6,46):
29     ... colList.append(i)
30     ...
31     >>> colList
32     [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45]
33     >>> colList.remove(38)
34     >>> colList.remove(37)
35     >>> origMatrix = {}
36     >>> for line in contents:
37     ... line = line.replace("\n","")
38     ... line = line.split(delim)
39     ... bates = line[2]
40     ... if bates == line[4]:
41     ... ## Parent found
42     ... textPath2 = line[37].replace("\xc3\xbe","")
43     ... if textPath2:
44     ... textContents = open(os.path.join(textPath,textPath2)).read()
45     ... else:
46     ... textContents = ""
47     ... rawStringContents = ""
48     ... for i in colList:
49     ... rawStringContents = rawStringContents + line[i]
50     ... rawStringContents = rawStringContents + textContents
51     ... hashValue = hashlib.md5(rawStringContents).hexdigest()
52     ... origMatrix[hashValue] = bates
53     ...
54     >>> len(origMatrix.keys())
55     62606
56     >>> newMatrix = {}
57     >>> textPath = r"\\sas22\sas22\35081\Inbound\New\QMobile Chat"
58     >>> contents = open(r"\\sas22\sas22\35081\Inbound\New\QMobile Chat\load_file.txt").readlines()
59     >>> contents[0]
60     '\xc3\xbeForensic Item Number\xc3\xbe\x14\xc3\xbeCustodian - Single Choice\xc3\xbe\x14\xc3\xbeDocID\xc3\xbe\x14\xc3\xbeGroup Identifier\xc3\xbe\x14\xc3\xbeBegAttach\xc3\xbe\x14\xc3\xbeEndAttach\xc3\xbe\x14\xc3\xbeMessage Thread ID\xc3\xbe\x14\xc3\xbeMobile Duplicate ID\xc3\xbe\x14\xc3\xbeChat Parties\xc3\xbe\x14\xc3\xbeParties_QD\xc3\xbe\x14\xc3\xbeParties by Group_QD\xc3\xbe\x14\xc3\xbeDate/Time Chat Start\xc3\xbe\x14\xc3\xbeDate/Time Chat End\xc3\xbe\x14\xc3\xbeDate/Time Mobile Event\xc3\xbe\x14\xc3\xbeDate/Time Message Delivered\xc3\xbe\x14\xc3\xbeDate/Time Message Read\xc3\xbe\x14\xc3\xbeMobile Folder\xc3\xbe\x14\xc3\xbeMobile To\xc3\xbe\x14\xc3\xbeMobile From\xc3\xbe\x14\xc3\xbeText Message Subject\xc3\xbe\x14\xc3\xbeHas Message Attachment\xc3\xbe\x14\xc3\xbeIs Message Attachment\xc3\xbe\x14\xc3\xbeFile Name\xc3\xbe\x14\xc3\xbeFile Size\xc3\xbe\x14\xc3\xbeDocument Extension\xc3\xbe\x14\xc3\xbeDate/Time Created\xc3\xbe\x14\xc3\xbeDate/Time Last Modified\xc3\xbe\x14\xc3\xbeAttachment MD5\xc3\xbe\x14\xc3\xbeMessage Read Status\xc3\xbe\x14\xc3\xbeMobile Deleted or Intact\xc3\xbe\x14\xc3\xbeChat Deleted\xc3\xbe\x14\xc3\xbeMessage Type\xc3\xbe\x14\xc3\xbeMessage SubType\xc3\xbe\x14\xc3\xbeCall Duration\xc3\xbe\x14\xc3\xbeMobile Country Code\xc3\xbe\x14\xc3\xbeMobile Video Call\xc3\xbe\x14\xc3\xbeQMobile Time Zone Field\xc3\xbe\x14\xc3\xbeExtracted Text\xc3\xbe\x14\xc3\xbeNative File Path\xc3\xbe\x14\xc3\xbeParent Message Type\xc3\xbe\x14\xc3\xbeMobile Source File Name\xc3\xbe\x14\xc3\xbeMobile Extraction Source\xc3\xbe\x14\xc3\xbeMobile Event Direction\xc3\xbe\x14\xc3\xbeException Reason\xc3\xbe\x14\xc3\xbeTime Zone Field\xc3\xbe\x14\xc3\xbeRelativity Native Time Zone Offset\xc3\xbe\n'
61     >>> contents = contents[1:]
62     >>> for line in contents:
63     ... line = line.replace("\n","")
64     ... line = line.split(delim)
65     ... bates = line[2]
66     ... if bates == line[4]:
67     ... ## Parent found
68     ... textPath2 = line[37].replace("\xc3\xbe","")
69     ... if textPath2:
70     ... textContents = open(os.path.join(textPath,textPath2)).read()
71     ... else:
72     ... textContents = ""
73     ... rawStringContents = ""
74     ... for i in colList:
75     ... rawStringContents = rawStringContents + line[i]
76     ... rawStringContents = rawStringContents + textContents
77     ... hashValue = hashlib.md5(rawStringContents).hexdigest()
78     ... newMatrix[hashValue] = bates
79    
80    
81     >>> len(newMatrix.keys())
82     91752
83     >>> deltaList = []
84    
85     >>> nonDeltaList = []
86     >>> oldHashList = origMatrix.keys()
87     >>> newHashList = newMatrix.keys()
88     >>> for hashVal in newHashList:
89     ... if hashVal in oldHashList:
90     ... nonDeltaList.append(newMatrix[hashVal])
91     ... else:
92     ... deltaList.append(newMatrix[hashVal])
93     ...
94     >>> len(deltaList)
95     47180
96     >>> len(nonDeltaList)
97     44572
98    
99    
100    
101     >>> deltaOutPutFile = open(r"\\sas22\sas22\35081\Inbound\XFOR\Ira Lee QM\UniqueFromOrig-Load.dat",'w')
102     >>> duplicatesOutPutFile = open(r"\\sas22\sas22\35081\Inbound\XFOR\Ira Lee QM\DuplicatesFromOrig-Load.dat",'w')
103     >>> for line in contents:
104     ... newLine = line.replace("\n","")
105     ... newLine = newLine.split(delim)
106     ... if newLine[3] in deltaList:
107     ... deltaOutPutFile.write(line)
108     ... else:
109     ... duplicatesOutPutFile.write(line)
110     ...
111     >>> deltaOutPutFile.close()
112     >>> duplicatesOutPutFile.close()