NinoCode/RandomCodeRequests/LCHC.txt

=====  LCHC req from Dana to take an XLS of dob and other pii data and pivot by deduplicated user name =====

>>> docIDMatrix = {}
>>> mrnMatrix = {}
>>> ccaidMatrix = {}
>>> dobMatrix = {}
>>> ssnMatrix = {}
>>> otherIDMatrix = {}
>>> otherPIIMatrix = {}
>>> contents = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21.csv").readlines()
>>> contents[0]
'Doc ID:|MRN|CCAID|DOB|Name|SSN|Other Identifiable Info.|Other PII|Multiple Doc IDs (Used for Consolidation Only)\n'
>>> contents = contents[1:]
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     name = name.lstrip()
...     name = name.rstrip()
...     if name in docIDMatrix.keys():
...             if line[0] in docIDMatrix[name]:
...                     pass
...             else:
...                     docIDMatrix[name].append(line[0])
...     else:
...             docIDMatrix[name] = [line[0],]
...             
>>> len(docIDMatrix.keys())
66183
>>> docIDMatrix['ABAD HIRALDO, PASCUAL']
['REV00118844', 'REV00215151']
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     name = name.lstrip()
...     name = name.rstrip()
...     if line[1]:
...             if name in mrnMatrix.keys():
...                     if line[1] in mrnMatrix[name]:
...                             pass
...                     else:
...                             mrnMatrix[name].append(line[1])
...             else:
...                     mrnMatrix[name] = [line[1],]
... 
>>> len(mrnMatrix.keys())
3534
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     name = name.lstrip()
...     name = name.rstrip()
...     if line[2]:
...             if name in ccaidMatrix.keys():
...                     if line[2] in ccaidMatrix[name]:
...                             pass
...                     else:
...                             ccaidMatrix[name].append(line[2])
...             else:
...                     ccaidMatrix[name] = [line[2],]
...                     
>>> len(ccaidMatrix.keys())
3
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     name = name.lstrip()
...     name = name.rstrip()
...     if line[5]:
...             if name in ssnMatrix.keys():
...                     if line[5] in ssnMatrix[name]:
...                             pass
...                     else:
...                             ssnMatrix[name].append(line[5])
...             else:
...                     ssnMatrix[name] = [line[5],]
...                     
>>> len(ssnMatrix.keys())
521
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     name = name.lstrip()
...     name = name.rstrip()
...     if line[6]:
...             if name in otherIDMatrix.keys():
...                     if line[6] in otherIDMatrix[name]:
...                             pass
...                     else:
...                             otherIDMatrix[name].append(line[6])
...             else:
...                     otherIDMatrix[name] = [line[6],]
...                     
>>> len(otherIDMatrix.keys())
24636
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     name = name.lstrip()
...     name = name.rstrip()
...     if line[7]:
...             if name in otherPIIMatrix.keys():
...                     if line[7] in otherPIIMatrix[name]:
...                             pass
...                     else:
...                             otherPIIMatrix[name].append(line[7])
...             else:
...                     otherPIIMatrix[name] = [line[7],]
...                     
>>> len(otherPIIMatrix.keys())
1351
>>> effFile = open(r"C:\Temp\LCHC - dobTestLog.txt",'w')
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     name = name.lstrip()
...     name = name.rstrip()
...     rawDob = line[3]
...     if len(line[3]) >5:
...             if "/" in rawDob:
...                     rawDob = datetime.datetime.strptime(rawDob, '%m/%d/%Y')
...                     dob = rawDob.strftime("%m/%d/%Y")
...             else:
...                     rawDob = datetime.datetime.strptime(rawDob, '%Y%m%d')
...                     dob = rawDob.strftime("%m/%d/%Y")
...                     effFile.write("%s changed to %s for docID %s\n"% (line[3], dob, line[0]))
...             if name in dobMatrix.keys():
...                     if dob in dobMatrix[name]:
...                             pass
...                     else:
...                             dobMatrix[name].append(dob)
...             else:
...                     dobMatrix[name] = [dob,]
...     else:
...             if line[3]:
...                     effFile.write("ERROR: %s is too small to parse!!!\n"% line[3])
...                     
>>> effFile.close()
>>> len(dobMatrix.keys())
64145
>>> patientList = docIDMatrix.keys()
>>> patientList.sort()
>>> patientList[0]
''
>>> patientList[1]
'A., A.'
>>> patientList[2]
'A., JESSE'
>>> patientList[3]
'A., JOHN'

=== Now we write it out ===
>>> dobPatientList = dobMatrix.keys()
>>> mrnPatientList = mrnMatrix.keys()
>>> ccaidPatientList = ccaidMatrix.keys()
>>> ssnPatientList = ssnMatrix.keys()
>>> otherIDPatientList = otherIDMatrix.keys()
>>> otherPIIPatientList = otherPIIMatrix.keys()

>>> outputFile = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21-OUTPUT.DAT",'w')
>>> outputFile.write("Name|DOB|MRN|CCAID|SSN|OtherIDInfo|OtherPII|DodIDs\n")
>>> for pt in patientList:
...     dob = ""
...     docID = ""
...     mrn = ""
...     ccaid = ""
...     ssn = ""
...     otherID = ""
...     otherPII = ""
...     outputFile.write("%s|"%pt)
...     if pt in dobPatientList:
...             dobs = dobMatrix[pt]
...             if len(dobs) > 1:
...                     for d in dobs:
...                             dob = dob + d + ";"
...             else:
...                     dob = dobs[0]
...     outputFile.write("%s|"%dob)
...     if pt in mrnPatientList:
...             mrns = mrnMatrix[pt]
...             if len(mrns) >1:
...                     for m in mrns:
...                             mrn = mrn + m + ";"
...             else:
...                     mrn = mrns[0]
...     outputFile.write("%s|"%mrn)
...     if pt in ccaidPatientList:
...             ccaids = ccaidMatrix[pt]
...             if len(ccaids) >1:
...                     for c in ccaids:
...                             ccaid = ccaid + c + ";"
...             else:
...                     ccaid = ccaids[0]
...     outputFile.write("%s|"%ccaid)
...     if pt in ssnPatientList:
...             ssns = ssnMatrix[pt]
...             if len(ssns) >1:
...                     for s in ssns:
...                             ssn = ssn + s + ";"
...             else:
...                     ssn = ssns[0]
...     outputFile.write("%s|"%ssn)
...     if pt in otherIDPatientList:
...             otherIDs = otherIDMatrix[pt]
...             if len(otherIDs) >1:
...                     for i in otherIDs:
...                             otherID = otherID + i + ";"
...             else:
...                     otherID = otherIDs[0]
...     outputFile.write("%s|"%otherID)
...     if pt in otherPIIPatientList:
...             otherPIIs = otherPIIMatrix[pt]
...             if len(otherPIIs) >1:
...                     for p in otherPIIs:
...                             otherPII = otherPII + p + ";"
...             else:
...                     otherPII = otherPIIs[0]
...     outputFile.write("%s|"%otherPII)
...     docIDs = docIDMatrix[pt]
...     if len(docIDs) >1:
...             for rev in docIDs:
...                     docID = docID + rev + ";"
...     else:
...             docID = docIDs[0]
...     outputFile.write("%s\n"%docID)
... 
>>> outputFile.close()


====== OLD BELOW THIS LINE, DO NOT USE ======

PythonWin 2.7.17 (v2.7.17:c2f86d86e6, Oct 19 2019, 20:49:36) [MSC v.1500 32 bit (Intel)] on win32.
Portions Copyright 1994-2018 Mark Hammond - see 'Help/About PythonWin' for further copyright information.
>>> docIDMatrix = {}
>>> mrnMatrix = {}
>>> ccaidMatrix = {}
>>> dobMatrix = {}
>>> ssnMatrix = {}
>>> otherIDMatrix = {}
>>> otherPIIMatrix = {}
>>> contents = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21.csv").readlines()
>>> contents[0]
'Doc ID:|MRN|CCAID|DOB|Name|SSN|Other Identifiable Info.|Other PII|Multiple Doc IDs (Used for Consolidation Only)\n'
>>> contents = contents[1:]
>>> import datetime
>>> test = '20210122'
>>> test2 = datetime.datetime.strptime(test, '%Y%m%d")
>>> test = '20210122'
>>> test2 = datetime.datetime.strptime(test, '%Y%m%d')
>>> test2.strftime("%m/%d/%Y")
'01/22/2021'
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     if name in docIDMatrix.keys():
...             if line[0] in docIDMatrix[name]:
...                     pass
...             else:
...                     docIDMatrix[name].append(line[0])
...     else:
...             docIDMatrix[name] = [line[0],]
...             
>>> len(docIDMatrix.keys()
... )
66433

>>> len(docIDMatrix['ABAD HIRALDO, PASCUAL'])
2
>>> docIDMatrix['ABAD HIRALDO, PASCUAL']
['REV00118844', 'REV00215151']

>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     if line[1]:
...             if name in mrnMatrix.keys():
...                     if line[1] in mrnMatrix[name]:
...                             pass
...                     else:
...                             mrnMatrix[name].append(line[1])
...             else:
...                     mrnMatrix[name] = [line[1],]
...                     
>>> len(mrnMatrix.keys())
3535
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     if line[2]:
...             if name in ccaidMatrix.keys():
...                     if line[2] in ccaidMatrix[name]:
...                             pass
...                     else:
...                             ccaidMatrix[name].append(line[2])
...             else:
...                     ccaidMatrix[name] = [line[2],]
...                     
>>> len(ccaidMatrix.keys())
3
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     if line[5]:
...             if name in ssnMatrix.keys():
...                     if line[5] in ssnMatrix[name]:
...                             pass
...                     else:
...                             ssnMatrix[name].append(line[5])
...             else:
...                     ssnMatrix[name] = [line[5],]
...                     
>>> len(ssnMatrix.keys())
522
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     if line[6]:
...             if name in otherIDMatrix.keys():
...                     if line[6] in otherIDMatrix[name]:
...                             pass
...                     else:
...                             otherIDMatrix[name].append(line[6])
...             else:
...                     otherIDMatrix[name] = [line[6],]
...                     
>>> len(otherIDMatrix.keys())
24639
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     if line[7]:
...             if name in otherPIIMatrix.keys():
...                     if line[7] in otherPIIMatrix[name]:
...                             pass
...                     else:
...                             otherPIIMatrix[name].append(line[7])
...             else:
...                     otherPIIMatrix[name] = [line[7],]
...                     
>>> len(otherPIIMatrix.keys())
1352
>>> test = '7/15/1974'
>>> test2 = datetime.datetime.strptime(test, '%m/%d/%Y')
>>> test2.strftime("%m/%d/%Y")
'07/15/1974'
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     if line[3]:
...             if name in dobMatrix.keys():
...                     if line[3] in dobMatrix[name]:
...                             pass
...                     else:
...                             dobMatrix[name].append(line[3])
...             else:
...                     dobMatrix[name] = [line[3],]

>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     if line[3]:
...             if "/" in rawDob:
...                     rawDob = datetime.datetime.strptime(rawDob, '%m/%d/%Y')
...                     dob = rawDob.strftime("%m/%d/%Y")
...             else:
...                     rawDob = datetime.datetime.strptime(rawDob, '%Y%m%d')
...                     dob = rawDob.strftime("%m/%d/%Y")
...             if name in dobMatrix.keys():
...                     if dob in dobMatrix[name]:
...                             pass
...                     else:
...                             dobMatrix[name].append(dob)
...             else:
...                     dobMatrix[name] = [dob,]
...                     
>>> effFile = open(r"C:\Temp\LCHC - dobTestLog.txt",'w')


>>> dobMatrix = {}
>>> effFile = open(r"C:\Temp\LCHC - dobTestLog.txt",'w')
>>> for line in contents:
...     line = line.replace("\n","")
...     line = line.split("|")
...     name = line[4]
...     name = name.upper()
...     rawDob = line[3]
...     if len(line[3]) >5:
...             if "/" in rawDob:
...                     rawDob = datetime.datetime.strptime(rawDob, '%m/%d/%Y')
...                     dob = rawDob.strftime("%m/%d/%Y")
...             else:
...                     rawDob = datetime.datetime.strptime(rawDob, '%Y%m%d')
...                     dob = rawDob.strftime("%m/%d/%Y")
...                     effFile.write("%s changed to %s for docID %s\n"% (line[3], dob, line[0]))
...             if name in dobMatrix.keys():
...                     if dob in dobMatrix[name]:
...                             pass
...                     else:
...                             dobMatrix[name].append(dob)
...             else:
...                     dobMatrix[name] = [dob,]
...     else:
...             if line[3]:
...                     effFile.write("ERROR: %s is too small to parse!!!\n"% line[3])
...                     
>>> effFile.close()
>>> len(dobMatrix.keys())
64385
>>> outputFile = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21-OUTPUT.DAT",'w')
>>> patientList = docIDMatrix.keys()
>>> patientList.sort()
>>> patientList[0]
''
>>> patientList[1]
' CAMPUZANO, JACGP'
>>> patientList[2]
' LEVESQUE, JENNIFER'
>>> patientList[3]
' OLIVER, CRUZ'
>>> patientList[1].lstrip()
'CAMPUZANO, JACGP'
>>> patientList[1].rstrip()
' CAMPUZANO, JACGP'
>>> 
Revision:	742
Committed:	Tue Apr 13 20:36:48 2021 UTC (4 years, 11 months ago) by nino.borges
Content type:	text/plain
File size:	12595 byte(s)
Log Message:	added some missing code request txt files
#	Content
1	===== LCHC req from Dana to take an XLS of dob and other pii data and pivot by deduplicated user name =====
2
3	>>> docIDMatrix = {}
4	>>> mrnMatrix = {}
5	>>> ccaidMatrix = {}
6	>>> dobMatrix = {}
7	>>> ssnMatrix = {}
8	>>> otherIDMatrix = {}
9	>>> otherPIIMatrix = {}
10	>>> contents = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21.csv").readlines()
11	>>> contents[0]
12	'Doc ID:\|MRN\|CCAID\|DOB\|Name\|SSN\|Other Identifiable Info.\|Other PII\|Multiple Doc IDs (Used for Consolidation Only)\n'
13	>>> contents = contents[1:]
14	>>> for line in contents:
15	... line = line.replace("\n","")
16	... line = line.split("\|")
17	... name = line[4]
18	... name = name.upper()
19	... name = name.lstrip()
20	... name = name.rstrip()
21	... if name in docIDMatrix.keys():
22	... if line[0] in docIDMatrix[name]:
23	... pass
24	... else:
25	... docIDMatrix[name].append(line[0])
26	... else:
27	... docIDMatrix[name] = [line[0],]
28	...
29	>>> len(docIDMatrix.keys())
30	66183
31	>>> docIDMatrix['ABAD HIRALDO, PASCUAL']
32	['REV00118844', 'REV00215151']
33	>>> for line in contents:
34	... line = line.replace("\n","")
35	... line = line.split("\|")
36	... name = line[4]
37	... name = name.upper()
38	... name = name.lstrip()
39	... name = name.rstrip()
40	... if line[1]:
41	... if name in mrnMatrix.keys():
42	... if line[1] in mrnMatrix[name]:
43	... pass
44	... else:
45	... mrnMatrix[name].append(line[1])
46	... else:
47	... mrnMatrix[name] = [line[1],]
48	...
49	>>> len(mrnMatrix.keys())
50	3534
51	>>> for line in contents:
52	... line = line.replace("\n","")
53	... line = line.split("\|")
54	... name = line[4]
55	... name = name.upper()
56	... name = name.lstrip()
57	... name = name.rstrip()
58	... if line[2]:
59	... if name in ccaidMatrix.keys():
60	... if line[2] in ccaidMatrix[name]:
61	... pass
62	... else:
63	... ccaidMatrix[name].append(line[2])
64	... else:
65	... ccaidMatrix[name] = [line[2],]
66	...
67	>>> len(ccaidMatrix.keys())
68	3
69	>>> for line in contents:
70	... line = line.replace("\n","")
71	... line = line.split("\|")
72	... name = line[4]
73	... name = name.upper()
74	... name = name.lstrip()
75	... name = name.rstrip()
76	... if line[5]:
77	... if name in ssnMatrix.keys():
78	... if line[5] in ssnMatrix[name]:
79	... pass
80	... else:
81	... ssnMatrix[name].append(line[5])
82	... else:
83	... ssnMatrix[name] = [line[5],]
84	...
85	>>> len(ssnMatrix.keys())
86	521
87	>>> for line in contents:
88	... line = line.replace("\n","")
89	... line = line.split("\|")
90	... name = line[4]
91	... name = name.upper()
92	... name = name.lstrip()
93	... name = name.rstrip()
94	... if line[6]:
95	... if name in otherIDMatrix.keys():
96	... if line[6] in otherIDMatrix[name]:
97	... pass
98	... else:
99	... otherIDMatrix[name].append(line[6])
100	... else:
101	... otherIDMatrix[name] = [line[6],]
102	...
103	>>> len(otherIDMatrix.keys())
104	24636
105	>>> for line in contents:
106	... line = line.replace("\n","")
107	... line = line.split("\|")
108	... name = line[4]
109	... name = name.upper()
110	... name = name.lstrip()
111	... name = name.rstrip()
112	... if line[7]:
113	... if name in otherPIIMatrix.keys():
114	... if line[7] in otherPIIMatrix[name]:
115	... pass
116	... else:
117	... otherPIIMatrix[name].append(line[7])
118	... else:
119	... otherPIIMatrix[name] = [line[7],]
120	...
121	>>> len(otherPIIMatrix.keys())
122	1351
123	>>> effFile = open(r"C:\Temp\LCHC - dobTestLog.txt",'w')
124	>>> for line in contents:
125	... line = line.replace("\n","")
126	... line = line.split("\|")
127	... name = line[4]
128	... name = name.upper()
129	... name = name.lstrip()
130	... name = name.rstrip()
131	... rawDob = line[3]
132	... if len(line[3]) >5:
133	... if "/" in rawDob:
134	... rawDob = datetime.datetime.strptime(rawDob, '%m/%d/%Y')
135	... dob = rawDob.strftime("%m/%d/%Y")
136	... else:
137	... rawDob = datetime.datetime.strptime(rawDob, '%Y%m%d')
138	... dob = rawDob.strftime("%m/%d/%Y")
139	... effFile.write("%s changed to %s for docID %s\n"% (line[3], dob, line[0]))
140	... if name in dobMatrix.keys():
141	... if dob in dobMatrix[name]:
142	... pass
143	... else:
144	... dobMatrix[name].append(dob)
145	... else:
146	... dobMatrix[name] = [dob,]
147	... else:
148	... if line[3]:
149	... effFile.write("ERROR: %s is too small to parse!!!\n"% line[3])
150	...
151	>>> effFile.close()
152	>>> len(dobMatrix.keys())
153	64145
154	>>> patientList = docIDMatrix.keys()
155	>>> patientList.sort()
156	>>> patientList[0]
157	''
158	>>> patientList[1]
159	'A., A.'
160	>>> patientList[2]
161	'A., JESSE'
162	>>> patientList[3]
163	'A., JOHN'
164
165	=== Now we write it out ===
166	>>> dobPatientList = dobMatrix.keys()
167	>>> mrnPatientList = mrnMatrix.keys()
168	>>> ccaidPatientList = ccaidMatrix.keys()
169	>>> ssnPatientList = ssnMatrix.keys()
170	>>> otherIDPatientList = otherIDMatrix.keys()
171	>>> otherPIIPatientList = otherPIIMatrix.keys()
172
173	>>> outputFile = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21-OUTPUT.DAT",'w')
174	>>> outputFile.write("Name\|DOB\|MRN\|CCAID\|SSN\|OtherIDInfo\|OtherPII\|DodIDs\n")
175	>>> for pt in patientList:
176	... dob = ""
177	... docID = ""
178	... mrn = ""
179	... ccaid = ""
180	... ssn = ""
181	... otherID = ""
182	... otherPII = ""
183	... outputFile.write("%s\|"%pt)
184	... if pt in dobPatientList:
185	... dobs = dobMatrix[pt]
186	... if len(dobs) > 1:
187	... for d in dobs:
188	... dob = dob + d + ";"
189	... else:
190	... dob = dobs[0]
191	... outputFile.write("%s\|"%dob)
192	... if pt in mrnPatientList:
193	... mrns = mrnMatrix[pt]
194	... if len(mrns) >1:
195	... for m in mrns:
196	... mrn = mrn + m + ";"
197	... else:
198	... mrn = mrns[0]
199	... outputFile.write("%s\|"%mrn)
200	... if pt in ccaidPatientList:
201	... ccaids = ccaidMatrix[pt]
202	... if len(ccaids) >1:
203	... for c in ccaids:
204	... ccaid = ccaid + c + ";"
205	... else:
206	... ccaid = ccaids[0]
207	... outputFile.write("%s\|"%ccaid)
208	... if pt in ssnPatientList:
209	... ssns = ssnMatrix[pt]
210	... if len(ssns) >1:
211	... for s in ssns:
212	... ssn = ssn + s + ";"
213	... else:
214	... ssn = ssns[0]
215	... outputFile.write("%s\|"%ssn)
216	... if pt in otherIDPatientList:
217	... otherIDs = otherIDMatrix[pt]
218	... if len(otherIDs) >1:
219	... for i in otherIDs:
220	... otherID = otherID + i + ";"
221	... else:
222	... otherID = otherIDs[0]
223	... outputFile.write("%s\|"%otherID)
224	... if pt in otherPIIPatientList:
225	... otherPIIs = otherPIIMatrix[pt]
226	... if len(otherPIIs) >1:
227	... for p in otherPIIs:
228	... otherPII = otherPII + p + ";"
229	... else:
230	... otherPII = otherPIIs[0]
231	... outputFile.write("%s\|"%otherPII)
232	... docIDs = docIDMatrix[pt]
233	... if len(docIDs) >1:
234	... for rev in docIDs:
235	... docID = docID + rev + ";"
236	... else:
237	... docID = docIDs[0]
238	... outputFile.write("%s\n"%docID)
239	...
240	>>> outputFile.close()
241
242
243
244
245
246
247
248	====== OLD BELOW THIS LINE, DO NOT USE ======
249
250	PythonWin 2.7.17 (v2.7.17:c2f86d86e6, Oct 19 2019, 20:49:36) [MSC v.1500 32 bit (Intel)] on win32.
251	Portions Copyright 1994-2018 Mark Hammond - see 'Help/About PythonWin' for further copyright information.
252	>>> docIDMatrix = {}
253	>>> mrnMatrix = {}
254	>>> ccaidMatrix = {}
255	>>> dobMatrix = {}
256	>>> ssnMatrix = {}
257	>>> otherIDMatrix = {}
258	>>> otherPIIMatrix = {}
259	>>> contents = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21.csv").readlines()
260	>>> contents[0]
261	'Doc ID:\|MRN\|CCAID\|DOB\|Name\|SSN\|Other Identifiable Info.\|Other PII\|Multiple Doc IDs (Used for Consolidation Only)\n'
262	>>> contents = contents[1:]
263	>>> import datetime
264	>>> test = '20210122'
265	>>> test2 = datetime.datetime.strptime(test, '%Y%m%d")
266	>>> test = '20210122'
267	>>> test2 = datetime.datetime.strptime(test, '%Y%m%d')
268	>>> test2.strftime("%m/%d/%Y")
269	'01/22/2021'
270	>>> for line in contents:
271	... line = line.replace("\n","")
272	... line = line.split("\|")
273	... name = line[4]
274	... name = name.upper()
275	... if name in docIDMatrix.keys():
276	... if line[0] in docIDMatrix[name]:
277	... pass
278	... else:
279	... docIDMatrix[name].append(line[0])
280	... else:
281	... docIDMatrix[name] = [line[0],]
282	...
283	>>> len(docIDMatrix.keys()
284	... )
285	66433
286
287	>>> len(docIDMatrix['ABAD HIRALDO, PASCUAL'])
288	2
289	>>> docIDMatrix['ABAD HIRALDO, PASCUAL']
290	['REV00118844', 'REV00215151']
291
292	>>> for line in contents:
293	... line = line.replace("\n","")
294	... line = line.split("\|")
295	... name = line[4]
296	... name = name.upper()
297	... if line[1]:
298	... if name in mrnMatrix.keys():
299	... if line[1] in mrnMatrix[name]:
300	... pass
301	... else:
302	... mrnMatrix[name].append(line[1])
303	... else:
304	... mrnMatrix[name] = [line[1],]
305	...
306	>>> len(mrnMatrix.keys())
307	3535
308	>>> for line in contents:
309	... line = line.replace("\n","")
310	... line = line.split("\|")
311	... name = line[4]
312	... name = name.upper()
313	... if line[2]:
314	... if name in ccaidMatrix.keys():
315	... if line[2] in ccaidMatrix[name]:
316	... pass
317	... else:
318	... ccaidMatrix[name].append(line[2])
319	... else:
320	... ccaidMatrix[name] = [line[2],]
321	...
322	>>> len(ccaidMatrix.keys())
323	3
324	>>> for line in contents:
325	... line = line.replace("\n","")
326	... line = line.split("\|")
327	... name = line[4]
328	... name = name.upper()
329	... if line[5]:
330	... if name in ssnMatrix.keys():
331	... if line[5] in ssnMatrix[name]:
332	... pass
333	... else:
334	... ssnMatrix[name].append(line[5])
335	... else:
336	... ssnMatrix[name] = [line[5],]
337	...
338	>>> len(ssnMatrix.keys())
339	522
340	>>> for line in contents:
341	... line = line.replace("\n","")
342	... line = line.split("\|")
343	... name = line[4]
344	... name = name.upper()
345	... if line[6]:
346	... if name in otherIDMatrix.keys():
347	... if line[6] in otherIDMatrix[name]:
348	... pass
349	... else:
350	... otherIDMatrix[name].append(line[6])
351	... else:
352	... otherIDMatrix[name] = [line[6],]
353	...
354	>>> len(otherIDMatrix.keys())
355	24639
356	>>> for line in contents:
357	... line = line.replace("\n","")
358	... line = line.split("\|")
359	... name = line[4]
360	... name = name.upper()
361	... if line[7]:
362	... if name in otherPIIMatrix.keys():
363	... if line[7] in otherPIIMatrix[name]:
364	... pass
365	... else:
366	... otherPIIMatrix[name].append(line[7])
367	... else:
368	... otherPIIMatrix[name] = [line[7],]
369	...
370	>>> len(otherPIIMatrix.keys())
371	1352
372	>>> test = '7/15/1974'
373	>>> test2 = datetime.datetime.strptime(test, '%m/%d/%Y')
374	>>> test2.strftime("%m/%d/%Y")
375	'07/15/1974'
376	>>> for line in contents:
377	... line = line.replace("\n","")
378	... line = line.split("\|")
379	... name = line[4]
380	... name = name.upper()
381	... if line[3]:
382	... if name in dobMatrix.keys():
383	... if line[3] in dobMatrix[name]:
384	... pass
385	... else:
386	... dobMatrix[name].append(line[3])
387	... else:
388	... dobMatrix[name] = [line[3],]
389
390	>>> for line in contents:
391	... line = line.replace("\n","")
392	... line = line.split("\|")
393	... name = line[4]
394	... name = name.upper()
395	... if line[3]:
396	... if "/" in rawDob:
397	... rawDob = datetime.datetime.strptime(rawDob, '%m/%d/%Y')
398	... dob = rawDob.strftime("%m/%d/%Y")
399	... else:
400	... rawDob = datetime.datetime.strptime(rawDob, '%Y%m%d')
401	... dob = rawDob.strftime("%m/%d/%Y")
402	... if name in dobMatrix.keys():
403	... if dob in dobMatrix[name]:
404	... pass
405	... else:
406	... dobMatrix[name].append(dob)
407	... else:
408	... dobMatrix[name] = [dob,]
409	...
410	>>> effFile = open(r"C:\Temp\LCHC - dobTestLog.txt",'w')
411
412
413	>>> dobMatrix = {}
414	>>> effFile = open(r"C:\Temp\LCHC - dobTestLog.txt",'w')
415	>>> for line in contents:
416	... line = line.replace("\n","")
417	... line = line.split("\|")
418	... name = line[4]
419	... name = name.upper()
420	... rawDob = line[3]
421	... if len(line[3]) >5:
422	... if "/" in rawDob:
423	... rawDob = datetime.datetime.strptime(rawDob, '%m/%d/%Y')
424	... dob = rawDob.strftime("%m/%d/%Y")
425	... else:
426	... rawDob = datetime.datetime.strptime(rawDob, '%Y%m%d')
427	... dob = rawDob.strftime("%m/%d/%Y")
428	... effFile.write("%s changed to %s for docID %s\n"% (line[3], dob, line[0]))
429	... if name in dobMatrix.keys():
430	... if dob in dobMatrix[name]:
431	... pass
432	... else:
433	... dobMatrix[name].append(dob)
434	... else:
435	... dobMatrix[name] = [dob,]
436	... else:
437	... if line[3]:
438	... effFile.write("ERROR: %s is too small to parse!!!\n"% line[3])
439	...
440	>>> effFile.close()
441	>>> len(dobMatrix.keys())
442	64385
443	>>> outputFile = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21-OUTPUT.DAT",'w')
444	>>> patientList = docIDMatrix.keys()
445	>>> patientList.sort()
446	>>> patientList[0]
447	''
448	>>> patientList[1]
449	' CAMPUZANO, JACGP'
450	>>> patientList[2]
451	' LEVESQUE, JENNIFER'
452	>>> patientList[3]
453	' OLIVER, CRUZ'
454	>>> patientList[1].lstrip()
455	'CAMPUZANO, JACGP'
456	>>> patientList[1].rstrip()
457	' CAMPUZANO, JACGP'
458	>>>