ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/RandomCodeRequests/LCHC.txt
Revision: 742
Committed: Tue Apr 13 20:36:48 2021 UTC (4 years, 11 months ago) by nino.borges
Content type: text/plain
File size: 12595 byte(s)
Log Message:
added some missing code request txt files

File Contents

# Content
1 ===== LCHC req from Dana to take an XLS of dob and other pii data and pivot by deduplicated user name =====
2
3 >>> docIDMatrix = {}
4 >>> mrnMatrix = {}
5 >>> ccaidMatrix = {}
6 >>> dobMatrix = {}
7 >>> ssnMatrix = {}
8 >>> otherIDMatrix = {}
9 >>> otherPIIMatrix = {}
10 >>> contents = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21.csv").readlines()
11 >>> contents[0]
12 'Doc ID:|MRN|CCAID|DOB|Name|SSN|Other Identifiable Info.|Other PII|Multiple Doc IDs (Used for Consolidation Only)\n'
13 >>> contents = contents[1:]
14 >>> for line in contents:
15 ... line = line.replace("\n","")
16 ... line = line.split("|")
17 ... name = line[4]
18 ... name = name.upper()
19 ... name = name.lstrip()
20 ... name = name.rstrip()
21 ... if name in docIDMatrix.keys():
22 ... if line[0] in docIDMatrix[name]:
23 ... pass
24 ... else:
25 ... docIDMatrix[name].append(line[0])
26 ... else:
27 ... docIDMatrix[name] = [line[0],]
28 ...
29 >>> len(docIDMatrix.keys())
30 66183
31 >>> docIDMatrix['ABAD HIRALDO, PASCUAL']
32 ['REV00118844', 'REV00215151']
33 >>> for line in contents:
34 ... line = line.replace("\n","")
35 ... line = line.split("|")
36 ... name = line[4]
37 ... name = name.upper()
38 ... name = name.lstrip()
39 ... name = name.rstrip()
40 ... if line[1]:
41 ... if name in mrnMatrix.keys():
42 ... if line[1] in mrnMatrix[name]:
43 ... pass
44 ... else:
45 ... mrnMatrix[name].append(line[1])
46 ... else:
47 ... mrnMatrix[name] = [line[1],]
48 ...
49 >>> len(mrnMatrix.keys())
50 3534
51 >>> for line in contents:
52 ... line = line.replace("\n","")
53 ... line = line.split("|")
54 ... name = line[4]
55 ... name = name.upper()
56 ... name = name.lstrip()
57 ... name = name.rstrip()
58 ... if line[2]:
59 ... if name in ccaidMatrix.keys():
60 ... if line[2] in ccaidMatrix[name]:
61 ... pass
62 ... else:
63 ... ccaidMatrix[name].append(line[2])
64 ... else:
65 ... ccaidMatrix[name] = [line[2],]
66 ...
67 >>> len(ccaidMatrix.keys())
68 3
69 >>> for line in contents:
70 ... line = line.replace("\n","")
71 ... line = line.split("|")
72 ... name = line[4]
73 ... name = name.upper()
74 ... name = name.lstrip()
75 ... name = name.rstrip()
76 ... if line[5]:
77 ... if name in ssnMatrix.keys():
78 ... if line[5] in ssnMatrix[name]:
79 ... pass
80 ... else:
81 ... ssnMatrix[name].append(line[5])
82 ... else:
83 ... ssnMatrix[name] = [line[5],]
84 ...
85 >>> len(ssnMatrix.keys())
86 521
87 >>> for line in contents:
88 ... line = line.replace("\n","")
89 ... line = line.split("|")
90 ... name = line[4]
91 ... name = name.upper()
92 ... name = name.lstrip()
93 ... name = name.rstrip()
94 ... if line[6]:
95 ... if name in otherIDMatrix.keys():
96 ... if line[6] in otherIDMatrix[name]:
97 ... pass
98 ... else:
99 ... otherIDMatrix[name].append(line[6])
100 ... else:
101 ... otherIDMatrix[name] = [line[6],]
102 ...
103 >>> len(otherIDMatrix.keys())
104 24636
105 >>> for line in contents:
106 ... line = line.replace("\n","")
107 ... line = line.split("|")
108 ... name = line[4]
109 ... name = name.upper()
110 ... name = name.lstrip()
111 ... name = name.rstrip()
112 ... if line[7]:
113 ... if name in otherPIIMatrix.keys():
114 ... if line[7] in otherPIIMatrix[name]:
115 ... pass
116 ... else:
117 ... otherPIIMatrix[name].append(line[7])
118 ... else:
119 ... otherPIIMatrix[name] = [line[7],]
120 ...
121 >>> len(otherPIIMatrix.keys())
122 1351
123 >>> effFile = open(r"C:\Temp\LCHC - dobTestLog.txt",'w')
124 >>> for line in contents:
125 ... line = line.replace("\n","")
126 ... line = line.split("|")
127 ... name = line[4]
128 ... name = name.upper()
129 ... name = name.lstrip()
130 ... name = name.rstrip()
131 ... rawDob = line[3]
132 ... if len(line[3]) >5:
133 ... if "/" in rawDob:
134 ... rawDob = datetime.datetime.strptime(rawDob, '%m/%d/%Y')
135 ... dob = rawDob.strftime("%m/%d/%Y")
136 ... else:
137 ... rawDob = datetime.datetime.strptime(rawDob, '%Y%m%d')
138 ... dob = rawDob.strftime("%m/%d/%Y")
139 ... effFile.write("%s changed to %s for docID %s\n"% (line[3], dob, line[0]))
140 ... if name in dobMatrix.keys():
141 ... if dob in dobMatrix[name]:
142 ... pass
143 ... else:
144 ... dobMatrix[name].append(dob)
145 ... else:
146 ... dobMatrix[name] = [dob,]
147 ... else:
148 ... if line[3]:
149 ... effFile.write("ERROR: %s is too small to parse!!!\n"% line[3])
150 ...
151 >>> effFile.close()
152 >>> len(dobMatrix.keys())
153 64145
154 >>> patientList = docIDMatrix.keys()
155 >>> patientList.sort()
156 >>> patientList[0]
157 ''
158 >>> patientList[1]
159 'A., A.'
160 >>> patientList[2]
161 'A., JESSE'
162 >>> patientList[3]
163 'A., JOHN'
164
165 === Now we write it out ===
166 >>> dobPatientList = dobMatrix.keys()
167 >>> mrnPatientList = mrnMatrix.keys()
168 >>> ccaidPatientList = ccaidMatrix.keys()
169 >>> ssnPatientList = ssnMatrix.keys()
170 >>> otherIDPatientList = otherIDMatrix.keys()
171 >>> otherPIIPatientList = otherPIIMatrix.keys()
172
173 >>> outputFile = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21-OUTPUT.DAT",'w')
174 >>> outputFile.write("Name|DOB|MRN|CCAID|SSN|OtherIDInfo|OtherPII|DodIDs\n")
175 >>> for pt in patientList:
176 ... dob = ""
177 ... docID = ""
178 ... mrn = ""
179 ... ccaid = ""
180 ... ssn = ""
181 ... otherID = ""
182 ... otherPII = ""
183 ... outputFile.write("%s|"%pt)
184 ... if pt in dobPatientList:
185 ... dobs = dobMatrix[pt]
186 ... if len(dobs) > 1:
187 ... for d in dobs:
188 ... dob = dob + d + ";"
189 ... else:
190 ... dob = dobs[0]
191 ... outputFile.write("%s|"%dob)
192 ... if pt in mrnPatientList:
193 ... mrns = mrnMatrix[pt]
194 ... if len(mrns) >1:
195 ... for m in mrns:
196 ... mrn = mrn + m + ";"
197 ... else:
198 ... mrn = mrns[0]
199 ... outputFile.write("%s|"%mrn)
200 ... if pt in ccaidPatientList:
201 ... ccaids = ccaidMatrix[pt]
202 ... if len(ccaids) >1:
203 ... for c in ccaids:
204 ... ccaid = ccaid + c + ";"
205 ... else:
206 ... ccaid = ccaids[0]
207 ... outputFile.write("%s|"%ccaid)
208 ... if pt in ssnPatientList:
209 ... ssns = ssnMatrix[pt]
210 ... if len(ssns) >1:
211 ... for s in ssns:
212 ... ssn = ssn + s + ";"
213 ... else:
214 ... ssn = ssns[0]
215 ... outputFile.write("%s|"%ssn)
216 ... if pt in otherIDPatientList:
217 ... otherIDs = otherIDMatrix[pt]
218 ... if len(otherIDs) >1:
219 ... for i in otherIDs:
220 ... otherID = otherID + i + ";"
221 ... else:
222 ... otherID = otherIDs[0]
223 ... outputFile.write("%s|"%otherID)
224 ... if pt in otherPIIPatientList:
225 ... otherPIIs = otherPIIMatrix[pt]
226 ... if len(otherPIIs) >1:
227 ... for p in otherPIIs:
228 ... otherPII = otherPII + p + ";"
229 ... else:
230 ... otherPII = otherPIIs[0]
231 ... outputFile.write("%s|"%otherPII)
232 ... docIDs = docIDMatrix[pt]
233 ... if len(docIDs) >1:
234 ... for rev in docIDs:
235 ... docID = docID + rev + ";"
236 ... else:
237 ... docID = docIDs[0]
238 ... outputFile.write("%s\n"%docID)
239 ...
240 >>> outputFile.close()
241
242
243
244
245
246
247
248 ====== OLD BELOW THIS LINE, DO NOT USE ======
249
250 PythonWin 2.7.17 (v2.7.17:c2f86d86e6, Oct 19 2019, 20:49:36) [MSC v.1500 32 bit (Intel)] on win32.
251 Portions Copyright 1994-2018 Mark Hammond - see 'Help/About PythonWin' for further copyright information.
252 >>> docIDMatrix = {}
253 >>> mrnMatrix = {}
254 >>> ccaidMatrix = {}
255 >>> dobMatrix = {}
256 >>> ssnMatrix = {}
257 >>> otherIDMatrix = {}
258 >>> otherPIIMatrix = {}
259 >>> contents = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21.csv").readlines()
260 >>> contents[0]
261 'Doc ID:|MRN|CCAID|DOB|Name|SSN|Other Identifiable Info.|Other PII|Multiple Doc IDs (Used for Consolidation Only)\n'
262 >>> contents = contents[1:]
263 >>> import datetime
264 >>> test = '20210122'
265 >>> test2 = datetime.datetime.strptime(test, '%Y%m%d")
266 >>> test = '20210122'
267 >>> test2 = datetime.datetime.strptime(test, '%Y%m%d')
268 >>> test2.strftime("%m/%d/%Y")
269 '01/22/2021'
270 >>> for line in contents:
271 ... line = line.replace("\n","")
272 ... line = line.split("|")
273 ... name = line[4]
274 ... name = name.upper()
275 ... if name in docIDMatrix.keys():
276 ... if line[0] in docIDMatrix[name]:
277 ... pass
278 ... else:
279 ... docIDMatrix[name].append(line[0])
280 ... else:
281 ... docIDMatrix[name] = [line[0],]
282 ...
283 >>> len(docIDMatrix.keys()
284 ... )
285 66433
286
287 >>> len(docIDMatrix['ABAD HIRALDO, PASCUAL'])
288 2
289 >>> docIDMatrix['ABAD HIRALDO, PASCUAL']
290 ['REV00118844', 'REV00215151']
291
292 >>> for line in contents:
293 ... line = line.replace("\n","")
294 ... line = line.split("|")
295 ... name = line[4]
296 ... name = name.upper()
297 ... if line[1]:
298 ... if name in mrnMatrix.keys():
299 ... if line[1] in mrnMatrix[name]:
300 ... pass
301 ... else:
302 ... mrnMatrix[name].append(line[1])
303 ... else:
304 ... mrnMatrix[name] = [line[1],]
305 ...
306 >>> len(mrnMatrix.keys())
307 3535
308 >>> for line in contents:
309 ... line = line.replace("\n","")
310 ... line = line.split("|")
311 ... name = line[4]
312 ... name = name.upper()
313 ... if line[2]:
314 ... if name in ccaidMatrix.keys():
315 ... if line[2] in ccaidMatrix[name]:
316 ... pass
317 ... else:
318 ... ccaidMatrix[name].append(line[2])
319 ... else:
320 ... ccaidMatrix[name] = [line[2],]
321 ...
322 >>> len(ccaidMatrix.keys())
323 3
324 >>> for line in contents:
325 ... line = line.replace("\n","")
326 ... line = line.split("|")
327 ... name = line[4]
328 ... name = name.upper()
329 ... if line[5]:
330 ... if name in ssnMatrix.keys():
331 ... if line[5] in ssnMatrix[name]:
332 ... pass
333 ... else:
334 ... ssnMatrix[name].append(line[5])
335 ... else:
336 ... ssnMatrix[name] = [line[5],]
337 ...
338 >>> len(ssnMatrix.keys())
339 522
340 >>> for line in contents:
341 ... line = line.replace("\n","")
342 ... line = line.split("|")
343 ... name = line[4]
344 ... name = name.upper()
345 ... if line[6]:
346 ... if name in otherIDMatrix.keys():
347 ... if line[6] in otherIDMatrix[name]:
348 ... pass
349 ... else:
350 ... otherIDMatrix[name].append(line[6])
351 ... else:
352 ... otherIDMatrix[name] = [line[6],]
353 ...
354 >>> len(otherIDMatrix.keys())
355 24639
356 >>> for line in contents:
357 ... line = line.replace("\n","")
358 ... line = line.split("|")
359 ... name = line[4]
360 ... name = name.upper()
361 ... if line[7]:
362 ... if name in otherPIIMatrix.keys():
363 ... if line[7] in otherPIIMatrix[name]:
364 ... pass
365 ... else:
366 ... otherPIIMatrix[name].append(line[7])
367 ... else:
368 ... otherPIIMatrix[name] = [line[7],]
369 ...
370 >>> len(otherPIIMatrix.keys())
371 1352
372 >>> test = '7/15/1974'
373 >>> test2 = datetime.datetime.strptime(test, '%m/%d/%Y')
374 >>> test2.strftime("%m/%d/%Y")
375 '07/15/1974'
376 >>> for line in contents:
377 ... line = line.replace("\n","")
378 ... line = line.split("|")
379 ... name = line[4]
380 ... name = name.upper()
381 ... if line[3]:
382 ... if name in dobMatrix.keys():
383 ... if line[3] in dobMatrix[name]:
384 ... pass
385 ... else:
386 ... dobMatrix[name].append(line[3])
387 ... else:
388 ... dobMatrix[name] = [line[3],]
389
390 >>> for line in contents:
391 ... line = line.replace("\n","")
392 ... line = line.split("|")
393 ... name = line[4]
394 ... name = name.upper()
395 ... if line[3]:
396 ... if "/" in rawDob:
397 ... rawDob = datetime.datetime.strptime(rawDob, '%m/%d/%Y')
398 ... dob = rawDob.strftime("%m/%d/%Y")
399 ... else:
400 ... rawDob = datetime.datetime.strptime(rawDob, '%Y%m%d')
401 ... dob = rawDob.strftime("%m/%d/%Y")
402 ... if name in dobMatrix.keys():
403 ... if dob in dobMatrix[name]:
404 ... pass
405 ... else:
406 ... dobMatrix[name].append(dob)
407 ... else:
408 ... dobMatrix[name] = [dob,]
409 ...
410 >>> effFile = open(r"C:\Temp\LCHC - dobTestLog.txt",'w')
411
412
413 >>> dobMatrix = {}
414 >>> effFile = open(r"C:\Temp\LCHC - dobTestLog.txt",'w')
415 >>> for line in contents:
416 ... line = line.replace("\n","")
417 ... line = line.split("|")
418 ... name = line[4]
419 ... name = name.upper()
420 ... rawDob = line[3]
421 ... if len(line[3]) >5:
422 ... if "/" in rawDob:
423 ... rawDob = datetime.datetime.strptime(rawDob, '%m/%d/%Y')
424 ... dob = rawDob.strftime("%m/%d/%Y")
425 ... else:
426 ... rawDob = datetime.datetime.strptime(rawDob, '%Y%m%d')
427 ... dob = rawDob.strftime("%m/%d/%Y")
428 ... effFile.write("%s changed to %s for docID %s\n"% (line[3], dob, line[0]))
429 ... if name in dobMatrix.keys():
430 ... if dob in dobMatrix[name]:
431 ... pass
432 ... else:
433 ... dobMatrix[name].append(dob)
434 ... else:
435 ... dobMatrix[name] = [dob,]
436 ... else:
437 ... if line[3]:
438 ... effFile.write("ERROR: %s is too small to parse!!!\n"% line[3])
439 ...
440 >>> effFile.close()
441 >>> len(dobMatrix.keys())
442 64385
443 >>> outputFile = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21-OUTPUT.DAT",'w')
444 >>> patientList = docIDMatrix.keys()
445 >>> patientList.sort()
446 >>> patientList[0]
447 ''
448 >>> patientList[1]
449 ' CAMPUZANO, JACGP'
450 >>> patientList[2]
451 ' LEVESQUE, JENNIFER'
452 >>> patientList[3]
453 ' OLIVER, CRUZ'
454 >>> patientList[1].lstrip()
455 'CAMPUZANO, JACGP'
456 >>> patientList[1].rstrip()
457 ' CAMPUZANO, JACGP'
458 >>>