ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/RandomCodeRequests/LCHC.txt
Revision: 742
Committed: Tue Apr 13 20:36:48 2021 UTC (4 years, 11 months ago) by nino.borges
Content type: text/plain
File size: 12595 byte(s)
Log Message:
added some missing code request txt files

File Contents

# User Rev Content
1 nino.borges 742 ===== LCHC req from Dana to take an XLS of dob and other pii data and pivot by deduplicated user name =====
2    
3     >>> docIDMatrix = {}
4     >>> mrnMatrix = {}
5     >>> ccaidMatrix = {}
6     >>> dobMatrix = {}
7     >>> ssnMatrix = {}
8     >>> otherIDMatrix = {}
9     >>> otherPIIMatrix = {}
10     >>> contents = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21.csv").readlines()
11     >>> contents[0]
12     'Doc ID:|MRN|CCAID|DOB|Name|SSN|Other Identifiable Info.|Other PII|Multiple Doc IDs (Used for Consolidation Only)\n'
13     >>> contents = contents[1:]
14     >>> for line in contents:
15     ... line = line.replace("\n","")
16     ... line = line.split("|")
17     ... name = line[4]
18     ... name = name.upper()
19     ... name = name.lstrip()
20     ... name = name.rstrip()
21     ... if name in docIDMatrix.keys():
22     ... if line[0] in docIDMatrix[name]:
23     ... pass
24     ... else:
25     ... docIDMatrix[name].append(line[0])
26     ... else:
27     ... docIDMatrix[name] = [line[0],]
28     ...
29     >>> len(docIDMatrix.keys())
30     66183
31     >>> docIDMatrix['ABAD HIRALDO, PASCUAL']
32     ['REV00118844', 'REV00215151']
33     >>> for line in contents:
34     ... line = line.replace("\n","")
35     ... line = line.split("|")
36     ... name = line[4]
37     ... name = name.upper()
38     ... name = name.lstrip()
39     ... name = name.rstrip()
40     ... if line[1]:
41     ... if name in mrnMatrix.keys():
42     ... if line[1] in mrnMatrix[name]:
43     ... pass
44     ... else:
45     ... mrnMatrix[name].append(line[1])
46     ... else:
47     ... mrnMatrix[name] = [line[1],]
48     ...
49     >>> len(mrnMatrix.keys())
50     3534
51     >>> for line in contents:
52     ... line = line.replace("\n","")
53     ... line = line.split("|")
54     ... name = line[4]
55     ... name = name.upper()
56     ... name = name.lstrip()
57     ... name = name.rstrip()
58     ... if line[2]:
59     ... if name in ccaidMatrix.keys():
60     ... if line[2] in ccaidMatrix[name]:
61     ... pass
62     ... else:
63     ... ccaidMatrix[name].append(line[2])
64     ... else:
65     ... ccaidMatrix[name] = [line[2],]
66     ...
67     >>> len(ccaidMatrix.keys())
68     3
69     >>> for line in contents:
70     ... line = line.replace("\n","")
71     ... line = line.split("|")
72     ... name = line[4]
73     ... name = name.upper()
74     ... name = name.lstrip()
75     ... name = name.rstrip()
76     ... if line[5]:
77     ... if name in ssnMatrix.keys():
78     ... if line[5] in ssnMatrix[name]:
79     ... pass
80     ... else:
81     ... ssnMatrix[name].append(line[5])
82     ... else:
83     ... ssnMatrix[name] = [line[5],]
84     ...
85     >>> len(ssnMatrix.keys())
86     521
87     >>> for line in contents:
88     ... line = line.replace("\n","")
89     ... line = line.split("|")
90     ... name = line[4]
91     ... name = name.upper()
92     ... name = name.lstrip()
93     ... name = name.rstrip()
94     ... if line[6]:
95     ... if name in otherIDMatrix.keys():
96     ... if line[6] in otherIDMatrix[name]:
97     ... pass
98     ... else:
99     ... otherIDMatrix[name].append(line[6])
100     ... else:
101     ... otherIDMatrix[name] = [line[6],]
102     ...
103     >>> len(otherIDMatrix.keys())
104     24636
105     >>> for line in contents:
106     ... line = line.replace("\n","")
107     ... line = line.split("|")
108     ... name = line[4]
109     ... name = name.upper()
110     ... name = name.lstrip()
111     ... name = name.rstrip()
112     ... if line[7]:
113     ... if name in otherPIIMatrix.keys():
114     ... if line[7] in otherPIIMatrix[name]:
115     ... pass
116     ... else:
117     ... otherPIIMatrix[name].append(line[7])
118     ... else:
119     ... otherPIIMatrix[name] = [line[7],]
120     ...
121     >>> len(otherPIIMatrix.keys())
122     1351
123     >>> effFile = open(r"C:\Temp\LCHC - dobTestLog.txt",'w')
124     >>> for line in contents:
125     ... line = line.replace("\n","")
126     ... line = line.split("|")
127     ... name = line[4]
128     ... name = name.upper()
129     ... name = name.lstrip()
130     ... name = name.rstrip()
131     ... rawDob = line[3]
132     ... if len(line[3]) >5:
133     ... if "/" in rawDob:
134     ... rawDob = datetime.datetime.strptime(rawDob, '%m/%d/%Y')
135     ... dob = rawDob.strftime("%m/%d/%Y")
136     ... else:
137     ... rawDob = datetime.datetime.strptime(rawDob, '%Y%m%d')
138     ... dob = rawDob.strftime("%m/%d/%Y")
139     ... effFile.write("%s changed to %s for docID %s\n"% (line[3], dob, line[0]))
140     ... if name in dobMatrix.keys():
141     ... if dob in dobMatrix[name]:
142     ... pass
143     ... else:
144     ... dobMatrix[name].append(dob)
145     ... else:
146     ... dobMatrix[name] = [dob,]
147     ... else:
148     ... if line[3]:
149     ... effFile.write("ERROR: %s is too small to parse!!!\n"% line[3])
150     ...
151     >>> effFile.close()
152     >>> len(dobMatrix.keys())
153     64145
154     >>> patientList = docIDMatrix.keys()
155     >>> patientList.sort()
156     >>> patientList[0]
157     ''
158     >>> patientList[1]
159     'A., A.'
160     >>> patientList[2]
161     'A., JESSE'
162     >>> patientList[3]
163     'A., JOHN'
164    
165     === Now we write it out ===
166     >>> dobPatientList = dobMatrix.keys()
167     >>> mrnPatientList = mrnMatrix.keys()
168     >>> ccaidPatientList = ccaidMatrix.keys()
169     >>> ssnPatientList = ssnMatrix.keys()
170     >>> otherIDPatientList = otherIDMatrix.keys()
171     >>> otherPIIPatientList = otherPIIMatrix.keys()
172    
173     >>> outputFile = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21-OUTPUT.DAT",'w')
174     >>> outputFile.write("Name|DOB|MRN|CCAID|SSN|OtherIDInfo|OtherPII|DodIDs\n")
175     >>> for pt in patientList:
176     ... dob = ""
177     ... docID = ""
178     ... mrn = ""
179     ... ccaid = ""
180     ... ssn = ""
181     ... otherID = ""
182     ... otherPII = ""
183     ... outputFile.write("%s|"%pt)
184     ... if pt in dobPatientList:
185     ... dobs = dobMatrix[pt]
186     ... if len(dobs) > 1:
187     ... for d in dobs:
188     ... dob = dob + d + ";"
189     ... else:
190     ... dob = dobs[0]
191     ... outputFile.write("%s|"%dob)
192     ... if pt in mrnPatientList:
193     ... mrns = mrnMatrix[pt]
194     ... if len(mrns) >1:
195     ... for m in mrns:
196     ... mrn = mrn + m + ";"
197     ... else:
198     ... mrn = mrns[0]
199     ... outputFile.write("%s|"%mrn)
200     ... if pt in ccaidPatientList:
201     ... ccaids = ccaidMatrix[pt]
202     ... if len(ccaids) >1:
203     ... for c in ccaids:
204     ... ccaid = ccaid + c + ";"
205     ... else:
206     ... ccaid = ccaids[0]
207     ... outputFile.write("%s|"%ccaid)
208     ... if pt in ssnPatientList:
209     ... ssns = ssnMatrix[pt]
210     ... if len(ssns) >1:
211     ... for s in ssns:
212     ... ssn = ssn + s + ";"
213     ... else:
214     ... ssn = ssns[0]
215     ... outputFile.write("%s|"%ssn)
216     ... if pt in otherIDPatientList:
217     ... otherIDs = otherIDMatrix[pt]
218     ... if len(otherIDs) >1:
219     ... for i in otherIDs:
220     ... otherID = otherID + i + ";"
221     ... else:
222     ... otherID = otherIDs[0]
223     ... outputFile.write("%s|"%otherID)
224     ... if pt in otherPIIPatientList:
225     ... otherPIIs = otherPIIMatrix[pt]
226     ... if len(otherPIIs) >1:
227     ... for p in otherPIIs:
228     ... otherPII = otherPII + p + ";"
229     ... else:
230     ... otherPII = otherPIIs[0]
231     ... outputFile.write("%s|"%otherPII)
232     ... docIDs = docIDMatrix[pt]
233     ... if len(docIDs) >1:
234     ... for rev in docIDs:
235     ... docID = docID + rev + ";"
236     ... else:
237     ... docID = docIDs[0]
238     ... outputFile.write("%s\n"%docID)
239     ...
240     >>> outputFile.close()
241    
242    
243    
244    
245    
246    
247    
248     ====== OLD BELOW THIS LINE, DO NOT USE ======
249    
250     PythonWin 2.7.17 (v2.7.17:c2f86d86e6, Oct 19 2019, 20:49:36) [MSC v.1500 32 bit (Intel)] on win32.
251     Portions Copyright 1994-2018 Mark Hammond - see 'Help/About PythonWin' for further copyright information.
252     >>> docIDMatrix = {}
253     >>> mrnMatrix = {}
254     >>> ccaidMatrix = {}
255     >>> dobMatrix = {}
256     >>> ssnMatrix = {}
257     >>> otherIDMatrix = {}
258     >>> otherPIIMatrix = {}
259     >>> contents = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21.csv").readlines()
260     >>> contents[0]
261     'Doc ID:|MRN|CCAID|DOB|Name|SSN|Other Identifiable Info.|Other PII|Multiple Doc IDs (Used for Consolidation Only)\n'
262     >>> contents = contents[1:]
263     >>> import datetime
264     >>> test = '20210122'
265     >>> test2 = datetime.datetime.strptime(test, '%Y%m%d")
266     >>> test = '20210122'
267     >>> test2 = datetime.datetime.strptime(test, '%Y%m%d')
268     >>> test2.strftime("%m/%d/%Y")
269     '01/22/2021'
270     >>> for line in contents:
271     ... line = line.replace("\n","")
272     ... line = line.split("|")
273     ... name = line[4]
274     ... name = name.upper()
275     ... if name in docIDMatrix.keys():
276     ... if line[0] in docIDMatrix[name]:
277     ... pass
278     ... else:
279     ... docIDMatrix[name].append(line[0])
280     ... else:
281     ... docIDMatrix[name] = [line[0],]
282     ...
283     >>> len(docIDMatrix.keys()
284     ... )
285     66433
286    
287     >>> len(docIDMatrix['ABAD HIRALDO, PASCUAL'])
288     2
289     >>> docIDMatrix['ABAD HIRALDO, PASCUAL']
290     ['REV00118844', 'REV00215151']
291    
292     >>> for line in contents:
293     ... line = line.replace("\n","")
294     ... line = line.split("|")
295     ... name = line[4]
296     ... name = name.upper()
297     ... if line[1]:
298     ... if name in mrnMatrix.keys():
299     ... if line[1] in mrnMatrix[name]:
300     ... pass
301     ... else:
302     ... mrnMatrix[name].append(line[1])
303     ... else:
304     ... mrnMatrix[name] = [line[1],]
305     ...
306     >>> len(mrnMatrix.keys())
307     3535
308     >>> for line in contents:
309     ... line = line.replace("\n","")
310     ... line = line.split("|")
311     ... name = line[4]
312     ... name = name.upper()
313     ... if line[2]:
314     ... if name in ccaidMatrix.keys():
315     ... if line[2] in ccaidMatrix[name]:
316     ... pass
317     ... else:
318     ... ccaidMatrix[name].append(line[2])
319     ... else:
320     ... ccaidMatrix[name] = [line[2],]
321     ...
322     >>> len(ccaidMatrix.keys())
323     3
324     >>> for line in contents:
325     ... line = line.replace("\n","")
326     ... line = line.split("|")
327     ... name = line[4]
328     ... name = name.upper()
329     ... if line[5]:
330     ... if name in ssnMatrix.keys():
331     ... if line[5] in ssnMatrix[name]:
332     ... pass
333     ... else:
334     ... ssnMatrix[name].append(line[5])
335     ... else:
336     ... ssnMatrix[name] = [line[5],]
337     ...
338     >>> len(ssnMatrix.keys())
339     522
340     >>> for line in contents:
341     ... line = line.replace("\n","")
342     ... line = line.split("|")
343     ... name = line[4]
344     ... name = name.upper()
345     ... if line[6]:
346     ... if name in otherIDMatrix.keys():
347     ... if line[6] in otherIDMatrix[name]:
348     ... pass
349     ... else:
350     ... otherIDMatrix[name].append(line[6])
351     ... else:
352     ... otherIDMatrix[name] = [line[6],]
353     ...
354     >>> len(otherIDMatrix.keys())
355     24639
356     >>> for line in contents:
357     ... line = line.replace("\n","")
358     ... line = line.split("|")
359     ... name = line[4]
360     ... name = name.upper()
361     ... if line[7]:
362     ... if name in otherPIIMatrix.keys():
363     ... if line[7] in otherPIIMatrix[name]:
364     ... pass
365     ... else:
366     ... otherPIIMatrix[name].append(line[7])
367     ... else:
368     ... otherPIIMatrix[name] = [line[7],]
369     ...
370     >>> len(otherPIIMatrix.keys())
371     1352
372     >>> test = '7/15/1974'
373     >>> test2 = datetime.datetime.strptime(test, '%m/%d/%Y')
374     >>> test2.strftime("%m/%d/%Y")
375     '07/15/1974'
376     >>> for line in contents:
377     ... line = line.replace("\n","")
378     ... line = line.split("|")
379     ... name = line[4]
380     ... name = name.upper()
381     ... if line[3]:
382     ... if name in dobMatrix.keys():
383     ... if line[3] in dobMatrix[name]:
384     ... pass
385     ... else:
386     ... dobMatrix[name].append(line[3])
387     ... else:
388     ... dobMatrix[name] = [line[3],]
389    
390     >>> for line in contents:
391     ... line = line.replace("\n","")
392     ... line = line.split("|")
393     ... name = line[4]
394     ... name = name.upper()
395     ... if line[3]:
396     ... if "/" in rawDob:
397     ... rawDob = datetime.datetime.strptime(rawDob, '%m/%d/%Y')
398     ... dob = rawDob.strftime("%m/%d/%Y")
399     ... else:
400     ... rawDob = datetime.datetime.strptime(rawDob, '%Y%m%d')
401     ... dob = rawDob.strftime("%m/%d/%Y")
402     ... if name in dobMatrix.keys():
403     ... if dob in dobMatrix[name]:
404     ... pass
405     ... else:
406     ... dobMatrix[name].append(dob)
407     ... else:
408     ... dobMatrix[name] = [dob,]
409     ...
410     >>> effFile = open(r"C:\Temp\LCHC - dobTestLog.txt",'w')
411    
412    
413     >>> dobMatrix = {}
414     >>> effFile = open(r"C:\Temp\LCHC - dobTestLog.txt",'w')
415     >>> for line in contents:
416     ... line = line.replace("\n","")
417     ... line = line.split("|")
418     ... name = line[4]
419     ... name = name.upper()
420     ... rawDob = line[3]
421     ... if len(line[3]) >5:
422     ... if "/" in rawDob:
423     ... rawDob = datetime.datetime.strptime(rawDob, '%m/%d/%Y')
424     ... dob = rawDob.strftime("%m/%d/%Y")
425     ... else:
426     ... rawDob = datetime.datetime.strptime(rawDob, '%Y%m%d')
427     ... dob = rawDob.strftime("%m/%d/%Y")
428     ... effFile.write("%s changed to %s for docID %s\n"% (line[3], dob, line[0]))
429     ... if name in dobMatrix.keys():
430     ... if dob in dobMatrix[name]:
431     ... pass
432     ... else:
433     ... dobMatrix[name].append(dob)
434     ... else:
435     ... dobMatrix[name] = [dob,]
436     ... else:
437     ... if line[3]:
438     ... effFile.write("ERROR: %s is too small to parse!!!\n"% line[3])
439     ...
440     >>> effFile.close()
441     >>> len(dobMatrix.keys())
442     64385
443     >>> outputFile = open(r"C:\Temp\LCHC - Consolidated Master_For Scripting_02.17.21-OUTPUT.DAT",'w')
444     >>> patientList = docIDMatrix.keys()
445     >>> patientList.sort()
446     >>> patientList[0]
447     ''
448     >>> patientList[1]
449     ' CAMPUZANO, JACGP'
450     >>> patientList[2]
451     ' LEVESQUE, JENNIFER'
452     >>> patientList[3]
453     ' OLIVER, CRUZ'
454     >>> patientList[1].lstrip()
455     'CAMPUZANO, JACGP'
456     >>> patientList[1].rstrip()
457     ' CAMPUZANO, JACGP'
458     >>>