ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/john_muir_dataParser.py
Revision: 553
Committed: Wed Jun 18 13:58:04 2014 UTC (11 years, 9 months ago) by nino.borges
Content type: text/x-python
File size: 4731 byte(s)
Log Message:
a simple program that allowed me to parse a pdf and spreadsheet where the rows didnt line up at all.

File Contents

# User Rev Content
1 nino.borges 553 """
2    
3     john_muir_dataParser
4    
5     Created by
6     Emanuel Borges
7     03.26.2014
8    
9     This program will run parse a very complex excel export into somthing that can be imported into RL.
10    
11     """
12    
13     import re,string
14    
15     def Parse(contents,outputFile):
16     comments = ""
17     actHC =""
18     dur =""
19     attndPhy =""
20     cptCode =""
21     stTmDxDlvd =""
22     primaryDiag =""
23     count = 1
24     outputFile.write("Sch_Date|Sch_Time|ActTx_Time|Status|Comments|Activity|Patient Name|MedRc|P Stf|Loc|MD|Dur|AttndPhy|CptCode|PrimaryDiag\n")
25     for line in contents:
26     match = re.search(r'^\d+/\d+/\d+[|][ ]*\d+:\d+(am|pm)?',line)
27     line = line.replace("\n","")
28     line = line.split("|")
29     if match:
30     if count >1:
31     outputFile.write("%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n"%(date,time,stTmDxDlvd,sts,comments,actHC,pName,medRc,pStr,loc,md,dur,attndPhy,cptCode,primaryDiag))
32     actHC =""
33     dur =""
34     attndPhy =""
35     cptCode =""
36     stTmDxDlvd =""
37     primaryDiag =""
38     comments =""
39    
40     date = ""
41     time = ""
42     pName = ""
43     medRc = ""
44     desc = ""
45     pStr = ""
46     loc = ""
47     sts = ""
48     md = ""
49    
50     count = count+1
51    
52     date = line[0]
53     time = line[1]
54     time = time.lstrip()
55     time = time.split(" ")
56     tmPt1,tmPt2 = time
57     tmPt2.lower()
58     time = "%s %s"% (tmPt1,tmPt2)
59     pName = line[3]
60     medRc = line[8]
61    
62     if medRc:
63     desc = line[13]
64     pStr = line[21]
65     loc = line[23]
66     sts = line[28]
67     else:
68     desc = line[12]
69     pStr = line[17]
70     loc = line[20]
71     sts = line[24]
72    
73     md = line[25]
74     #elif "Activity & (Hosp Code):" in line:
75     elif "Activity & (Hos.Code):" in line:
76     actHC = line[3]
77     dur = line[11]
78     attndPhy = line[21]
79     if attndPhy:
80     test = attndPhy.split(",")
81     fstInit = test[1][1]
82     lstInit = test[0][0]
83     if md == "%s%s"%(fstInit,lstInit):
84     pass
85     else:
86     sts = md
87     md = loc
88     loc = pStr
89     if pStr == loc:
90     pStr = ""
91     elif "CPT Code:" in line:
92     cptCode = line[4]
93     stTmDxDlvd = line[18]
94     elif "Primary Diagnosis:" in line:
95     primaryDiag = line[3]
96     primaryDiag = primaryDiag + ";"+line[4]
97     elif "Comments:" in line:
98     comments = line[3]
99     comments = comments.lower()
100     comments = string.upper(comments[0]) + comments[1:]
101     outputFile.write("%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n"%(date,time,stTmDxDlvd,sts,comments,actHC,pName,medRc,pStr,loc,md,dur,attndPhy,cptCode,primaryDiag))
102    
103    
104    
105     ##for line in content:
106     ##... match = re.search(r'^\d+/\d+/\d+[| ]\d+:\d+(am|pm)?',line)
107     ##... line = line.replace("\n","")
108     ##... if match:
109     ##... line = line.split("|")
110     ##... date = line[0]
111     ##... time = line[1]
112     ##... pName = line[3]
113     ##... medRc = line[8]
114     ##... desc = line[12]
115     ##... if desc:
116     ##... pass
117     ##... else:
118     ##... desc = line[13]
119     ##... pStr = line[17]
120     ##... if pStr:
121     ##... pass
122     ##... else:
123     ##... pStr = line[21]
124     ##... loc = line[20]
125     ##... if loc:
126     ##... pass
127     ##... else:
128     ##... loc = line[23]
129     ##... md = line[25]
130     ##... sts = line[24]
131     ##... if sts:
132     ##... pass
133     ##... else:
134     ## sts = line[28]
135     ##... outputFile.write("%s|%s|%s|%s|%s|%s|%s|%s|%s\n"%(date,time,pName,medRc,desc,pStr,loc,md,sts))
136    
137    
138    
139    
140     if __name__=='__main__':
141     #outputFile = open(r"\\BSTDD967DTW1\Users\eborges\My Documents\Test_dir\wc 2013 (1.1.13-8.26.13) with actual times111.dat",'w')
142     #contents = open(r"\\BSTDD967DTW1\Users\eborges\My Documents\Test_dir\wc 2013 (1.1.13-8.26.13) with actual times111.csv").readlines()
143     outputFile = open(r"W:\Manny\Client\018292-0062\MOSAIQ Schedule Review\MOSAIQ Schedule Review\Concord\2012\REV00000430.dat",'w')
144     contents = open(r"W:\Manny\Client\018292-0062\MOSAIQ Schedule Review\MOSAIQ Schedule Review\Concord\2012\REV00000430.csv").readlines()
145     Parse(contents,outputFile)
146     outputFile.close()