ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/RandomCodeRequests/Evidox_Glassman.txt
Revision: 651
Committed: Thu Dec 12 20:45:58 2019 UTC (6 years, 3 months ago) by nino.borges
Content type: text/plain
File size: 7063 byte(s)
Log Message:
small updates

File Contents

# User Rev Content
1 nino.borges 651 ### Glassman: for every document, I had to get a count of the times the word "work product" existed and then list those by claim instead of my doc###
2    
3     >>> startPath = r"\\iadcifs01\iproshares01\PSDU-Glassman-Metropolitan\Eclipse\PSDU-Glassman-Metropolitan-Review\Production Outgoing\WP001\Text\001"
4    
5     >>> matchMatrix = {}
6     >>> for f in os.listdir(startPath):
7     ... fContentsString = open(os.path.join(startPath,f)).read()
8     ... match = re.findall(r'\bWork\W+(?:\w+\W+){0,2}?Product\b', fContentsString)
9     ... matchMatrix[os.path.splitext(f)[0]] = len(match)
10     ...
11     >>> matchMatrix['MAPFRECFEDP000000252']
12     3
13    
14     >>> startPath = r"\\iadcifs01\iproshares01\PSDU-Glassman-Metropolitan\Eclipse\PSDU-Glassman-Metropolitan-Review\Production Outgoing\WP001\Text\002"
15    
16     >>> for f in os.listdir(startPath):
17     ... fContentsString = open(os.path.join(startPath,f)).read()
18     ... match = re.findall(r'\bWork\W+(?:\w+\W+){0,2}?Product\b', fContentsString)
19     ... matchMatrix[os.path.splitext(f)[0]] = len(match)
20     ...
21    
22     >>> contents = open(r"\\iadcifs01\iproshares01\PSDU-Glassman-Metropolitan\Eclipse\PSDU-Glassman-Metropolitan-Review\Production Outgoing\WP001\Data\CONCORD.DAT").readlines()
23     >>> contents = contents[1:]
24     >>> claimMatrix = {}
25     >>> for line in contents:
26     ... line = line.replace("\n","")
27     ... bates,claim = line.split("|")
28     ... claim = claim.lower()
29     ... if claim in claimMatrix.keys():
30     ... claimMatrix[claim].append(bates)
31     ... else:
32     ... claimMatrix[claim] = [bates,]
33     ...
34     >>> len(claimMatrix.keys())
35     845
36     >>> claimMatrix["cann65"]
37     ['MAPFRECFEDP000001418', 'MAPFRECFEDP000001751']
38    
39    
40     >>> claimList = claimMatrix.keys()
41     >>> claimList.sort()
42    
43     >>> outputFile = open(r"C:\Test-PY\Glassman\20180918_claimReport",'w')
44     >>> for claim in claimList:
45     ... claimCount = 0
46     ... for bates in claimMatrix[claim]:
47     ... claimCount = claimCount + matchMatrix[bates]
48     ... outputFile.write("%s|%s\n"%(claim,claimCount))
49     ...
50     >>> outputFile.close()
51    
52    
53     ### Then I was asked to grab the earliest date in the same text files and add those to the report
54    
55     >>> dateMatrix = {}
56     >>> import datetime
57     >>> for f in os.listdir(startPath):
58     ... fContentsString = open(os.path.join(startPath,f)).read()
59     ... dateMatrix[os.path.splitext(f)[0]] = re.findall(r'\b\d{1,2}[-/]\d{1,2}[-/]\d{4}\b', fContentsString)
60     ...
61     >>> startPath = r"\\iadcifs01\iproshares01\PSDU-Glassman-Metropolitan\Eclipse\PSDU-Glassman-Metropolitan-Review\Production Outgoing\WP001\Text\001"
62     >>> for f in os.listdir(startPath):
63     ... fContentsString = open(os.path.join(startPath,f)).read()
64     ... dateMatrix[os.path.splitext(f)[0]] = re.findall(r'\b\d{1,2}[-/]\d{1,2}[-/]\d{4}\b', fContentsString)
65     ...
66    
67    
68     >>> formattedDateMatrix = {}
69     >>> for bates in dateMatrix.keys():
70     ... formattedDateList = []
71     ... for dt in dateMatrix[bates]:
72     ... try:
73     ... formattedDateList.append(datetime.datetime.strptime(dt, "%m/%d/%Y"))
74     ... except:
75     ... try:
76     ... formattedDateList.append(datetime.datetime.strptime(dt, "%m-%d-%Y"))
77     ... except:
78     ... pass
79     ... if len(formattedDateList) > 0:
80     ... formattedDateList.sort()
81     ### This last line is an issue. you will need to rewrite it like the one below (next loop) where it cycles through all dts in this list looking for the oldest but valid date
82     ... formattedDateMatrix[bates] = formattedDateList[0]
83     ...
84    
85     >>> formattedClaimMatrix = {}
86     >>> for claim in claimMatrix.keys():
87     ... formattedClaimList = []
88     ... for bates in claimMatrix[claim]:
89     ... if bates in formattedDateList:
90     ... formattedClaimList.append(formattedDateMatrix[bates])
91     ... formattedClaimList.sort()
92     ... earliestDate = False
93     ... for dt in formattedClaimList:
94     ... try:
95     ... earliestDate = datetime.datetime.strftime(dt, "%m/%d/%Y")
96     ... except:
97     ... pass
98     ... if earliestDate:
99     ... break
100     ... if earliestDate:
101     ... formattedClaimMatrix[claim] = earliestDate
102     ...
103    
104     >>> formattedClaimDateList = formattedClaimMatrix.keys()
105     >>> formattedClaimDateList.sort()
106     >>> for i in formattedClaimDateList:
107     ... outputFile.write("%s|%s\n"% (i,formattedClaimMatrix[i]))
108     ...
109     >>> outputFile.close()
110    
111     ## Just merging the two reports to make one
112     # Says bates below but that should be claim, since both files are now by claim
113     >>> outputFile = open(r"C:\Test-PY\Glassman\20180918_claimReportWithDates.txt",'w')
114     >>> contents = open(r"C:\Test-PY\Glassman\20180918_claimReport.txt").readlines()
115     >>> for line in contents:
116     ... line = line.replace("\n","")
117     ... bates,count = line.split("|")
118     ... if bates in formattedClaimDateList:
119     ... outputFile.write("%s|%s|%s\n"% (bates, count, formattedClaimMatrix[bates]))
120     ... else:
121     ... outputFile.write("%s|%s|\n"% (bates, count))
122     ...
123     >>> outputFile.close()
124    
125    
126     #### THIS IS THE SAME AS THE PROJECT ABOVE BUT WITH THE DATES FROM THE DAT INSTEAD ####
127    
128     >>> startPath = r"\\iadcifs01\iproshares01\PSDU-Glassman-Metropolitan\Eclipse\PSDU-Glassman-Metropolitan-Review\Production Outgoing\NR001\Text\001"
129    
130     >>> import os
131     >>> import re
132     >>> import datetime
133    
134     >>> matchMatrix = {}
135     >>> for f in os.listdir(startPath):
136     ... fContentsString = open(os.path.join(startPath,f)).read()
137     ... match = re.findall(r'\bNon\W+(?:\w+\W+){0,2}?Responsive\b', fContentsString)
138     ... matchMatrix[os.path.splitext(f)[0]] = len(match)
139     ...
140     >>> matchMatrix['MAPFRECFEDP000000252']
141     1
142    
143     >>> contents = open(r"\\iadcifs01\iproshares01\PSDU-Glassman-Metropolitan\Eclipse\PSDU-Glassman-Metropolitan-Review\Production Outgoing\NR001\Data\CONCORD.DAT").readlines()
144     >>> contents = contents[1:]
145     >>> claimMatrix = {}
146    
147     # first we populate the date matrix
148     >>> claimDtMatrix = {}
149    
150     >>> for line in contents:
151     ... line = line.replace("\n","")
152     ... bates,claim, createdDt,lastmodDt = line.split("|")
153     ... claim = claim.lower()
154     ... if createdDt:
155     ... tempDtList = [datetime.datetime.strptime(createdDt, "%m/%d/%Y"),datetime.datetime.strptime(lastmodDt, "%m/%d/%Y")]
156     ... tempDtList.sort()
157     ... if claim in claimDtMatrix.keys():
158     ... if tempDtList[0] < claimDtMatrix[claim]:
159     ... claimDtMatrix[claim] = tempDtList[0]
160     ... else:
161     ... claimDtMatrix[claim] = tempDtList[0]
162     ...
163     >>> claimDtMatrix["ccky60"]
164     datetime.datetime(2013, 3, 18, 0, 0)
165    
166     # Now the main matrix
167     >>> for line in contents:
168     ... line = line.replace("\n","")
169     ... bates,claim, createdDt,lastmodDt = line.split("|")
170     ... claim = claim.lower()
171     ... if claim in claimMatrix.keys():
172     ... claimMatrix[claim].append(bates)
173     ... else:
174     ... claimMatrix[claim] = [bates,]
175     ...
176    
177     >>> outputFile = open(r"C:\Test-PY\Glassman\20181017_NR_claimReport",'w')
178    
179     >>> claimList = claimMatrix.keys()
180    
181     >>> for claim in claimList:
182     ... claimCount = 0
183     ... for bates in claimMatrix[claim]:
184     ... claimCount = claimCount + matchMatrix[bates]
185     ... if claim in claimDtMatrix.keys():
186     ... outputFile.write("%s|%s|%s\n"%(claim,claimCount, datetime.datetime.strftime(claimDtMatrix[claim], "%m/%d/%Y")))
187     ... else:
188     ... outputFile.write("%s|%s|\n"%(claim,claimCount))
189     ...
190     >>> outputFile.close()