| 1 |
nino.borges |
742 |
### Aceto Project to take search term file and split into same but with terms parsed and then same wiht parsed, deduped and sorted terms.
|
| 2 |
|
|
|
| 3 |
|
|
>>> startDir = r"C:\Temp\Search Terms"
|
| 4 |
|
|
>>> outputDir = r"\\sas01\sas01\19516\Reports\138\Search Terms Parsed"
|
| 5 |
|
|
|
| 6 |
|
|
>>> for f in os.listdir(startDir):
|
| 7 |
|
|
... contents = open(os.path.join(startDir,f)).readlines()
|
| 8 |
|
|
... newFileName = f.replace("Search","Search_Parsed")
|
| 9 |
|
|
... outputFile = open(os.path.join(outputDir,newFileName),'w')
|
| 10 |
|
|
... for line in contents:
|
| 11 |
|
|
... line = re.sub(' or ', '\n', line, flags=re.IGNORECASE)
|
| 12 |
|
|
... line = re.sub(' and ', '\n', line, flags=re.IGNORECASE)
|
| 13 |
|
|
... line = re.sub(' w/[0-9]+ ', '\n', line, flags=re.IGNORECASE)
|
| 14 |
|
|
... line = line.replace("(","")
|
| 15 |
|
|
... line = line.replace(")","")
|
| 16 |
|
|
... outputFile.write(line)
|
| 17 |
|
|
... outputFile.close()
|
| 18 |
|
|
...
|
| 19 |
|
|
|
| 20 |
|
|
|
| 21 |
|
|
>>> startDir = r"\\sas01\sas01\19516\Reports\138\Search Terms Parsed"
|
| 22 |
|
|
>>> outputDir = r"\\sas01\sas01\19516\Reports\138\Search Terms Deduped Sorted"
|
| 23 |
|
|
|
| 24 |
|
|
>>> for f in os.listdir(startDir):
|
| 25 |
|
|
... contents = open(os.path.join(startDir,f)).readlines()
|
| 26 |
|
|
... newFileName = f.replace("Search_Parsed", "Search_DupSorted")
|
| 27 |
|
|
... outputFile = open(os.path.join(outputDir,newFileName),'w')
|
| 28 |
|
|
... matrix = {}
|
| 29 |
|
|
... for line in contents:
|
| 30 |
|
|
... line = line.replace("\n","")
|
| 31 |
|
|
... matrix[line] = 1
|
| 32 |
|
|
... terms = matrix.keys()
|
| 33 |
|
|
... terms.sort()
|
| 34 |
|
|
... for t in terms:
|
| 35 |
|
|
... outputFile.write(t + "\n")
|
| 36 |
|
|
... outputFile.close()
|
| 37 |
|
|
...
|
| 38 |
|
|
>>> |