| 1 |
### Aceto Project to take search term file and split into same but with terms parsed and then same wiht parsed, deduped and sorted terms.
|
| 2 |
|
| 3 |
>>> startDir = r"C:\Temp\Search Terms"
|
| 4 |
>>> outputDir = r"\\sas01\sas01\19516\Reports\138\Search Terms Parsed"
|
| 5 |
|
| 6 |
>>> for f in os.listdir(startDir):
|
| 7 |
... contents = open(os.path.join(startDir,f)).readlines()
|
| 8 |
... newFileName = f.replace("Search","Search_Parsed")
|
| 9 |
... outputFile = open(os.path.join(outputDir,newFileName),'w')
|
| 10 |
... for line in contents:
|
| 11 |
... line = re.sub(' or ', '\n', line, flags=re.IGNORECASE)
|
| 12 |
... line = re.sub(' and ', '\n', line, flags=re.IGNORECASE)
|
| 13 |
... line = re.sub(' w/[0-9]+ ', '\n', line, flags=re.IGNORECASE)
|
| 14 |
... line = line.replace("(","")
|
| 15 |
... line = line.replace(")","")
|
| 16 |
... outputFile.write(line)
|
| 17 |
... outputFile.close()
|
| 18 |
...
|
| 19 |
|
| 20 |
|
| 21 |
>>> startDir = r"\\sas01\sas01\19516\Reports\138\Search Terms Parsed"
|
| 22 |
>>> outputDir = r"\\sas01\sas01\19516\Reports\138\Search Terms Deduped Sorted"
|
| 23 |
|
| 24 |
>>> for f in os.listdir(startDir):
|
| 25 |
... contents = open(os.path.join(startDir,f)).readlines()
|
| 26 |
... newFileName = f.replace("Search_Parsed", "Search_DupSorted")
|
| 27 |
... outputFile = open(os.path.join(outputDir,newFileName),'w')
|
| 28 |
... matrix = {}
|
| 29 |
... for line in contents:
|
| 30 |
... line = line.replace("\n","")
|
| 31 |
... matrix[line] = 1
|
| 32 |
... terms = matrix.keys()
|
| 33 |
... terms.sort()
|
| 34 |
... for t in terms:
|
| 35 |
... outputFile.write(t + "\n")
|
| 36 |
... outputFile.close()
|
| 37 |
...
|
| 38 |
>>> |