| 187 |
|
y = date[4:] |
| 188 |
|
m = date[:2] |
| 189 |
|
d = date[2:4] |
| 190 |
< |
return y+m+d |
| 190 |
> |
return y+m+d |
| 191 |
> |
|
| 192 |
> |
|
| 193 |
> |
|
| 194 |
> |
class WordsUtilities: |
| 195 |
> |
"""A few methods that I often use to manipulate terms and words.""" |
| 196 |
> |
def GenerateWordList(self, rawContents, setToUpper = False): |
| 197 |
> |
"""Gets a word list from a contents, usually just read in from a file. optionally can set setToUpper to true to make everythign uppper, thereby further reducing the list. Returns a sorted list.""" |
| 198 |
> |
wordIndex = set() |
| 199 |
> |
for line in rawContents: |
| 200 |
> |
line = line.replace("\n","") |
| 201 |
> |
line = line.replace(" ","|") |
| 202 |
> |
line = line.replace("/","|") |
| 203 |
> |
line = line.replace("_","|") |
| 204 |
> |
line = line.replace(",","|") |
| 205 |
> |
line = line.replace(".","|") |
| 206 |
> |
line = line.split("|") |
| 207 |
> |
for i in line: |
| 208 |
> |
if i: |
| 209 |
> |
if i.isnumeric(): |
| 210 |
> |
pass |
| 211 |
> |
else: |
| 212 |
> |
wordIndex.add(i) |
| 213 |
> |
|
| 214 |
> |
print(f"There are {len(wordIndex)} unique words.") |
| 215 |
> |
wordIndexSorted = list(wordIndex) |
| 216 |
> |
wordIndexSorted.sort() |
| 217 |
> |
return wordIndexSorted |