ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Tool_Box/NinoGenTools.py
Revision: 891
Committed: Sat May 24 02:11:55 2025 UTC (10 months ago) by nino.borges
Content type: text/x-python
File size: 8928 byte(s)
Log Message:
Added method that I often use to manipulate terms and words.

File Contents

# Content
1 """
2 NinoGenTools
3
4 Created by
5 Emanuel Borges
6 06.16.07
7
8 This is just going to be a collection of classes for general tools that I use often. Nothing product
9 specific.
10
11 """
12
13 import os, shutil, hashlib, time, stat, datetime, zipfile
14
15 class Counter:
16 def __init__(self, count = 1):
17 """Nino's main Counter Class. It will allow you to keep the counter in memory and you can
18 count in multiples. For example if you pass it 2 and inc by 2, your counting in multiples
19 of 2"""
20 #self.count = 1
21 self.count = count
22 def inc(self, n=1):
23 self.count += n
24
25
26 class CopyFile:
27 def __init__(self, outputDir):
28 self.outputDir = outputDir
29 self.logsDir = outputDir + "\\_logs_"
30
31 def Copy(self,absInputFile,outputFile,destinationDir,verify):
32 """This was copied from SummationExt and modified a little to stand alone. It will copy a file,
33 write errors to an error log, etc.
34 This method will do the actual coying IF verify is == to 0. If not it will just populate file to be counted later"""
35 #print self.imagesDir
36 #destinationDir = self.imagesProcessedDir + "\\Printing\\" + projectName.split('\n')[0] + "\\" + os.path.split(absFile.lower().split(self.imagesDir.lower())[1])[0]
37 #print destinationDir
38 print("-"*80)
39 print("copying %s\n"% absInputFile)
40
41 try:
42 if verify == 0:
43 if os.path.exists(destinationDir) == 0:
44 os.makedirs(destinationDir)
45 if os.path.isfile(destinationDir + "\\" + outputFile):
46 print("duplicate File. Skipping...")
47 else:
48 print(destinationDir+"\\"+outputFile)
49 shutil.copy2(absInputFile,destinationDir+"\\"+outputFile)
50 print("Done!\n")
51 #copySuccess = open(self.workDir + '\\copySuccess.log','a')
52 #copySuccess.write(absFile+'\n')
53 #copySuccess.close()
54 except:
55 print("This file could not be copied! It either dosent exist or something is wrong with the path...")
56 if os.path.exists(self.logsDir) == 0:
57 os.makedirs(self.logsDir)
58 copyError = open(self.logsDir + '\\copyError.log','a')
59 copyError.write("I can't find or process original file %s or destination %s\n"% (absInputFile,destinationDir+ "\\"+outputFile))
60 copyError.close()
61
62 class HashFileContents:
63 """This class will handle requests to hash file contents. It will alow you to choose between the
64 algorithms and will have some protection for opening really large files to memory. (eventually)
65 This hash class will always hash the binary contents of the file not the string representation.
66 Very important."""
67 def __init__(self, hashAlgorithm):
68 self.hashAlgorithm = hashAlgorithm
69
70 def HashFile(self, absFilePath):
71 """Current supported Algorithms are: md5, sha1, sha224, sha256"""
72 binContents = open(absFilePath,'rb').read()
73 print("-"*80)
74 print("Creating a %s hash digest from file\n%s"%(self.hashAlgorithm, absFilePath))
75 print("-"*80)
76 if self.hashAlgorithm == 'md5':
77 hashVal = hashlib.md5(binContents).hexdigest()
78 elif self.hashAlgorithm == 'sha1':
79 hashVal = hashlib.sha1(binContents).hexdigest()
80 elif self.hashAlgorithm == 'sha224':
81 hashVal = hashlib.sha224(binContents).hexdigest()
82 elif self.hashAlgorithm == 'sha256':
83 hashVal = hashlib.sha256(binContents).hexdigest()
84 else:
85 print("Unsupported algorithm!")
86 hashVal = '0'
87 return hashVal
88
89 def HashZipFileContents(self, absFilePath):
90 """This method will hash the contents of a zip file without having to extract it first. The zip lib doesnt support the buffer API, which is why I do it this way."""
91 blocksize = 1024**2 #1M chunks
92 masterList = []
93
94 archive = zipfile.ZipFile(absFilePath)
95 for fname in archive.namelist():
96 entry = archive.open(fname)
97 if self.hashAlgorithm == 'md5':
98 hashContainer = hashlib.md5()
99 elif self.hashAlgorithm == 'sha1':
100 hashContainer = hashlib.sha1()
101 elif self.hashAlgorithm == 'sha224':
102 hashContainer = hashlib.sha224()
103 elif self.hashAlgorithm == 'sha256':
104 hashContainer = hashlib.sha256()
105 else:
106 print("Unsupported algorithm!")
107 hashContainer = False
108 if hashContainer:
109 while True:
110 block = entry.read(blocksize)
111 if not block:
112 break
113 hashContainer.update(block)
114 #print(fname, md5.hexdigest())
115 masterList.append([fname,hashContainer.hexdigest()])
116
117 return masterList
118
119
120 class FileProperties:
121 """This class will retrieve file properties (things like created date, etc)in a human readable format
122 """
123 def GetCreatedDate(self, absFilePath):
124 """Returns the Created Date for the file."""
125 stats = os.stat(absFilePath)
126 createDate = time.localtime(stats[9])
127 formatedDate = time.strftime("%m/%d/%y", createDate)
128 return formatedDate
129
130 def GetModifiedDate(self, absFilePath):
131 """Returns the Last Modified Date for the file."""
132 stats = os.stat(absFilePath)
133 lastModDate = time.localtime(stats[8])
134 formatedDate = time.strftime("%m/%d/%y", lastModDate)
135 return formatedDate
136
137 def GetAccessedDate(self, absFilePath):
138 """Returns the Last Accessed Date for the file."""
139 stats = os.stat(absFilePath)
140 lastAccDate = time.localtime(stats[7])
141 formatedDate = time.strftime("%m/%d/%y", lastAccDate)
142 return formatedDate
143
144 def RemoveReadOnlyFlag(self, absFilePath, removeFlag = True):
145 """Removes the Read Only flag on a file passed to it. Alternativly, you can add a the
146 flag by making removeFlag = False"""
147 if removeFlag:
148 os.chmod(absFilePath,stat.S_IWRITE)
149 else:
150 os.chmod(absFilePath,stat.S_IREAD)
151
152 class DateUtilities:
153 def EnumerateDateRange(self, startDate, endDate):
154 """This method will enumerate all the values in a date range. Must be in MM/DD/YYYY format"""
155 m,d,y = startDate.split("/")
156 first=datetime.date(int(y),int(m),int(d))
157 m,d,y = endDate.split("/")
158 last=datetime.date(int(y),int(m),int(d))
159 adate=first
160 dates=[]
161 while adate<=last:
162 dates.append(adate)
163 adate+=datetime.timedelta(1)
164 dateList = []
165 for i in dates:
166 i = str(i)
167 y,m,d = i.split("-")
168 dateList.append("%s/%s/%s"%(m,d,y))
169 return dateList
170 def ConvertDateFormat(self,date, convertToFormat='computer'):
171 """This method converts between human MM/DD/YYYY and computer yyyymmdd"""
172 if convertToFormat == 'computer':
173 m,d,y = date.split("/")
174 if len(m)<2:
175 m = "0"+m
176 if len(d)<2:
177 d = "0"+d
178 return y+m+d
179 else:
180 y = date[:4]
181 m = date[4:6]
182 d = date[6:]
183 return "%s/%s/%s"% (m,d,y)
184
185 def ReverseDateFormat(self,date):
186 """This method converts from improper computer format MMDDYYYY to proper YYYYMMDD"""
187 y = date[4:]
188 m = date[:2]
189 d = date[2:4]
190 return y+m+d
191
192
193
194 class WordsUtilities:
195 """A few methods that I often use to manipulate terms and words."""
196 def GenerateWordList(self, rawContents, setToUpper = False):
197 """Gets a word list from a contents, usually just read in from a file. optionally can set setToUpper to true to make everythign uppper, thereby further reducing the list. Returns a sorted list."""
198 wordIndex = set()
199 for line in rawContents:
200 line = line.replace("\n","")
201 line = line.replace(" ","|")
202 line = line.replace("/","|")
203 line = line.replace("_","|")
204 line = line.replace(",","|")
205 line = line.replace(".","|")
206 line = line.split("|")
207 for i in line:
208 if i:
209 if i.isnumeric():
210 pass
211 else:
212 wordIndex.add(i)
213
214 print(f"There are {len(wordIndex)} unique words.")
215 wordIndexSorted = list(wordIndex)
216 wordIndexSorted.sort()
217 return wordIndexSorted