NinoCode/Tool_Box/NinoGenTools.py

"""
NinoGenTools

Created by
Emanuel Borges
06.16.07

This is just going to be a collection of classes for general tools that I use often.  Nothing product
specific.

"""

import os, shutil, hashlib, time, stat, datetime, zipfile

class Counter:
    def __init__(self, count = 1):
        """Nino's main Counter Class. It will allow you to keep the counter in memory and you can
        count in multiples. For example if you pass it 2 and inc by 2, your counting in multiples
        of 2"""
        #self.count = 1
        self.count = count
    def inc(self, n=1):
        self.count += n


class CopyFile:
    def __init__(self, outputDir):
        self.outputDir = outputDir
        self.logsDir = outputDir + "\\_logs_"

    def Copy(self,absInputFile,outputFile,destinationDir,verify):
        """This was copied from SummationExt and modified a little to stand alone. It will copy a file,
        write errors to an error log, etc.
        This method will do the actual coying IF verify is == to 0. If not it will just populate file to be counted later"""
        #print self.imagesDir
        #destinationDir = self.imagesProcessedDir + "\\Printing\\" + projectName.split('\n')[0] + "\\" + os.path.split(absFile.lower().split(self.imagesDir.lower())[1])[0]
        #print destinationDir
        print("-"*80)
        print("copying %s\n"% absInputFile)
        
        try:
            if verify == 0:
                if os.path.exists(destinationDir) == 0:
                    os.makedirs(destinationDir)
                if os.path.isfile(destinationDir + "\\" + outputFile):
                    print("duplicate File. Skipping...")
                else:
                    print(destinationDir+"\\"+outputFile)
                    shutil.copy2(absInputFile,destinationDir+"\\"+outputFile)
                    print("Done!\n")
            #copySuccess = open(self.workDir + '\\copySuccess.log','a')
            #copySuccess.write(absFile+'\n')
            #copySuccess.close()
        except:
            print("This file could not be copied!  It either dosent exist or something is wrong with the path...")
            if os.path.exists(self.logsDir) == 0:
                os.makedirs(self.logsDir)
            copyError = open(self.logsDir + '\\copyError.log','a')
            copyError.write("I can't find or process original file %s or destination %s\n"% (absInputFile,destinationDir+ "\\"+outputFile))
            copyError.close()

class HashFileContents:
    """This class will handle requests to hash file contents.  It will alow you to choose between the
    algorithms and will have some protection for opening really large files to memory. (eventually)
    This hash class will always hash the binary contents of the file not the string representation.
    Very important."""
    def __init__(self, hashAlgorithm):
        self.hashAlgorithm = hashAlgorithm
    
    def HashFile(self, absFilePath):
        """Current supported Algorithms are: md5, sha1, sha224, sha256"""
        binContents = open(absFilePath,'rb').read()
        print("-"*80)
        print("Creating a %s hash digest from file\n%s"%(self.hashAlgorithm, absFilePath))
        print("-"*80)
        if self.hashAlgorithm == 'md5':
            hashVal = hashlib.md5(binContents).hexdigest()
        elif self.hashAlgorithm == 'sha1':
            hashVal = hashlib.sha1(binContents).hexdigest()
        elif self.hashAlgorithm == 'sha224':
            hashVal = hashlib.sha224(binContents).hexdigest()
        elif self.hashAlgorithm == 'sha256':
            hashVal = hashlib.sha256(binContents).hexdigest()
        else:
            print("Unsupported algorithm!")
            hashVal = '0'
        return hashVal
    
    def HashZipFileContents(self, absFilePath):
        """This method will hash the contents of a zip file without having to extract it first.  The zip lib doesnt support the buffer API, which is why I do it this way."""
        blocksize = 1024**2  #1M chunks
        masterList = []

        archive = zipfile.ZipFile(absFilePath)
        for fname in archive.namelist():
            entry = archive.open(fname)
            if self.hashAlgorithm == 'md5':
                hashContainer = hashlib.md5()
            elif self.hashAlgorithm == 'sha1':
                hashContainer = hashlib.sha1()
            elif self.hashAlgorithm == 'sha224':
                hashContainer = hashlib.sha224()
            elif self.hashAlgorithm == 'sha256':
                hashContainer = hashlib.sha256()
            else:
                print("Unsupported algorithm!")
                hashContainer = False
            if hashContainer:
                while True:
                    block = entry.read(blocksize)
                    if not block:
                        break
                    hashContainer.update(block)
                #print(fname, md5.hexdigest())
                masterList.append([fname,hashContainer.hexdigest()])

        return masterList


class FileProperties:
    """This class will retrieve file properties (things like created date, etc)in a human readable format
    """
    def GetCreatedDate(self, absFilePath):
        """Returns the Created Date for the file."""
        stats = os.stat(absFilePath)
        createDate = time.localtime(stats[9])
        formatedDate = time.strftime("%m/%d/%y", createDate)
        return formatedDate
        
    def GetModifiedDate(self, absFilePath):
        """Returns the Last Modified Date for the file."""
        stats = os.stat(absFilePath)
        lastModDate = time.localtime(stats[8])
        formatedDate = time.strftime("%m/%d/%y", lastModDate)
        return formatedDate
        
    def GetAccessedDate(self, absFilePath):
        """Returns the Last Accessed Date for the file."""
        stats = os.stat(absFilePath)
        lastAccDate = time.localtime(stats[7])
        formatedDate = time.strftime("%m/%d/%y", lastAccDate)
        return formatedDate
        
    def RemoveReadOnlyFlag(self, absFilePath, removeFlag = True):
        """Removes the Read Only flag on a file passed to it.  Alternativly, you can add a the
        flag by making removeFlag = False"""
        if removeFlag:
            os.chmod(absFilePath,stat.S_IWRITE)
        else:
            os.chmod(absFilePath,stat.S_IREAD)

class DateUtilities:
    def EnumerateDateRange(self, startDate, endDate):
        """This method will enumerate all the values in a date range. Must be in MM/DD/YYYY format"""
        m,d,y = startDate.split("/")
        first=datetime.date(int(y),int(m),int(d))
        m,d,y = endDate.split("/")
        last=datetime.date(int(y),int(m),int(d))
        adate=first
        dates=[]
        while adate<=last:
            dates.append(adate)
            adate+=datetime.timedelta(1)
        dateList = []
        for i in dates:
            i = str(i)
            y,m,d = i.split("-")
            dateList.append("%s/%s/%s"%(m,d,y))
        return dateList
    def ConvertDateFormat(self,date, convertToFormat='computer'):
        """This method converts between human MM/DD/YYYY and computer yyyymmdd"""
        if convertToFormat == 'computer':
            m,d,y = date.split("/")
            if len(m)<2:
                m = "0"+m
            if len(d)<2:
                d = "0"+d
            return y+m+d
        else:
            y = date[:4]
            m = date[4:6]
            d = date[6:]
            return "%s/%s/%s"% (m,d,y)

    def ReverseDateFormat(self,date):
        """This method converts from improper computer format MMDDYYYY to proper YYYYMMDD"""
        y = date[4:]
        m = date[:2]
        d = date[2:4]
        return y+m+d


class WordsUtilities:
    """A few methods that I often use to manipulate terms and words."""
    def GenerateWordList(self, rawContents, setToUpper = False):
        """Gets a word list from a contents, usually just read in from a file. optionally can set setToUpper to true to make everythign uppper, thereby further reducing the list. Returns a sorted list."""
        wordIndex = set()
        for line in rawContents:
            line = line.replace("\n","")
            line = line.replace(" ","|")
            line = line.replace("/","|")
            line = line.replace("_","|")
            line = line.replace(",","|")
            line = line.replace(".","|")
            line = line.split("|")
            for i in line:
                if i:
                    if i.isnumeric():
                        pass
                    else:
                        wordIndex.add(i)

        print(f"There are {len(wordIndex)} unique words.")
        wordIndexSorted = list(wordIndex)
        wordIndexSorted.sort()
        return wordIndexSorted
Revision:	891
Committed:	Sat May 24 02:11:55 2025 UTC (10 months ago) by nino.borges
Content type:	text/x-python
File size:	8928 byte(s)
Log Message:	Added method that I often use to manipulate terms and words.
#	Content
1	"""
2	NinoGenTools
3
4	Created by
5	Emanuel Borges
6	06.16.07
7
8	This is just going to be a collection of classes for general tools that I use often. Nothing product
9	specific.
10
11	"""
12
13	import os, shutil, hashlib, time, stat, datetime, zipfile
14
15	class Counter:
16	def __init__(self, count = 1):
17	"""Nino's main Counter Class. It will allow you to keep the counter in memory and you can
18	count in multiples. For example if you pass it 2 and inc by 2, your counting in multiples
19	of 2"""
20	#self.count = 1
21	self.count = count
22	def inc(self, n=1):
23	self.count += n
24
25
26	class CopyFile:
27	def __init__(self, outputDir):
28	self.outputDir = outputDir
29	self.logsDir = outputDir + "\\_logs_"
30
31	def Copy(self,absInputFile,outputFile,destinationDir,verify):
32	"""This was copied from SummationExt and modified a little to stand alone. It will copy a file,
33	write errors to an error log, etc.
34	This method will do the actual coying IF verify is == to 0. If not it will just populate file to be counted later"""
35	#print self.imagesDir
36	#destinationDir = self.imagesProcessedDir + "\\Printing\\" + projectName.split('\n')[0] + "\\" + os.path.split(absFile.lower().split(self.imagesDir.lower())[1])[0]
37	#print destinationDir
38	print("-"*80)
39	print("copying %s\n"% absInputFile)
40
41	try:
42	if verify == 0:
43	if os.path.exists(destinationDir) == 0:
44	os.makedirs(destinationDir)
45	if os.path.isfile(destinationDir + "\\" + outputFile):
46	print("duplicate File. Skipping...")
47	else:
48	print(destinationDir+"\\"+outputFile)
49	shutil.copy2(absInputFile,destinationDir+"\\"+outputFile)
50	print("Done!\n")
51	#copySuccess = open(self.workDir + '\\copySuccess.log','a')
52	#copySuccess.write(absFile+'\n')
53	#copySuccess.close()
54	except:
55	print("This file could not be copied! It either dosent exist or something is wrong with the path...")
56	if os.path.exists(self.logsDir) == 0:
57	os.makedirs(self.logsDir)
58	copyError = open(self.logsDir + '\\copyError.log','a')
59	copyError.write("I can't find or process original file %s or destination %s\n"% (absInputFile,destinationDir+ "\\"+outputFile))
60	copyError.close()
61
62	class HashFileContents:
63	"""This class will handle requests to hash file contents. It will alow you to choose between the
64	algorithms and will have some protection for opening really large files to memory. (eventually)
65	This hash class will always hash the binary contents of the file not the string representation.
66	Very important."""
67	def __init__(self, hashAlgorithm):
68	self.hashAlgorithm = hashAlgorithm
69
70	def HashFile(self, absFilePath):
71	"""Current supported Algorithms are: md5, sha1, sha224, sha256"""
72	binContents = open(absFilePath,'rb').read()
73	print("-"*80)
74	print("Creating a %s hash digest from file\n%s"%(self.hashAlgorithm, absFilePath))
75	print("-"*80)
76	if self.hashAlgorithm == 'md5':
77	hashVal = hashlib.md5(binContents).hexdigest()
78	elif self.hashAlgorithm == 'sha1':
79	hashVal = hashlib.sha1(binContents).hexdigest()
80	elif self.hashAlgorithm == 'sha224':
81	hashVal = hashlib.sha224(binContents).hexdigest()
82	elif self.hashAlgorithm == 'sha256':
83	hashVal = hashlib.sha256(binContents).hexdigest()
84	else:
85	print("Unsupported algorithm!")
86	hashVal = '0'
87	return hashVal
88
89	def HashZipFileContents(self, absFilePath):
90	"""This method will hash the contents of a zip file without having to extract it first. The zip lib doesnt support the buffer API, which is why I do it this way."""
91	blocksize = 1024**2 #1M chunks
92	masterList = []
93
94	archive = zipfile.ZipFile(absFilePath)
95	for fname in archive.namelist():
96	entry = archive.open(fname)
97	if self.hashAlgorithm == 'md5':
98	hashContainer = hashlib.md5()
99	elif self.hashAlgorithm == 'sha1':
100	hashContainer = hashlib.sha1()
101	elif self.hashAlgorithm == 'sha224':
102	hashContainer = hashlib.sha224()
103	elif self.hashAlgorithm == 'sha256':
104	hashContainer = hashlib.sha256()
105	else:
106	print("Unsupported algorithm!")
107	hashContainer = False
108	if hashContainer:
109	while True:
110	block = entry.read(blocksize)
111	if not block:
112	break
113	hashContainer.update(block)
114	#print(fname, md5.hexdigest())
115	masterList.append([fname,hashContainer.hexdigest()])
116
117	return masterList
118
119
120	class FileProperties:
121	"""This class will retrieve file properties (things like created date, etc)in a human readable format
122	"""
123	def GetCreatedDate(self, absFilePath):
124	"""Returns the Created Date for the file."""
125	stats = os.stat(absFilePath)
126	createDate = time.localtime(stats[9])
127	formatedDate = time.strftime("%m/%d/%y", createDate)
128	return formatedDate
129
130	def GetModifiedDate(self, absFilePath):
131	"""Returns the Last Modified Date for the file."""
132	stats = os.stat(absFilePath)
133	lastModDate = time.localtime(stats[8])
134	formatedDate = time.strftime("%m/%d/%y", lastModDate)
135	return formatedDate
136
137	def GetAccessedDate(self, absFilePath):
138	"""Returns the Last Accessed Date for the file."""
139	stats = os.stat(absFilePath)
140	lastAccDate = time.localtime(stats[7])
141	formatedDate = time.strftime("%m/%d/%y", lastAccDate)
142	return formatedDate
143
144	def RemoveReadOnlyFlag(self, absFilePath, removeFlag = True):
145	"""Removes the Read Only flag on a file passed to it. Alternativly, you can add a the
146	flag by making removeFlag = False"""
147	if removeFlag:
148	os.chmod(absFilePath,stat.S_IWRITE)
149	else:
150	os.chmod(absFilePath,stat.S_IREAD)
151
152	class DateUtilities:
153	def EnumerateDateRange(self, startDate, endDate):
154	"""This method will enumerate all the values in a date range. Must be in MM/DD/YYYY format"""
155	m,d,y = startDate.split("/")
156	first=datetime.date(int(y),int(m),int(d))
157	m,d,y = endDate.split("/")
158	last=datetime.date(int(y),int(m),int(d))
159	adate=first
160	dates=[]
161	while adate<=last:
162	dates.append(adate)
163	adate+=datetime.timedelta(1)
164	dateList = []
165	for i in dates:
166	i = str(i)
167	y,m,d = i.split("-")
168	dateList.append("%s/%s/%s"%(m,d,y))
169	return dateList
170	def ConvertDateFormat(self,date, convertToFormat='computer'):
171	"""This method converts between human MM/DD/YYYY and computer yyyymmdd"""
172	if convertToFormat == 'computer':
173	m,d,y = date.split("/")
174	if len(m)<2:
175	m = "0"+m
176	if len(d)<2:
177	d = "0"+d
178	return y+m+d
179	else:
180	y = date[:4]
181	m = date[4:6]
182	d = date[6:]
183	return "%s/%s/%s"% (m,d,y)
184
185	def ReverseDateFormat(self,date):
186	"""This method converts from improper computer format MMDDYYYY to proper YYYYMMDD"""
187	y = date[4:]
188	m = date[:2]
189	d = date[2:4]
190	return y+m+d
191
192
193
194	class WordsUtilities:
195	"""A few methods that I often use to manipulate terms and words."""
196	def GenerateWordList(self, rawContents, setToUpper = False):
197	"""Gets a word list from a contents, usually just read in from a file. optionally can set setToUpper to true to make everythign uppper, thereby further reducing the list. Returns a sorted list."""
198	wordIndex = set()
199	for line in rawContents:
200	line = line.replace("\n","")
201	line = line.replace(" ","\|")
202	line = line.replace("/","\|")
203	line = line.replace("_","\|")
204	line = line.replace(",","\|")
205	line = line.replace(".","\|")
206	line = line.split("\|")
207	for i in line:
208	if i:
209	if i.isnumeric():
210	pass
211	else:
212	wordIndex.add(i)
213
214	print(f"There are {len(wordIndex)} unique words.")
215	wordIndexSorted = list(wordIndex)
216	wordIndexSorted.sort()
217	return wordIndexSorted