| 1 |
"""
|
| 2 |
BatesRangeFunctions
|
| 3 |
|
| 4 |
Created by
|
| 5 |
Emanuel Borges
|
| 6 |
12.31.2019
|
| 7 |
|
| 8 |
A set of functions for dealing wiht bates ranges, like enumerating, page counts, etc. Originally this was a program
|
| 9 |
called FixBatesRange_func in the summation tools package but I use the enumerate and page counts for lots of other reasons,
|
| 10 |
so I copied it out to tools and deleted the parts that I didnt need.
|
| 11 |
|
| 12 |
Note that separate_alpha was the third version of this function, fixing lots of bugs, but it still calls the original SeperateAlpha
|
| 13 |
as a fall back for some reason.
|
| 14 |
|
| 15 |
"""
|
| 16 |
|
| 17 |
|
| 18 |
import re
|
| 19 |
|
| 20 |
class EnumError(Exception):
|
| 21 |
def __init__(self,value):
|
| 22 |
self.value = value
|
| 23 |
def __str__(self):
|
| 24 |
return repr(self.value)
|
| 25 |
|
| 26 |
|
| 27 |
def _SeperateAlphaOrig(bates):
|
| 28 |
""" This original function will chop the individual bates into alpha and numeric. still used as a fall back for separate_alpha"""
|
| 29 |
import string
|
| 30 |
alpha = []
|
| 31 |
for i in bates:
|
| 32 |
if i in string.letters:
|
| 33 |
alpha.append(i)
|
| 34 |
if i in string.punctuation:
|
| 35 |
alpha.append(i)
|
| 36 |
alpha = string.join(alpha).replace(" ","")
|
| 37 |
if alpha == "":
|
| 38 |
num = bates
|
| 39 |
else:
|
| 40 |
num = bates.replace(alpha,"")
|
| 41 |
while num[0] == '0':
|
| 42 |
alpha = alpha + num[0]
|
| 43 |
num = num[1:]
|
| 44 |
return (alpha, int(num))
|
| 45 |
|
| 46 |
def seperate_alpha(bates):
|
| 47 |
"""This third version of this function will chop the individual bates into alpha and numeric"""
|
| 48 |
|
| 49 |
parts = filter(None,re.split(r'(\d+|\s+)',bates))
|
| 50 |
moreParts = _SeperateAlphaOrig(parts[-1])
|
| 51 |
num = moreParts[-1]
|
| 52 |
parts = parts[0:-1]
|
| 53 |
moreParts = moreParts[0:-1]
|
| 54 |
alpha = ""
|
| 55 |
for i in parts:
|
| 56 |
alpha = alpha + i
|
| 57 |
for x in moreParts:
|
| 58 |
alpha = alpha +x
|
| 59 |
return (alpha, int(num))
|
| 60 |
|
| 61 |
|
| 62 |
def generate_index(str):
|
| 63 |
"""
|
| 64 |
Splits a string into alpha and numeric elements, which
|
| 65 |
is used as an index for sorting"
|
| 66 |
"""
|
| 67 |
#
|
| 68 |
# the index is built progressively
|
| 69 |
# using the _append function
|
| 70 |
#
|
| 71 |
index = []
|
| 72 |
def _append(fragment, alist=index):
|
| 73 |
if fragment.isdigit(): fragment = int(fragment)
|
| 74 |
alist.append(fragment)
|
| 75 |
|
| 76 |
# initialize loop
|
| 77 |
prev_isdigit = str[0].isdigit()
|
| 78 |
current_fragment = ''
|
| 79 |
# group a string into digit and non-digit parts
|
| 80 |
for char in str:
|
| 81 |
curr_isdigit = char.isdigit()
|
| 82 |
if curr_isdigit == prev_isdigit:
|
| 83 |
current_fragment += char
|
| 84 |
else:
|
| 85 |
_append(current_fragment)
|
| 86 |
current_fragment = char
|
| 87 |
prev_isdigit = curr_isdigit
|
| 88 |
_append(current_fragment)
|
| 89 |
return tuple(index)
|
| 90 |
|
| 91 |
|
| 92 |
def GetBatesPageCount(begNo, endNo):
|
| 93 |
begNoIndex = generate_index(begNo)
|
| 94 |
endNoIndex = generate_index(endNo)
|
| 95 |
matrix = zip(begNoIndex, endNoIndex)
|
| 96 |
for i in matrix:
|
| 97 |
try:
|
| 98 |
numb = int(i[1]) - int(i[0])
|
| 99 |
except:
|
| 100 |
pass
|
| 101 |
## Now check to see if there was a hanging chad on the endDoc and account for it.
|
| 102 |
if len(endNoIndex) > len(begNoIndex):
|
| 103 |
## first lets see if it's an int first. this will work if its begno foo-0001 endno foo0001_002
|
| 104 |
## but not if the endno was foo0001_002a
|
| 105 |
if str(endNoIndex[-1]).isdigit():
|
| 106 |
numb=+endNoIndex[-1]
|
| 107 |
numb = numb - 1
|
| 108 |
else:
|
| 109 |
## otherwise it's probable just a real handing chad like foo-0001a
|
| 110 |
numb =+1
|
| 111 |
return numb +1
|
| 112 |
|
| 113 |
def GetBatesRanges(batesList):
|
| 114 |
"""Takes a list of bates numbers (this single list will include all bates from beg and end values) and
|
| 115 |
returns a list of tuple ranges"""
|
| 116 |
batesMatrix = {}
|
| 117 |
batesRangeList = []
|
| 118 |
for bates in batesList:
|
| 119 |
alpha, numb = seperate_alpha(bates)
|
| 120 |
if alpha in batesMatrix.keys():
|
| 121 |
batesMatrix[alpha].append(bates)
|
| 122 |
else:
|
| 123 |
batesMatrix[alpha] = [bates,]
|
| 124 |
for batesGroup in batesMatrix.keys():
|
| 125 |
tempList = batesMatrix[batesGroup]
|
| 126 |
tempList.sort()
|
| 127 |
batesRangeList.append((tempList[0],tempList[-1]))
|
| 128 |
return batesRangeList
|
| 129 |
|
| 130 |
def EnumerateBates(begNo, endNo):
|
| 131 |
"""Will return all the possible bates values in between begNo and endNo. Split Alpha needs to be fixed
|
| 132 |
for bates with spaces. This will error out if the begdoc or enddoc has a weird value like foo-001.a"""
|
| 133 |
pageCount = GetBatesPageCount(begNo, endNo)
|
| 134 |
begBates = seperate_alpha(begNo)
|
| 135 |
endBates = seperate_alpha(endNo)
|
| 136 |
prefix = begBates[0]
|
| 137 |
padding = ""
|
| 138 |
#reverse the string
|
| 139 |
for x in prefix[::-1]:
|
| 140 |
if x.isdigit():
|
| 141 |
padding = padding + x
|
| 142 |
prefix = prefix[:-1]
|
| 143 |
else:
|
| 144 |
break
|
| 145 |
padding = len(str(begBates[-1]) + padding)
|
| 146 |
|
| 147 |
batesList = []
|
| 148 |
for i in range(begBates[-1], endBates[-1]+1):
|
| 149 |
batesList.append(prefix + str(i).zfill(padding))
|
| 150 |
if len(batesList) == pageCount:
|
| 151 |
return batesList
|
| 152 |
else:
|
| 153 |
#print begBates,endBates
|
| 154 |
## My own exception that I can catch.
|
| 155 |
raise EnumError("Could not enumerate fully.")
|