Commit 8fbc5fb6 authored by Jerome Flesch's avatar Jerome Flesch

PEP8 compliance

Signed-off-by: Jerome Flesch's avatarJerome Flesch <jflesch@gmail.com>
parent d07bd19a
......@@ -147,7 +147,8 @@ class WordBoxBuilder(object):
class WordHTMLParser(HTMLParser):
"""
Tesseract style: Tesseract provides handy but non-standard hOCR tags: ocr_word
Tesseract style: Tesseract provides handy but non-standard hOCR tags:
ocr_word
"""
def __init__(self):
HTMLParser.__init__(self)
......@@ -230,7 +231,8 @@ class WordBoxBuilder(object):
for attr in attrs:
if attr[0] == 'title':
self.__char_positions = attr[1].split(" ")
self.__char_positions = self.__char_positions[1:] # strip x_bboxes
# strip x_bboxes
self.__char_positions = self.__char_positions[1:]
if self.__char_positions[-1] == "":
self.__char_positions[:-1]
try:
......@@ -251,13 +253,17 @@ class WordBoxBuilder(object):
for word in words:
if word == "":
continue
positions = self.__char_positions[0:4*len(word)]
self.__char_positions = self.__char_positions[4*len(word):]
left_pos = min([int(positions[x]) for x in range(0, 4*len(word), 4)])
top_pos = min([int(positions[x]) for x in range(1, 4*len(word), 4)])
right_pos = max([int(positions[x]) for x in range(2, 4*len(word), 4)])
bottom_pos = max([int(positions[x]) for x in range(3, 4*len(word), 4)])
positions = self.__char_positions[0:4 * len(word)]
self.__char_positions = self.__char_positions[4 * len(word):]
left_pos = min([int(positions[x])
for x in range(0, 4 * len(word), 4)])
top_pos = min([int(positions[x])
for x in range(1, 4 * len(word), 4)])
right_pos = max([int(positions[x])
for x in range(2, 4 * len(word), 4)])
bottom_pos = max([int(positions[x])
for x in range(3, 4 * len(word), 4)])
box_pos = ((left_pos, top_pos), (right_pos, bottom_pos))
box = Box(word, box_pos)
......@@ -275,7 +281,7 @@ class WordBoxBuilder(object):
Return:
An array of Box.
"""
parsers = [ self.WordHTMLParser(), self.LineHTMLParser() ]
parsers = [self.WordHTMLParser(), self.LineHTMLParser()]
html_str = file_descriptor.read()
for p in parsers:
......@@ -301,4 +307,3 @@ class WordBoxBuilder(object):
@staticmethod
def __str__():
return "Word boxes"
......@@ -95,12 +95,12 @@ def image_to_string(image, lang=None, builder=None):
output_file_name = ('%s.%s' % (output_file_name_base,
builder.file_extension))
cmd = [ CUNEIFORM_CMD ]
cmd = [CUNEIFORM_CMD]
if lang != None:
cmd += [ "-l", lang ]
cmd += ["-l", lang]
cmd += builder.cuneiform_args
cmd += [ "-o", output_file_name ]
cmd += [ "-" ] # stdin
cmd += ["-o", output_file_name]
cmd += ["-"] # stdin
try:
img_data = StringIO.StringIO()
......@@ -128,7 +128,7 @@ def is_available():
def get_available_languages():
proc = subprocess.Popen([ CUNEIFORM_CMD, "-l" ], stdout=subprocess.PIPE,
proc = subprocess.Popen([CUNEIFORM_CMD, "-l"], stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
output = proc.stdout.read()
proc.wait()
......@@ -145,7 +145,7 @@ def get_available_languages():
def get_version():
proc = subprocess.Popen([ CUNEIFORM_CMD ], stdout=subprocess.PIPE,
proc = subprocess.Popen([CUNEIFORM_CMD], stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
output = proc.stdout.read()
proc.wait()
......
......@@ -54,7 +54,7 @@ __all__ = [
]
TOOLS = [ # in preference order
TOOLS = [ # in preference order
cuneiform,
tesseract,
]
......
......@@ -2,6 +2,7 @@
import os
def is_on_path(exec_name):
"""
Indicates if the command 'exec_name' appears to be installed.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment