Commit ecabf0f8 authored by Jerome Flesch's avatar Jerome Flesch

Tesseract C-API: Take into account the tesseract_layout specified in the builder

Signed-off-by: Jerome Flesch's avatarJerome Flesch <jflesch@gmail.com>
parent dd05e84d
......@@ -242,6 +242,7 @@ class TextBuilder(object):
def __init__(self, tesseract_layout=3, cuneiform_dotmatrix=False,
cuneiform_fax=False, cuneiform_singlecolumn=False):
self.tesseract_configs = ["-psm", str(tesseract_layout)]
self.tesseract_layout = tesseract_layout
# Add custom cuneiform parameters if needed
if cuneiform_dotmatrix:
self.cuneiform_args.append("--dotmatrix")
......@@ -460,6 +461,7 @@ class WordBoxBuilder(object):
def __init__(self):
self.word_boxes = []
self.tesseract_layout = 3
def read_file(self, file_descriptor):
"""
......@@ -528,6 +530,7 @@ class LineBoxBuilder(object):
def __init__(self):
self.current_line = None
self.lines = []
self.tesseract_layout = 3
def read_file(self, file_descriptor):
"""
......
......@@ -92,6 +92,11 @@ def image_to_string(image, lang=None, builder=None):
lvl_word = tesseract_raw.PageIteratorLevel.WORD
try:
if builder.tesseract_layout != tesseract_raw.PageSegMode.AUTO:
tesseract_raw.set_page_seg_mode(
handle, builder.tesseract_layout
)
tesseract_raw.set_image(handle, image)
# XXX(JFlesch): PageIterator and ResultIterator are actually the
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment