Commit 67c3ca26 authored by Jerome Flesch's avatar Jerome Flesch

Libtesseract: Fix multi-language support

Signed-off-by: Jerome Flesch's avatarJerome Flesch <jflesch@openpaper.work>
parent 3871684a
......@@ -95,11 +95,12 @@ def image_to_string(image, lang=None, builder=None):
# Tesseract TessBaseAPIRecognize() may segfault when the target
# language is not available
clang = lang if lang else "eng"
if clang not in tesseract_raw.get_available_languages(handle):
raise TesseractError(
"no lang",
"language {} is not available".format(clang)
)
for lang_item in clang.split("+"):
if lang_item not in tesseract_raw.get_available_languages(handle):
raise TesseractError(
"no lang",
"language {} is not available".format(lang_item)
)
tesseract_raw.set_page_seg_mode(
handle, builder.tesseract_layout
......
......@@ -99,6 +99,9 @@ class TestTxt(base.BaseTestText, BaseLibtesseract, unittest.TestCase):
def test_japanese(self):
self._test_txt('test-japanese.jpg', 'test-japanese.txt', 'jpn')
def test_multi(self):
self._test_txt('test-european.jpg', 'test-european.txt', 'eng+fra')
def test_nolangs(self):
"""
Issue #51: Running OCR without any language installed causes a SIGSEGV.
......@@ -289,6 +292,8 @@ def get_all_tests():
'test_basic',
'test_european',
'test_french',
'test_japanese',
'test_multi',
'test_nolangs',
]
tests = unittest.TestSuite(map(TestTxt, test_names))
......
......@@ -80,6 +80,9 @@ class TestTxt(base.BaseTestText, BaseTesseract, unittest.TestCase):
def test_japanese(self):
self._test_txt('test-japanese.jpg', 'test-japanese.txt', 'jpn')
def test_multi(self):
self._test_txt('test-european.jpg', 'test-european.txt', 'eng+fra')
class TestCharBox(base.BaseTestBox, BaseTesseract, unittest.TestCase):
"""
......@@ -258,6 +261,8 @@ def get_all_tests():
'test_basic',
'test_european',
'test_french',
'test_japanese',
'test_multi',
]
tests = unittest.TestSuite(map(TestTxt, test_names))
all_tests.addTest(tests)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment