Commit 181bae96 authored by Thomas Perret's avatar Thomas Perret

Add tesseract 4.0.0 support

parent 3a10019e
......@@ -20,10 +20,16 @@ from . import tesseract_raw
from ..error import TesseractError
from ..util import digits_only
import locale
import logging
logger = logging.getLogger(__name__)
# Tesseract 4 workaround
if tesseract_raw.get_version() == "4.0.0":
locale.setlocale(locale.LC_ALL, "C")
__all__ = [
'can_detect_orientation',
'detect_orientation',
......
......@@ -79,7 +79,8 @@ class PageSegMode(object):
SINGLE_CHAR = 10
SPARSE_TEXT = 11
SPARSE_TEXT_OSD = 12
COUNT = 13
PSM_RAW_LINE = 13
COUNT = 14
class Orientation(object):
......
......@@ -186,11 +186,11 @@ def detect_orientation(image, lang=None):
with temp_dir() as tmpdir:
command = [TESSERACT_CMD, "input.bmp", 'stdout', psm_parameter(), "0"]
version = get_version()
if version[0] >= 4:
# XXX: temporary fix to remove once Tesseract 4 is stable
command += ["--oem", "0"]
if lang is not None:
command += ['-l', lang]
if version[0] < 4:
command += ['-l', lang]
else:
command += ['-l', 'osd']
if image.mode != "RGB":
image = image.convert("RGB")
......
......@@ -28,6 +28,7 @@ class TestContext(object):
(3, 4, 0),
(3, 4, 1),
(3, 5, 0),
(4, 0, 0),
)
def test_langs(self):
......
......@@ -23,6 +23,7 @@ class TestContext(object):
(3, 4, 0),
(3, 4, 1),
(3, 5, 0),
(4, 0, 0),
)
def test_langs(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment