Commit 40db86a0 authored by Jerome Flesch's avatar Jerome Flesch

Merge branch 'update_deprecated_psm_option_string' into 'master'

Update pyocr to use psm_parameter based on tesseract version

Closes #99

See merge request !103
parents eae6c473 a6e06b4c
......@@ -304,8 +304,9 @@ class TextBuilder(BaseBuilder):
def __init__(self, tesseract_layout=3, cuneiform_dotmatrix=False,
cuneiform_fax=False, cuneiform_singlecolumn=False):
from .tesseract import psm_parameter
tess_flags = [psm_parameter(), str(tesseract_layout)]
file_ext = ["txt"]
tess_flags = ["-psm", str(tesseract_layout)]
cun_args = ["-f", "text"]
# Add custom cuneiform parameters if needed
for par, arg in [(cuneiform_dotmatrix, "--dotmatrix"),
......@@ -561,8 +562,9 @@ class WordBoxBuilder(BaseBuilder):
"""
def __init__(self, tesseract_layout=1):
from .tesseract import psm_parameter
tess_flags = [psm_parameter(), str(tesseract_layout)]
file_ext = ["html", "hocr"]
tess_flags = ["-psm", str(tesseract_layout)]
tess_conf = ["hocr"]
cun_args = ["-f", "hocr"]
super(WordBoxBuilder, self).__init__(file_ext, tess_flags, tess_conf,
......@@ -637,8 +639,9 @@ class LineBoxBuilder(BaseBuilder):
"""
def __init__(self, tesseract_layout=1):
from .tesseract import psm_parameter
tess_flags = [psm_parameter(), str(tesseract_layout)]
file_ext = ["html", "hocr"]
tess_flags = ["-psm", str(tesseract_layout)]
tess_conf = ["hocr"]
cun_args = ["-f", "hocr"]
super(LineBoxBuilder, self).__init__(file_ext, tess_flags, tess_conf,
......
......@@ -161,6 +161,12 @@ def can_detect_orientation():
)
def psm_parameter():
"""Return the psm option string depending on the Tesseract version."""
version = get_version()
return "--psm" if version[0] > 3 else "-psm"
def detect_orientation(image, lang=None):
"""
Arguments:
......@@ -178,7 +184,7 @@ def detect_orientation(image, lang=None):
"""
_set_environment()
with temp_dir() as tmpdir:
command = [TESSERACT_CMD, "input.bmp", 'stdout', "-psm", "0"]
command = [TESSERACT_CMD, "input.bmp", 'stdout', psm_parameter(), "0"]
version = get_version()
if version[0] >= 4:
# XXX: temporary fix to remove once Tesseract 4 is stable
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment