Commit d18912d4 authored by Jerome Flesch's avatar Jerome Flesch

Tesseract C-API: Implements get_available_languages()

Signed-off-by: Jerome Flesch's avatarJerome Flesch <jflesch@gmail.com>
parent bb1c3055
......@@ -83,8 +83,7 @@ def is_available():
def get_available_languages():
# TODO
return []
return tesseract_raw.get_available_languages()
def get_version():
......
......@@ -2,7 +2,7 @@ import ctypes
import os
import sys
TESSDATA_PREFIX = os.getenv('TESSDATA_PREFIX', "../")
TESSDATA_PREFIX = os.getenv('TESSDATA_PREFIX', None)
if sys.platform[:3] == "win":
libnames = [
......@@ -29,6 +29,50 @@ if g_libtesseract:
g_libtesseract.TessVersion.argtypes = []
g_libtesseract.TessVersion.restype = ctypes.c_char_p
g_libtesseract.TessBaseAPICreate.argtypes = []
g_libtesseract.TessBaseAPICreate.restype = ctypes.c_void_p # TessBaseAPI*
g_libtesseract.TessBaseAPIDelete.argtypes = [
ctypes.c_void_p # TessBaseAPI*
]
g_libtesseract.TessBaseAPIDelete.argtypes = None
g_libtesseract.TessBaseAPIInit3.argtypes = [
ctypes.c_void_p, # TessBaseAPI*
ctypes.c_char_p, # datapath
ctypes.c_char_p, # language
]
g_libtesseract.TessBaseAPIInit3.restype = ctypes.c_int
g_libtesseract.TessBaseAPIGetAvailableLanguagesAsVector.argtypes = [
ctypes.c_void_p # TessBaseAPI*
]
g_libtesseract.TessBaseAPIGetAvailableLanguagesAsVector.restype = \
ctypes.POINTER(ctypes.c_char_p)
def _init(lang=None):
assert(g_libtesseract)
handle = g_libtesseract.TessBaseAPICreate()
try:
if lang:
lang = lang.encode("utf-8")
prefix = None
if TESSDATA_PREFIX:
prefix = TESSDATA_PREFIX.encode("utf-8")
g_libtesseract.TessBaseAPIInit3(
handle,
ctypes.c_char_p(prefix),
ctypes.c_char_p(lang)
)
except:
g_libtesseract.TessBaseAPIDelete(handle)
raise
return handle
def _cleanup(handle):
g_libtesseract.TessBaseAPIDelete(handle)
def is_available():
global g_libtesseract
......@@ -38,3 +82,20 @@ def is_available():
def get_version():
global g_libtesseract
return g_libtesseract.TessVersion().decode("utf-8")
def get_available_languages():
global g_libtesseract
langs = []
handle = _init()
try:
c_langs = g_libtesseract.TessBaseAPIGetAvailableLanguagesAsVector(
handle
)
i = 0
while c_langs[i]:
langs.append(c_langs[i].decode("utf-8"))
i += 1
finally:
_cleanup(handle)
return langs
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment