pyocr.py 1.91 KB
Newer Older
1 2 3 4 5
#!/usr/bin/env python
"""
Wrapper for various OCR tools.

USAGE:
6
from PIL import Image
7 8 9 10 11
import sys
from pyocr import pyocr

tools = pyocr.get_available_tools()[:]
if len(tools) == 0:
Jerome Flesch's avatar
Jerome Flesch committed
12
    print("No OCR tool found")
13
    sys.exit(1)
Jerome Flesch's avatar
Jerome Flesch committed
14
print("Using '%s'" % (tools[0].get_name()))
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
tools[0].image_to_string(Image.open('test.png'), lang='fra',
                         builder=TextBuilder())


DETAILS:
Each module wrapping an OCR tool provides the following functions:
- get_name(): Return the name of the tool
- is_available(): Returns True if the tool is installed. False else.
- get_version(): Return a tuple containing the version of the tool (if
  installed)
- get_available_builders(): Returns a list of builders that can be used with
  this tool (see image_to_string())
- get_available_languages(): Returns a list of languages supported by this
  tool. Languages are usually written using ISO 3 letters country codes
- image_to_string():
    Takes 3 arguments:
    - an image (see python Imaging "Image" module) (mandatory)
    - lang=<language> (see get_available_languages()) (optional)
    - builder=<builder> (see get_available_builders() or the classes in the
      module 'pyocr.builders') (optional: default is
      pyocr.builders.TextBuilder)
    Returned value depends of the specified builder.


COPYRIGHT:
Pyocr is released under the GPL v3.
Copyright (c) Jerome Flesch, 2011
Tesseract module: Copyright (c) Samuel Hoffstaetter, 2009

WEBSITE:
https://github.com/jflesch/python-tesseract#readme
"""

48 49
from . import cuneiform
from . import tesseract
50 51 52 53

__all__ = [
    'get_available_tools',
    'TOOLS',
54
    'VERSION',
55 56 57
]


Jerome Flesch's avatar
Jerome Flesch committed
58
TOOLS = [  # in preference order
Jerome Flesch's avatar
Jerome Flesch committed
59
    tesseract,
60
    cuneiform,
61 62
]

Jerome Flesch's avatar
v0.2.4  
Jerome Flesch committed
63
VERSION = (0, 2, 4)
Jerome Flesch's avatar
Jerome Flesch committed
64

65 66 67 68 69 70 71 72 73
def get_available_tools():
    """
    Return a list of OCR tools available on the local system.
    """
    available = []
    for tool in TOOLS:
        if tool.is_available():
            available.append(tool)
    return available