Commit eae6c473 authored by Jerome Flesch's avatar Jerome Flesch

Merge branch 'pytest' into 'master'

Convert to pytest

See merge request !104
parents bf8f7678 3f5eb1d0
[nosetests]
verbosity=2
[tool:pytest]
addopts = -ra
python_files = tests_*.py
......@@ -41,7 +41,7 @@ class BaseTest(object):
def set_builder(self):
raise NotImplementedError("Implemented in subclasses.")
def setUp(self):
def setup(self):
self.set_builder()
def _test_txt(self, image_file, expected_output_file, lang='eng'):
......@@ -72,7 +72,7 @@ class BaseTestText(BaseTest):
return file_descriptor.read().strip()
def _test_equal(self, output, expected_output):
self.assertEqual(output, expected_output)
assert output == expected_output
class BaseTestDigit(BaseTestText):
......@@ -105,14 +105,13 @@ class BaseTestWordBox(BaseTestBox):
self._builder = builders.WordBoxBuilder()
def _test_equal(self, output, expected_output):
self.assertTrue(len(output) > 0)
self.assertEqual(len(output), len(expected_output))
assert len(output) > 0
assert len(output) == len(expected_output)
for i in range(0, min(len(output), len(expected_output))):
self.assertTrue(isinstance(expected_output[i].content,
six.text_type))
self.assertTrue(isinstance(output[i].content, six.text_type))
self.assertEqual(output[i], expected_output[i])
assert isinstance(expected_output[i].content, six.text_type)
assert isinstance(output[i].content, six.text_type)
assert output[i] == expected_output[i]
class BaseTestLineBox(BaseTestBox):
......@@ -120,15 +119,15 @@ class BaseTestLineBox(BaseTestBox):
self._builder = builders.LineBoxBuilder()
def _test_equal(self, output, expected_output):
self.assertEqual(len(output), len(expected_output))
assert len(output) == len(expected_output)
for i in range(0, min(len(output), len(expected_output))):
self.assertEqual(len(output[i].word_boxes),
len(expected_output[i].word_boxes))
assert (len(output[i].word_boxes) ==
len(expected_output[i].word_boxes))
for j in range(0, len(output[i].word_boxes)):
self.assertEqual(type(output[i].word_boxes[j]),
type(expected_output[i].word_boxes[j]))
self.assertEqual(output[i], expected_output[i])
assert (type(output[i].word_boxes[j]) ==
type(expected_output[i].word_boxes[j]))
assert output[i] == expected_output[i]
class BaseTestDigitLineBox(BaseTestLineBox):
......@@ -146,4 +145,4 @@ class BaseTestPdf(BaseTest):
return file_descriptor.read()
def _test_not_empty(self, output):
self.assertTrue(len(output) > 0)
assert len(output) > 0
import os
import codecs
import tempfile
import unittest
import pytest
from pyocr import cuneiform
from . import tests_base as base
class TestContext(unittest.TestCase):
class TestContext(object):
"""
These tests make sure the requirements for the tests are met.
"""
def setUp(self):
def setup(self):
pass
def test_available(self):
self.assertTrue(cuneiform.is_available(),
"cuneiform not found. Is it installed ?")
assert cuneiform.is_available()
def test_version(self):
self.assertEqual(cuneiform.get_version(), (1, 1, 0),
("cuneiform does not have the expected version"
" (1.1.0) ! Tests will fail !"))
assert cuneiform.get_version() == (1, 1, 0)
def test_langs(self):
langs = cuneiform.get_available_languages()
self.assertTrue("eng" in langs,
("English training does not appear to be installed."
" (required for the tests)"))
self.assertTrue("fra" in langs,
("French training does not appear to be installed."
" (required for the tests)"))
def tearDown(self):
assert "eng" in langs
assert "fra" in langs
def teardown(self):
pass
......@@ -49,13 +40,13 @@ class BaseCuneiform(base.BaseTest):
)
class TestTxt(unittest.TestCase, base.BaseTestText, BaseCuneiform):
class TestTxt(base.BaseTestText, BaseCuneiform):
"""
These tests make sure the "usual" OCR works fine. (the one generating
a .txt file)
"""
def setUp(self):
super(TestTxt, self).setUp()
def setup(self):
super(TestTxt, self).setup()
self.tool = cuneiform
self.set_builder()
......@@ -68,31 +59,28 @@ class TestTxt(unittest.TestCase, base.BaseTestText, BaseCuneiform):
def test_french(self):
self._test_txt('test-french.jpg', 'test-french.txt', 'fra')
def tearDown(self):
def teardown(self):
pass
class TestDigit(base.BaseTestDigit, BaseCuneiform, unittest.TestCase):
def setUp(self):
super(TestDigit, self).setUp()
class TestDigit(base.BaseTestDigit, BaseCuneiform):
def setup(self):
super(TestDigit, self).setup()
self.tool = cuneiform
self.set_builder()
def test_digits_not_implemented(self):
image_path = self._path_to_img("test-digits.png")
self.assertRaises(
NotImplementedError,
self._read_from_img,
image_path
)
with pytest.raises(NotImplementedError):
self._read_from_img(image_path)
class TestWordBox(base.BaseTestWordBox, BaseCuneiform, unittest.TestCase):
class TestWordBox(base.BaseTestWordBox, BaseCuneiform):
"""
These tests make sure that cuneiform box handling works fine.
"""
def setUp(self):
super(TestWordBox, self).setUp()
def setup(self):
super(TestWordBox, self).setup()
self.tool = cuneiform
self.set_builder()
......@@ -105,73 +93,23 @@ class TestWordBox(base.BaseTestWordBox, BaseCuneiform, unittest.TestCase):
def test_french(self):
self._test_txt('test-french.jpg', 'test-french.words', 'fra')
def test_write_read(self):
def test_write_read(self, tmpdir):
original_boxes = self._read_from_img(
os.path.join("tests", "input", "specific", "test.png")
)
self.assertTrue(len(original_boxes) > 0)
assert len(original_boxes) > 0
(file_descriptor, tmp_path) = tempfile.mkstemp()
try:
# we must open the file with codecs.open() for utf-8 support
os.close(file_descriptor)
tmp_path = tmpdir.join('test_write_read.txt')
with codecs.open(tmp_path, 'w', encoding='utf-8') as file_desc:
self._builder.write_file(file_desc, original_boxes)
with tmp_path.open('w', encoding='utf-8') as file_desc:
self._builder.write_file(file_desc, original_boxes)
with codecs.open(tmp_path, 'r', encoding='utf-8') as file_desc:
new_boxes = self._builder.read_file(file_desc)
with tmp_path.open('r', encoding='utf-8') as file_desc:
new_boxes = self._builder.read_file(file_desc)
self.assertEqual(len(new_boxes), len(original_boxes))
for i in range(0, len(original_boxes)):
self.assertEqual(new_boxes[i], original_boxes[i])
finally:
os.remove(tmp_path)
assert new_boxes == original_boxes
class TestOrientation(unittest.TestCase):
class TestOrientation(object):
def test_can_detect_orientation(self):
self.assertFalse(cuneiform.can_detect_orientation())
def get_all_tests():
all_tests = unittest.TestSuite()
test_names = [
'test_available',
'test_version',
'test_langs',
]
tests = unittest.TestSuite(map(TestContext, test_names))
all_tests.addTest(tests)
test_names = [
'test_basic',
'test_european',
'test_french',
]
tests = unittest.TestSuite(map(TestTxt, test_names))
all_tests.addTest(tests)
test_names = [
'test_basic',
'test_european',
'test_french',
'test_write_read',
]
tests = unittest.TestSuite(map(TestWordBox, test_names))
all_tests.addTest(tests)
test_names = [
'test_digits_not_implemented'
]
tests = unittest.TestSuite(map(TestDigit, test_names))
all_tests.addTest(tests)
test_names = [
'test_can_detect_orientation',
]
tests = unittest.TestSuite(map(TestOrientation, test_names))
all_tests.addTest(tests)
return all_tests
assert not cuneiform.can_detect_orientation()
import codecs
import os
import tempfile
import unittest
import pytest
import PIL.Image
......@@ -12,59 +10,38 @@ from pyocr import PyocrException
from . import tests_base as base
class TestContext(unittest.TestCase):
class TestContext(object):
"""
These tests make sure the requirements for the tests are met.
"""
def setUp(self):
def setup(self):
pass
def test_available(self):
self.assertTrue(
libtesseract.is_available(),
"Tesseract not found."
" Are libtesseract and libleptonica installed ? "
)
assert libtesseract.is_available()
def test_version(self):
self.assertTrue(
libtesseract.get_version() in (
assert libtesseract.get_version() in (
(3, 2, 1),
(3, 2, 2),
(3, 3, 0),
(3, 4, 0),
(3, 4, 1),
(3, 5, 0),
),
("Tesseract does not have the expected version")
)
)
def test_langs(self):
langs = libtesseract.get_available_languages()
self.assertTrue("eng" in langs,
("English training does not appear to be installed."
" (required for the tests)"))
self.assertTrue("fra" in langs,
("French training does not appear to be installed."
" (required for the tests)"))
self.assertTrue("jpn" in langs,
("Japanese training does not appear to be installed."
" (required for the tests)"))
def test_nolangs(self):
tessdata_prefix = os.getenv("TESSDATA_PREFIX", "")
os.environ['TESSDATA_PREFIX'] = '/opt/tulipe'
try:
langs = libtesseract.get_available_languages()
self.assertEqual(langs, [])
finally:
if tessdata_prefix == "":
os.environ['TESSDATA_PREFIX'] = ""
os.unsetenv("TESSDATA_PREFIX")
else:
os.environ['TESSDATA_PREFIX'] = tessdata_prefix
def tearDown(self):
assert "eng" in langs
assert "fra" in langs
assert "jpn" in langs
def test_nolangs(self, monkeypatch):
monkeypatch.setenv('TESSDATA_PREFIX', '/opt/tulipe')
langs = libtesseract.get_available_languages()
assert langs == []
def teardown(self):
pass
......@@ -82,7 +59,7 @@ class BaseLibtesseract(base.BaseTest):
)
class TestTxt(base.BaseTestText, BaseLibtesseract, unittest.TestCase):
class TestTxt(base.BaseTestText, BaseLibtesseract):
"""
These tests make sure the "usual" OCR works fine. (the one generating
a .txt file)
......@@ -102,34 +79,26 @@ class TestTxt(base.BaseTestText, BaseLibtesseract, unittest.TestCase):
def test_multi(self):
self._test_txt('test-european.jpg', 'test-european.txt', 'eng+fra')
def test_nolangs(self):
def test_nolangs(self, monkeypatch):
"""
Issue #51: Running OCR without any language installed causes a SIGSEGV.
"""
tessdata_prefix = os.getenv("TESSDATA_PREFIX", "")
os.environ['TESSDATA_PREFIX'] = '/opt/tulipe'
try:
with self.assertRaises(PyocrException):
self.tool.image_to_string(
PIL.Image.open(self._path_to_img('test-japanese.jpg')),
lang='fra'
)
finally:
if tessdata_prefix == "":
os.environ['TESSDATA_PREFIX'] = ""
os.unsetenv("TESSDATA_PREFIX")
else:
os.environ['TESSDATA_PREFIX'] = tessdata_prefix
monkeypatch.setenv('TESSDATA_PREFIX', '/opt/tulipe')
with pytest.raises(PyocrException):
self.tool.image_to_string(
PIL.Image.open(self._path_to_img('test-japanese.jpg')),
lang='fra'
)
def test_nolangs2(self):
with self.assertRaises(PyocrException):
with pytest.raises(PyocrException):
self.tool.image_to_string(
PIL.Image.open(self._path_to_img('test-japanese.jpg')),
lang='doesnotexist'
)
class TestDigit(base.BaseTestDigit, BaseLibtesseract, unittest.TestCase):
class TestDigit(base.BaseTestDigit, BaseLibtesseract):
"""
These tests make sure that Tesseract digits handling works fine.
"""
......@@ -137,7 +106,7 @@ class TestDigit(base.BaseTestDigit, BaseLibtesseract, unittest.TestCase):
self._test_txt('test-digits.png', 'test-digits.txt')
class TestWordBox(base.BaseTestWordBox, BaseLibtesseract, unittest.TestCase):
class TestWordBox(base.BaseTestWordBox, BaseLibtesseract):
"""
These tests make sure that Tesseract box handling works fine.
"""
......@@ -153,30 +122,23 @@ class TestWordBox(base.BaseTestWordBox, BaseLibtesseract, unittest.TestCase):
def test_japanese(self):
self._test_txt('test-japanese.jpg', 'test-japanese.words', 'jpn')
def test_write_read(self):
def test_write_read(self, tmpdir):
image_path = self._path_to_img("test.png")
original_boxes = self._read_from_img(image_path)
self.assertTrue(len(original_boxes) > 0)
assert len(original_boxes) > 0
(file_descriptor, tmp_path) = tempfile.mkstemp()
try:
# we must open the file with codecs.open() for utf-8 support
os.close(file_descriptor)
tmp_path = tmpdir.join('test_write_read.txt')
with codecs.open(tmp_path, 'w', encoding='utf-8') as fdescriptor:
self._builder.write_file(fdescriptor, original_boxes)
with tmp_path.open('w', encoding='utf-8') as fdescriptor:
self._builder.write_file(fdescriptor, original_boxes)
with codecs.open(tmp_path, 'r', encoding='utf-8') as fdescriptor:
new_boxes = self._builder.read_file(fdescriptor)
with tmp_path.open('r', encoding='utf-8') as fdescriptor:
new_boxes = self._builder.read_file(fdescriptor)
self.assertEqual(len(new_boxes), len(original_boxes))
for i in range(0, len(original_boxes)):
self.assertEqual(new_boxes[i], original_boxes[i])
finally:
os.remove(tmp_path)
assert new_boxes == original_boxes
class TestLineBox(base.BaseTestLineBox, BaseLibtesseract, unittest.TestCase):
class TestLineBox(base.BaseTestLineBox, BaseLibtesseract):
"""
These tests make sure that Tesseract box handling works fine.
"""
......@@ -192,54 +154,46 @@ class TestLineBox(base.BaseTestLineBox, BaseLibtesseract, unittest.TestCase):
def test_japanese(self):
self._test_txt('test-japanese.jpg', 'test-japanese.lines', 'jpn')
def test_write_read(self):
def test_write_read(self, tmpdir):
image_path = self._path_to_img("test.png")
original_boxes = self._read_from_img(image_path)
self.assertTrue(len(original_boxes) > 0)
assert len(original_boxes) > 0
(file_descriptor, tmp_path) = tempfile.mkstemp()
try:
# we must open the file with codecs.open() for utf-8 support
os.close(file_descriptor)
tmp_path = tmpdir.join('test_write_read.txt')
with codecs.open(tmp_path, 'w', encoding='utf-8') as fdescriptor:
self._builder.write_file(fdescriptor, original_boxes)
with tmp_path.open('w', encoding='utf-8') as fdescriptor:
self._builder.write_file(fdescriptor, original_boxes)
with codecs.open(tmp_path, 'r', encoding='utf-8') as fdescriptor:
new_boxes = self._builder.read_file(fdescriptor)
with tmp_path.open('r', encoding='utf-8') as fdescriptor:
new_boxes = self._builder.read_file(fdescriptor)
self.assertEqual(len(new_boxes), len(original_boxes))
for i in range(0, len(original_boxes)):
self.assertEqual(new_boxes[i], original_boxes[i])
finally:
os.remove(tmp_path)
assert new_boxes == original_boxes
class TestDigitLineBox(base.BaseTestDigitLineBox, BaseLibtesseract,
unittest.TestCase):
class TestDigitLineBox(base.BaseTestDigitLineBox, BaseLibtesseract):
def test_digits(self):
self._test_txt('test-digits.png', 'test-digits.lines')
class TestOrientation(BaseLibtesseract, unittest.TestCase):
class TestOrientation(BaseLibtesseract):
def set_builder(self):
self._builder = builders.TextBuilder()
def test_can_detect_orientation(self):
self.assertTrue(libtesseract.can_detect_orientation())
assert libtesseract.can_detect_orientation()
def test_orientation_0(self):
img = base.Image.open(self._path_to_img("test.png"))
result = libtesseract.detect_orientation(img, lang='eng')
self.assertEqual(result['angle'], 0)
assert result['angle'] == 0
def test_orientation_90(self):
img = base.Image.open(self._path_to_img("test-90.png"))
result = libtesseract.detect_orientation(img, lang='eng')
self.assertEqual(result['angle'], 90)
assert result['angle'] == 90
class TestBasicDoc(base.BaseTestLineBox, unittest.TestCase):
class TestBasicDoc(base.BaseTestLineBox):
"""
These tests make sure that Tesseract box handling works fine.
"""
......@@ -259,7 +213,7 @@ class TestBasicDoc(base.BaseTestLineBox, unittest.TestCase):
self._test_txt('basic_doc.jpg', 'basic_doc.lines')
class TestPdf(base.BaseTestPdf, unittest.TestCase):
class TestPdf(base.BaseTestPdf):
tool = libtesseract
def _path_to_img(self, image_file):
......@@ -274,63 +228,3 @@ class TestPdf(base.BaseTestPdf, unittest.TestCase):
def test_basic(self):
self._test_pdf('basic_doc.jpg')
def get_all_tests():
all_tests = unittest.TestSuite()
test_names = [
'test_available',
'test_version',
'test_langs',
'test_nolangs',
]
tests = unittest.TestSuite(map(TestContext, test_names))
all_tests.addTest(tests)
test_names = [
'test_basic',
'test_european',
'test_french',
'test_japanese',
'test_multi',
'test_nolangs',
]
tests = unittest.TestSuite(map(TestTxt, test_names))
all_tests.addTest(tests)
test_names = [
'test_basic',
'test_european',
'test_french',
'test_japanese',
'test_write_read',
]
tests = unittest.TestSuite(map(TestWordBox, test_names))
all_tests.addTest(tests)
tests = unittest.TestSuite(map(TestLineBox, test_names))
all_tests.addTest(tests)
test_names = [
'test_digits'
]
tests = unittest.TestSuite(map(TestDigit, test_names))
all_tests.addTest(tests)
tests = unittest.TestSuite(map(TestDigitLineBox, test_names))
all_tests.addTest(tests)
test_names = [
'test_can_detect_orientation',
'test_orientation_0',
'test_orientation_90',
]
tests = unittest.TestSuite(map(TestOrientation, test_names))
all_tests.addTest(tests)
test_names = [
'test_basic',
]
tests = unittest.TestSuite(map(TestBasicDoc, test_names))
all_tests.addTest(tests)
return all_tests
import os
import codecs
import tempfile
import unittest
from pyocr import builders
from pyocr import tesseract
from . import tests_base as base
class TestContext(unittest.TestCase):
class TestContext(object):
"""
These tests make sure the requirements for the tests are met.
"""
def setUp(self):
def setup(self):
pass
def test_available(self):
self.assertTrue(tesseract.is_available(),
"Tesseract not found. Is it installed ?")
assert tesseract.is_available()
def test_version(self):
self.assertTrue(
tesseract.get_version() in (
assert tesseract.get_version() in (
(3, 2, 1),
(3, 2, 2),
(3, 3, 0),
(3, 4, 0),
(3, 4, 1),
(3, 5, 0),
),
("Tesseract does not have the expected version")
)
)
def test_langs(self):
langs = tesseract.get_available_languages()
self.assertTrue("eng" in langs,
("English training does not appear to be installed."
" (required for the tests)"))
self.assertTrue("fra" in langs,
("French training does not appear to be installed."
" (required for the tests)"))
self.assertTrue("jpn" in langs,
("Japanese training does not appear to be installed."
" (required for the tests)"))
def tearDown(self):
assert "eng" in langs
assert "fra" in langs
assert "jpn" in langs
def teardown(self):
pass
......@@ -63,7 +49,7 @@ class BaseTesseract(base.BaseTest):
)
class TestTxt(base.BaseTestText, BaseTesseract, unittest.TestCase):
class TestTxt(base.BaseTestText, BaseTesseract):
"""
These tests make sure the "usual" OCR works fine. (the one generating
a .txt file)
......@@ -84,7 +70,7 @@ class TestTxt(base.BaseTestText, BaseTesseract, unittest.TestCase):
self._test_txt('test-european.jpg', 'test-european.txt', 'eng+fra')
class TestCharBox(base.BaseTestBox, BaseTesseract, unittest.TestCase):
class TestCharBox(base.BaseTestBox, BaseTesseract):
"""
These tests make sure that Tesseract box handling works fine.
"""
......@@ -92,10 +78,7 @@ class TestCharBox(base.BaseTestBox, BaseTesseract, unittest.TestCase):
self._builder = tesseract.CharBoxBuilder()
def _test_equal(self, output, expected_output):
self.assertEqual(len(output), len(expected_output))
for i in range(0, min(len(output), len(expected_output))):
self.assertEqual(output[i], expected_output[i])
assert output == expected_output
def test_basic(self):
self._test_txt('test.png', 'test.box')
......@@ -109,30 +92,23 @@ class TestCharBox(base.BaseTestBox, BaseTesseract, unittest.TestCase):
def test_japanese(self):
self._test_txt('test-japanese.jpg', 'test-japanese.box', 'jpn')
def test_write_read(self):
def test_write_read(self, tmpdir):
image_path = self._path_to_img("test.png")
original_boxes = self._read_from_img(image_path)
self.assertTrue(len(original_boxes) > 0)
assert len(original_boxes) > 0
(file_descriptor, tmp_path) = tempfile.mkstemp()
try:
# we must open the file with codecs.open() for utf-8 support
os.close(file_descriptor)
tmp_path = tmpdir.join('test_write_read.txt')
with codecs.open(tmp_path, 'w', encoding='utf-8') as fdescriptor:
self._builder.write_file(fdescriptor, original_boxes)
with tmp_path.open('w', encoding='utf-8') as fdescriptor:
self._builder.write_file(fdescriptor, original_boxes)
with codecs.open(tmp_path, 'r', encoding='utf-8') as fdescriptor:
new_boxes = self._builder.read_file(fdescriptor)
with tmp_path.open('r', encoding='utf-8') as fdescriptor:
new_boxes = self._builder.read_file(fdescriptor)
self.assertEqual(len(new_boxes), len(original_boxes))
for i in range(0, len(original_boxes)):
self.assertEqual(new_boxes[i], original_boxes[i])
finally:
os.remove(tmp_path)
assert new_boxes == original_boxes
class TestDigit(base.BaseTestDigit, BaseTesseract, unittest.TestCase):
class TestDigit(base.BaseTestDigit, BaseTesseract):
"""
These tests make sure that Tesseract digits handling works fine.
"""
......@@ -140,7 +116,7 @@ class TestDigit(base.BaseTestDigit, BaseTesseract, unittest.TestCase):
self._test_txt('test-digits.png', 'test-digits.txt')
class TestWordBox(base.BaseTestWordBox, BaseTesseract, unittest.TestCase):
class TestWordBox(base.BaseTestWordBox,