Commit cb9acca6 authored by Thomas Perret's avatar Thomas Perret

Remove real images and use fake one

parent 199e4c5c
......@@ -17,7 +17,3 @@ class BaseTest(unittest.TestCase):
"data", filename), encoding="utf-8") as fh:
content = fh.read()
return content
def _get_file_path(self, filename):
return os.path.join(os.path.dirname(os.path.abspath(__file__)),
"data", filename)
......@@ -8,8 +8,8 @@ except ImportError:
from PIL import Image
from pyocr import cuneiform
from pyocr import builders
from pyocr import cuneiform
from .tests_base import BaseTest
......@@ -87,7 +87,8 @@ class TestCuneiformTxt(BaseTest):
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.builder = builders.TextBuilder()
self.image = Image.open(self._get_file_path("text.jpg"))
self.image = Image.new(mode="RGB", size=(1, 1))
self.text_file = StringIO(self._get_file_content("text"))
self.stdout = MagicMock()
self.stdout.stdout.read.return_value = (
"Cuneiform for Linux 1.1.0\n".encode()
......@@ -106,7 +107,7 @@ class TestCuneiformTxt(BaseTest):
temp_file, get_version):
get_version.return_value = (4, 0, 0)
Popen.return_value = self.stdout
copen.return_value = StringIO(self._get_file_content("text"))
copen.return_value = self.text_file
temp_file.return_value = self.enter
output = cuneiform.image_to_string(self.image)
self.assertEqual(output, self._get_file_content("text").strip())
......@@ -121,7 +122,7 @@ class TestCuneiformTxt(BaseTest):
@patch("subprocess.Popen")
def test_lang(self, Popen, copen, temp_file):
Popen.return_value = self.stdout
copen.return_value = StringIO(self._get_file_content("text"))
copen.return_value = self.text_file
temp_file.return_value = self.enter
output = cuneiform.image_to_string(self.image, lang="fra",
builder=self.builder)
......@@ -138,7 +139,7 @@ class TestCuneiformTxt(BaseTest):
@patch("subprocess.Popen")
def test_text(self, Popen, copen, temp_file):
Popen.return_value = self.stdout
copen.return_value = StringIO(self._get_file_content("text"))
copen.return_value = self.text_file
temp_file.return_value = self.enter
output = cuneiform.image_to_string(self.image,
builder=self.builder)
......@@ -162,6 +163,24 @@ class TestCuneiformTxt(BaseTest):
self.assertEqual(ce.exception.status, 1)
self.assertEqual(ce.exception.message, message)
@patch("pyocr.cuneiform.temp_file")
@patch("codecs.open")
@patch("subprocess.Popen")
def test_text_non_rgb_image(self, Popen, copen, temp_file):
"""This tests that image_to_string works with non RGB mode images and
that image is converted in function."""
image = self.image.convert("L")
Popen.return_value = self.stdout
copen.return_value = self.text_file
temp_file.return_value = self.enter
output = cuneiform.image_to_string(image, builder=self.builder)
self.assertEqual(output, self._get_file_content("text").strip())
Popen.assert_called_once_with(
["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
class TestCuneiformDigits(BaseTest):
......@@ -169,16 +188,15 @@ class TestCuneiformDigits(BaseTest):
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.builder = builders.DigitBuilder()
self.image = Image.new(mode="RGB", size=(1, 1))
def test_digits_not_implemented(self):
image = Image.open(self._get_file_path("digits.png"))
with self.assertRaises(NotImplementedError):
cuneiform.image_to_string(image, builder=self.builder)
cuneiform.image_to_string(self.image, builder=self.builder)
def test_digits_box_not_implemented(self):
image = Image.open(self._get_file_path("digits.png"))
with self.assertRaises(NotImplementedError):
cuneiform.image_to_string(image,
cuneiform.image_to_string(self.image,
builder=self.builder)
......@@ -190,7 +208,8 @@ class TestCuneiformWordBox(BaseTest):
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.builder = builders.WordBoxBuilder()
self.image = Image.open(self._get_file_path("paragraph.jpg"))
self.image = Image.new(mode="RGB", size=(1, 1))
self.text_file = StringIO(self._get_file_content("cuneiform.words"))
self.stdout = MagicMock()
self.stdout.stdout.read.return_value = (
"Cuneiform for Linux 1.1.0\n".encode()
......@@ -206,7 +225,7 @@ class TestCuneiformWordBox(BaseTest):
@patch("subprocess.Popen")
def test_word(self, Popen, copen, temp_file):
Popen.return_value = self.stdout
copen.return_value = StringIO(self._get_file_content("cuneiform.words"))
copen.return_value = self.text_file
temp_file.return_value = self.enter
output = cuneiform.image_to_string(self.image,
builder=self.builder)
......@@ -242,7 +261,8 @@ class TestCuneiformLineBox(BaseTest):
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.builder = builders.LineBoxBuilder()
self.image = Image.open(self._get_file_path("paragraph.jpg"))
self.image = Image.new(mode="RGB", size=(1, 1))
self.text_file = StringIO(self._get_file_content("cuneiform.lines"))
self.stdout = MagicMock()
self.stdout.stdout.read.return_value = (
"Cuneiform for Linux 1.1.0\n".encode()
......@@ -258,7 +278,7 @@ class TestCuneiformLineBox(BaseTest):
@patch("subprocess.Popen")
def test_line(self, Popen, copen, temp_file):
Popen.return_value = self.stdout
copen.return_value = StringIO(self._get_file_content("cuneiform.lines"))
copen.return_value = self.text_file
temp_file.return_value = self.enter
output = cuneiform.image_to_string(self.image,
builder=self.builder)
......
......@@ -10,7 +10,8 @@ except ImportError:
from PIL import Image
from pyocr import builders, libtesseract
from pyocr import builders
from pyocr import libtesseract
from pyocr.error import TesseractError
from pyocr.libtesseract import tesseract_raw
......@@ -23,6 +24,7 @@ class TestLibTesseract(BaseTest):
"""
def setUp(self):
self.handle = randint(0, 2**32-1)
self.image = Image.new(mode="RGB", size=(1, 1))
@patch("pyocr.libtesseract.tesseract_raw.g_libtesseract")
def test_available(self, libtess):
......@@ -113,9 +115,8 @@ class TestLibTesseract(BaseTest):
"confidence": 87,
}
raw.detect_os.return_value = expected
image = Image.open(self._get_file_path("text.jpg"))
self.assertEqual(
libtesseract.detect_orientation(image),
libtesseract.detect_orientation(self.image),
{
"angle": 90,
"confidence": 87,
......@@ -125,16 +126,15 @@ class TestLibTesseract(BaseTest):
raw.set_page_seg_mode.assert_called_once_with(
self.handle, raw.PageSegMode.OSD_ONLY
)
raw.set_image.assert_called_once_with(self.handle, image)
raw.set_image.assert_called_once_with(self.handle, self.image)
raw.detect_os.assert_called_once_with(self.handle)
@patch("pyocr.libtesseract.tesseract_raw")
def test_detect_orientation_error(self, raw):
raw.init.return_value = self.handle
raw.detect_os.return_value = {"confidence": 0}
image = Image.open(self._get_file_path("text.jpg"))
with self.assertRaises(TesseractError) as te:
libtesseract.detect_orientation(image)
libtesseract.detect_orientation(self.image)
self.assertEqual(te.exception.status, "no script")
self.assertEqual(te.exception.message, "no script detected")
......@@ -144,6 +144,7 @@ class TestLibTesseractRaw(BaseTest):
def setUp(self):
self.handle = randint(0, 2**32-1)
self.iterator = randint(0, 2**32-1)
self.image = Image.new("RGB", size=(1, 1))
@patch("locale.setlocale")
@patch("pyocr.libtesseract.tesseract_raw.g_libtesseract")
......@@ -337,22 +338,16 @@ class TestLibTesseractRaw(BaseTest):
@patch("pyocr.libtesseract.tesseract_raw.g_libtesseract")
def test_set_image(self, libtess):
image = Image.open(self._get_file_path("text.jpg"))
dpi = image.info.get("dpi", [tesseract_raw.DPI_DEFAULT])[0]
tesseract_raw.set_image(self.handle, image)
self.assertEqual(
libtess.TessBaseAPISetImage.call_count,
1
)
tesseract_raw.set_image(self.handle, self.image)
self.assertEqual(libtess.TessBaseAPISetImage.call_count, 1)
args = libtess.TessBaseAPISetImage.call_args[0]
self.assertEqual(len(args), 6)
self.assertEqual(args[0].value, self.handle)
image = image.convert("RGB")
self.assertEqual(args[1], image.tobytes("raw", "RGB"))
self.assertEqual(args[2].value, image.width)
self.assertEqual(args[3].value, image.height)
self.assertEqual(args[1], self.image.tobytes("raw", "RGB"))
self.assertEqual(args[2].value, self.image.width)
self.assertEqual(args[3].value, self.image.height)
self.assertEqual(args[4].value, 3)
self.assertEqual(args[5].value, image.width * 3)
self.assertEqual(args[5].value, self.image.width * 3)
@patch("pyocr.libtesseract.tesseract_raw.g_libtesseract")
def test_recognize(self, libtess):
......@@ -779,7 +774,7 @@ class TestLibTesseractText(BaseTest):
@patch("pyocr.tesseract.get_version")
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.image = Image.open(self._get_file_path("text.jpg"))
self.image = Image.new(mode="RGB", size=(1, 1))
self.builder = builders.TextBuilder()
self.handle = randint(0, 2**32-1)
self.iterator = randint(0, 2**32-1)
......@@ -1022,7 +1017,7 @@ class TestLibTesseractDigits(BaseTest):
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.builder = builders.DigitBuilder()
self.image = Image.open(self._get_file_path("digits.png"))
self.image = Image.new(mode="RGB", size=(1, 1))
self.handle = randint(0, 2**32-1)
self.iterator = randint(0, 2**32-1)
......@@ -1093,7 +1088,7 @@ class TestLibTesseractWordBox(BaseTest):
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.builder = builders.WordBoxBuilder()
self.image = Image.open(self._get_file_path("paragraph.jpg"))
self.image = Image.new("RGB", size=(1, 1))
self.handle = randint(0, 2**32-1)
self.iterator = randint(0, 2**32-1)
......@@ -1196,7 +1191,7 @@ class TestLibTesseractLineBox(BaseTest):
@patch("pyocr.tesseract.get_version")
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.image = Image.open(self._get_file_path("paragraph.jpg"))
self.image = Image.new(mode="RGB", size=(1, 1))
self.builder = builders.LineBoxBuilder()
self.handle = randint(0, 2**32-1)
self.iterator = randint(0, 2**32-1)
......@@ -1302,7 +1297,7 @@ class TestLibTesseractDigitsLineBox(BaseTest):
@patch("pyocr.tesseract.get_version")
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.image = Image.open(self._get_file_path("paragraph.jpg"))
self.image = Image.new(mode="RGB", size=(1, 1))
self.builder = builders.DigitLineBoxBuilder()
self.handle = randint(0, 2**32-1)
self.iterator = randint(0, 2**32-1)
......@@ -1404,7 +1399,7 @@ class TestLibTesseractDigitsLineBox(BaseTest):
class TestLibTesseractPDF(BaseTest):
def setUp(self):
self.image = Image.open(self._get_file_path("paragraph.jpg"))
self.image = Image.new(mode="RGB", size=(1, 1))
self.handle = randint(0, 2**32-1)
@patch("pyocr.libtesseract.tesseract_raw")
......
......@@ -25,6 +25,7 @@ class TestTesseract(BaseTest):
"""
def setUp(self):
self.stdout = MagicMock()
self.image = Image.new(mode="RGB", size=(1, 1))
self.message = (
"tesseract 4.0.0\n leptonica-1.76.0\n"
" libgif 5.1.4 : libjpeg 6b (libjpeg-turbo 1.5.2) : libpng 1.6.34 "
......@@ -221,10 +222,6 @@ class TestTesseract(BaseTest):
@patch("pyocr.tesseract.get_version")
@patch("subprocess.Popen")
def test_run_tesseract(self, Popen, get_version):
image = Image.open(self._get_file_path("text.jpg"))
if image.mode != "RGB":
image = image.convert("RGB")
message = (
"Tesseract Open Source OCR Engine v4.0.0 with Leptonica\n"
)
......@@ -232,7 +229,7 @@ class TestTesseract(BaseTest):
Popen.return_value = self.stdout
with tesseract.temp_dir() as tmpdir:
image.save(os.path.join(tmpdir, "input.bmp"))
self.image.save(os.path.join(tmpdir, "input.bmp"))
status, error = tesseract.run_tesseract(
"input.bmp",
"output",
......@@ -252,7 +249,7 @@ class TestTesseract(BaseTest):
get_version.return_value = (4, 0, 0)
builder = builders.TextBuilder()
with tesseract.temp_dir() as tmpdir:
image.save(os.path.join(tmpdir, "input2.bmp"))
self.image.save(os.path.join(tmpdir, "input2.bmp"))
status, error = tesseract.run_tesseract(
"input2.bmp",
"output2",
......@@ -278,7 +275,6 @@ class TestTesseract(BaseTest):
@patch("subprocess.Popen")
def test_detect_orientation_tesseract4(self, Popen, temp_dir, get_version):
get_version.return_value = (4, 0, 0)
image = Image.open(self._get_file_path("orientation.png"))
message = (
"Page number: 0\n"
"Orientation in degrees: 90\n"
......@@ -293,7 +289,7 @@ class TestTesseract(BaseTest):
enter = MagicMock()
enter.__enter__.return_value = tmpdir
temp_dir.return_value = enter
result = tesseract.detect_orientation(image)
result = tesseract.detect_orientation(self.image)
self.assertEqual(result["angle"], 90)
self.assertEqual(result["confidence"], 9.30)
Popen.assert_called_once_with(
......@@ -310,11 +306,12 @@ class TestTesseract(BaseTest):
@patch("pyocr.tesseract.get_version")
@patch("pyocr.tesseract.temp_dir")
@patch("subprocess.Popen")
def test_detect_orientation_tesseract4_rgb_image(self, Popen,
temp_dir, get_version):
def test_detect_orientation_tesseract4_non_rgb_image(self, Popen,
temp_dir, get_version):
"""This tests that detect_orientation works with non RGB mode images and
that image is converted in function."""
image = self.image.convert("L")
get_version.return_value = (4, 0, 0)
image = Image.open(self._get_file_path("orientation.png"))
image = image.convert("RGB")
message = (
"Page number: 0\n"
"Orientation in degrees: 90\n"
......@@ -350,7 +347,6 @@ class TestTesseract(BaseTest):
def test_detect_orientation_tesseract4_with_lang(self, Popen, temp_dir,
get_version):
get_version.return_value = (4, 0, 0)
image = Image.open(self._get_file_path("orientation.png"))
message = (
"Page number: 0\n"
"Orientation in degrees: 90\n"
......@@ -365,7 +361,7 @@ class TestTesseract(BaseTest):
enter = MagicMock()
enter.__enter__.return_value = tmpdir
temp_dir.return_value = enter
result = tesseract.detect_orientation(image, lang="fra")
result = tesseract.detect_orientation(self.image, lang="fra")
self.assertEqual(result["angle"], 90)
self.assertEqual(result["confidence"], 9.30)
Popen.assert_called_once_with(
......@@ -385,7 +381,6 @@ class TestTesseract(BaseTest):
def test_detect_orientation_tesseract4_error(self, Popen, temp_dir,
get_version):
get_version.return_value = (4, 0, 0)
image = Image.open(self._get_file_path("orientation.png"))
message = (
"Could not initialize tesseract\n"
)
......@@ -396,7 +391,7 @@ class TestTesseract(BaseTest):
enter.__enter__.return_value = tmpdir
temp_dir.return_value = enter
with self.assertRaises(tesseract.TesseractError) as te:
tesseract.detect_orientation(image)
tesseract.detect_orientation(self.image)
Popen.assert_called_once_with(
["tesseract", "input.bmp", "stdout", "--psm", "0"],
stdin=subprocess.PIPE,
......@@ -416,7 +411,6 @@ class TestTesseract(BaseTest):
def test_detect_orientation_tesseract4_bad_output(self, Popen, temp_dir,
get_version):
get_version.return_value = (4, 0, 0)
image = Image.open(self._get_file_path("orientation.png"))
message = (
"Page number: 0\n"
"Orientation in degrees: ABC\n"
......@@ -432,7 +426,7 @@ class TestTesseract(BaseTest):
enter.__enter__.return_value = tmpdir
temp_dir.return_value = enter
with self.assertRaises(tesseract.TesseractError) as te:
tesseract.detect_orientation(image)
tesseract.detect_orientation(self.image)
Popen.assert_called_once_with(
["tesseract", "input.bmp", "stdout", "--psm", "0"],
stdin=subprocess.PIPE,
......@@ -451,7 +445,6 @@ class TestTesseract(BaseTest):
@patch("subprocess.Popen")
def test_detect_orientation_tesseract3(self, Popen, temp_dir, get_version):
get_version.return_value = (3, 5, 0)
image = Image.open(self._get_file_path("orientation.png"))
message = (
"Page number: 0\n"
"Orientation in degrees: 90\n"
......@@ -466,7 +459,7 @@ class TestTesseract(BaseTest):
enter = MagicMock()
enter.__enter__.return_value = tmpdir
temp_dir.return_value = enter
result = tesseract.detect_orientation(image)
result = tesseract.detect_orientation(self.image)
self.assertEqual(result["angle"], 90)
self.assertEqual(result["confidence"], 9.30)
Popen.assert_called_once_with(
......@@ -486,7 +479,6 @@ class TestTesseract(BaseTest):
def test_detect_orientation_tesseract3_with_lang(self, Popen, temp_dir,
get_version):
get_version.return_value = (3, 5, 0)
image = Image.open(self._get_file_path("orientation.png"))
message = (
"Page number: 0\n"
"Orientation in degrees: 90\n"
......@@ -501,7 +493,7 @@ class TestTesseract(BaseTest):
enter = MagicMock()
enter.__enter__.return_value = tmpdir
temp_dir.return_value = enter
result = tesseract.detect_orientation(image, lang="fra")
result = tesseract.detect_orientation(self.image, lang="fra")
self.assertEqual(result["angle"], 90)
self.assertEqual(result["confidence"], 9.30)
Popen.assert_called_once_with(
......@@ -521,7 +513,6 @@ class TestTesseract(BaseTest):
def test_detect_orientation_tesseract3_error(self, Popen, temp_dir,
get_version):
get_version.return_value = (3, 5, 0)
image = Image.open(self._get_file_path("orientation.png"))
message = (
"Could not initialize tesseract\n"
)
......@@ -532,7 +523,7 @@ class TestTesseract(BaseTest):
enter.__enter__.return_value = tmpdir
temp_dir.return_value = enter
with self.assertRaises(tesseract.TesseractError) as te:
tesseract.detect_orientation(image)
tesseract.detect_orientation(self.image)
Popen.assert_called_once_with(
["tesseract", "input.bmp", "stdout", "-psm", "0"],
stdin=subprocess.PIPE,
......@@ -552,7 +543,6 @@ class TestTesseract(BaseTest):
def test_detect_orientation_tesseract3_bad_output(self, Popen, temp_dir,
get_version):
get_version.return_value = (3, 5, 0)
image = Image.open(self._get_file_path("orientation.png"))
message = (
"Page number: 0\n"
"Orientation in degrees: ABC\n"
......@@ -568,7 +558,7 @@ class TestTesseract(BaseTest):
enter.__enter__.return_value = tmpdir
temp_dir.return_value = enter
with self.assertRaises(tesseract.TesseractError) as te:
tesseract.detect_orientation(image)
tesseract.detect_orientation(self.image)
Popen.assert_called_once_with(
["tesseract", "input.bmp", "stdout", "-psm", "0"],
stdin=subprocess.PIPE,
......@@ -591,7 +581,7 @@ class TestTesseractTxt(BaseTest):
@patch("pyocr.tesseract.get_version")
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.image = Image.open(self._get_file_path("text.jpg"))
self.image = Image.new(mode="RGB", size=(1, 1))
self.builder = builders.TextBuilder()
@patch("pyocr.tesseract.get_version")
......@@ -663,6 +653,30 @@ class TestTesseractTxt(BaseTest):
configs=self.builder.tesseract_configs,
)
@patch("pyocr.tesseract.temp_dir")
@patch("codecs.open")
@patch("pyocr.tesseract.run_tesseract")
def test_text_non_rgb_image(self, run_tesseract, copen, temp_dir):
"""This tests that image_to_string works with non RGB mode images and
that image is converted in function."""
image = self.image.convert("L")
run_tesseract.return_value = (0, "")
copen.return_value = StringIO(self._get_file_content("text"))
with TemporaryDirectory(prefix="tess_") as tmpdir:
enter = MagicMock()
enter.__enter__.return_value = tmpdir
temp_dir.return_value = enter
with open(os.path.join(tmpdir, "output.txt"), "w") as fh:
fh.write("")
result = tesseract.image_to_string(image, builder=self.builder)
self.assertEqual(result, self._get_file_content("text").strip())
run_tesseract.assert_called_once_with(
"input.bmp", "output", cwd=tmpdir, lang=None,
flags=self.builder.tesseract_flags,
configs=self.builder.tesseract_configs,
)
@patch("pyocr.tesseract.temp_dir")
@patch("codecs.open")
@patch("pyocr.tesseract.run_tesseract")
......@@ -735,7 +749,7 @@ class TestTesseractCharBox(BaseTest):
@patch("pyocr.tesseract.get_version")
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.image = Image.open(self._get_file_path("paragraph.jpg"))
self.image = Image.new(mode="RGB", size=(1, 1))
self.builder = tesseract.CharBoxBuilder()
@patch("pyocr.tesseract.temp_dir")
......@@ -870,7 +884,7 @@ class TestTesseractDigits(BaseTest):
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.builder = builders.DigitBuilder()
self.image = Image.open(self._get_file_path("digits.png"))
self.image = Image.new(mode="RGB", size=(1, 1))
@patch("pyocr.tesseract.temp_dir")
@patch("codecs.open")
......@@ -901,7 +915,7 @@ class TestTesseractWordBox(BaseTest):
@patch("pyocr.tesseract.get_version")
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.image = Image.open(self._get_file_path("paragraph.jpg"))
self.image = Image.new(mode="RGB", size=(1, 1))
self.builder = builders.WordBoxBuilder()
@patch("pyocr.tesseract.temp_dir")
......@@ -976,7 +990,7 @@ class TestTesseractLineBox(BaseTest):
@patch("pyocr.tesseract.get_version")
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.image = Image.open(self._get_file_path("paragraph.jpg"))
self.image = Image.new(mode="RGB", size=(1, 1))
self.builder = builders.LineBoxBuilder()
@patch("pyocr.tesseract.temp_dir")
......@@ -1051,7 +1065,7 @@ class TestTesseractDigitsLineBox(BaseTest):
@patch("pyocr.tesseract.get_version")
def setUp(self, get_version):
get_version.return_value = (4, 0, 0)
self.image = Image.open(self._get_file_path("digits.png"))
self.image = Image.new(mode="RGB", size=(1, 1))
self.builder = builders.DigitLineBoxBuilder()
@patch("pyocr.tesseract.temp_dir")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment