Commit 666c9e5e authored by Thomas Perret's avatar Thomas Perret

Fix PEP8 errors and add python2 tests dependencies

parent 27f2b30d
......@@ -17,7 +17,6 @@ https://github.com/openpaperwork/pyocr#readme
import codecs
from io import BytesIO
import os
import re
import subprocess
import tempfile
......
......@@ -30,10 +30,7 @@ from . import builders
from . import util
from .builders import DigitBuilder # backward compatibility
from .error import TesseractError # backward compatibility
from .util import (
digits_only,
to_unicode,
)
from .util import digits_only
try:
FileNotFoundError
......
......@@ -40,13 +40,14 @@ class TestBox(unittest.TestCase):
@unittest.skipUnless(sys.version_info < (3, 0), "python2 box str")
def test_str_python2(self):
self.assertEqual(str(self.box_unicode), u"\xe9 1 2 3 4".encode("utf-8"))
self.assertEqual(str(self.box_unicode),
u"\xe9 1 2 3 4".encode("utf-8"))
@unittest.skipIf(sys.version_info < (3, 0), "python3 box str")
def test_str_python3(self):
self.assertEqual(str(self.box_unicode), "\xe9 1 2 3 4")
def test_box_not_equal_None(self):
def test_box_not_equal_none(self):
self.assertNotEqual(self.box1, None)
def test_box_equal(self):
......@@ -132,7 +133,8 @@ class TestLineBox(unittest.TestCase):
def test_str_python2(self):
self.assertEqual(
str(self.line_unicode),
u"[\n word1 15 22 23 30\n \xe9 1 2 3 4\n] 1 2 3 4".encode("utf-8")
(u"[\n word1 15 22 23 30"
u"\n \xe9 1 2 3 4\n] 1 2 3 4").encode("utf-8")
)
@unittest.skipIf(sys.version_info < (3, 0), "python3 line str")
......@@ -142,7 +144,7 @@ class TestLineBox(unittest.TestCase):
"[\n word1 15 22 23 30\n \xe9 1 2 3 4\n] 1 2 3 4"
)
def test_line_not_equal_None(self):
def test_line_not_equal_none(self):
self.assertNotEqual(self.line1, None)
def test_box_equal(self):
......
......@@ -30,7 +30,8 @@ class TestTextBuilder(unittest.TestCase):
def test_init_cuneiform_params(self, get_version):
get_version.return_value = (4, 0, 0)
# XXX Maybe overkill?
for cun_dotmat, cun_fax, cun_sglcol in product(*((False, True),) *3):
# this check all combinations of parameters
for cun_dotmat, cun_fax, cun_sglcol in product(*((False, True),) * 3):
builder = builders.TextBuilder(
cuneiform_dotmatrix=cun_dotmat,
cuneiform_fax=cun_fax,
......@@ -178,7 +179,6 @@ class TestWordBoxBuilder(BaseTest):
self.assertEqual(self.builder.word_boxes[0], box)
def test_end_line(self):
box = builders.Box("word", ((1, 2), (3, 4)))
before = list(self.builder.word_boxes)
self.builder.end_line()
self.assertEqual(self.builder.word_boxes, before)
......@@ -222,7 +222,7 @@ class TestLineBoxBuilder(BaseTest):
self.assertEqual(builder.tesseract_layout, 1)
@patch("pyocr.tesseract.get_version")
def test_init_tesseract_version_3(self, get_version):
def test_init_tesseract_version_4(self, get_version):
get_version.return_value = (4, 0, 0)
builder = builders.LineBoxBuilder()
self.assertListEqual(builder.tesseract_flags, ["--psm", "1"])
......@@ -270,10 +270,12 @@ class TestLineBoxBuilder(BaseTest):
position = ((1, 2), (3, 4))
self.builder.start_line(position)
self.assertEqual(len(self.builder.lines), 1)
self.assertListEqual(self.builder.lines, [builders.LineBox([], position)])
self.assertListEqual(self.builder.lines,
[builders.LineBox([], position)])
self.builder.start_line(position)
self.assertEqual(len(self.builder.lines), 1)
self.assertListEqual(self.builder.lines, [builders.LineBox([], position)])
self.assertListEqual(self.builder.lines,
[builders.LineBox([], position)])
def test_add_word_no_line(self):
box = builders.Box("word", ((1, 2), (3, 4)), 42)
......@@ -300,7 +302,8 @@ class TestLineBoxBuilder(BaseTest):
for line in lines:
self.builder.start_line(line.position)
for word in line.word_boxes:
self.builder.add_word(word.content, word.position, word.confidence)
self.builder.add_word(word.content, word.position,
word.confidence)
self.builder.end_line() # could be useful in future
output = self.builder.get_output()
for line, line_expected in zip(output, lines):
......
......@@ -27,36 +27,36 @@ class TestCuneiform(BaseTest):
is_on_path.assert_called_once_with("cuneiform")
@patch("subprocess.Popen")
def test_version(self, Popen):
def test_version(self, popen):
stdout = MagicMock()
stdout.stdout.read.return_value = (
"Cuneiform for Linux 1.1.0\n"
"Usage: cuneiform [-l languagename -f format --dotmatrix --fax"
" --singlecolumn -o result_file] imagefile"
).encode()
Popen.return_value = stdout
popen.return_value = stdout
self.assertSequenceEqual(cuneiform.get_version(), (1, 1, 0))
@patch("subprocess.Popen")
def test_version_error(self, Popen):
def test_version_error(self, popen):
stdout = MagicMock()
stdout.stdout.read.return_value = "\n".encode()
Popen.return_value = stdout
popen.return_value = stdout
self.assertIsNone(cuneiform.get_version())
@patch("subprocess.Popen")
def test_langs(self, Popen):
def test_langs(self, popen):
stdout = MagicMock()
stdout.stdout.read.return_value = (
"Cuneiform for Linux 1.1.0\n"
"Supported languages: eng ger fra rus swe spa ita ruseng ukr srp "
"hrv pol dan por dut cze rum hun bul slv lav lit est tur."
).encode()
Popen.return_value = stdout
popen.return_value = stdout
langs = cuneiform.get_available_languages()
self.assertIn("eng", langs)
self.assertIn("fra", langs)
Popen.assert_called_once_with(
popen.assert_called_once_with(
["cuneiform", "-l"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
......@@ -97,21 +97,23 @@ class TestCuneiformTxt(BaseTest):
self.tmp_filename = "/tmp/cuneiform_n0qfk87otxt"
self.enter = MagicMock()
self.enter.__enter__.return_value = MagicMock()
self.enter.__enter__.return_value.configure_mock(name=self.tmp_filename)
self.enter.__enter__.return_value.configure_mock(
name=self.tmp_filename
)
@patch("pyocr.tesseract.get_version")
@patch("pyocr.cuneiform.temp_file")
@patch("codecs.open")
@patch("subprocess.Popen")
def test_image_to_string_defaults_to_text_buidler(self, Popen, copen,
def test_image_to_string_defaults_to_text_buidler(self, popen, copen,
temp_file, get_version):
get_version.return_value = (4, 0, 0)
Popen.return_value = self.stdout
popen.return_value = self.stdout
copen.return_value = self.text_file
temp_file.return_value = self.enter
output = cuneiform.image_to_string(self.image)
self.assertEqual(output, self._get_file_content("text").strip())
Popen.assert_called_once_with(
popen.assert_called_once_with(
["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
......@@ -120,14 +122,14 @@ class TestCuneiformTxt(BaseTest):
@patch("pyocr.cuneiform.temp_file")
@patch("codecs.open")
@patch("subprocess.Popen")
def test_lang(self, Popen, copen, temp_file):
Popen.return_value = self.stdout
def test_lang(self, popen, copen, temp_file):
popen.return_value = self.stdout
copen.return_value = self.text_file
temp_file.return_value = self.enter
output = cuneiform.image_to_string(self.image, lang="fra",
builder=self.builder)
self.assertEqual(output, self._get_file_content("text").strip())
Popen.assert_called_once_with(
popen.assert_called_once_with(
["cuneiform", "-l", "fra", "-f", "text", "-o", self.tmp_filename,
"-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
......@@ -137,27 +139,27 @@ class TestCuneiformTxt(BaseTest):
@patch("pyocr.cuneiform.temp_file")
@patch("codecs.open")
@patch("subprocess.Popen")
def test_text(self, Popen, copen, temp_file):
Popen.return_value = self.stdout
def test_text(self, popen, copen, temp_file):
popen.return_value = self.stdout
copen.return_value = self.text_file
temp_file.return_value = self.enter
output = cuneiform.image_to_string(self.image,
builder=self.builder)
self.assertEqual(output, self._get_file_content("text").strip())
Popen.assert_called_once_with(
popen.assert_called_once_with(
["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
@patch("subprocess.Popen")
def test_text_error(self, Popen):
def test_text_error(self, popen):
message = ("Cuneiform for Linux 1.1.0\n"
"Magick: Improper image header (example.png) reported by "
"coders/png.c:2932 (ReadPNGImage)\n")
self.stdout.stdout.read.return_value = message.encode()
self.stdout.wait.return_value = 1
Popen.return_value = self.stdout
popen.return_value = self.stdout
with self.assertRaises(cuneiform.CuneiformError) as ce:
cuneiform.image_to_string(self.image, builder=self.builder)
self.assertEqual(ce.exception.status, 1)
......@@ -166,16 +168,16 @@ class TestCuneiformTxt(BaseTest):
@patch("pyocr.cuneiform.temp_file")
@patch("codecs.open")
@patch("subprocess.Popen")
def test_text_non_rgb_image(self, Popen, copen, temp_file):
def test_text_non_rgb_image(self, popen, copen, temp_file):
"""This tests that image_to_string works with non RGB mode images and
that image is converted in function."""
image = self.image.convert("L")
Popen.return_value = self.stdout
popen.return_value = self.stdout
copen.return_value = self.text_file
temp_file.return_value = self.enter
output = cuneiform.image_to_string(image, builder=self.builder)
self.assertEqual(output, self._get_file_content("text").strip())
Popen.assert_called_once_with(
popen.assert_called_once_with(
["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
......@@ -218,18 +220,20 @@ class TestCuneiformWordBox(BaseTest):
self.tmp_filename = "/tmp/cuneiform_n0qfk87otxt"
self.enter = MagicMock()
self.enter.__enter__.return_value = MagicMock()
self.enter.__enter__.return_value.configure_mock(name=self.tmp_filename)
self.enter.__enter__.return_value.configure_mock(
name=self.tmp_filename
)
@patch("pyocr.cuneiform.temp_file")
@patch("codecs.open")
@patch("subprocess.Popen")
def test_word(self, Popen, copen, temp_file):
Popen.return_value = self.stdout
def test_word(self, popen, copen, temp_file):
popen.return_value = self.stdout
copen.return_value = self.text_file
temp_file.return_value = self.enter
output = cuneiform.image_to_string(self.image,
builder=self.builder)
Popen.assert_called_once_with(
popen.assert_called_once_with(
["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
......@@ -238,14 +242,14 @@ class TestCuneiformWordBox(BaseTest):
self.assertIsInstance(box, builders.Box)
@patch("subprocess.Popen")
def test_word_error(self, Popen):
def test_word_error(self, popen):
stdout = MagicMock()
message = ("Cuneiform for Linux 1.1.0\n"
"Magick: Improper image header (example.png) reported by "
"coders/png.c:2932 (ReadPNGImage)\n")
stdout.stdout.read.return_value = message.encode()
stdout.wait.return_value = 1
Popen.return_value = stdout
popen.return_value = stdout
with self.assertRaises(cuneiform.CuneiformError) as ce:
cuneiform.image_to_string(self.image,
builder=self.builder)
......@@ -271,18 +275,20 @@ class TestCuneiformLineBox(BaseTest):
self.tmp_filename = "/tmp/cuneiform_n0qfk87otxt"
self.enter = MagicMock()
self.enter.__enter__.return_value = MagicMock()
self.enter.__enter__.return_value.configure_mock(name=self.tmp_filename)
self.enter.__enter__.return_value.configure_mock(
name=self.tmp_filename
)
@patch("pyocr.cuneiform.temp_file")
@patch("codecs.open")
@patch("subprocess.Popen")
def test_line(self, Popen, copen, temp_file):
Popen.return_value = self.stdout
def test_line(self, popen, copen, temp_file):
popen.return_value = self.stdout
copen.return_value = self.text_file
temp_file.return_value = self.enter
output = cuneiform.image_to_string(self.image,
builder=self.builder)
Popen.assert_called_once_with(
popen.assert_called_once_with(
["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
......@@ -291,13 +297,13 @@ class TestCuneiformLineBox(BaseTest):
self.assertIsInstance(box, builders.LineBox)
@patch("subprocess.Popen")
def test_line_error(self, Popen):
def test_line_error(self, popen):
message = ("Cuneiform for Linux 1.1.0\n"
"Magick: Improper image header (example.png) reported by "
"coders/png.c:2932 (ReadPNGImage)\n")
self.stdout.stdout.read.return_value = message.encode()
self.stdout.wait.return_value = 1
Popen.return_value = self.stdout
popen.return_value = self.stdout
with self.assertRaises(cuneiform.CuneiformError) as ce:
cuneiform.image_to_string(self.image,
builder=self.builder)
......
......@@ -214,50 +214,16 @@ class TestLibTesseractRaw(BaseTest):
self.assertEqual(args[2], b"F")
self.assertFalse(setlocale.called)
setlocale.assert_called_once_with(locale.LC_ALL, "C")
libtess.reset_mock()
setlocale.reset_mock()
@patch("locale.setlocale")
@patch("pyocr.libtesseract.tesseract_raw.g_libtesseract")
def test_init_tesseract3(self, libtess, setlocale):
libtess.TessVersion.return_value = b"3.5.0"
libtess.TessBaseAPICreate.return_value = self.handle
for lang in (None, "eng", "fra", "jpn", "osd"):
api = tesseract_raw.init(lang)
self.assertEqual(api, self.handle)
libtess.TessBaseAPICreate.assert_called_once_with()
self.assertEqual(
libtess.TessBaseAPIInit3.call_count,
1
)
args = libtess.TessBaseAPIInit3.call_args[0]
self.assertEqual(len(args), 3)
self.assertEqual(args[0].value, self.handle)
self.assertEqual(args[1].value, None)
self.assertEqual(args[2].value, lang.encode() if lang else None)
self.assertEqual(
libtess.TessBaseAPISetVariable.call_count,
1
)
args = libtess.TessBaseAPISetVariable.call_args[0]
self.assertEqual(len(args), 3)
self.assertEqual(args[0].value, self.handle)
self.assertEqual(args[1], b"tessedit_zero_rejection")
self.assertEqual(args[2], b"F")
setlocale.assert_not_called()
libtess.reset_mock()
@patch("pyocr.libtesseract.tesseract_raw.g_libtesseract")
def test_init_error(self, libtess):
libtess.TessBaseAPICreate.return_value = self.handle
libtess.TessBaseAPIInit3.side_effect = Exception("Could not initialize")
with self.assertRaises(Exception) as e:
libtess.TessBaseAPIInit3.side_effect = Exception(
"Could not initialize"
)
with self.assertRaises(Exception):
tesseract_raw.init()
self.assertEqual(
libtess.TessBaseAPICreate.call_count,
......@@ -453,7 +419,8 @@ class TestLibTesseractRaw(BaseTest):
@patch("pyocr.libtesseract.tesseract_raw.g_libtesseract")
def test_page_iterator_block_type(self, libtess):
libtess.TessPageIteratorBlockType.return_value = tesseract_raw.PolyBlockType.FLOWING_TEXT
flowing = tesseract_raw.PolyBlockType.FLOWING_TEXT
libtess.TessPageIteratorBlockType.return_value = flowing
self.assertEqual(tesseract_raw.page_iterator_block_type(self.iterator),
tesseract_raw.PolyBlockType.FLOWING_TEXT)
self.assertEqual(
......@@ -516,7 +483,8 @@ class TestLibTesseractRaw(BaseTest):
@patch("pyocr.libtesseract.tesseract_raw.g_libtesseract")
def test_get_iterator(self, libtess):
libtess.TessBaseAPIGetIterator.return_value = self.iterator
self.assertEqual(tesseract_raw.get_iterator(self.handle), self.iterator)
self.assertEqual(tesseract_raw.get_iterator(self.handle),
self.iterator)
self.assertEqual(
libtess.TessBaseAPIGetIterator.call_count,
1
......@@ -792,7 +760,8 @@ class TestLibTesseractText(BaseTest):
raw.result_iterator_get_utf8_text.side_effect = ("word1", "word2",
"word3")
raw.page_iterator_is_at_beginning_of.side_effect = (True, False, False)
raw.page_iterator_is_at_final_element.side_effect = (False, False, True)
raw.page_iterator_is_at_final_element.side_effect = (False, False,
True)
self.assertEqual(
libtesseract.image_to_string(self.image),
......@@ -853,7 +822,8 @@ class TestLibTesseractText(BaseTest):
raw.result_iterator_get_utf8_text.side_effect = ("word1", "word2",
"word3")
raw.page_iterator_is_at_beginning_of.side_effect = (True, False, False)
raw.page_iterator_is_at_final_element.side_effect = (False, False, True)
raw.page_iterator_is_at_final_element.side_effect = (False, False,
True)
self.assertEqual(
libtesseract.image_to_string(self.image, lang="eng",
......@@ -993,7 +963,8 @@ class TestLibTesseractText(BaseTest):
raw.result_iterator_get_utf8_text.side_effect = ("word1", "word2",
"word3")
raw.page_iterator_is_at_beginning_of.side_effect = (True, False, False)
raw.page_iterator_is_at_final_element.side_effect = (False, False, True)
raw.page_iterator_is_at_final_element.side_effect = (False, False,
True)
with self.assertRaises(TesseractError) as te:
libtesseract.image_to_string(self.image, builder=self.builder)
......@@ -1031,7 +1002,8 @@ class TestLibTesseractDigits(BaseTest):
raw.page_iterator_bounding_box.return_value = (True, (0, 0, 0, 0))
raw.result_iterator_get_utf8_text.side_effect = ("1", "2", "42")
raw.page_iterator_is_at_beginning_of.side_effect = (True, False, False)
raw.page_iterator_is_at_final_element.side_effect = (False, False, True)
raw.page_iterator_is_at_final_element.side_effect = (False, False,
True)
self.assertEqual(
libtesseract.image_to_string(self.image, builder=self.builder),
......@@ -1103,7 +1075,8 @@ class TestLibTesseractWordBox(BaseTest):
raw.result_iterator_get_utf8_text.side_effect = ("word1", "word2",
"word3")
raw.page_iterator_is_at_beginning_of.side_effect = (True, False, False)
raw.page_iterator_is_at_final_element.side_effect = (False, False, True)
raw.page_iterator_is_at_final_element.side_effect = (False, False,
True)
self.assertListEqual(
libtesseract.image_to_string(self.image, builder=self.builder),
......@@ -1168,7 +1141,8 @@ class TestLibTesseractWordBox(BaseTest):
raw.result_iterator_get_utf8_text.side_effect = ("word1", "word2",
"word3")
raw.page_iterator_is_at_beginning_of.side_effect = (True, False, False)
raw.page_iterator_is_at_final_element.side_effect = (False, False, True)
raw.page_iterator_is_at_final_element.side_effect = (False, False,
True)
with self.assertRaises(TesseractError) as te:
libtesseract.image_to_string(self.image, builder=self.builder)
......@@ -1207,7 +1181,8 @@ class TestLibTesseractLineBox(BaseTest):
raw.result_iterator_get_utf8_text.side_effect = ("word1", "word2",
"word3")
raw.page_iterator_is_at_beginning_of.side_effect = (True, False, False)
raw.page_iterator_is_at_final_element.side_effect = (False, False, True)
raw.page_iterator_is_at_final_element.side_effect = (False, False,
True)
self.assertListEqual(
libtesseract.image_to_string(self.image, builder=self.builder),
......@@ -1274,7 +1249,8 @@ class TestLibTesseractLineBox(BaseTest):
raw.result_iterator_get_utf8_text.side_effect = ("word1", "word2",
"word3")
raw.page_iterator_is_at_beginning_of.side_effect = (True, False, False)
raw.page_iterator_is_at_final_element.side_effect = (False, False, True)
raw.page_iterator_is_at_final_element.side_effect = (False, False,
True)
with self.assertRaises(TesseractError) as te:
libtesseract.image_to_string(self.image, builder=self.builder)
......@@ -1312,7 +1288,8 @@ class TestLibTesseractDigitsLineBox(BaseTest):
raw.page_iterator_bounding_box.return_value = (True, (0, 0, 0, 0))
raw.result_iterator_get_utf8_text.side_effect = ("1", "2", "42")
raw.page_iterator_is_at_beginning_of.side_effect = (True, False, False)
raw.page_iterator_is_at_final_element.side_effect = (False, False, True)
raw.page_iterator_is_at_final_element.side_effect = (False, False,
True)
self.assertListEqual(
libtesseract.image_to_string(self.image, builder=self.builder),
......@@ -1378,7 +1355,8 @@ class TestLibTesseractDigitsLineBox(BaseTest):
raw.page_iterator_bounding_box.return_value = (True, (0, 0, 0, 0))
raw.result_iterator_get_utf8_text.side_effect = ("1", "2", "42")
raw.page_iterator_is_at_beginning_of.side_effect = (True, False, False)
raw.page_iterator_is_at_final_element.side_effect = (False, False, True)
raw.page_iterator_is_at_final_element.side_effect = (False, False,
True)
with self.assertRaises(TesseractError) as te:
libtesseract.image_to_string(self.image, builder=self.builder)
......@@ -1434,7 +1412,7 @@ class TestLibTesseractPDF(BaseTest):
raw.init.return_value = self.handle
raw.init_pdf_renderer.return_value = renderer
with self.assertRaises(AssertionError) as ae:
with self.assertRaises(AssertionError):
libtesseract.image_to_pdf(self.image, "output")
raw.init.assert_called_once_with(lang=None)
......
This diff is collapsed.
......@@ -55,7 +55,7 @@ class TestPyOCR(unittest.TestCase):
@patch("pyocr.libtesseract.tesseract_raw.is_available")
@patch("pyocr.util.is_on_path")
def test_available_tools_tesseract3_0(self, is_on_path,
is_available, libtess):
is_available, libtess):
is_on_path.return_value = True
is_available.return_value = True
libtess.TessVersion.return_value = b"3.0.0"
......
......@@ -2,7 +2,10 @@
envlist=py27,py3
[testenv]
deps=pytest
deps=
pytest
py27: mock
py27: backports.tempfile
commands=pytest {posargs}
[flake8]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment