Revert Python 2 compatibility fixes for tests.

This partially reverts commit 27f2b30d,
199e4c5c, and
e37af8fa.
parent 8069b048
......@@ -60,20 +60,6 @@ class Box(object):
self.position = position
self.confidence = confidence
def get_unicode_string(self):
"""
Return the string corresponding to the box, in unicode (utf8).
This string can be stored in a file as-is (see write_box_file())
and reread using read_box_file().
"""
return to_unicode("%s %d %d %d %d") % (
self.content,
self.position[0][0],
self.position[0][1],
self.position[1][0],
self.position[1][1],
)
def get_xml_tag(self, parent_doc):
span_tag = parent_doc.createElement("span")
span_tag.setAttribute("class", "ocrx_word")
......@@ -87,7 +73,13 @@ class Box(object):
return span_tag
def __str__(self):
return self.get_unicode_string()
return "{} {} {} {} {}".format(
self.content,
self.position[0][0],
self.position[0][1],
self.position[1][0],
self.position[1][1],
)
def __box_cmp(self, other):
"""
......@@ -158,23 +150,6 @@ class LineBox(object):
txt = txt.strip()
return txt
def get_unicode_string(self):
"""
Return the string corresponding to the box, in unicode (utf8).
This string can be stored in a file as-is (see write_box_file())
and reread using read_box_file().
"""
txt = to_unicode("[\n")
for box in self.word_boxes:
txt += to_unicode(" %s\n") % box.get_unicode_string()
return to_unicode("%s] %d %d %d %d") % (
txt,
self.position[0][0],
self.position[0][1],
self.position[1][0],
self.position[1][1],
)
def get_xml_tag(self, parent_doc):
span_tag = parent_doc.createElement("span")
span_tag.setAttribute("class", "ocr_line")
......@@ -191,7 +166,22 @@ class LineBox(object):
return span_tag
def __str__(self):
return self.get_unicode_string()
txt = "[\n"
for box in self.word_boxes:
txt += " {} {} {} {} {}\n".format(
box.content,
box.position[0][0],
box.position[0][1],
box.position[1][0],
box.position[1][1],
)
return "{}] {} {} {} {}".format(
txt,
self.position[0][0],
self.position[0][1],
self.position[1][0],
self.position[1][1],
)
def __box_cmp(self, other):
"""
......
......@@ -16,7 +16,6 @@ https://gitlab.gnome.org/World/OpenPaperwork/pyocr#readme
'''
import codecs
import errno
import logging
import os
import subprocess
......@@ -31,12 +30,6 @@ from .builders import DigitBuilder # backward compatibility
from .error import TesseractError # backward compatibility
from .util import digits_only
try:
FileNotFoundError
except NameError:
# python2 does not have FileNotFoundError
FileNotFoundError = IOError
# CHANGE THIS IF TESSERACT IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
TESSERACT_CMD = 'tesseract.exe' if os.name == 'nt' else 'tesseract'
......@@ -109,7 +102,7 @@ class CharBoxBuilder(builders.BaseBuilder):
The file_descriptor must support UTF-8 ! (see module 'codecs')
"""
for box in boxes:
file_descriptor.write(box.get_unicode_string() + " 0\n")
file_descriptor.write(str(box) + " 0\n")
def __str__(self):
return "Character boxes"
......@@ -389,15 +382,7 @@ def image_to_string(image, lang=None, builder=None):
with codecs.open(output_file_name, 'r', encoding='utf-8',
errors='replace') as file_desc:
return builder.read_file(file_desc)
except FileNotFoundError as exc:
if sys.version_info < (3, 0):
# python2 has no FileNotFoundError specifid Exception
# so we rely on the errno of the IOError exception
if exc.errno == errno.ENOENT:
# file not found
continue
else:
raise exc
except FileNotFoundError:
continue
finally:
cleanup(output_file_name)
......
import os
import unittest
from codecs import open
class BaseTest(unittest.TestCase):
tool = None
......
import sys
import unittest
import xml.dom.minidom
......@@ -13,7 +12,7 @@ class TestBox(unittest.TestCase):
self.box1 = builders.Box("word1", ((15, 22), (23, 42)))
self.box1_bis = builders.Box("word1_bis", ((15, 22), (23, 42)))
self.box2 = builders.Box("word2", ((30, 5), (40, 15)), 95)
self.box_unicode = builders.Box(u"\xe9", ((1, 2), (3, 4)))
self.box_unicode = builders.Box("\xe9", ((1, 2), (3, 4)))
def test_init(self):
self.assertEqual(self.box1.content, "word1")
......@@ -31,20 +30,10 @@ class TestBox(unittest.TestCase):
"bbox 15 22 23 42; x_wconf 0")
self.assertEqual(tag.firstChild.data, "word1")
def test_get_unicode_string(self):
self.assertEqual(self.box_unicode.get_unicode_string(),
u"\xe9 1 2 3 4")
def test_str_method(self):
self.assertEqual(str(self.box1), "word1 15 22 23 42")
@unittest.skipUnless(sys.version_info < (3, 0), "python2 box str")
def test_str_python2(self):
self.assertEqual(str(self.box_unicode),
u"\xe9 1 2 3 4".encode("utf-8"))
@unittest.skipIf(sys.version_info < (3, 0), "python3 box str")
def test_str_python3(self):
def test_str_unicode(self):
self.assertEqual(str(self.box_unicode), "\xe9 1 2 3 4")
def test_box_not_equal_none(self):
......@@ -80,7 +69,7 @@ class TestLineBox(unittest.TestCase):
box2 = builders.Box("word2", ((25, 23), (30, 32)))
box3 = builders.Box("word3", ((32, 25), (40, 32)), 95)
box4 = builders.Box("word4", ((41, 18), (44, 33)), 98)
box_unicode = builders.Box(u"\xe9", ((1, 2), (3, 4)), 98)
box_unicode = builders.Box("\xe9", ((1, 2), (3, 4)), 98)
self.line1 = builders.LineBox(
[box1, box2, box3, box4],
((14, 15), (45, 33))
......@@ -118,10 +107,6 @@ class TestLineBox(unittest.TestCase):
self.assertEqual(tag.firstChild.firstChild.data, "word1")
self.assertEqual(tag.lastChild.firstChild.data, "word4")
def test_get_unicode_string(self):
self.assertEqual(self.line_unicode.get_unicode_string(),
u"[\n word1 15 22 23 30\n \xe9 1 2 3 4\n] 1 2 3 4")
def test_line_str(self):
expected = "[\n"
for box in self.line1.word_boxes:
......@@ -129,16 +114,7 @@ class TestLineBox(unittest.TestCase):
expected += "] 14 15 45 33"
self.assertEqual(str(self.line1), expected)
@unittest.skipUnless(sys.version_info < (3, 0), "python2 line str")
def test_str_python2(self):
self.assertEqual(
str(self.line_unicode),
(u"[\n word1 15 22 23 30"
u"\n \xe9 1 2 3 4\n] 1 2 3 4").encode("utf-8")
)
@unittest.skipIf(sys.version_info < (3, 0), "python3 line str")
def test_str_python3(self):
def test_str_unicode(self):
self.assertEqual(
str(self.line_unicode),
"[\n word1 15 22 23 30\n \xe9 1 2 3 4\n] 1 2 3 4"
......
......@@ -3,10 +3,7 @@ import unittest
from io import StringIO
from itertools import product
from random import randint
try:
from unittest.mock import patch
except ImportError:
from mock import patch
from unittest.mock import patch
from pyocr import builders
......@@ -51,14 +48,14 @@ class TestTextBuilder(unittest.TestCase):
self.assertNotIn("--singlecolumn", builder.cuneiform_args)
def test_read_file(self):
txt = u"first line\nsecond line\n0123456789\n\U0001f5a8 "
txt = "first line\nsecond line\n0123456789\n🖨 "
input_fh = StringIO(txt)
output = self.builder.read_file(input_fh)
self.assertEqual(output, txt.strip())
def test_write_file(self):
output = StringIO()
txt = u"first line\nsecond line\n0123456789\n\U0001f5a8 "
txt = "first line\nsecond line\n0123456789\n🖨 "
self.builder.write_file(output, txt)
output.seek(0)
self.assertEqual(output.read(), txt)
......
import subprocess
from io import StringIO
try:
from unittest.mock import patch, MagicMock
except ImportError:
from mock import patch, MagicMock
from unittest.mock import patch, MagicMock
from PIL import Image
......
......@@ -3,10 +3,7 @@ import os
from ctypes import POINTER, cast, c_char_p, c_int
from random import randint
try:
from unittest.mock import patch, call
except ImportError:
from mock import patch, call
from unittest.mock import patch, call
from PIL import Image
......@@ -266,7 +263,7 @@ class TestLibTesseractRaw(BaseTest):
@patch("pyocr.libtesseract.tesseract_raw.g_libtesseract")
def test_set_debug_file(self, libtess):
for filename in (u"file", b"file"):
for filename in ("file", b"file"):
tesseract_raw.set_debug_file(self.handle, filename)
self.assertEqual(
libtess.TessBaseAPISetVariable.call_count,
......
......@@ -3,21 +3,8 @@ import os
import subprocess
from io import StringIO
try:
from unittest.mock import patch, MagicMock
except ImportError:
from mock import patch, MagicMock
try:
from tempfile import TemporaryDirectory
except ImportError:
from backports.tempfile import TemporaryDirectory
try:
FileNotFoundError
except NameError:
# python3 does not have FileNotFoundError and PermissionError
FileNotFoundError = IOError
PermissionError = IOError
from tempfile import TemporaryDirectory
from unittest.mock import patch, MagicMock
from PIL import Image
......@@ -864,14 +851,14 @@ class TestCharBoxBuilder(BaseTest):
builders.Box("b", ((11, 12), (13, 14))),
builders.Box("c", ((12, 13), (14, 15))),
builders.Box("d", ((13, 14), (15, 16)), 87),
builders.Box(u"\xe9", ((14, 15), (16, 17)), 88),
builders.Box("\xe9", ((14, 15), (16, 17)), 88),
]
builder.write_file(output, boxes)
output.seek(0)
output = output.read()
for box in boxes:
self.assertIn(box.content, output)
self.assertIn(u"{} {} {} {}".format(
self.assertIn("{} {} {} {}".format(
box.position[0][0], box.position[0][1],
box.position[1][0], box.position[1][1],
), output)
......
import unittest
import sys
try:
from unittest.mock import patch
except ImportError:
from mock import patch
from unittest.mock import patch
import pyocr
......@@ -83,17 +79,6 @@ class TestPyOCR(unittest.TestCase):
self.assertFalse(is_on_path("windows95"))
def test_to_unicode(self):
self.assertEqual(to_unicode("salut, \u00e7a va ?"),
u"salut, \u00e7a va ?")
@unittest.skipUnless(sys.version_info >= (3, 0),
"Test for python3 to_unicode")
def test_to_unicode_python3(self):
self.assertEqual(to_unicode("salut, \u00e7a va ?".encode("utf-8")),
u"salut, \u00e7a va ?".encode("utf-8"))
@unittest.skipIf(sys.version_info >= (3, 0),
"Test for python2 unicode support")
def test_to_unicode_python2(self):
self.assertEqual(to_unicode("salut, \u00e7a va ?".encode("utf-8")),
u"salut, \u00e7a va ?")
self.assertEqual(to_unicode("salut, ça va ?"), "salut, ça va ?")
self.assertEqual(to_unicode("salut, ça va ?".encode()),
"salut, ça va ?".encode())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment