diff --git a/src/ocrfeeder/studio/widgetPresenter.py b/src/ocrfeeder/studio/widgetPresenter.py index 088fa06c5d609060cb7f998ac303edbced8cbd72..47a8ca92ffe079b8e25a76d0b15531ed51ef4301 100644 --- a/src/ocrfeeder/studio/widgetPresenter.py +++ b/src/ocrfeeder/studio/widgetPresenter.py @@ -17,6 +17,7 @@ # along with this program. If not, see . ########################################################################### +import shlex from .dataHolder import DataBox, TEXT_TYPE, IMAGE_TYPE from ocrfeeder.util import lib, PAPER_SIZES from ocrfeeder.util.configuration import ConfigurationManager @@ -972,7 +973,7 @@ class UnpaperDialog(Gtk.Dialog): unpapered_image = os.path.splitext(name)[0] + '_unpapered.ppm' if os.path.exists(unpapered_image): unpapered_image = lib.getNonExistingFileName(unpapered_image) - command += ' %s %s' % (name, unpapered_image) + command += [name, unpapered_image] progress_bar = CommandProgressBarDialog(self, command, _('Performing Unpaper'), _('Performing unpaper. Please wait…')) progress_bar.run() self.unpapered_image = unpapered_image @@ -1107,22 +1108,23 @@ class UnpaperPreferences(Gtk.VBox): self.gray_filter_size.set_sensitive(has_gray_filter) def getUnpaperCommand(self): - command = '%s --layout single' % self.configuration_manager.unpaper + command = [ + self.configuration_manager.unpaper, + '--layout', 'single', + ] if not self.black_filter_usage.get_active(): - command += ' --no-blackfilter' + command.append('--no-blackfilter') if self.noise_filter_none.get_active(): - command += ' --no-noisefilter' + command.append('--no-noisefilter') elif self.noise_filter_custom.get_active(): - command += ' --noisefilter-intensity %s' % \ - self.noise_filter_intensity.get_value() + command += ['--noisefilter-intensity', self.noise_filter_intensity.get_value()] if self.gray_filter_none.get_active(): - command += ' --no-grayfilter' + command.append('--no-grayfilter') elif self.gray_filter_custom.get_active(): - command += ' --grayfilter-size %s' % \ - self.gray_filter_size.get_value() - extra_options_text = self.extra_options.get_text() - if extra_options_text.strip(): - command += ' %s ' % extra_options_text + command += ['--grayfilter-size', self.gray_filter_size.get_value()] + extra_options_text = self.extra_options.get_text().strip() + if extra_options_text: + command += shlex.split(extra_options_text) return command def save(self): @@ -1206,7 +1208,7 @@ class CommandProgressBarDialog(Gtk.Dialog): def __startPulse(self): try: - self.process = subprocess.Popen(self.command.split(), stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize=1) + self.process = subprocess.Popen(self.command, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize=1) except: warning = SimpleDialog(self, _('An error occurred!'), _('Error'), 'warning') warning.run() diff --git a/src/ocrfeeder/util/lib.py b/src/ocrfeeder/util/lib.py index b1c7767caf7ff9c5d9ddb198a0a897971f176cbc..cd4ab95fdd4bf6524e83dbd2675b8e7b8454a3fe 100644 --- a/src/ocrfeeder/util/lib.py +++ b/src/ocrfeeder/util/lib.py @@ -17,21 +17,23 @@ # along with this program. If not, see . ########################################################################### +import math +import mimetypes import os +import re +import shlex import subprocess -import mimetypes -from PIL import Image import tempfile +import xml.etree.ElementTree as etree + +import sane from gi.repository import Gtk -import math +from PIL import Image + from .constants import * -import sane -import tempfile -import locale -import re -import xml.etree.ElementTree as etree from .log import debug + def getIconOrLabel(icon_name, label_text, icon_size = Gtk.IconSize.SMALL_TOOLBAR): icon = Gtk.Image() theme = Gtk.IconTheme.get_default() @@ -47,11 +49,8 @@ def getIconOrLabel(icon_name, label_text, icon_size = Gtk.IconSize.SMALL_TOOLBAR def getSafeGhostscriptPath(file_path): return re.sub(r'[^\w !#$%&()*+,./:;<=>?@\[\\\]^_`{|}~-]', '_', file_path) -def getSafeGhostscriptInputFilename(file_name): - return re.sub(r'[/]', '_', getSafeGhostscriptPath(file_name)) - def getSafeGhostscriptOutputBasename(file_name): - return re.sub(r'[%]', '_', getSafeGhostscriptInputFilename(file_name)) + return re.sub(r'[%]', '_', getSafeGhostscriptPath(file_name)) def convertPdfToImages(pdf_file, temp_dir = '/tmp'): if not os.path.isfile(pdf_file): @@ -67,41 +66,27 @@ def convertPdfToImages(pdf_file, temp_dir = '/tmp'): file_name = os.path.basename(pdf_file) base_name = os.path.splitext(file_name)[0] - file_name_safe = getSafeGhostscriptInputFilename(file_name) + # GhostScript will interpret the '%' character, which is allowed + # in Unix file names, so we need to escape it base_name_safe = getSafeGhostscriptOutputBasename(base_name) - pdf_file_safe = getSafeGhostscriptPath(pdf_file) - if pdf_file != pdf_file_safe: - try: - # The prefix added here is for extra safety so there are less chances - # for this path to match the original one in future changes. - pdf_path_safe = os.path.join(dir_name, 'OCRFEEDER_' + file_name_safe) - os.symlink(pdf_file, pdf_path_safe) - except: - debug('Unable to convert PDF: Cannot create temp symlink in: %s', dir_name) - return None - - runGhostscript(dir_name, base_name_safe, pdf_path_safe) - try: - os.unlink(pdf_path_safe) - except: - debug('PDF conversion warning: Cannot remove temp symlink: %s', pdf_path_safe) - else: - runGhostscript(dir_name, base_name_safe, pdf_file) + runGhostscript(dir_name, base_name_safe, pdf_file) return dir_name def runGhostscript(dir_name, base_name, pdf_path, format = 'jpeg', resolution = 300, size = 'letter'): - command = 'gs -SDEVICE=%(format)s -r%(resolution)sx%(resolution)s -sPAPERSIZE=%(size)s ' \ - '-sOutputFile=\'%(temp_name)s/%(file_name)s_%%04d.jpg\' ' \ - '-dNOPAUSE -dBATCH -- \'%(pdf_file)s\'' % \ - {'format': format, - 'temp_name': dir_name, - 'file_name': base_name, - 'pdf_file': pdf_path, - 'resolution': resolution, - 'size': size} - process = subprocess.run(command, shell=True) + command = [ + 'gs', + f'-SDEVICE={format}', + f'-r{resolution}x{resolution}', + f'-sPAPERSIZE={size}', + f'-sOutputFile={dir_name}/{base_name}_%04d.jpg', + '-dNOPAUSE', + '-dBATCH', + '--', + pdf_path + ] + subprocess.run(command) def getImagesFromFolder(folder): if folder is None: @@ -159,20 +144,24 @@ def getExecPath(exec_name): return real_exec_name def getUnpaperCommand(configuration_manager): - command = '%s --layout single --overwrite ' % configuration_manager.unpaper + command = [ + configuration_manager.unpaper, + '--layout', 'single', + '--overwrite', + ] if not configuration_manager.unpaper_use_black_filter: - command += ' --no-blackfilter' + command.append('--no-blackfilter') if configuration_manager.unpaper_noise_filter_intensity == 'none': - command += ' --no-noisefilter' + command.append('--no-noisefilter') elif configuration_manager.unpaper_noise_filter_intensity != 'auto': - command += ' --noisefilter-intensity %s' % \ - configuration_manager.unpaper_noise_filter_intensity + command += ['--noisefilter-intensity', configuration_manager.unpaper_noise_filter_intensity] if configuration_manager.unpaper_gray_filter_size == 'none': - command += ' --no-grayfilter' + command.append('--no-grayfilter') elif configuration_manager.unpaper_gray_filter_size != 'auto': - command += ' --grayfilter-size %s' % \ - configuration_manager.unpaper_gray_filter_size - command += ' %s ' % configuration_manager.unpaper_extra_options + command += ['--grayfilter-size', configuration_manager.unpaper_gray_filter_size] + extra_options = configuration_manager.unpaper_extra_options.strip() + if extra_options: + command += shlex.split(extra_options) return command def unpaperImage(configuration_manager, image_path): @@ -186,10 +175,10 @@ def unpaperImage(configuration_manager, image_path): image_path = Image.open(image_path) image_path.save(unpapered_in, format = 'PPM') command = getUnpaperCommand(configuration_manager) - command += ' %s %s' % (unpapered_in, unpapered_name) + command += [unpapered_in, unpapered_name] debug(command) try: - os.system(command) + subprocess.run(command) except Exception as exception: debug(exception) return None