Commit 9ea7d270 authored by Kai Willadsen's avatar Kai Willadsen

Create new matchers package for diff-related code

This commit also split the threading-related code for handling
asynchronous diffs into a new meld.matchers.helpers module.
parent 67e847ef
......@@ -16,12 +16,8 @@
import copy
import functools
import logging
import math
import os
import queue
import threading
import time
from gi.repository import GLib
from gi.repository import GObject
......@@ -31,11 +27,10 @@ from gi.repository import Gtk
from gi.repository import GtkSource
from meld.conf import _
from . import diffutil
from . import matchers
from meld.matchers import diffutil
from . import meldbuffer
from . import melddoc
from . import merge
from meld.matchers import merge
from . import misc
from . import patchdialog
from . import recent
......@@ -44,36 +39,11 @@ from .ui import findbar
from .ui import gnomeglade
from meld.const import MODE_REPLACE, MODE_DELETE, MODE_INSERT, NEWLINES
from meld.matchers.helpers import CachedSequenceMatcher
from meld.settings import bind_settings, meldsettings
from meld.sourceview import LanguageManager, get_custom_encoding_candidates
log = logging.getLogger(__name__)
class MatcherWorker(threading.Thread):
matcher_class = matchers.InlineMyersSequenceMatcher
def __init__(self, tasks, results):
super(MatcherWorker, self).__init__()
self.tasks = tasks
self.results = results
self.daemon = True
def run(self):
while True:
task_id, (text1, textn) = self.tasks.get()
try:
matcher = self.matcher_class(None, text1, textn)
self.results.put((task_id, matcher.get_opcodes()))
except Exception as e:
log.error("Exception while running diff: %s", e)
finally:
self.tasks.task_done()
time.sleep(0)
def with_focused_pane(function):
@functools.wraps(function)
def wrap_function(*args, **kwargs):
......@@ -84,67 +54,6 @@ def with_focused_pane(function):
return wrap_function
class CachedSequenceMatcher(object):
"""Simple class for caching diff results, with LRU-based eviction
Results from the SequenceMatcher are cached and timestamped, and
subsequently evicted based on least-recent generation/usage. The LRU-based
eviction is overly simplistic, but is okay for our usage pattern.
"""
def __init__(self):
self.cache = {}
self.tasks = queue.Queue()
# Limiting the result queue here has the effect of giving us
# much better interactivity. Without this limit, the
# result-checker tends to get starved and all highlights get
# delayed until we're almost completely finished.
self.results = queue.Queue(5)
self.thread = MatcherWorker(self.tasks, self.results)
self.task_id = 1
self.queued_matches = {}
GLib.idle_add(self.thread.start)
def match(self, text1, textn, cb):
texts = (text1, textn)
try:
self.cache[texts][1] = time.time()
opcodes = self.cache[texts][0]
GLib.idle_add(lambda: cb(opcodes))
except KeyError:
GLib.idle_add(lambda: self.enqueue_task(texts, cb))
def enqueue_task(self, texts, cb):
if not bool(self.queued_matches):
GLib.idle_add(self.check_results)
self.queued_matches[self.task_id] = (texts, cb)
self.tasks.put((self.task_id, texts))
self.task_id += 1
def check_results(self):
try:
task_id, opcodes = self.results.get_nowait()
texts, cb = self.queued_matches.pop(task_id)
self.cache[texts] = [opcodes, time.time()]
GLib.idle_add(lambda: cb(opcodes))
except queue.Empty:
pass
return bool(self.queued_matches)
def clean(self, size_hint):
"""Clean the cache if necessary
@param size_hint: the recommended minimum number of cache entries
"""
if len(self.cache) < size_hint * 3:
return
items = list(self.cache.items())
items.sort(key=lambda it: it[1][1])
for item in items[:-size_hint * 2]:
del self.cache[item[0]]
MASK_SHIFT, MASK_CTRL = 1, 2
PANE_LEFT, PANE_RIGHT = -1, +1
......
......@@ -16,7 +16,7 @@
from meld.conf import _
from . import filediff
from . import merge
from meld.matchers import merge
from . import recent
......
......@@ -16,8 +16,8 @@
from gi.repository import GObject
from .matchers import DiffChunk, MyersSequenceMatcher, \
SyncPointMyersSequenceMatcher
from meld.matchers.myers import (
DiffChunk, MyersSequenceMatcher, SyncPointMyersSequenceMatcher)
opcode_reverse = {
......
import logging
import queue
import threading
import time
from gi.repository import GLib
from meld.matchers import myers
log = logging.getLogger(__name__)
class MatcherWorker(threading.Thread):
matcher_class = myers.InlineMyersSequenceMatcher
def __init__(self, tasks, results):
super(MatcherWorker, self).__init__()
self.tasks = tasks
self.results = results
self.daemon = True
def run(self):
while True:
task_id, (text1, textn) = self.tasks.get()
try:
matcher = self.matcher_class(None, text1, textn)
self.results.put((task_id, matcher.get_opcodes()))
except Exception as e:
log.error("Exception while running diff: %s", e)
finally:
self.tasks.task_done()
time.sleep(0)
class CachedSequenceMatcher(object):
"""Simple class for caching diff results, with LRU-based eviction
Results from the SequenceMatcher are cached and timestamped, and
subsequently evicted based on least-recent generation/usage. The LRU-based
eviction is overly simplistic, but is okay for our usage pattern.
"""
def __init__(self):
self.cache = {}
self.tasks = queue.Queue()
# Limiting the result queue here has the effect of giving us
# much better interactivity. Without this limit, the
# result-checker tends to get starved and all highlights get
# delayed until we're almost completely finished.
self.results = queue.Queue(5)
self.thread = MatcherWorker(self.tasks, self.results)
self.task_id = 1
self.queued_matches = {}
GLib.idle_add(self.thread.start)
def match(self, text1, textn, cb):
texts = (text1, textn)
try:
self.cache[texts][1] = time.time()
opcodes = self.cache[texts][0]
GLib.idle_add(lambda: cb(opcodes))
except KeyError:
GLib.idle_add(lambda: self.enqueue_task(texts, cb))
def enqueue_task(self, texts, cb):
if not bool(self.queued_matches):
GLib.idle_add(self.check_results)
self.queued_matches[self.task_id] = (texts, cb)
self.tasks.put((self.task_id, texts))
self.task_id += 1
def check_results(self):
try:
task_id, opcodes = self.results.get_nowait()
texts, cb = self.queued_matches.pop(task_id)
self.cache[texts] = [opcodes, time.time()]
GLib.idle_add(lambda: cb(opcodes))
except queue.Empty:
pass
return bool(self.queued_matches)
def clean(self, size_hint):
"""Clean the cache if necessary
@param size_hint: the recommended minimum number of cache entries
"""
if len(self.cache) < size_hint * 3:
return
items = list(self.cache.items())
items.sort(key=lambda it: it[1][1])
for item in items[:-size_hint * 2]:
del self.cache[item[0]]
......@@ -13,13 +13,13 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from . import diffutil
from . import matchers
from meld.matchers import diffutil
from meld.matchers.myers import MyersSequenceMatcher
class AutoMergeDiffer(diffutil.Differ):
_matcher = matchers.MyersSequenceMatcher
_matcher = MyersSequenceMatcher
# _matcher = PatienceSequenceMatcher
def __init__(self):
......
......@@ -30,6 +30,7 @@ setup(
keywords=['diff', 'merge'],
packages=[
'meld',
'meld.matchers',
'meld.ui',
'meld.vc',
],
......
......@@ -130,6 +130,7 @@ setup(
],
packages=[
'meld',
'meld.matchers',
'meld.ui',
'meld.vc',
],
......
......@@ -5,7 +5,7 @@ import pytest
import meld.gutterrendererchunk
from meld.gutterrendererchunk import GutterRendererChunkAction
from meld.const import MODE_REPLACE, MODE_DELETE, MODE_INSERT
from meld.matchers import DiffChunk
from meld.matchers.myers import DiffChunk
def make_chunk(chunk_type):
......
import unittest
from meld import matchers
from meld.matchers import myers
class MatchersTests(unittest.TestCase):
......@@ -9,7 +9,7 @@ class MatchersTests(unittest.TestCase):
a = list('abcbdefgabcdefg')
b = list('gfabcdefcd')
r = [(0, 2, 3), (4, 5, 3), (10, 8, 2), (15, 10, 0)]
matcher = matchers.MyersSequenceMatcher(None, a, b)
matcher = myers.MyersSequenceMatcher(None, a, b)
blocks = matcher.get_matching_blocks()
self.assertEqual(blocks, r)
......@@ -17,7 +17,7 @@ class MatchersTests(unittest.TestCase):
a = list('abcfabgcd')
b = list('afabcgabgcabcd')
r = [(0, 2, 3), (4, 6, 3), (7, 12, 2), (9, 14, 0)]
matcher = matchers.MyersSequenceMatcher(None, a, b)
matcher = myers.MyersSequenceMatcher(None, a, b)
blocks = matcher.get_matching_blocks()
self.assertEqual(blocks, r)
......@@ -25,7 +25,7 @@ class MatchersTests(unittest.TestCase):
a = 'red, blue, yellow, white'
b = 'black green, hue, white'
r = [(17, 16, 7), (24, 23, 0)]
matcher = matchers.InlineMyersSequenceMatcher(None, a, b)
matcher = myers.InlineMyersSequenceMatcher(None, a, b)
blocks = matcher.get_matching_blocks()
self.assertEqual(blocks, r)
......@@ -33,7 +33,7 @@ class MatchersTests(unittest.TestCase):
a = list('012a3456c789')
b = list('0a3412b5678')
r = [(0, 0, 1), (3, 1, 3), (6, 7, 2), (9, 9, 2), (12, 11, 0)]
matcher = matchers.SyncPointMyersSequenceMatcher(None, a, b)
matcher = myers.SyncPointMyersSequenceMatcher(None, a, b)
blocks = matcher.get_matching_blocks()
self.assertEqual(blocks, r)
......@@ -41,7 +41,7 @@ class MatchersTests(unittest.TestCase):
a = list('012a3456c789')
b = list('0a3412b5678')
r = [(0, 0, 1), (1, 4, 2), (6, 7, 2), (9, 9, 2), (12, 11, 0)]
matcher = matchers.SyncPointMyersSequenceMatcher(None, a, b, [(3, 6)])
matcher = myers.SyncPointMyersSequenceMatcher(None, a, b, [(3, 6)])
blocks = matcher.get_matching_blocks()
self.assertEqual(blocks, r)
......@@ -49,7 +49,7 @@ class MatchersTests(unittest.TestCase):
a = list('012a3456c789')
b = list('02a341b5678')
r = [(0, 0, 1), (2, 1, 1), (3, 2, 3), (9, 9, 2), (12, 11, 0)]
matcher = matchers.SyncPointMyersSequenceMatcher(
matcher = myers.SyncPointMyersSequenceMatcher(
None, a, b, [(3, 2), (8, 6)])
blocks = matcher.get_matching_blocks()
self.assertEqual(blocks, r)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment