diffutil.py 8.85 KB
Newer Older
Stephen Kennedy's avatar
Stephen Kennedy committed
1
### Copyright (C) 2002-2006 Stephen Kennedy <stevek@gnome.org>
steve9000's avatar
steve9000 committed
2 3 4 5 6 7 8 9 10 11 12 13 14 15

### This program is free software; you can redistribute it and/or modify
### it under the terms of the GNU General Public License as published by
### the Free Software Foundation; either version 2 of the License, or
### (at your option) any later version.

### This program is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
### GNU General Public License for more details.

### You should have received a copy of the GNU General Public License
### along with this program; if not, write to the Free Software
### Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
steve9000's avatar
steve9000 committed
16 17 18

import difflib

steve9000's avatar
steve9000 committed
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
################################################################################
#
# Differ
#
################################################################################
class IncrementalSequenceMatcher(difflib.SequenceMatcher):
    def __init__(self, isjunk=None, a="", b=""):
        difflib.SequenceMatcher.__init__(self, isjunk, a, b)

    def initialise(self):
        la, lb = len(self.a), len(self.b)
        todo = [(0, la, 0, lb)]
        done = []
        while len(todo):
            alo, ahi, blo, bhi = todo.pop(0)
            i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
            if k:
                yield None
                done.append( (i,x) )
                if alo < i and blo < j:
                    todo.append( (alo, i, blo, j) )
                if i+k < ahi and j+k < bhi:
                    todo.append( (i+k, ahi, j+k, bhi) )
        done.append( (la, (la, lb, 0)) )
        done.sort()
        self.matching_blocks = [x[1] for x in done]
        yield 1

    def get_difference_opcodes(self):
        return filter(lambda x: x[0]!="equal", self.get_opcodes())
steve9000's avatar
steve9000 committed
49

50 51 52 53 54 55 56 57 58 59 60 61

opcode_reverse = {
    "replace"  : "replace",
    "insert"   : "delete",
    "delete"   : "insert",
    "conflict" : "conflict",
    "equal"    : "equal"
}

def reverse_chunk(chunk):
    return opcode_reverse[chunk[0]], chunk[3], chunk[4], chunk[1], chunk[2]

steve9000's avatar
steve9000 committed
62 63 64 65 66
################################################################################
#
# Differ
#
################################################################################
67
class Differ(object):
steve9000's avatar
steve9000 committed
68
    """Utility class to hold diff2 or diff3 chunks"""
69
    def __init__(self):
70
        # Internally, diffs are stored from text1 -> text0 and text1 -> text2.
71 72 73
        self.num_sequences = 0
        self.seqlength = [0, 0, 0]
        self.diffs = [[], []]
Stephen Kennedy's avatar
Stephen Kennedy committed
74

75
    def change_sequence(self, sequence, startidx, sizechange, texts):
Kai Willadsen's avatar
Kai Willadsen committed
76 77
        assert sequence in (0, 1, 2)
        if sequence == 0 or sequence == 1:
78
            self._change_sequence(0, sequence, startidx, sizechange, texts)
Kai Willadsen's avatar
Kai Willadsen committed
79 80
        if sequence == 2 or (sequence == 1 and self.num_sequences == 3):
            self._change_sequence(1, sequence, startidx, sizechange, texts)
81
        self.seqlength[sequence] += sizechange
steve9000's avatar
steve9000 committed
82

83 84
    def _locate_chunk(self, whichdiffs, sequence, line):
        """Find the index of the chunk which contains line."""
Kai Willadsen's avatar
Kai Willadsen committed
85 86 87 88 89
        high_index = 2 + 2 * int(sequence != 1)
        for i, c in enumerate(self.diffs[whichdiffs]):
            if line < c[high_index]:
                return i
        return len(self.diffs[whichdiffs])
90

91
    def _change_sequence(self, which, sequence, startidx, sizechange, texts):
92
        diffs = self.diffs[which]
steve9000's avatar
steve9000 committed
93 94
        lines_added = [0,0,0]
        lines_added[sequence] = sizechange
95
        loidx = self._locate_chunk(which, sequence, startidx)
steve9000's avatar
steve9000 committed
96
        if sizechange < 0:
97
            hiidx = self._locate_chunk(which, sequence, startidx-sizechange)
steve9000's avatar
steve9000 committed
98 99 100
        else:
            hiidx = loidx
        if loidx > 0:
steve9000's avatar
steve9000 committed
101
            loidx -= 1
102
            lorange = diffs[loidx][3], diffs[loidx][1]
steve9000's avatar
steve9000 committed
103 104 105 106
        else:
            lorange = (0,0)
        x = which*2
        if hiidx < len(diffs):
steve9000's avatar
steve9000 committed
107
            hiidx += 1
108
            hirange = diffs[hiidx-1][4], diffs[hiidx-1][2]
steve9000's avatar
steve9000 committed
109
        else:
110
            hirange = self.seqlength[x], self.seqlength[1]
steve9000's avatar
steve9000 committed
111
        #print "diffs", loidx, hiidx, len(diffs), lorange, hirange #diffs[loidx], diffs[hiidx-1]
steve9000's avatar
steve9000 committed
112 113
        rangex = lorange[0], hirange[0] + lines_added[x]
        range1 = lorange[1], hirange[1] + lines_added[1]
steve9000's avatar
steve9000 committed
114
        #print "^^^^^", rangex, range1
steve9000's avatar
steve9000 committed
115
        assert rangex[0] <= rangex[1] and range1[0] <= range1[1]
116 117
        linesx = texts[x][rangex[0]:rangex[1]]
        lines1 = texts[1][range1[0]:range1[1]]
steve9000's avatar
steve9000 committed
118
        #print "<<<\n%s\n===\n%s\n>>>" % ("\n".join(linesx),"\n".join(lines1))
steve9000's avatar
steve9000 committed
119
        newdiffs = IncrementalSequenceMatcher( None, lines1, linesx).get_difference_opcodes()
120
        newdiffs = [ (c[0], c[1]+range1[0],c[2]+range1[0], c[3]+rangex[0],c[4]+rangex[0]) for c in newdiffs]
steve9000's avatar
steve9000 committed
121 122
        if hiidx < len(self.diffs[which]):
            self.diffs[which][hiidx:] = [ (c[0],
123 124
                                           c[1] + lines_added[1], c[2] + lines_added[1],
                                           c[3] + lines_added[x], c[4] + lines_added[x])
steve9000's avatar
steve9000 committed
125 126 127
                                                for c in self.diffs[which][hiidx:] ]
        self.diffs[which][loidx:hiidx] = newdiffs

128 129 130 131 132 133 134
    def all_changes(self, texts):
        for c in self._merge_diffs(self.diffs[0], self.diffs[1], texts):
            yield c

    def pair_changes(self, fromindex, toindex, texts):
        """Give all changes between file1 and either file0 or file2.
        """
steve9000's avatar
steve9000 committed
135
        if fromindex == 1:
136 137 138 139 140 141 142 143
            seq = toindex/2
            for c in self.all_changes( texts ):
                if c[seq]:
                    yield c[seq]
        else:
            seq = fromindex/2
            for c in self.all_changes( texts ):
                if c[seq]:
144
                    yield reverse_chunk(c[seq])
145

146 147 148 149 150 151
    def single_changes(self, textindex, texts):
        """Give changes for single file only. do not return 'equal' hunks.
        """
        if textindex in (0,2):
            seq = textindex/2
            for cs in self.all_changes( texts ):
152 153
                if cs[seq]:
                    yield reverse_chunk(cs[seq]) + (1,)
154 155 156
        else:
            for cs in self.all_changes( texts ):
                if cs[0]:
157
                    yield cs[0] + (0,)
158
                elif cs[1]:
159
                    yield cs[1] + (2,)
160

161 162 163
    def sequences_identical(self):
        return self.diffs == [[], []]

164
    def _merge_blocks(self, using):
165
        LO, HI = 1,2
166 167
        lowc  =  min(using[0][ 0][LO], using[1][ 0][LO])
        highc =  max(using[0][-1][HI], using[1][-1][HI])
steve9000's avatar
steve9000 committed
168 169 170
        low = []
        high = []
        for i in (0,1):
171 172 173 174
            d = using[i][0]
            low.append(lowc - d[LO] + d[2+LO])
            d = using[i][-1]
            high.append(highc - d[HI] + d[2+HI])
steve9000's avatar
steve9000 committed
175
        return low[0], high[0], lowc, highc, low[1], high[1]
steve9000's avatar
steve9000 committed
176

steve9000's avatar
steve9000 committed
177
    def _merge_diffs(self, seq0, seq1, texts):
178
        seq0, seq1 = seq0[:], seq1[:]
steve9000's avatar
steve9000 committed
179
        seq = seq0, seq1
180
        LO, HI = 1,2
steve9000's avatar
steve9000 committed
181 182
        while len(seq0) or len(seq1):
            if len(seq0) == 0:
183
                high_seq = 1
steve9000's avatar
steve9000 committed
184
            elif len(seq1) == 0:
185
                high_seq = 0
186
            else:
187
                high_seq = int(seq0[0][LO] > seq1[0][LO])
steve9000's avatar
steve9000 committed
188

steve9000's avatar
steve9000 committed
189 190
            high_diff = seq[high_seq].pop(0)
            high_mark = high_diff[HI]
191
            other_seq = high_seq ^ 1
steve9000's avatar
steve9000 committed
192

steve9000's avatar
steve9000 committed
193 194
            using = [[], []]
            using[high_seq].append(high_diff)
steve9000's avatar
steve9000 committed
195

196 197 198 199
            while seq[other_seq]:
                other_diff = seq[other_seq][0]
                if high_mark < other_diff[LO]:
                    break
200

steve9000's avatar
steve9000 committed
201 202 203 204
                using[other_seq].append(other_diff)
                seq[other_seq].pop(0)

                if high_mark < other_diff[HI]:
205
                    (high_seq, other_seq) = (other_seq, high_seq)
steve9000's avatar
steve9000 committed
206 207 208
                    high_mark = other_diff[HI]

            if len(using[0])==0:
209 210
                assert len(using[1])==1
                yield None, using[1][0]
steve9000's avatar
steve9000 committed
211
            elif len(using[1])==0:
212 213
                assert len(using[0])==1
                yield using[0][0], None
steve9000's avatar
steve9000 committed
214
            else:
215
                l0, h0, l1, h1, l2, h2 = self._merge_blocks(using)
steve9000's avatar
steve9000 committed
216
                if h0-l0 == h2-l2 and texts[0][l0:h0] == texts[2][l2:h2]:
217
                    if l1 != h1:
218
                        tag = "replace"
219
                    else:
220
                        tag = "insert"
221
                else:
222 223 224
                    tag = "conflict"
                out0 = (tag, l1, h1, l0, h0)
                out1 = (tag, l1, h1, l2, h2)
225
                yield out0, out1
steve9000's avatar
steve9000 committed
226

227
    def set_sequences_iter(self, *sequences):
228 229 230 231 232 233 234
        assert 0 <= len(sequences) <= 3
        self.diffs = [[], []]
        self.num_sequences = len(sequences)
        self.seqlength = [len(s) for s in sequences]

        for i in range(self.num_sequences - 1):
            matcher = IncrementalSequenceMatcher(None, sequences[1], sequences[i*2])
235 236 237
            work = matcher.initialise()
            while work.next() == None:
                yield None
238
            self.diffs[i] = matcher.get_difference_opcodes()
239 240
        yield 1