001    /*--------------------------------------------------------------------------+
002    $Id: Diff.java 26283 2010-02-18 11:18:57Z juergens $
003    |                                                                          |
004    | Copyright 2005-2010 Technische Universitaet Muenchen                     |
005    |                                                                          |
006    | Licensed under the Apache License, Version 2.0 (the "License");          |
007    | you may not use this file except in compliance with the License.         |
008    | You may obtain a copy of the License at                                  |
009    |                                                                          |
010    |    http://www.apache.org/licenses/LICENSE-2.0                            |
011    |                                                                          |
012    | Unless required by applicable law or agreed to in writing, software      |
013    | distributed under the License is distributed on an "AS IS" BASIS,        |
014    | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
015    | See the License for the specific language governing permissions and      |
016    | limitations under the License.                                           |
017    +--------------------------------------------------------------------------*/
018    package edu.tum.cs.commons.algo;
019    
020    import java.util.ArrayList;
021    import java.util.Arrays;
022    import java.util.List;
023    
024    import edu.tum.cs.commons.assertion.CCSMPre;
025    import edu.tum.cs.commons.string.StringUtils;
026    
027    /**
028     * Implementation of the diff algorithm described in: E.W. Myers: "An O(ND)
029     * Difference Algorithm and Its Variations".
030     * <p>
031     * Let N be the sum of the concatenated input strings and D the size of the
032     * delta (i.e. the number of changes required to transform one string into the
033     * other). Then the time complexity is O(ND) and the space complexity is O(D^2).
034     * 
035     * @param <T>
036     *            The type of objects for which the diff is constructed.
037     * 
038     * @author hummelb
039     * @author $Author: juergens $
040     * @version $Rev: 26283 $
041     * @levd.rating GREEN Hash: 2B0182DC254FAFBACB0727A1A353E51C
042     */
043    public class Diff<T> {
044    
045            /** The first list of objects. */
046            private final List<T> a;
047    
048            /** The second list of objects. */
049            private final List<T> b;
050    
051            /** Length of {@link #a}. */
052            private final int n;
053    
054            /** Length of {@link #b}. */
055            private final int m;
056    
057            /** The maximal possible difference between {@link #a} and {@link #b}. */
058            private final int max;
059    
060            /**
061             * The array for storing the positions on each diagonal. This is an
062             * "unrolled" version compared to the original paper, i.e. we create a new
063             * array for each iteration of the main loop.
064             */
065            private final int[][] v;
066    
067            /**
068             * This array stores from where we came during the
069             * {@link #calculateDeltaSize()} method. Its structure is the same as
070             * {@link #v}.
071             */
072            private final boolean[][] from;
073    
074            /**
075             * Hidden constructor. Use one of the {@link #computeDelta(List, List)} or
076             * {@link #computeDelta(Object[], Object[])} methods instead.
077             */
078            private Diff(List<T> a, List<T> b) {
079                    this.a = a;
080                    this.b = b;
081    
082                    n = a.size();
083                    m = b.size();
084                    max = n + m;
085                    v = new int[max + 1][];
086                    from = new boolean[max + 1][];
087            }
088    
089            /** Performs the actual computations. */
090            private Delta<T> computeDelta() {
091                    return constructDelta(calculateDeltaSize());
092            }
093    
094            /** Constructs the actual delta. */
095            private Delta<T> constructDelta(int size) {
096                    int d = size;
097                    int k = -size;
098                    while (v[size][size + k] < n || v[size][d + k] - k < m) {
099                            ++k;
100                    }
101    
102                    Delta<T> delta = new Delta<T>(size, n, m);
103    
104                    int difference = n - m;
105                    while (d > 0) {
106                            if (from[d][d + k]) {
107                                    ++k;
108                            } else {
109                                    --k;
110                            }
111                            --d;
112    
113                            int x = v[d][d + k];
114                            int y = x - k;
115    
116                            int newDifference = x - y;
117                            if (newDifference > difference) {
118                                    delta.position[d] = y + 1;
119                                    delta.t[d] = b.get(y);
120                            } else {
121                                    delta.position[d] = -x - 1;
122                                    delta.t[d] = a.get(x);
123                            }
124                            difference = newDifference;
125                    }
126                    return delta;
127            }
128    
129            /**
130             * Calculates the size of the delta (i.e. the number of additions and
131             * deletions. Additionally the {@link #v} and {@link #from} arrays are
132             * filled.
133             */
134            private int calculateDeltaSize() {
135                    int size = -1;
136                    for (int d = 0; size < 0 && d <= max; ++d) {
137                            v[d] = new int[2 * d + 1];
138                            from[d] = new boolean[2 * d + 1];
139                            for (int k = -d; k <= d; k += 2) {
140                                    int x = 0;
141                                    if (d > 0) {
142                                            if (k == -d
143                                                            || k != d
144                                                            && v[d - 1][d - 1 + k - 1] < v[d - 1][d - 1 + k + 1]) {
145                                                    x = v[d - 1][d - 1 + k + 1];
146                                                    from[d][d + k] = true;
147                                            } else {
148                                                    x = v[d - 1][d - 1 + k - 1] + 1;
149                                                    from[d][d + k] = false;
150                                            }
151                                    }
152                                    int y = x - k;
153                                    while (x < n && y < m && a.get(x).equals(b.get(y))) {
154                                            ++x;
155                                            ++y;
156                                    }
157                                    v[d][d + k] = x;
158                                    if (x >= n && y >= m) {
159                                            size = d;
160                                    }
161                            }
162                    }
163                    return size;
164            }
165    
166            /**
167             * Applies the diff algorithm on the supplied arrays and returns the delta
168             * between them.
169             * 
170             * @param a
171             *            the first "word", i.e., array of objects to produce a delta
172             *            for.
173             * @param b
174             *            the second "word", i.e., array of objects to produce a delta
175             *            for.
176             * @return a delta containing the differences between a and b.
177             */
178            public static <T> Delta<T> computeDelta(T[] a, T[] b) {
179                    return computeDelta(Arrays.asList(a), Arrays.asList(b));
180            }
181    
182            /**
183             * Applies the diff algorithm on the supplied lists and returns the delta
184             * between them.
185             * 
186             * @param a
187             *            the first "word", i.e., list of objects to produce a delta
188             *            for.
189             * @param b
190             *            the second "word", i.e., list of objects to produce a delta
191             *            for.
192             * @return a delta containing the differences between a and b.
193             */
194            public static <T> Delta<T> computeDelta(List<T> a, List<T> b) {
195                    return new Diff<T>(a, b).computeDelta();
196            }
197    
198            /**
199             * Objects of this class describe the additions and deletions used to
200             * transform between two words.
201             */
202            public static class Delta<T> {
203    
204                    /** The size of the first word. */
205                    private final int n;
206    
207                    /** The size of the second word. */
208                    private final int m;
209    
210                    /**
211                     * This array stores the position at which a string is changed. If it is
212                     * positive, it indicates an addition (i.e. the position is for the
213                     * second string). Otherwise it is a deletion (i.e. the (negated)
214                     * position is for the first string). To allow storing a sign for
215                     * position 0, all positions are incremented before (so this has to be
216                     * compensated for).
217                     */
218                    private final int[] position;
219    
220                    /**
221                     * This array stores the characters which are added or deleted
222                     * (interpretation depends on {@link #position}).
223                     */
224                    private final T[] t;
225    
226                    /** Create new delta of given size. */
227                    @SuppressWarnings("unchecked")
228                    private Delta(int size, int n, int m) {
229                            this.n = n;
230                            this.m = m;
231                            position = new int[size];
232                            t = (T[]) new Object[size];
233                    }
234    
235                    /**
236                     * Returns the size of the delta, i.e. the number of additions and
237                     * deletions.
238                     */
239                    public int getSize() {
240                            return position.length;
241                    }
242    
243                    /** Returns the size of the first word the delta was created for. */
244                    public int getN() {
245                            return n;
246                    }
247    
248                    /** Returns the size of the second word the delta was created for. */
249                    public int getM() {
250                            return m;
251                    }
252    
253                    /** Returns the i-th element stored as addition or deletion. */
254                    public T getT(int i) {
255                            return t[i];
256                    }
257    
258                    /**
259                     * Returns the i-th element of the change positions. If it is positive,
260                     * it indicates an addition (i.e. the position is for the second
261                     * string). Otherwise it is a deletion (i.e. the (negated) position is
262                     * for the first string). To allow storing a sign for position 0, all
263                     * positions are incremented before (so this has to be compensated for).
264                     */
265                    public int getPosition(int i) {
266                            return position[i];
267                    }
268    
269                    /**
270                     * Applies the forward patch, i.e. if the first string is inserted, then
271                     * the second string is returned. The input word must be of length n,
272                     * the output word will be of length m.
273                     */
274                    public List<T> forwardPatch(List<T> a) {
275                            CCSMPre.isTrue(a.size() == n, "Input word must be of size " + n);
276                            return doPatch(a, new ArrayList<T>(m), 1);
277                    }
278    
279                    /**
280                     * Applies the backward patch, i.e. if the second string is inserted,
281                     * then the first string is returned. The input word must be of length
282                     * m, the output word will be of length n.
283                     */
284                    public List<T> backwardPatch(List<T> b) {
285                            CCSMPre.isTrue(b.size() == m, "Input word must be of size " + m);
286                            return doPatch(b, new ArrayList<T>(n), -1);
287                    }
288    
289                    /**
290                     * Performs the patching from a to b put pre-multiplying the positions
291                     * with the given factor.
292                     */
293                    private List<T> doPatch(List<T> a, List<T> b, int positionFactor) {
294                            int posA = 0;
295                            int posB = 0;
296    
297                            for (int j = 0; j < position.length; ++j) {
298                                    int k = position[j] * positionFactor;
299                                    if (k > 0) {
300                                            // add character
301                                            k = k - 1;
302                                            while (posB < k) {
303                                                    b.add(a.get(posA));
304                                                    ++posA;
305                                                    ++posB;
306                                            }
307                                            b.add(t[j]);
308                                            ++posB;
309                                    } else {
310                                            // delete character
311                                            k = -k - 1;
312                                            while (posA < k) {
313                                                    b.add(a.get(posA));
314                                                    ++posA;
315                                                    ++posB;
316                                            }
317                                            ++posA;
318                                    }
319                            }
320                            while (posA < a.size()) {
321                                    b.add(a.get(posA));
322                                    ++posA;
323                                    ++posB;
324                            }
325    
326                            return b;
327                    }
328    
329                    /** {@inheritDoc} */
330                    @Override
331                    public String toString() {
332                            StringBuilder sb = new StringBuilder();
333                            for (int i = 0; i < position.length; ++i) {
334                                    sb.append(Math.abs(position[i]) - 1);
335                                    if (position[i] > 0) {
336                                            sb.append("+ ");
337                                    } else {
338                                            sb.append("- ");
339                                    }
340                                    sb.append(t[i] + StringUtils.CR);
341                            }
342                            return sb.toString();
343                    }
344            }
345    }