001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.tools;
003
004//// Taken from http://www.bmsi.com/java/#diff
005
006// http://www.bmsi.com/java/DiffPrint.java could also be useful
007
008/*
009 * $Log: Diff.java,v $
010 * Revision 1.15  2013/04/01 16:27:31  stuart
011 * Fix DiffPrint unified format with test cases.
012 * Begin porting some diff-2.7 features.
013 *
014 * Revision 1.14  2010/03/03 21:21:25  stuart
015 * Test new direct equivalence API
016 *
017 * Revision 1.13  2009/12/07 17:43:17  stuart
018 * Compute equivMax for int[] ctor
019 *
020 * Revision 1.12  2009/12/07 17:34:46  stuart
021 * Ctor with int[].
022 *
023 * Revision 1.11  2009/11/15 01:11:54  stuart
024 * Diff doesn't need to be generic
025 *
026 * Revision 1.10  2009/11/15 00:54:03  stuart
027 * Update to Java 5 containers
028 *
029 * Revision 1.7  2009/01/19 03:05:26  stuart
030 * Fix StackOverflow bug with heuristic on reported by Jimmy Han.
031 *
032 * Revision 1.6  2003/03/06 22:51:32  stuart
033 * Convert to CVS
034 *
035 * Revision 1.5  2002/07/19  19:14:40  stuart
036 * fix reverseScript, make change ctor public, update docs
037 *
038 * Revision 1.4  2002/04/09  17:53:39  stuart
039 * More flexible interface for diff() function.
040 *
041 * Revision 1.3  2000/03/03 21:58:03  stuart
042 * move discard_confusing_lines and shift_boundaries to class file_data
043 *
044 * Revision 1.2  2000/03/02  16:37:38  stuart
045 * Add GPL and copyright
046 *
047 */
048
049import java.util.HashMap;
050import java.util.Map;
051
052/** A class to compare vectors of objects.  The result of comparison
053    is a list of <code>change</code> objects which form an
054    edit script.  The objects compared are traditionally lines
055    of text from two files.  Comparison options such as "ignore
056    whitespace" are implemented by modifying the <code>equals</code>
057    and <code>hashcode</code> methods for the objects compared.
058<p>
059   The basic algorithm is described in: <br>
060   "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
061   Algorithmica Vol. 1 No. 2, 1986, p 251.
062<p>
063   This class outputs different results from GNU diff 1.15 on some
064   inputs.  Our results are actually better (smaller change list, smaller
065   total size of changes), but it would be nice to know why.  Perhaps
066   there is a memory overwrite bug in GNU diff 1.15.
067
068  @author Stuart D. Gathman, translated from GNU diff 1.15
069    Copyright (C) 2000  Business Management Systems, Inc.
070<p>
071    This program is free software; you can redistribute it and/or modify
072    it under the terms of the GNU General Public License as published by
073    the Free Software Foundation; either version 1, or (at your option)
074    any later version.
075<p>
076    This program is distributed in the hope that it will be useful,
077    but WITHOUT ANY WARRANTY; without even the implied warranty of
078    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
079    GNU General Public License for more details.
080<p>
081    You should have received a copy of the <a href=COPYING.txt>
082    GNU General Public License</a>
083    along with this program; if not, write to the Free Software
084    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
085 */
086public class Diff {
087
088    /** Prepare to find differences between two arrays.  Each element of
089      the arrays is translated to an "equivalence number" based on
090      the result of <code>equals</code>.  The original Object arrays
091      are no longer needed for computing the differences.  They will
092      be needed again later to print the results of the comparison as
093      an edit script, if desired.
094     * @param a first array
095     * @param b second array
096     */
097    public Diff(Object[] a, Object[] b) {
098        Map<Object, Integer> h = new HashMap<>(a.length + b.length);
099        filevec = new FileData[] {new FileData(a, h), new FileData(b, h)};
100    }
101
102    /** 1 more than the maximum equivalence value used for this or its
103     sibling file. */
104    private int equivMax = 1;
105
106    private int[] xvec, yvec; /* Vectors being compared. */
107    private int[] fdiag;      /* Vector, indexed by diagonal, containing
108                   the X coordinate of the point furthest
109                   along the given diagonal in the forward
110                   search of the edit matrix. */
111    private int[] bdiag;      /* Vector, indexed by diagonal, containing
112                   the X coordinate of the point furthest
113                   along the given diagonal in the backward
114                   search of the edit matrix. */
115    private int fdiagoff, bdiagoff;
116    private final FileData[] filevec;
117    private int cost;
118
119    /**
120     * Find the midpoint of the shortest edit script for a specified
121     * portion of the two files.
122     *
123     * We scan from the beginnings of the files, and simultaneously from the ends,
124     * doing a breadth-first search through the space of edit-sequence.
125     * When the two searches meet, we have found the midpoint of the shortest
126     * edit sequence.
127     *
128     * The value returned is the number of the diagonal on which the midpoint lies.
129     * The diagonal number equals the number of inserted lines minus the number
130     * of deleted lines (counting only lines before the midpoint).
131     * The edit cost is stored into COST; this is the total number of
132     * lines inserted or deleted (counting only lines before the midpoint).
133     *
134     * This function assumes that the first lines of the specified portions
135     * of the two files do not match, and likewise that the last lines do not
136     * match.  The caller must trim matching lines from the beginning and end
137     * of the portions it is going to specify.
138     *
139     * Note that if we return the "wrong" diagonal value, or if
140     * the value of bdiag at that diagonal is "wrong",
141     * the worst this can do is cause suboptimal diff output.
142     * It cannot cause incorrect diff output.
143     * @param xoff xoff
144     * @param xlim xlim
145     * @param yoff yoff
146     * @param ylim ylim
147     * @return midpoint of the shortest edit script
148     */
149    private int diag(int xoff, int xlim, int yoff, int ylim) {
150        final int[] fd = fdiag; // Give the compiler a chance.
151        final int[] bd = bdiag; // Additional help for the compiler.
152        final int[] xv = xvec;      // Still more help for the compiler.
153        final int[] yv = yvec;      // And more and more . . .
154        final int dmin = xoff - ylim;   // Minimum valid diagonal.
155        final int dmax = xlim - yoff;   // Maximum valid diagonal.
156        final int fmid = xoff - yoff;   // Center diagonal of top-down search.
157        final int bmid = xlim - ylim;   // Center diagonal of bottom-up search.
158        int fmin = fmid, fmax = fmid;   // Limits of top-down search.
159        int bmin = bmid, bmax = bmid;   // Limits of bottom-up search.
160        // True if southeast corner is on an odd diagonal with respect to the northwest.
161        final boolean odd = (fmid - bmid & 1) != 0;
162
163        fd[fdiagoff + fmid] = xoff;
164        bd[bdiagoff + bmid] = xlim;
165
166        for (int c = 1;; ++c) {
167            int d;          /* Active diagonal. */
168
169            /* Extend the top-down search by an edit step in each diagonal. */
170            if (fmin > dmin) {
171                fd[fdiagoff + --fmin - 1] = -1;
172            } else {
173                ++fmin;
174            }
175            if (fmax < dmax) {
176                fd[fdiagoff + ++fmax + 1] = -1;
177            } else {
178                --fmax;
179            }
180            for (d = fmax; d >= fmin; d -= 2) {
181                int x;
182                int y;
183                int tlo = fd[fdiagoff + d - 1];
184                int thi = fd[fdiagoff + d + 1];
185
186                if (tlo >= thi) {
187                    x = tlo + 1;
188                } else {
189                    x = thi;
190                }
191                y = x - d;
192                while (x < xlim && y < ylim && xv[x] == yv[y]) {
193                    ++x; ++y;
194                }
195                fd[fdiagoff + d] = x;
196                if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
197                    cost = 2 * c - 1;
198                    return d;
199                }
200            }
201
202            /* Similar extend the bottom-up search. */
203            if (bmin > dmin) {
204                bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
205            } else {
206                ++bmin;
207            }
208            if (bmax < dmax) {
209                bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
210            } else {
211                --bmax;
212            }
213            for (d = bmax; d >= bmin; d -= 2) {
214                int x, y, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
215
216                if (tlo < thi) {
217                    x = tlo;
218                } else {
219                    x = thi - 1;
220                }
221                y = x - d;
222                while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
223                    --x; --y;
224                }
225                bd[bdiagoff + d] = x;
226                if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
227                    cost = 2 * c;
228                    return d;
229                }
230            }
231        }
232    }
233
234    /**
235     * Compare in detail contiguous subsequences of the two files
236     * which are known, as a whole, to match each other.
237     *
238     * The results are recorded in the vectors filevec[N].changed_flag, by
239     * storing a 1 in the element for each line that is an insertion or deletion.
240     *
241     * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
242     *
243     * Note that XLIM, YLIM are exclusive bounds.
244     * All line numbers are origin-0 and discarded lines are not counted.
245     * @param xoff xoff
246     * @param xlim xlim
247     * @param yoff yoff
248     * @param ylim ylim
249     */
250    private void compareseq(int xoff, int xlim, int yoff, int ylim) {
251        /* Slide down the bottom initial diagonal. */
252        while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
253            ++xoff; ++yoff;
254        }
255        /* Slide up the top initial diagonal. */
256        while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
257            --xlim; --ylim;
258        }
259
260        /* Handle simple cases. */
261        if (xoff == xlim) {
262            while (yoff < ylim) {
263                filevec[1].changedFlag[1+filevec[1].realindexes[yoff++]] = true;
264            }
265        } else if (yoff == ylim) {
266            while (xoff < xlim) {
267                filevec[0].changedFlag[1+filevec[0].realindexes[xoff++]] = true;
268            }
269        } else {
270            /* Find a point of correspondence in the middle of the files.  */
271
272            int d = diag(xoff, xlim, yoff, ylim);
273            int c = cost;
274            int b = bdiag[bdiagoff + d];
275
276            if (c == 1)
277                /* This should be impossible, because it implies that
278                   one of the two subsequences is empty,
279                   and that case was handled above without calling `diag'.
280                   Let's verify that this is true.  */
281                throw new IllegalArgumentException("Empty subsequence");
282            else {
283                /* Use that point to split this problem into two subproblems.  */
284                compareseq(xoff, b, yoff, b - d);
285                /* This used to use f instead of b,
286                   but that is incorrect!
287                   It is not necessarily the case that diagonal d
288                   has a snake from b to f.  */
289                compareseq(b, xlim, b - d, ylim);
290            }
291        }
292    }
293
294    /** Discard lines from one file that have no matches in the other file.
295     */
296    private void discardConfusingLines() {
297        filevec[0].discardConfusingLines(filevec[1]);
298        filevec[1].discardConfusingLines(filevec[0]);
299    }
300
301    /**
302     * Adjust inserts/deletes of blank lines to join changes as much as possible.
303     */
304    private void shiftBoundaries() {
305        filevec[0].shiftBoundaries(filevec[1]);
306        filevec[1].shiftBoundaries(filevec[0]);
307    }
308
309    /**
310     * Script builder.
311     * @since 10600 (functional interface)
312     */
313    @FunctionalInterface
314    public interface ScriptBuilder {
315        /**
316         * Scan the tables of which lines are inserted and deleted, producing an edit script.
317         * @param changed0 true for lines in first file which do not match 2nd
318         * @param len0 number of lines in first file
319         * @param changed1 true for lines in 2nd file which do not match 1st
320         * @param len1 number of lines in 2nd file
321         * @return a linked list of changes - or null
322         * @since 10600 (renamed)
323         */
324        Change buildScript(
325                boolean[] changed0, int len0,
326                boolean[] changed1, int len1
327        );
328    }
329
330    /**
331     * Scan the tables of which lines are inserted and deleted, producing an edit script in reverse order.
332     */
333    static class ReverseScript implements ScriptBuilder {
334        @Override
335        public Change buildScript(
336                final boolean[] changed0, int len0,
337                final boolean[] changed1, int len1) {
338            Change script = null;
339            int i0 = 0, i1 = 0;
340            while (i0 < len0 || i1 < len1) {
341                if (changed0[1+i0] || changed1[1+i1]) {
342                    int line0 = i0, line1 = i1;
343
344                    /* Find # lines changed here in each file.  */
345                    while (changed0[1+i0]) {
346                        ++i0;
347                    }
348                    while (changed1[1+i1]) {
349                        ++i1;
350                    }
351
352                    /* Record this change.  */
353                    script = new Change(line0, line1, i0 - line0, i1 - line1, script);
354                }
355
356                /* We have reached lines in the two files that match each other.  */
357                i0++; i1++;
358            }
359
360            return script;
361        }
362    }
363
364    static class ForwardScript implements ScriptBuilder {
365        /** Scan the tables of which lines are inserted and deleted,
366            producing an edit script in forward order.  */
367        @Override
368        public Change buildScript(
369                final boolean[] changed0, int len0,
370                final boolean[] changed1, int len1) {
371            Change script = null;
372            int i0 = len0, i1 = len1;
373
374            while (i0 >= 0 || i1 >= 0) {
375                if (changed0[i0] || changed1[i1]) {
376                    int line0 = i0, line1 = i1;
377
378                    /* Find # lines changed here in each file.  */
379                    while (changed0[i0]) {
380                        --i0;
381                    }
382                    while (changed1[i1]) {
383                        --i1;
384                    }
385
386                    /* Record this change.  */
387                    script = new Change(i0, i1, line0 - i0, line1 - i1, script);
388                }
389
390                /* We have reached lines in the two files that match each other.  */
391                i0--; i1--;
392            }
393
394            return script;
395        }
396    }
397
398    /** Standard Forward ScriptBuilder. */
399    public static final ScriptBuilder forwardScript = new ForwardScript();
400    /** Standard Reverse ScriptBuilder. */
401    public static final ScriptBuilder reverseScript = new ReverseScript();
402
403    /**
404     * Report the differences of two files. DEPTH is the current directory depth.
405     * @param reverse if {@code true} use {@link #reverseScript} else use {@link #forwardScript}
406     * @return the differences of two files
407     */
408    public final Change diff2(final boolean reverse) {
409        return diff(reverse ? reverseScript : forwardScript);
410    }
411
412    /**
413     * Get the results of comparison as an edit script.  The script
414     * is described by a list of changes.  The standard ScriptBuilder
415     * implementations provide for forward and reverse edit scripts.
416     * Alternate implementations could, for instance, list common elements
417     * instead of differences.
418     * @param bld an object to build the script from change flags
419     * @return the head of a list of changes
420     */
421    public Change diff(final ScriptBuilder bld) {
422
423        // Some lines are obviously insertions or deletions because they don't match anything.
424        // Detect them now, and avoid even thinking about them in the main comparison algorithm.
425        discardConfusingLines();
426
427        // Now do the main comparison algorithm, considering just the undiscarded lines.
428        xvec = filevec[0].undiscarded;
429        yvec = filevec[1].undiscarded;
430
431        int diags = filevec[0].nondiscardedLines + filevec[1].nondiscardedLines + 3;
432        fdiag = new int[diags];
433        fdiagoff = filevec[1].nondiscardedLines + 1;
434        bdiag = new int[diags];
435        bdiagoff = filevec[1].nondiscardedLines + 1;
436
437        compareseq(0, filevec[0].nondiscardedLines,
438                   0, filevec[1].nondiscardedLines);
439        fdiag = null;
440        bdiag = null;
441
442        // Modify the results slightly to make them prettier in cases where that can validly be done.
443        shiftBoundaries();
444
445        // Get the results of comparison in the form of a chain of `struct change's -- an edit script.
446        return bld.buildScript(
447                filevec[0].changedFlag,
448                filevec[0].bufferedLines,
449                filevec[1].changedFlag,
450                filevec[1].bufferedLines
451        );
452    }
453
454    /** The result of comparison is an "edit script": a chain of change objects.
455     Each change represents one place where some lines are deleted
456     and some are inserted.
457
458     LINE0 and LINE1 are the first affected lines in the two files (origin 0).
459     DELETED is the number of lines deleted here from file 0.
460     INSERTED is the number of lines inserted here in file 1.
461
462     If DELETED is 0 then LINE0 is the number of the line before
463     which the insertion was done; vice versa for INSERTED and LINE1.  */
464
465    public static class Change {
466        /** Previous or next edit command. */
467        public Change link;
468        /** # lines of file 1 changed here.  */
469        public final int inserted;
470        /** # lines of file 0 changed here.  */
471        public final int deleted;
472        /** Line number of 1st deleted line.  */
473        public final int line0;
474        /** Line number of 1st inserted line.  */
475        public final int line1;
476
477        /**
478         * Cons an additional entry onto the front of an edit script OLD.
479         * LINE0 and LINE1 are the first affected lines in the two files (origin 0).
480         * DELETED is the number of lines deleted here from file 0.
481         * INSERTED is the number of lines inserted here in file 1.
482         *
483         * If DELETED is 0 then LINE0 is the number of the line before
484         * which the insertion was done; vice versa for INSERTED and LINE1.
485         * @param line0 first affected lines in the two files (origin 0)
486         * @param line1 first affected lines in the two files (origin 0)
487         * @param deleted the number of lines deleted here from file 0
488         * @param inserted the number of lines inserted here in file 1
489         * @param old edit script
490         */
491        public Change(int line0, int line1, int deleted, int inserted, Change old) {
492            this.line0 = line0;
493            this.line1 = line1;
494            this.inserted = inserted;
495            this.deleted = deleted;
496            this.link = old;
497        }
498
499        /**
500         * Returns the number of insertions and deletions of this change as well as
501         * (recursively) the changes linked via {@link #link}.
502         * @return recursive number of insertions and deletions
503         */
504        public int getTotalNumberOfChanges() {
505            return inserted + deleted + (link != null ? link.getTotalNumberOfChanges() : 0);
506        }
507
508        @Override
509        public String toString() {
510            String s = String.format("%d -%d +%d %d", line0, deleted, inserted, line1);
511            return (link != null) ? s + '\n' + link : s;
512        }
513    }
514
515    /**
516     * Data on one input file being compared.
517     */
518    class FileData {
519
520        /** Allocate changed array for the results of comparison.  */
521        void clear() {
522            // Allocate a flag for each line of each file, saying whether that line is an insertion or deletion.
523            // Allocate an extra element, always zero, at each end of each vector.
524            changedFlag = new boolean[bufferedLines + 2];
525        }
526
527        /**
528         * Return equiv_count[I] as the number of lines in this file that fall in equivalence class I.
529         * @return the array of equivalence class counts.
530         */
531        int[] equivCount() {
532            int[] equivCount = new int[equivMax];
533            for (int i = 0; i < bufferedLines; ++i) {
534                ++equivCount[equivs[i]];
535            }
536            return equivCount;
537        }
538
539        /**
540         * Discard lines that have no matches in another file.
541         *
542         * A line which is discarded will not be considered by the actual comparison algorithm;
543         * it will be as if that line were not in the file.
544         * The file's `realindexes' table maps virtual line numbers
545         * (which don't count the discarded lines) into real line numbers;
546         * this is how the actual comparison algorithm produces results
547         * that are comprehensible when the discarded lines are counted.
548         * <p>
549         * When we discard a line, we also mark it as a deletion or insertion so that it will be printed in the output.
550         * @param f the other file
551         */
552        void discardConfusingLines(FileData f) {
553            clear();
554            // Set up table of which lines are going to be discarded.
555            final byte[] discarded = discardable(f.equivCount());
556
557            // Don't really discard the provisional lines except when they occur in a run of discardables,
558            // with nonprovisionals at the beginning and end.
559            filterDiscards(discarded);
560
561            // Actually discard the lines.
562            discard(discarded);
563        }
564
565        /**
566         * Mark to be discarded each line that matches no line of another file.
567         * If a line matches many lines, mark it as provisionally discardable.
568         * @param counts The count of each equivalence number for the other file.
569         * @return 0=nondiscardable, 1=discardable or 2=provisionally discardable for each line
570         * @see #equivCount()
571         */
572        private byte[] discardable(final int ... counts) {
573            final int end = bufferedLines;
574            final byte[] discards = new byte[end];
575            final int[] equivs = this.equivs;
576            int many = 5;
577            int tem = end / 64;
578
579            /* Multiply MANY by approximate square root of number of lines.
580               That is the threshold for provisionally discardable lines.  */
581            while ((tem = tem >> 2) > 0) {
582                many *= 2;
583            }
584
585            for (int i = 0; i < end; i++) {
586                int nmatch;
587                if (equivs[i] == 0) {
588                    continue;
589                }
590                nmatch = counts[equivs[i]];
591                if (nmatch == 0) {
592                    discards[i] = 1;
593                } else if (nmatch > many) {
594                    discards[i] = 2;
595                }
596            }
597            return discards;
598        }
599
600        /**
601         * Don't really discard the provisional lines except when they occur
602         * in a run of discardables, with nonprovisionals at the beginning and end.
603         * @param discards discards
604         */
605        private void filterDiscards(final byte[] discards) {
606            final int end = bufferedLines;
607
608            for (int i = 0; i < end; i++) {
609                /* Cancel provisional discards not in middle of run of discards.  */
610                if (discards[i] == 2) {
611                    discards[i] = 0;
612                } else if (discards[i] != 0) {
613                    /* We have found a nonprovisional discard.  */
614                    int j;
615                    int length;
616                    int provisional = 0;
617
618                    /* Find end of this run of discardable lines.
619                       Count how many are provisionally discardable.  */
620                    for (j = i; j < end; j++) {
621                        if (discards[j] == 0) {
622                            break;
623                        }
624                        if (discards[j] == 2) {
625                            ++provisional;
626                        }
627                    }
628
629                    /* Cancel provisional discards at end, and shrink the run.  */
630                    while (j > i && discards[j - 1] == 2) {
631                        discards[--j] = 0; --provisional;
632                    }
633
634                    /* Now we have the length of a run of discardable lines
635                       whose first and last are not provisional.  */
636                    length = j - i;
637
638                    /* If 1/4 of the lines in the run are provisional,
639                       cancel discarding of all provisional lines in the run.  */
640                    if (provisional * 4 > length) {
641                        while (j > i)
642                            if (discards[--j] == 2) {
643                                discards[j] = 0;
644                            }
645                    } else {
646                        int consec;
647                        int minimum = 1;
648                        int tem = length / 4;
649
650                        /* MINIMUM is approximate square root of LENGTH/4.
651                           A subrun of two or more provisionals can stand
652                           when LENGTH is at least 16.
653                           A subrun of 4 or more can stand when LENGTH >= 64.  */
654                        while ((tem = tem >> 2) > 0) {
655                            minimum *= 2;
656                        }
657                        minimum++;
658
659                        /* Cancel any subrun of MINIMUM or more provisionals
660                           within the larger run.  */
661                        for (j = 0, consec = 0; j < length; j++) {
662                            if (discards[i + j] != 2) {
663                                consec = 0;
664                            } else if (minimum == ++consec) {
665                                /* Back up to start of subrun, to cancel it all.  */
666                                j -= consec;
667                            } else if (minimum < consec) {
668                                discards[i + j] = 0;
669                            }
670                        }
671
672                        /* Scan from beginning of run
673                           until we find 3 or more nonprovisionals in a row
674                           or until the first nonprovisional at least 8 lines in.
675                           Until that point, cancel any provisionals.  */
676                        for (j = 0, consec = 0; j < length; j++) {
677                            if (j >= 8 && discards[i + j] == 1) {
678                                break;
679                            }
680                            if (discards[i + j] == 2) {
681                                consec = 0; discards[i + j] = 0;
682                            } else if (discards[i + j] == 0) {
683                                consec = 0;
684                            } else {
685                                consec++;
686                            }
687                            if (consec == 3) {
688                                break;
689                            }
690                        }
691
692                        /* I advances to the last line of the run.  */
693                        i += length - 1;
694
695                        /* Same thing, from end.  */
696                        for (j = 0, consec = 0; j < length; j++) {
697                            if (j >= 8 && discards[i - j] == 1) {
698                                break;
699                            }
700                            if (discards[i - j] == 2) {
701                                consec = 0; discards[i - j] = 0;
702                            } else if (discards[i - j] == 0) {
703                                consec = 0;
704                            } else {
705                                consec++;
706                            }
707                            if (consec == 3) {
708                                break;
709                            }
710                        }
711                    }
712                }
713            }
714        }
715
716        /** Actually discard the lines.
717            @param discards flags lines to be discarded
718         */
719        private void discard(final byte[] discards) {
720            final int end = bufferedLines;
721            int j = 0;
722            for (int i = 0; i < end; ++i) {
723                if (discards[i] == 0) {
724                    undiscarded[j] = equivs[i];
725                    realindexes[j++] = i;
726                } else {
727                    changedFlag[1+i] = true;
728                }
729            }
730            nondiscardedLines = j;
731        }
732
733        FileData(int length) {
734            bufferedLines = length;
735            equivs = new int[length];
736            undiscarded = new int[bufferedLines];
737            realindexes = new int[bufferedLines];
738        }
739
740        FileData(Object[] data, Map<Object, Integer> h) {
741            this(data.length);
742            // FIXME: diff 2.7 removes common prefix and common suffix
743            for (int i = 0; i < data.length; ++i) {
744                Integer ir = h.get(data[i]);
745                if (ir == null) {
746                    equivs[i] = equivMax++;
747                    h.put(data[i], equivs[i]);
748                } else {
749                    equivs[i] = ir.intValue();
750                }
751            }
752        }
753
754        /**
755         * Adjust inserts/deletes of blank lines to join changes as much as possible.
756         *
757         * We do something when a run of changed lines include a blank line at one end and have an excluded blank line at the other.
758         * We are free to choose which blank line is included.
759         * `compareseq' always chooses the one at the beginning, but usually it is cleaner to consider the following blank line
760         * to be the "change". The only exception is if the preceding blank line would join this change to other changes.
761         * @param f the file being compared against
762         */
763        void shiftBoundaries(FileData f) {
764            final boolean[] changed = changedFlag;
765            final boolean[] otherChanged = f.changedFlag;
766            int i = 0;
767            int j = 0;
768            int iEnd = bufferedLines;
769            int preceding = -1;
770            int otherPreceding = -1;
771
772            for (;;) {
773                int start, end, otherStart;
774
775                /* Scan forwards to find beginning of another run of changes.
776                   Also keep track of the corresponding point in the other file.  */
777
778                while (i < iEnd && !changed[1+i]) {
779                    while (otherChanged[1+j++]) {
780                        /* Non-corresponding lines in the other file
781                           will count as the preceding batch of changes.  */
782                        otherPreceding = j;
783                    }
784                    i++;
785                }
786
787                if (i == iEnd) {
788                    break;
789                }
790
791                start = i;
792                otherStart = j;
793
794                boolean loop = true;
795                while (loop) {
796                    /* Now find the end of this run of changes.  */
797
798                    while (i < iEnd && changed[1+i]) {
799                        i++;
800                    }
801                    end = i;
802
803                    /* If the first changed line matches the following unchanged one,
804                       and this run does not follow right after a previous run,
805                       and there are no lines deleted from the other file here,
806                       then classify the first changed line as unchanged
807                       and the following line as changed in its place.  */
808
809                    /* You might ask, how could this run follow right after another?
810                       Only because the previous run was shifted here.  */
811
812                    if (end != iEnd && equivs[start] == equivs[end] && !otherChanged[1+j]
813                         && !((preceding >= 0 && start == preceding) || (otherPreceding >= 0 && otherStart == otherPreceding))) {
814                        changed[1+end++] = true;
815                        changed[1+start++] = false;
816                        ++i;
817                        /* Since one line-that-matches is now before this run
818                           instead of after, we must advance in the other file
819                           to keep in synch.  */
820                        ++j;
821                    } else {
822                        loop = false;
823                    }
824                }
825
826                preceding = i;
827                otherPreceding = j;
828            }
829        }
830
831        /** Number of elements (lines) in this file. */
832        private final int bufferedLines;
833
834        /** Vector, indexed by line number, containing an equivalence code for each line.
835         * It is this vector that is actually compared with that of another file to generate differences. */
836        private final int[] equivs;
837
838        /** Vector, like the previous one except that the elements for discarded lines have been squeezed out. */
839        private final int[] undiscarded;
840
841        /** Vector mapping virtual line numbers (not counting discarded lines) to real ones (counting those lines).
842         * Both are origin-0.  */
843        private final int[] realindexes;
844
845        /** Total number of nondiscarded lines. */
846        private int nondiscardedLines;
847
848        /** Array, indexed by real origin-1 line number, containing true for a line that is an insertion or a deletion.
849           The results of comparison are stored here. */
850        private boolean[] changedFlag;
851    }
852}