Code/Dependencies/Pixie/Pixie/Diff.cs
using System;
using System.Collections.Generic;
using System.Linq;

namespace WasmBox.Pixie {
    /// <summary>
    /// A helper class that creates diffs.
    /// </summary>
    public static class Diff {
        /// <summary>
        /// Creates a diff from an old sequence and a new sequence.
        /// </summary>
        /// <param name="oldSequence">A 'before' sequence for the diff.</param>
        /// <param name="newSequence">An 'after' sequence of the diff.</param>
        /// <returns>
        /// A diff that, when applied, turns <paramref name="oldSequence"/> into
        /// <paramref name="newSequence"/>.
        /// </returns>
        public static Diff<T> Create<T>(
            IEnumerable<T> oldSequence,
            IEnumerable<T> newSequence) {
            return Create( oldSequence, newSequence, EqualityComparer<T>.Default);
        }

        /// <summary>
        /// Creates a diff from an old sequence, a new sequence
        /// and an equality comparer.
        /// </summary>
        /// <param name="oldSequence">A 'before' sequence for the diff.</param>
        /// <param name="newSequence">An 'after' sequence of the diff.</param>
        /// <param name="comparer">
        /// The equality comparer to use for comparing elements.
        /// </param>
        /// <returns>
        /// A diff that, when applied, turns <paramref name="oldSequence"/> into
        /// <paramref name="newSequence"/>.
        /// </returns>
        public static Diff<T> Create<T>(
            IEnumerable<T> oldSequence,
            IEnumerable<T> newSequence,
            IEqualityComparer<T> comparer) {
            // Algorithm based on the Python version of simplediff by
            // Paul Butler. Original source code at
            // https://github.com/paulgb/simplediff.
            // Comments are mostly preserved from original algorithm.

            // Create a map from old values to their indices.
            var oldIndexMap = new Dictionary<T, List<int>>(comparer);
            int oldSequenceLength = 0; {
                foreach (var val in oldSequence) {
                    List<int> indexList;
                    if (!oldIndexMap.TryGetValue(val, out indexList)) {
                        indexList = new List<int>();
                        oldIndexMap[val] = indexList;
                    }
                    indexList.Add(oldSequenceLength);
                    oldSequenceLength++;
                }
            }

            // Find the largest substring common to old and new.
            // We use a dynamic programming approach here.
            //
            // We iterate over each value in the `new` list, calling the
            // index `inew`. At each iteration, `overlap[i]` is the
            // length of the largest suffix of `old[:i]` equal to a suffix
            // of `new[:inew]` (or unset when `old[i]` != `new[inew]`).
            //
            // At each stage of iteration, the new `overlap` (called
            // `_overlap` until the original `overlap` is no longer needed)
            // is built from the old one.
            //
            // If the length of overlap exceeds the largest substring
            // seen so far (`sub_length`), we update the largest substring
            // to the overlapping strings.

            var overlap = new Dictionary<int, int>();

            // `sub_start_old` is the index of the beginning of the largest overlapping
            // substring in the old list. `sub_start_new` is the index of the beginning
            // of the same substring in the new list. `sub_length` is the length that
            // overlaps in both.
            // These track the largest overlapping substring seen so far, so naturally
            // we start with a 0-length substring.
            var sub_start_old = 0;
            var sub_start_new = 0;
            var sub_length = 0;

            int inew = 0;
            foreach (var val in newSequence) {
                var _overlap = new Dictionary<int, int>();
                List<int> oldIndexList;
                if (oldIndexMap.TryGetValue(val, out oldIndexList)) {
                    for (int i = 0; i < oldIndexList.Count; i++) {
                        int iold = oldIndexList[i];
                        // Now we are considering all values of iold such that
                        // `old[iold] == new[inew]`.
                        _overlap[iold] = (iold == 0 ? 0 : GetOrDefault( overlap, iold - 1, 0)) + 1;
                        if (_overlap[iold] > sub_length) {
                            // This is the largest substring seen so far, so store its
                            // indices.
                            sub_length = _overlap[iold];
                            sub_start_old = iold - sub_length + 1;
                            sub_start_new = inew - sub_length + 1;
                        }
                    }
                }
                overlap = _overlap;

                inew++;
            }
            int newSequenceLength = inew;

            var diffElems = new List<DiffElement<T>>();
            if (sub_length == 0) {
                // If no common substring is found, we return an insert and delete...
                if (oldSequenceLength > 0) {
                    diffElems.Add(
                        new DiffElement<T>(
                            DiffOperation.Deletion,
                            oldSequence));
                }
                if (newSequenceLength > 0) {
                    diffElems.Add(
                        new DiffElement<T>(
                            DiffOperation.Insertion,
                            newSequence));
                }
            }
            else {
                // ...otherwise, the common substring is unchanged and we recursively
                // diff the text before and after that substring.
                diffElems.AddRange(
                    Create<T>(
                        oldSequence.Take( sub_start_old),
                        newSequence.Take( sub_start_new),
                        comparer)
                    .Elements);

                diffElems.Add(
                    new DiffElement<T>(
                        DiffOperation.Unchanged,
                        newSequence
                            .Skip( sub_start_new)
                            .Take( sub_length)));

                diffElems.AddRange(
                    Create<T>(
                        oldSequence.Skip( sub_start_old + sub_length),
                        newSequence.Skip( sub_start_new + sub_length),
                        comparer)
                    .Elements);
            }
            return new Diff<T>(diffElems);
        }

        private static V GetOrDefault<K, V>(Dictionary<K, V> dictionary, K key, V defaultValue) {
            V result;
            if (dictionary.TryGetValue(key, out result)) {
                return result;
            }
            else {
                return defaultValue;
            }
        }
    }

    /// <summary>
    /// Represents the changes required to turn one sequence
    /// into another.
    /// </summary>
    public struct Diff<T> : IEquatable<Diff<T>> {
        /// <summary>
        /// Creates a diff from a sequence of diff elements.
        /// </summary>
        /// <param name="elements">The diff elements.</param>
        public Diff(IReadOnlyList<DiffElement<T>> elements) {
            this.Elements = elements;
        }

        /// <summary>
        /// Gets the elements in this diff.
        /// </summary>
        /// <returns>The diff elements.</returns>
        public IReadOnlyList<DiffElement<T>> Elements { get; private set; }

        /// <summary>
        /// Recreates the old sequence, that is, the sequence
        /// prior to applying this diff.
        /// </summary>
        /// <returns>The old sequence.</returns>
        public IReadOnlyList<T> OldSequence {
            get {
                var result = new List<T>();
                int elemCount = Elements.Count;
                for (int i = 0; i < elemCount; i++) {
                    var elem = Elements[i];
                    if (elem.Operation != DiffOperation.Insertion) {
                        result.AddRange(elem.Values);
                    }
                }
                return result;
            }
        }

        /// <summary>
        /// Recreates the new sequence, that is, the sequence
        /// produced by applying this diff.
        /// </summary>
        /// <returns>The new sequence.</returns>
        public IReadOnlyList<T> NewSequence {
            get {
                var result = new List<T>();
                int elemCount = Elements.Count;
                for (int i = 0; i < elemCount; i++) {
                    var elem = Elements[i];
                    if (elem.Operation != DiffOperation.Deletion) {
                        result.AddRange(elem.Values);
                    }
                }
                return result;
            }
        }

        /// <inheritdoc/>
        public override bool Equals(object obj) {
            return obj is Diff<T> && Equals((Diff<T>)obj);
        }

        /// <summary>
        /// Tests if this diff is the same as another diff.
        /// </summary>
        /// <param name="other">The other diff.</param>
        /// <returns>
        /// <c>true</c> if this diff equals the other diff; otherwise, <c>false</c>.
        /// </returns>
        public bool Equals(Diff<T> other) {
            return Enumerable.SequenceEqual( Elements, other.Elements);
        }

        /// <inheritdoc/>
        public override int GetHashCode() {
            int elemCount = Elements.Count;
            int hash = 0;
            for (int i = 0; i < elemCount; i++) {
                hash = (hash << 1) ^ Elements[i].GetHashCode();
            }
            return hash;
        }

        /// <inheritdoc/>
        public override string ToString() {
            return "[" + string.Join( ", ", Elements) + "]";
        }
    }

    /// <summary>
    /// An enumeration of possible operations a diff element
    /// can perform.
    /// </summary>
    public enum DiffOperation {
        /// <summary>
        /// The diff element leaves a number of values in place.
        /// </summary>
        Unchanged,

        /// <summary>
        /// The diff inserts a number of new values into the
        /// sequence.
        /// </summary>
        Insertion,

        /// <summary>
        /// The diff deletes a number of old values from the
        /// sequence.
        /// </summary>
        Deletion
    }

    /// <summary>
    /// An element of a diff.
    /// </summary>
    public struct DiffElement<T> : IEquatable<DiffElement<T>> {
        /// <summary>
        /// Creates a diff element.
        /// </summary>
        /// <param name="operation">
        /// The operation to perform.
        /// </param>
        /// <param name="values">
        /// The sequence of values to operate on.
        /// </param>
        public DiffElement(
            DiffOperation operation,
            IEnumerable<T> values)
            : this(operation, values.ToArray()) { }

        /// <summary>
        /// Creates a diff element.
        /// </summary>
        /// <param name="operation">
        /// The operation to perform.
        /// </param>
        /// <param name="values">
        /// The sequence of values to operate on.
        /// </param>
        public DiffElement(
            DiffOperation operation,
            IReadOnlyList<T> values) {
            this.Operation = operation;
            this.Values = values;
        }

        /// <summary>
        /// Gets the operation this diff element performs.
        /// </summary>
        /// <returns>The operation it performs.</returns>
        public DiffOperation Operation { get; private set; }

        /// <summary>
        /// Gets the sequence of values this diff element inserts,
        /// deletes or leaves unchanged.
        /// </summary>
        /// <returns>The sequence of values.</returns>
        public IReadOnlyList<T> Values { get; private set; }

        /// <summary>
        /// Tests if this diff elements equals another diff element.
        /// </summary>
        /// <param name="other">A diff element to test for equality.</param>
        /// <returns>
        /// <c>true</c> if this diff element equals the other diff element;
        /// otherwise, <c>false</c>.
        /// </returns>
        public bool Equals(DiffElement<T> other) {
            return Operation == other.Operation
                && Enumerable.SequenceEqual( Values, other.Values);
        }

        /// <inheritdoc/>
        public override bool Equals(object obj) {
            return obj is DiffElement<T> && Equals((DiffElement<T>)obj);
        }

        /// <inheritdoc/>
        public override int GetHashCode() {
            int valueCount = Values.Count;
            int hash = Operation.GetHashCode();
            for (int i = 0; i < valueCount; i++) {
                hash = (hash << 1) ^ Values[i].GetHashCode();
            }
            return hash;
        }

        /// <inheritdoc/>
        public override string ToString() {
            string prefix = Operation == DiffOperation.Insertion
                ? "+"
                : Operation == DiffOperation.Deletion
                    ? "-"
                    : "=";
            return prefix + " " + string.Concat( Values);
        }
    }
}