citadel/poky/bitbake/lib/simplediff/__init__.py

'''
Simple Diff for Python version 1.0

Annotate two versions of a list with the values that have been
changed between the versions, similar to unix's `diff` but with
a dead-simple Python interface.

(C) Paul Butler 2008-2012 <http://www.paulbutler.org/>
May be used and distributed under the zlib/libpng license
<http://www.opensource.org/licenses/zlib-license.php>
'''

__all__ = ['diff', 'string_diff', 'html_diff']
__version__ = '1.0'


def diff(old, new):
    '''
    Find the differences between two lists. Returns a list of pairs, where the
    first value is in ['+','-','='] and represents an insertion, deletion, or
    no change for that list. The second value of the pair is the list
    of elements.

    Params:
        old     the old list of immutable, comparable values (ie. a list
                of strings)
        new     the new list of immutable, comparable values
   
    Returns:
        A list of pairs, with the first part of the pair being one of three
        strings ('-', '+', '=') and the second part being a list of values from
        the original old and/or new lists. The first part of the pair
        corresponds to whether the list of values is a deletion, insertion, or
        unchanged, respectively.

    Examples:
        >>> diff([1,2,3,4],[1,3,4])
        [('=', [1]), ('-', [2]), ('=', [3, 4])]

        >>> diff([1,2,3,4],[2,3,4,1])
        [('-', [1]), ('=', [2, 3, 4]), ('+', [1])]

        >>> diff('The quick brown fox jumps over the lazy dog'.split(),
        ...      'The slow blue cheese drips over the lazy carrot'.split())
        ... # doctest: +NORMALIZE_WHITESPACE
        [('=', ['The']),
         ('-', ['quick', 'brown', 'fox', 'jumps']),
         ('+', ['slow', 'blue', 'cheese', 'drips']),
         ('=', ['over', 'the', 'lazy']),
         ('-', ['dog']),
         ('+', ['carrot'])]

    '''

    # Create a map from old values to their indices
    old_index_map = dict()
    for i, val in enumerate(old):
        old_index_map.setdefault(val,list()).append(i)

    # Find the largest substring common to old and new.
    # We use a dynamic programming approach here.
    # 
    # We iterate over each value in the `new` list, calling the
    # index `inew`. At each iteration, `overlap[i]` is the
    # length of the largest suffix of `old[:i]` equal to a suffix
    # of `new[:inew]` (or unset when `old[i]` != `new[inew]`).
    #
    # At each stage of iteration, the new `overlap` (called
    # `_overlap` until the original `overlap` is no longer needed)
    # is built from the old one.
    #
    # If the length of overlap exceeds the largest substring
    # seen so far (`sub_length`), we update the largest substring
    # to the overlapping strings.

    overlap = dict()
    # `sub_start_old` is the index of the beginning of the largest overlapping
    # substring in the old list. `sub_start_new` is the index of the beginning
    # of the same substring in the new list. `sub_length` is the length that
    # overlaps in both.
    # These track the largest overlapping substring seen so far, so naturally
    # we start with a 0-length substring.
    sub_start_old = 0
    sub_start_new = 0
    sub_length = 0

    for inew, val in enumerate(new):
        _overlap = dict()
        for iold in old_index_map.get(val,list()):
            # now we are considering all values of iold such that
            # `old[iold] == new[inew]`.
            _overlap[iold] = (iold and overlap.get(iold - 1, 0)) + 1
            if(_overlap[iold] > sub_length):
                # this is the largest substring seen so far, so store its
                # indices
                sub_length = _overlap[iold]
                sub_start_old = iold - sub_length + 1
                sub_start_new = inew - sub_length + 1
        overlap = _overlap

    if sub_length == 0:
        # If no common substring is found, we return an insert and delete...
        return (old and [('-', old)] or []) + (new and [('+', new)] or [])
    else:
        # ...otherwise, the common substring is unchanged and we recursively
        # diff the text before and after that substring
        return diff(old[ : sub_start_old], new[ : sub_start_new]) + \
               [('=', new[sub_start_new : sub_start_new + sub_length])] + \
               diff(old[sub_start_old + sub_length : ],
                       new[sub_start_new + sub_length : ])


def string_diff(old, new):
    '''
    Returns the difference between the old and new strings when split on
    whitespace. Considers punctuation a part of the word

    This function is intended as an example; you'll probably want
    a more sophisticated wrapper in practice.

    Params:
        old     the old string
        new     the new string

    Returns:
        the output of `diff` on the two strings after splitting them
        on whitespace (a list of change instructions; see the docstring
        of `diff`)

    Examples:
        >>> string_diff('The quick brown fox', 'The fast blue fox')
        ... # doctest: +NORMALIZE_WHITESPACE
        [('=', ['The']),
         ('-', ['quick', 'brown']),
         ('+', ['fast', 'blue']),
         ('=', ['fox'])]

    '''
    return diff(old.split(), new.split())


def html_diff(old, new):
    '''
    Returns the difference between two strings (as in stringDiff) in
    HTML format. HTML code in the strings is NOT escaped, so you
    will get weird results if the strings contain HTML.

    This function is intended as an example; you'll probably want
    a more sophisticated wrapper in practice.

    Params:
        old     the old string
        new     the new string

    Returns:
        the output of the diff expressed with HTML <ins> and <del>
        tags.

    Examples:
        >>> html_diff('The quick brown fox', 'The fast blue fox')
        'The <del>quick brown</del> <ins>fast blue</ins> fox'
    '''
    con = {'=': (lambda x: x),
           '+': (lambda x: "<ins>" + x + "</ins>"),
           '-': (lambda x: "<del>" + x + "</del>")}
    return " ".join([(con[a])(" ".join(b)) for a, b in string_diff(old, new)])


def check_diff(old, new):
    '''
    This tests that diffs returned by `diff` are valid. You probably won't
    want to use this function, but it's provided for documentation and
    testing.

    A diff should satisfy the property that the old input is equal to the
    elements of the result annotated with '-' or '=' concatenated together.
    Likewise, the new input is equal to the elements of the result annotated
    with '+' or '=' concatenated together. This function compares `old`,
    `new`, and the results of `diff(old, new)` to ensure this is true.

    Tests:
        >>> check_diff('ABCBA', 'CBABA')
        >>> check_diff('Foobarbaz', 'Foobarbaz')
        >>> check_diff('Foobarbaz', 'Boobazbam')
        >>> check_diff('The quick brown fox', 'Some quick brown car')
        >>> check_diff('A thick red book', 'A quick blue book')
        >>> check_diff('dafhjkdashfkhasfjsdafdasfsda', 'asdfaskjfhksahkfjsdha')
        >>> check_diff('88288822828828288282828', '88288882882828282882828')
        >>> check_diff('1234567890', '24689')
    '''
    old = list(old)
    new = list(new)
    result = diff(old, new)
    _old = [val for (a, vals) in result if (a in '=-') for val in vals]
    assert old == _old, 'Expected %s, got %s' % (old, _old)
    _new = [val for (a, vals) in result if (a in '=+') for val in vals]
    assert new == _new, 'Expected %s, got %s' % (new, _new)
Squashed 'poky/' content from commit 4469acdf1d git-subtree-dir: poky git-subtree-split: 4469acdf1d0338220f3fe2ecb5e079eea6fda375 2017-12-04 16:35:29 -05:00			`'''`
			`Simple Diff for Python version 1.0`

			`Annotate two versions of a list with the values that have been`
			changed between the versions, similar to unix's `diff` but with
			`a dead-simple Python interface.`

			`(C) Paul Butler 2008-2012 <http://www.paulbutler.org/>`
			`May be used and distributed under the zlib/libpng license`
			`<http://www.opensource.org/licenses/zlib-license.php>`
			`'''`

			`__all__ = ['diff', 'string_diff', 'html_diff']`
			`__version__ = '1.0'`


			`def diff(old, new):`
			`'''`
			`Find the differences between two lists. Returns a list of pairs, where the`
			`first value is in ['+','-','='] and represents an insertion, deletion, or`
			`no change for that list. The second value of the pair is the list`
			`of elements.`

			`Params:`
			`old the old list of immutable, comparable values (ie. a list`
			`of strings)`
			`new the new list of immutable, comparable values`

			`Returns:`
			`A list of pairs, with the first part of the pair being one of three`
			`strings ('-', '+', '=') and the second part being a list of values from`
			`the original old and/or new lists. The first part of the pair`
			`corresponds to whether the list of values is a deletion, insertion, or`
			`unchanged, respectively.`

			`Examples:`
			`>>> diff([1,2,3,4],[1,3,4])`
			`[('=', [1]), ('-', [2]), ('=', [3, 4])]`

			`>>> diff([1,2,3,4],[2,3,4,1])`
			`[('-', [1]), ('=', [2, 3, 4]), ('+', [1])]`

			`>>> diff('The quick brown fox jumps over the lazy dog'.split(),`
			`... 'The slow blue cheese drips over the lazy carrot'.split())`
			`... # doctest: +NORMALIZE_WHITESPACE`
			`[('=', ['The']),`
			`('-', ['quick', 'brown', 'fox', 'jumps']),`
			`('+', ['slow', 'blue', 'cheese', 'drips']),`
			`('=', ['over', 'the', 'lazy']),`
			`('-', ['dog']),`
			`('+', ['carrot'])]`

			`'''`

			`# Create a map from old values to their indices`
			`old_index_map = dict()`
			`for i, val in enumerate(old):`
			`old_index_map.setdefault(val,list()).append(i)`

			`# Find the largest substring common to old and new.`
			`# We use a dynamic programming approach here.`
			`#`
			# We iterate over each value in the `new` list, calling the
			# index `inew`. At each iteration, `overlap[i]` is the
			# length of the largest suffix of `old[:i]` equal to a suffix
			# of `new[:inew]` (or unset when `old[i]` != `new[inew]`).
			`#`
			# At each stage of iteration, the new `overlap` (called
			# `_overlap` until the original `overlap` is no longer needed)
			`# is built from the old one.`
			`#`
			`# If the length of overlap exceeds the largest substring`
			# seen so far (`sub_length`), we update the largest substring
			`# to the overlapping strings.`

			`overlap = dict()`
			# `sub_start_old` is the index of the beginning of the largest overlapping
			# substring in the old list. `sub_start_new` is the index of the beginning
			# of the same substring in the new list. `sub_length` is the length that
			`# overlaps in both.`
			`# These track the largest overlapping substring seen so far, so naturally`
			`# we start with a 0-length substring.`
			`sub_start_old = 0`
			`sub_start_new = 0`
			`sub_length = 0`

			`for inew, val in enumerate(new):`
			`_overlap = dict()`
			`for iold in old_index_map.get(val,list()):`
			`# now we are considering all values of iold such that`
			# `old[iold] == new[inew]`.
			`_overlap[iold] = (iold and overlap.get(iold - 1, 0)) + 1`
			`if(_overlap[iold] > sub_length):`
			`# this is the largest substring seen so far, so store its`
			`# indices`
			`sub_length = _overlap[iold]`
			`sub_start_old = iold - sub_length + 1`
			`sub_start_new = inew - sub_length + 1`
			`overlap = _overlap`

			`if sub_length == 0:`
			`# If no common substring is found, we return an insert and delete...`
			`return (old and [('-', old)] or []) + (new and [('+', new)] or [])`
			`else:`
			`# ...otherwise, the common substring is unchanged and we recursively`
			`# diff the text before and after that substring`
			`return diff(old[ : sub_start_old], new[ : sub_start_new]) + \`
			`[('=', new[sub_start_new : sub_start_new + sub_length])] + \`
			`diff(old[sub_start_old + sub_length : ],`
			`new[sub_start_new + sub_length : ])`


			`def string_diff(old, new):`
			`'''`
			`Returns the difference between the old and new strings when split on`
			`whitespace. Considers punctuation a part of the word`

			`This function is intended as an example; you'll probably want`
			`a more sophisticated wrapper in practice.`

			`Params:`
			`old the old string`
			`new the new string`

			`Returns:`
			the output of `diff` on the two strings after splitting them
			`on whitespace (a list of change instructions; see the docstring`
			of `diff`)

			`Examples:`
			`>>> string_diff('The quick brown fox', 'The fast blue fox')`
			`... # doctest: +NORMALIZE_WHITESPACE`
			`[('=', ['The']),`
			`('-', ['quick', 'brown']),`
			`('+', ['fast', 'blue']),`
			`('=', ['fox'])]`

			`'''`
			`return diff(old.split(), new.split())`


			`def html_diff(old, new):`
			`'''`
			`Returns the difference between two strings (as in stringDiff) in`
			`HTML format. HTML code in the strings is NOT escaped, so you`
			`will get weird results if the strings contain HTML.`

			`This function is intended as an example; you'll probably want`
			`a more sophisticated wrapper in practice.`

			`Params:`
			`old the old string`
			`new the new string`

			`Returns:`
			`the output of the diff expressed with HTML <ins> and <del>`
			`tags.`

			`Examples:`
			`>>> html_diff('The quick brown fox', 'The fast blue fox')`
			`'The <del>quick brown</del> <ins>fast blue</ins> fox'`
			`'''`
			`con = {'=': (lambda x: x),`
			`'+': (lambda x: "<ins>" + x + "</ins>"),`
			`'-': (lambda x: "<del>" + x + "</del>")}`
			`return " ".join([(con[a])(" ".join(b)) for a, b in string_diff(old, new)])`


			`def check_diff(old, new):`
			`'''`
			This tests that diffs returned by `diff` are valid. You probably won't
			`want to use this function, but it's provided for documentation and`
			`testing.`

			`A diff should satisfy the property that the old input is equal to the`
			`elements of the result annotated with '-' or '=' concatenated together.`
			`Likewise, the new input is equal to the elements of the result annotated`
			with '+' or '=' concatenated together. This function compares `old`,
			`new`, and the results of `diff(old, new)` to ensure this is true.

			`Tests:`
			`>>> check_diff('ABCBA', 'CBABA')`
			`>>> check_diff('Foobarbaz', 'Foobarbaz')`
			`>>> check_diff('Foobarbaz', 'Boobazbam')`
			`>>> check_diff('The quick brown fox', 'Some quick brown car')`
			`>>> check_diff('A thick red book', 'A quick blue book')`
			`>>> check_diff('dafhjkdashfkhasfjsdafdasfsda', 'asdfaskjfhksahkfjsdha')`
			`>>> check_diff('88288822828828288282828', '88288882882828282882828')`
			`>>> check_diff('1234567890', '24689')`
			`'''`
			`old = list(old)`
			`new = list(new)`
			`result = diff(old, new)`
			`_old = [val for (a, vals) in result if (a in '=-') for val in vals]`
			`assert old == _old, 'Expected %s, got %s' % (old, _old)`
			`_new = [val for (a, vals) in result if (a in '=+') for val in vals]`
			`assert new == _new, 'Expected %s, got %s' % (new, _new)`