Source code for superdiff.differ

import difflib
import itertools
from typing import Iterable, Tuple

from .parser import Parser


[docs]class Differ: r''' This class can be used to flexibly compare two pieces of text and return diff information in a variety of formats. Definitions of common terms used: - non-newline whitespace: Tabs and spaces - newline: Any of the following line endings: ``\n``, ``\r``, or ``\r\n`` - whitespace: A combination of newlines and non-newline whitespace - empty line: A line consisting of only whitespace '''
[docs] def __init__(self, ignore_case: bool=False, ignore_non_newline_whitespace: bool=False, ignore_non_newline_whitespace_changes: bool=False, ignore_newline_changes: bool=False, ignore_blank_lines: bool=False, ignore_leading_whitespace: bool=False, ignore_trailing_whitespace: bool=False) -> None: r''' :param ignore_case: Ignore case differences between the two texts. :param ignore_non_newline_whitespace: Completely ignore differences in non-newline whitespace. :param ignore_non_newline_whitespace_changes: Treat consecutive sequences of non-newline whitespace as equal. For example, when this option is True, a single space, two spaces, a tab character, and a mix of tabs and spaces will be considered equal. :param ignore_newline_changes: Treat consecutive sequences of newline characters as equal. For example, when this option is True, ``\r``, ``\r\r\n``, and ``\n\n`` will all be considered equal. :param ignore_blank_lines: Ignore lines consisting of only whitespace. :param ignore_leading_whitespace: Ignore whitespace characters at the beginning of lines. Note that this will cause empty lines to be treated as the empty string. :param ignore_trailing_whitespace: Ignore whitespace characters at the end of lines. Note that this will cause empty lines to be treated as the empty string. ''' self._parser = Parser( ignore_case=ignore_case, ignore_non_newline_whitespace=ignore_non_newline_whitespace, ignore_non_newline_whitespace_changes=ignore_non_newline_whitespace_changes, ignore_newline_changes=ignore_newline_changes, ignore_blank_lines=ignore_blank_lines, ignore_leading_whitespace=ignore_leading_whitespace, ignore_trailing_whitespace=ignore_trailing_whitespace )
[docs] def compare(self, first: str, second: str) -> Iterable[Tuple[str, str, str]]: ''' Performs a line-by-line comparision of the strings first and second and returns a sequence of ``(tag, left, right)`` tuples specifying the differences between the strings. ``tag`` can be any of the values of "tag" used in https://docs.python.org/3.5/library/difflib.html#difflib.SequenceMatcher.get_opcodes and have the same meanings. If the two strings are equal, returns an empty iterable. ''' result = tuple() # type: Iterable[Tuple[str, str, str]] parsed_first = self._parser.parse(first) parsed_second = self._parser.parse(second) matcher = difflib.SequenceMatcher(a=parsed_first, b=parsed_second) sequences_equal = True for tag, first_start, first_end, second_start, second_end in matcher.get_opcodes(): if tag != 'equal': sequences_equal = False pairs = itertools.zip_longest( (line.original_text for line in parsed_first[first_start:first_end]), (line.original_text for line in parsed_second[second_start:second_end]), fillvalue='') tuples = [(tag,) + pair for pair in pairs] result = itertools.chain(result, tuples) if sequences_equal: return tuple() return result