404

[ Avaa Bypassed ]




Upload:

Command:

elspacio@13.59.69.58: ~ $
"""
lxml-based doctest output comparison.

Note: normally, you should just import the `lxml.usedoctest` and
`lxml.html.usedoctest` modules from within a doctest, instead of this
one::

    >>> import lxml.usedoctest # for XML output

    >>> import lxml.html.usedoctest # for HTML output

To use this module directly, you must call ``lxmldoctest.install()``,
which will cause doctest to use this in all subsequent calls.

This changes the way output is checked and comparisons are made for
XML or HTML-like content.

XML or HTML content is noticed because the example starts with ``<``
(it's HTML if it starts with ``<html``).  You can also use the
``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.

Some rough wildcard-like things are allowed.  Whitespace is generally
ignored (except in attributes).  In text (attributes and text in the
body) you can use ``...`` as a wildcard.  In an example it also
matches any trailing tags in the element, though it does not match
leading tags.  You may create a tag ``<any>`` or include an ``any``
attribute in the tag.  An ``any`` tag matches any tag, while the
attribute matches any and all attributes.

When a match fails, the reformatted example and gotten text is
displayed (indented), and a rough diff-like output is given.  Anything
marked with ``+`` is in the output but wasn't supposed to be, and
similarly ``-`` means its in the example but wasn't in the output.

You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
"""

from lxml import etree
import sys
import re
import doctest
try:
    from html import escape as html_escape
except ImportError:
    from cgi import escape as html_escape

__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
           'LHTMLOutputChecker', 'install', 'temp_install']

try:
    _basestring = basestring
except NameError:
    _basestring = (str, bytes)

_IS_PYTHON_3 = sys.version_info[0] >= 3

PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
PARSE_XML = doctest.register_optionflag('PARSE_XML')
NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')

OutputChecker = doctest.OutputChecker

def strip(v):
    if v is None:
        return None
    else:
        return v.strip()

def norm_whitespace(v):
    return _norm_whitespace_re.sub(' ', v)

_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)

def html_fromstring(html):
    return etree.fromstring(html, _html_parser)

# We use this to distinguish repr()s from elements:
_repr_re = re.compile(r'^<[^>]+ (at|object) ')
_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')

class LXMLOutputChecker(OutputChecker):

    empty_tags = (
        'param', 'img', 'area', 'br', 'basefont', 'input',
        'base', 'meta', 'link', 'col')

    def get_default_parser(self):
        return etree.XML

    def check_output(self, want, got, optionflags):
        alt_self = getattr(self, '_temp_override_self', None)
        if alt_self is not None:
            super_method = self._temp_call_super_check_output
            self = alt_self
        else:
            super_method = OutputChecker.check_output
        parser = self.get_parser(want, got, optionflags)
        if not parser:
            return super_method(
                self, want, got, optionflags)
        try:
            want_doc = parser(want)
        except etree.XMLSyntaxError:
            return False
        try:
            got_doc = parser(got)
        except etree.XMLSyntaxError:
            return False
        return self.compare_docs(want_doc, got_doc)

    def get_parser(self, want, got, optionflags):
        parser = None
        if NOPARSE_MARKUP & optionflags:
            return None
        if PARSE_HTML & optionflags:
            parser = html_fromstring
        elif PARSE_XML & optionflags:
            parser = etree.XML
        elif (want.strip().lower().startswith('<html')
              and got.strip().startswith('<html')):
            parser = html_fromstring
        elif (self._looks_like_markup(want)
              and self._looks_like_markup(got)):
            parser = self.get_default_parser()
        return parser

    def _looks_like_markup(self, s):
        s = s.strip()
        return (s.startswith('<')
                and not _repr_re.search(s))

    def compare_docs(self, want, got):
        if not self.tag_compare(want.tag, got.tag):
            return False
        if not self.text_compare(want.text, got.text, True):
            return False
        if not self.text_compare(want.tail, got.tail, True):
            return False
        if 'any' not in want.attrib:
            want_keys = sorted(want.attrib.keys())
            got_keys = sorted(got.attrib.keys())
            if want_keys != got_keys:
                return False
            for key in want_keys:
                if not self.text_compare(want.attrib[key], got.attrib[key], False):
                    return False
        if want.text != '...' or len(want):
            want_children = list(want)
            got_children = list(got)
            while want_children or got_children:
                if not want_children or not got_children:
                    return False
                want_first = want_children.pop(0)
                got_first = got_children.pop(0)
                if not self.compare_docs(want_first, got_first):
                    return False
                if not got_children and want_first.tail == '...':
                    break
        return True

    def text_compare(self, want, got, strip):
        want = want or ''
        got = got or ''
        if strip:
            want = norm_whitespace(want).strip()
            got = norm_whitespace(got).strip()
        want = '^%s$' % re.escape(want)
        want = want.replace(r'\.\.\.', '.*')
        if re.search(want, got):
            return True
        else:
            return False

    def tag_compare(self, want, got):
        if want == 'any':
            return True
        if (not isinstance(want, _basestring)
            or not isinstance(got, _basestring)):
            return want == got
        want = want or ''
        got = got or ''
        if want.startswith('{...}'):
            # Ellipsis on the namespace
            return want.split('}')[-1] == got.split('}')[-1]
        else:
            return want == got

    def output_difference(self, example, got, optionflags):
        want = example.want
        parser = self.get_parser(want, got, optionflags)
        errors = []
        if parser is not None:
            try:
                want_doc = parser(want)
            except etree.XMLSyntaxError:
                e = sys.exc_info()[1]
                errors.append('In example: %s' % e)
            try:
                got_doc = parser(got)
            except etree.XMLSyntaxError:
                e = sys.exc_info()[1]
                errors.append('In actual output: %s' % e)
        if parser is None or errors:
            value = OutputChecker.output_difference(
                self, example, got, optionflags)
            if errors:
                errors.append(value)
                return '\n'.join(errors)
            else:
                return value
        html = parser is html_fromstring
        diff_parts = ['Expected:',
                      self.format_doc(want_doc, html, 2),
                      'Got:',
                      self.format_doc(got_doc, html, 2),
                      'Diff:',
                      self.collect_diff(want_doc, got_doc, html, 2)]
        return '\n'.join(diff_parts)

    def html_empty_tag(self, el, html=True):
        if not html:
            return False
        if el.tag not in self.empty_tags:
            return False
        if el.text or len(el):
            # This shouldn't happen (contents in an empty tag)
            return False
        return True

    def format_doc(self, doc, html, indent, prefix=''):
        parts = []
        if not len(doc):
            # No children...
            parts.append(' '*indent)
            parts.append(prefix)
            parts.append(self.format_tag(doc))
            if not self.html_empty_tag(doc, html):
                if strip(doc.text):
                    parts.append(self.format_text(doc.text))
                parts.append(self.format_end_tag(doc))
            if strip(doc.tail):
                parts.append(self.format_text(doc.tail))
            parts.append('\n')
            return ''.join(parts)
        parts.append(' '*indent)
        parts.append(prefix)
        parts.append(self.format_tag(doc))
        if not self.html_empty_tag(doc, html):
            parts.append('\n')
            if strip(doc.text):
                parts.append(' '*indent)
                parts.append(self.format_text(doc.text))
                parts.append('\n')
            for el in doc:
                parts.append(self.format_doc(el, html, indent+2))
            parts.append(' '*indent)
            parts.append(self.format_end_tag(doc))
            parts.append('\n')
        if strip(doc.tail):
            parts.append(' '*indent)
            parts.append(self.format_text(doc.tail))
            parts.append('\n')
        return ''.join(parts)

    def format_text(self, text, strip=True):
        if text is None:
            return ''
        if strip:
            text = text.strip()
        return html_escape(text, 1)

    def format_tag(self, el):
        attrs = []
        if isinstance(el, etree.CommentBase):
            # FIXME: probably PIs should be handled specially too?
            return '<!--'
        for name, value in sorted(el.attrib.items()):
            attrs.append('%s="%s"' % (name, self.format_text(value, False)))
        if not attrs:
            return '<%s>' % el.tag
        return '<%s %s>' % (el.tag, ' '.join(attrs))
    
    def format_end_tag(self, el):
        if isinstance(el, etree.CommentBase):
            # FIXME: probably PIs should be handled specially too?
            return '-->'
        return '</%s>' % el.tag

    def collect_diff(self, want, got, html, indent):
        parts = []
        if not len(want) and not len(got):
            parts.append(' '*indent)
            parts.append(self.collect_diff_tag(want, got))
            if not self.html_empty_tag(got, html):
                parts.append(self.collect_diff_text(want.text, got.text))
                parts.append(self.collect_diff_end_tag(want, got))
            parts.append(self.collect_diff_text(want.tail, got.tail))
            parts.append('\n')
            return ''.join(parts)
        parts.append(' '*indent)
        parts.append(self.collect_diff_tag(want, got))
        parts.append('\n')
        if strip(want.text) or strip(got.text):
            parts.append(' '*indent)
            parts.append(self.collect_diff_text(want.text, got.text))
            parts.append('\n')
        want_children = list(want)
        got_children = list(got)
        while want_children or got_children:
            if not want_children:
                parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+'))
                continue
            if not got_children:
                parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-'))
                continue
            parts.append(self.collect_diff(
                want_children.pop(0), got_children.pop(0), html, indent+2))
        parts.append(' '*indent)
        parts.append(self.collect_diff_end_tag(want, got))
        parts.append('\n')
        if strip(want.tail) or strip(got.tail):
            parts.append(' '*indent)
            parts.append(self.collect_diff_text(want.tail, got.tail))
            parts.append('\n')
        return ''.join(parts)

    def collect_diff_tag(self, want, got):
        if not self.tag_compare(want.tag, got.tag):
            tag = '%s (got: %s)' % (want.tag, got.tag)
        else:
            tag = got.tag
        attrs = []
        any = want.tag == 'any' or 'any' in want.attrib
        for name, value in sorted(got.attrib.items()):
            if name not in want.attrib and not any:
                attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
            else:
                if name in want.attrib:
                    text = self.collect_diff_text(want.attrib[name], value, False)
                else:
                    text = self.format_text(value, False)
                attrs.append('%s="%s"' % (name, text))
        if not any:
            for name, value in sorted(want.attrib.items()):
                if name in got.attrib:
                    continue
                attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
        if attrs:
            tag = '<%s %s>' % (tag, ' '.join(attrs))
        else:
            tag = '<%s>' % tag
        return tag

    def collect_diff_end_tag(self, want, got):
        if want.tag != got.tag:
            tag = '%s (got: %s)' % (want.tag, got.tag)
        else:
            tag = got.tag
        return '</%s>' % tag

    def collect_diff_text(self, want, got, strip=True):
        if self.text_compare(want, got, strip):
            if not got:
                return ''
            return self.format_text(got, strip)
        text = '%s (got: %s)' % (want, got)
        return self.format_text(text, strip)

class LHTMLOutputChecker(LXMLOutputChecker):
    def get_default_parser(self):
        return html_fromstring
    
def install(html=False):
    """
    Install doctestcompare for all future doctests.

    If html is true, then by default the HTML parser will be used;
    otherwise the XML parser is used.
    """
    if html:
        doctest.OutputChecker = LHTMLOutputChecker
    else:
        doctest.OutputChecker = LXMLOutputChecker

def temp_install(html=False, del_module=None):
    """
    Use this *inside* a doctest to enable this checker for this
    doctest only.

    If html is true, then by default the HTML parser will be used;
    otherwise the XML parser is used.
    """
    if html:
        Checker = LHTMLOutputChecker
    else:
        Checker = LXMLOutputChecker
    frame = _find_doctest_frame()
    dt_self = frame.f_locals['self']
    checker = Checker()
    old_checker = dt_self._checker
    dt_self._checker = checker
    # The unfortunate thing is that there is a local variable 'check'
    # in the function that runs the doctests, that is a bound method
    # into the output checker.  We have to update that.  We can't
    # modify the frame, so we have to modify the object in place.  The
    # only way to do this is to actually change the func_code
    # attribute of the method.  We change it, and then wait for
    # __record_outcome to be run, which signals the end of the __run
    # method, at which point we restore the previous check_output
    # implementation.
    if _IS_PYTHON_3:
        check_func = frame.f_locals['check'].__func__
        checker_check_func = checker.check_output.__func__
    else:
        check_func = frame.f_locals['check'].im_func
        checker_check_func = checker.check_output.im_func
    # Because we can't patch up func_globals, this is the only global
    # in check_output that we care about:
    doctest.etree = etree
    _RestoreChecker(dt_self, old_checker, checker,
                    check_func, checker_check_func,
                    del_module)

class _RestoreChecker(object):
    def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
                 del_module):
        self.dt_self = dt_self
        self.checker = old_checker
        self.checker._temp_call_super_check_output = self.call_super
        self.checker._temp_override_self = new_checker
        self.check_func = check_func
        self.clone_func = clone_func
        self.del_module = del_module
        self.install_clone()
        self.install_dt_self()
    def install_clone(self):
        if _IS_PYTHON_3:
            self.func_code = self.check_func.__code__
            self.func_globals = self.check_func.__globals__
            self.check_func.__code__ = self.clone_func.__code__
        else:
            self.func_code = self.check_func.func_code
            self.func_globals = self.check_func.func_globals
            self.check_func.func_code = self.clone_func.func_code
    def uninstall_clone(self):
        if _IS_PYTHON_3:
            self.check_func.__code__ = self.func_code
        else:
            self.check_func.func_code = self.func_code
    def install_dt_self(self):
        self.prev_func = self.dt_self._DocTestRunner__record_outcome
        self.dt_self._DocTestRunner__record_outcome = self
    def uninstall_dt_self(self):
        self.dt_self._DocTestRunner__record_outcome = self.prev_func
    def uninstall_module(self):
        if self.del_module:
            import sys
            del sys.modules[self.del_module]
            if '.' in self.del_module:
                package, module = self.del_module.rsplit('.', 1)
                package_mod = sys.modules[package]
                delattr(package_mod, module)
    def __call__(self, *args, **kw):
        self.uninstall_clone()
        self.uninstall_dt_self()
        del self.checker._temp_override_self
        del self.checker._temp_call_super_check_output
        result = self.prev_func(*args, **kw)
        self.uninstall_module()
        return result
    def call_super(self, *args, **kw):
        self.uninstall_clone()
        try:
            return self.check_func(*args, **kw)
        finally:
            self.install_clone()
            
def _find_doctest_frame():
    import sys
    frame = sys._getframe(1)
    while frame:
        l = frame.f_locals
        if 'BOOM' in l:
            # Sign of doctest
            return frame
        frame = frame.f_back
    raise LookupError(
        "Could not find doctest (only use this function *inside* a doctest)")
    
__test__ = {
    'basic': '''
    >>> temp_install()
    >>> print """<xml a="1" b="2">stuff</xml>"""
    <xml b="2" a="1">...</xml>
    >>> print """<xml xmlns="http://example.com"><tag   attr="bar"   /></xml>"""
    <xml xmlns="...">
      <tag attr="..." />
    </xml>
    >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
    <xml>...foo /></xml>
    '''}

if __name__ == '__main__':
    import doctest
    doctest.testmod()
    
    

Filemanager

Name Type Size Permission Actions
__pycache__ Folder 0755
html Folder 0755
includes Folder 0755
isoschematron Folder 0755
ElementInclude.py File 8.36 KB 0644
__init__.py File 575 B 0644
_elementpath.cpython-311-x86_64-linux-gnu.so File 225.27 KB 0755
_elementpath.py File 10.49 KB 0644
apihelpers.pxi File 62.94 KB 0644
builder.cpython-311-x86_64-linux-gnu.so File 120.73 KB 0755
builder.py File 7.96 KB 0644
classlookup.pxi File 21.94 KB 0644
cleanup.pxi File 8.26 KB 0644
cssselect.py File 3.29 KB 0644
debug.pxi File 3.21 KB 0644
docloader.pxi File 5.65 KB 0644
doctestcompare.py File 17.91 KB 0644
dtd.pxi File 14.86 KB 0644
etree.cpython-311-x86_64-linux-gnu.so File 5.53 MB 0755
etree.h File 8.37 KB 0644
etree.pyx File 129.31 KB 0644
etree_api.h File 17.06 KB 0644
extensions.pxi File 32.46 KB 0644
iterparse.pxi File 16.22 KB 0644
lxml.etree.h File 8.37 KB 0644
lxml.etree_api.h File 17.06 KB 0644
nsclasses.pxi File 8.93 KB 0644
objectify.cpython-311-x86_64-linux-gnu.so File 3.2 MB 0755
objectify.pyx File 75.29 KB 0644
objectpath.pxi File 11.21 KB 0644
parser.pxi File 76.41 KB 0644
parsertarget.pxi File 6.7 KB 0644
proxy.pxi File 23.01 KB 0644
public-api.pxi File 6.5 KB 0644
pyclasslookup.py File 92 B 0644
readonlytree.pxi File 18.6 KB 0644
relaxng.pxi File 5.94 KB 0644
sax.cpython-311-x86_64-linux-gnu.so File 196.95 KB 0755
sax.py File 9.18 KB 0644
saxparser.pxi File 31.78 KB 0644
schematron.pxi File 5.65 KB 0644
serializer.pxi File 66.41 KB 0644
usedoctest.py File 230 B 0644
xinclude.pxi File 2.4 KB 0644
xmlerror.pxi File 48.37 KB 0644
xmlid.pxi File 5.92 KB 0644
xmlschema.pxi File 7.89 KB 0644
xpath.pxi File 19.11 KB 0644
xslt.pxi File 35.83 KB 0644
xsltext.pxi File 10.83 KB 0644