diff --git a/SickBeard.py b/SickBeard.py index 3ef924a4648d183631de3cf1f51c6ed329b889dc..1f4bf18be5e2358d56041b17e4d0396098380cb7 100755 --- a/SickBeard.py +++ b/SickBeard.py @@ -456,7 +456,7 @@ class SickRage(object): # shutdown web server if self.web_server: logger.log('Shutting down Tornado') - self.web_server.shutDown() + self.web_server.shutdown() try: self.web_server.join(10) @@ -497,8 +497,8 @@ class SickRage(object): def force_update(): """ Forces SickRage to update to the latest version and exit. - - :return: True if successful, False otherwise + + :return: True if successful, False otherwise """ def update_with_git(): diff --git a/lib/MultipartPostHandler.py b/lib/MultipartPostHandler.py deleted file mode 100644 index 284500202dd02664511919e3e922c3de56d0cc14..0000000000000000000000000000000000000000 --- a/lib/MultipartPostHandler.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/python - -#### -# 06/2010 Nic Wolfe <nic@wolfeden.ca> -# 02/2006 Will Holcomb <wholcomb@gmail.com> -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# - -import urllib -import urllib2 -import mimetools, mimetypes -import os, sys - -# Controls how sequences are uncoded. If true, elements may be given multiple values by -# assigning a sequence. -doseq = 1 - -class MultipartPostHandler(urllib2.BaseHandler): - handler_order = urllib2.HTTPHandler.handler_order - 10 # needs to run first - - def http_request(self, request): - data = request.get_data() - if data is not None and type(data) != str: - v_files = [] - v_vars = [] - try: - for(key, value) in data.items(): - if type(value) in (file, list, tuple): - v_files.append((key, value)) - else: - v_vars.append((key, value)) - except TypeError: - systype, value, traceback = sys.exc_info() - raise TypeError, "not a valid non-string sequence or mapping object", traceback - - if len(v_files) == 0: - data = urllib.urlencode(v_vars, doseq) - else: - boundary, data = MultipartPostHandler.multipart_encode(v_vars, v_files) - contenttype = 'multipart/form-data; boundary=%s' % boundary - if(request.has_header('Content-Type') - and request.get_header('Content-Type').find('multipart/form-data') != 0): - print "Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data') - request.add_unredirected_header('Content-Type', contenttype) - - request.add_data(data) - return request - - @staticmethod - def multipart_encode(vars, files, boundary = None, buffer = None): - if boundary is None: - boundary = mimetools.choose_boundary() - if buffer is None: - buffer = '' - for(key, value) in vars: - buffer += '--%s\r\n' % boundary - buffer += 'Content-Disposition: form-data; name="%s"' % key - buffer += '\r\n\r\n' + value + '\r\n' - for(key, fd) in files: - - # allow them to pass in a file or a tuple with name & data - if type(fd) == file: - name_in = fd.name - fd.seek(0) - data_in = fd.read() - elif type(fd) in (tuple, list): - name_in, data_in = fd - - filename = os.path.basename(name_in) - contenttype = mimetypes.guess_type(filename)[0] or 'application/octet-stream' - buffer += '--%s\r\n' % boundary - buffer += 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (key, filename) - buffer += 'Content-Type: %s\r\n' % contenttype - # buffer += 'Content-Length: %s\r\n' % file_size - buffer += '\r\n' + data_in + '\r\n' - buffer += '--%s--\r\n\r\n' % boundary - return boundary, buffer - - https_request = http_request - diff --git a/lib/README.md b/lib/README.md new file mode 100644 index 0000000000000000000000000000000000000000..abcf2bd20206a01791779d910512419a073c3c01 --- /dev/null +++ b/lib/README.md @@ -0,0 +1,32 @@ +Libraries directory +====================== + +Vendored python packages and custom libraries go in this folder. + +Keep this list updated with installed versions and their dependencies,<br/> +and ordered by the top-level library name. + +Adding a new package +--------- +The best practice is to install the package within a Python **virtual environment** (using `virtualenv`),<br/> +then use `pipdeptree -p PACKAGE` to get a list of the package (+dependencies) versions.<br/> +Add the output to the list below to the appropriate location (based on the top-level package name) + +*** + +Packages List +========= +``` +beautifulsoup4==4.5.3 +html5lib==0.999999999 + - six [required: Any, installed: 1.10.0] + - webencodings [required: Any, installed: 0.5.1] +Mako==1.0.6 + - MarkupSafe [required: >=0.9.2, installed: 1.0] +tornado==4.5.1 + - backports-abc [required: >=0.4, installed: 0.5] + - backports.ssl-match-hostname [required: Any, installed: 3.5.0.1] + - certifi [required: Any, installed: 2017.4.17] + - singledispatch [required: Any, installed: 3.4.0.3] + - six [required: Any, installed: 1.10.0] +``` diff --git a/lib/backports/ssl_match_hostname/__init__.py b/lib/backports/ssl_match_hostname/__init__.py index 34f248f336d55d00b2c3f101c75879cc5cc95c46..06538ec6899adc761df2952cf9f2a01c3fe73aca 100644 --- a/lib/backports/ssl_match_hostname/__init__.py +++ b/lib/backports/ssl_match_hostname/__init__.py @@ -1,8 +1,20 @@ """The match_hostname() function from Python 3.3.3, essential when using SSL.""" import re +import sys + +# ipaddress has been backported to 2.6+ in pypi. If it is installed on the +# system, use it to handle IPAddress ServerAltnames (this was added in +# python-3.5) otherwise only do DNS matching. This allows +# backports.ssl_match_hostname to continue to be used all the way back to +# python-2.4. +try: + import ipaddress +except ImportError: + ipaddress = None + +__version__ = '3.5.0.1' -__version__ = '3.4.0.2' class CertificateError(ValueError): pass @@ -61,6 +73,23 @@ def _dnsname_match(dn, hostname, max_wildcards=1): return pat.match(hostname) +def _to_unicode(obj): + if isinstance(obj, str) and sys.version_info < (3,): + obj = unicode(obj, encoding='ascii', errors='strict') + return obj + +def _ipaddress_match(ipname, host_ip): + """Exact matching of IP addresses. + + RFC 6125 explicitly doesn't define an algorithm for this + (section 1.7.2 - "Out of Scope"). + """ + # OpenSSL may add a trailing newline to a subjectAltName's IP address + # Divergence from upstream: ipaddress can't handle byte str + ip = ipaddress.ip_address(_to_unicode(ipname).rstrip()) + return ip == host_ip + + def match_hostname(cert, hostname): """Verify that *cert* (in decoded format as returned by SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 @@ -70,12 +99,35 @@ def match_hostname(cert, hostname): returns nothing. """ if not cert: - raise ValueError("empty or no certificate") + raise ValueError("empty or no certificate, match_hostname needs a " + "SSL socket or SSL context with either " + "CERT_OPTIONAL or CERT_REQUIRED") + try: + # Divergence from upstream: ipaddress can't handle byte str + host_ip = ipaddress.ip_address(_to_unicode(hostname)) + except ValueError: + # Not an IP address (common case) + host_ip = None + except UnicodeError: + # Divergence from upstream: Have to deal with ipaddress not taking + # byte strings. addresses should be all ascii, so we consider it not + # an ipaddress in this case + host_ip = None + except AttributeError: + # Divergence from upstream: Make ipaddress library optional + if ipaddress is None: + host_ip = None + else: + raise dnsnames = [] san = cert.get('subjectAltName', ()) for key, value in san: if key == 'DNS': - if _dnsname_match(value, hostname): + if host_ip is None and _dnsname_match(value, hostname): + return + dnsnames.append(value) + elif key == 'IP Address': + if host_ip is not None and _ipaddress_match(value, host_ip): return dnsnames.append(value) if not dnsnames: diff --git a/lib/backports_abc.py b/lib/backports_abc.py index c48b7b0d54e63557956a395f76d28d27943fc718..da4cb3298328bdc24783d7b9887b5e0c149bfe3d 100644 --- a/lib/backports_abc.py +++ b/lib/backports_abc.py @@ -21,6 +21,20 @@ except ImportError: import collections as _collections_abc +def get_mro(cls): + try: + return cls.__mro__ + except AttributeError: + return old_style_mro(cls) + + +def old_style_mro(cls): + yield cls + for base in cls.__bases__: + for c in old_style_mro(base): + yield c + + def mk_gen(): from abc import abstractmethod @@ -63,7 +77,7 @@ def mk_gen(): @classmethod def __subclasshook__(cls, C): if cls is Generator: - mro = C.__mro__ + mro = get_mro(C) for method in required_methods: for base in mro: if method in base.__dict__: @@ -88,7 +102,7 @@ def mk_awaitable(): @classmethod def __subclasshook__(cls, C): if cls is Awaitable: - for B in C.__mro__: + for B in get_mro(C): if '__await__' in B.__dict__: if B.__dict__['__await__']: return True @@ -144,7 +158,7 @@ def mk_coroutine(): @classmethod def __subclasshook__(cls, C): if cls is Coroutine: - mro = C.__mro__ + mro = get_mro(C) for method in ('__await__', 'send', 'throw', 'close'): for base in mro: if method in base.__dict__: diff --git a/lib/bs4/__init__.py b/lib/bs4/__init__.py index f3dd75573ad383b891da4104792c369295a17709..46caac049263938325fcad3a11e77d340925e9d7 100644 --- a/lib/bs4/__init__.py +++ b/lib/bs4/__init__.py @@ -5,26 +5,31 @@ http://www.crummy.com/software/BeautifulSoup/ Beautiful Soup uses a pluggable XML or HTML parser to parse a (possibly invalid) document into a tree representation. Beautiful Soup -provides provides methods and Pythonic idioms that make it easy to -navigate, search, and modify the parse tree. +provides methods and Pythonic idioms that make it easy to navigate, +search, and modify the parse tree. -Beautiful Soup works with Python 2.6 and up. It works better if lxml +Beautiful Soup works with Python 2.7 and up. It works better if lxml and/or html5lib is installed. For more than you ever wanted to know about Beautiful Soup, see the documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/ + """ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + __author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "4.4.1" -__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson" +__version__ = "4.5.3" +__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson" __license__ = "MIT" __all__ = ['BeautifulSoup'] import os import re +import traceback import warnings from .builder import builder_registry, ParserRejectedMarkup @@ -77,7 +82,7 @@ class BeautifulSoup(Tag): ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' - NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n" + NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n" def __init__(self, markup="", features=None, builder=None, parse_only=None, from_encoding=None, exclude_encodings=None, @@ -137,6 +142,10 @@ class BeautifulSoup(Tag): from_encoding = from_encoding or deprecated_argument( "fromEncoding", "from_encoding") + if from_encoding and isinstance(markup, unicode): + warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.") + from_encoding = None + if len(kwargs) > 0: arg = kwargs.keys().pop() raise TypeError( @@ -161,19 +170,29 @@ class BeautifulSoup(Tag): markup_type = "XML" else: markup_type = "HTML" + + caller = traceback.extract_stack()[0] + filename = caller[0] + line_number = caller[1] warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict( + filename=filename, + line_number=line_number, parser=builder.NAME, markup_type=markup_type)) self.builder = builder self.is_xml = builder.is_xml + self.known_xml = self.is_xml self.builder.soup = self self.parse_only = parse_only if hasattr(markup, 'read'): # It's a file-type object. markup = markup.read() - elif len(markup) <= 256: + elif len(markup) <= 256 and ( + (isinstance(markup, bytes) and not b'<' in markup) + or (isinstance(markup, unicode) and not u'<' in markup) + ): # Print out warnings for a couple beginner problems # involving passing non-markup to Beautiful Soup. # Beautiful Soup will still parse the input as markup, @@ -195,16 +214,10 @@ class BeautifulSoup(Tag): if isinstance(markup, unicode): markup = markup.encode("utf8") warnings.warn( - '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) - if markup[:5] == "http:" or markup[:6] == "https:": - # TODO: This is ugly but I couldn't get it to work in - # Python 3 otherwise. - if ((isinstance(markup, bytes) and not b' ' in markup) - or (isinstance(markup, unicode) and not u' ' in markup)): - if isinstance(markup, unicode): - markup = markup.encode("utf8") - warnings.warn( - '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup) + '"%s" looks like a filename, not markup. You should' + 'probably open this file and pass the filehandle into' + 'Beautiful Soup.' % markup) + self._check_markup_is_url(markup) for (self.markup, self.original_encoding, self.declared_html_encoding, self.contains_replacement_characters) in ( @@ -223,15 +236,52 @@ class BeautifulSoup(Tag): self.builder.soup = None def __copy__(self): - return type(self)(self.encode(), builder=self.builder) + copy = type(self)( + self.encode('utf-8'), builder=self.builder, from_encoding='utf-8' + ) + + # Although we encoded the tree to UTF-8, that may not have + # been the encoding of the original markup. Set the copy's + # .original_encoding to reflect the original object's + # .original_encoding. + copy.original_encoding = self.original_encoding + return copy def __getstate__(self): # Frequently a tree builder can't be pickled. d = dict(self.__dict__) if 'builder' in d and not self.builder.picklable: - del d['builder'] + d['builder'] = None return d + @staticmethod + def _check_markup_is_url(markup): + """ + Check if markup looks like it's actually a url and raise a warning + if so. Markup can be unicode or str (py2) / bytes (py3). + """ + if isinstance(markup, bytes): + space = b' ' + cant_start_with = (b"http:", b"https:") + elif isinstance(markup, unicode): + space = u' ' + cant_start_with = (u"http:", u"https:") + else: + return + + if any(markup.startswith(prefix) for prefix in cant_start_with): + if not space in markup: + if isinstance(markup, bytes): + decoded_markup = markup.decode('utf-8', 'replace') + else: + decoded_markup = markup + warnings.warn( + '"%s" looks like a URL. Beautiful Soup is not an' + ' HTTP client. You should probably use an HTTP client like' + ' requests to get the document behind the URL, and feed' + ' that document to Beautiful Soup.' % decoded_markup + ) + def _feed(self): # Convert the document to Unicode. self.builder.reset() @@ -335,7 +385,18 @@ class BeautifulSoup(Tag): if parent.next_sibling: # This node is being inserted into an element that has # already been parsed. Deal with any dangling references. - index = parent.contents.index(o) + index = len(parent.contents)-1 + while index >= 0: + if parent.contents[index] is o: + break + index -= 1 + else: + raise ValueError( + "Error building tree: supposedly %r was inserted " + "into %r after the fact, but I don't see it!" % ( + o, parent + ) + ) if index == 0: previous_element = parent previous_sibling = None @@ -387,7 +448,7 @@ class BeautifulSoup(Tag): """Push a start tag on to the stack. If this method returns None, the tag was rejected by the - SoupStrainer. You should proceed as if the tag had not occured + SoupStrainer. You should proceed as if the tag had not occurred in the document. For instance, if this was a self-closing tag, don't call handle_endtag. """ diff --git a/lib/bs4/builder/__init__.py b/lib/bs4/builder/__init__.py index f8fce5681c27aea87ce74fb11a978231886f3269..601979bf4e06ea12749f88270a37bc3534ce18a4 100644 --- a/lib/bs4/builder/__init__.py +++ b/lib/bs4/builder/__init__.py @@ -1,9 +1,13 @@ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + from collections import defaultdict import itertools import sys from bs4.element import ( CharsetMetaAttributeValue, ContentMetaAttributeValue, + HTMLAwareEntitySubstitution, whitespace_re ) @@ -227,7 +231,7 @@ class HTMLTreeBuilder(TreeBuilder): Such as which tags are empty-element tags. """ - preserve_whitespace_tags = set(['pre', 'textarea']) + preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta', 'spacer', 'link', 'frame', 'base']) diff --git a/lib/bs4/builder/_html5lib.py b/lib/bs4/builder/_html5lib.py index 8725a65885b28f3973f098a61afa11623d3aa987..5f548935835ec8b720efcf76958a6d3d09fd93af 100644 --- a/lib/bs4/builder/_html5lib.py +++ b/lib/bs4/builder/_html5lib.py @@ -1,9 +1,12 @@ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + __all__ = [ 'HTML5TreeBuilder', ] -from pdb import set_trace import warnings +import re from bs4.builder import ( PERMISSIVE, HTML, @@ -15,7 +18,10 @@ from bs4.element import ( whitespace_re, ) import html5lib -from html5lib.constants import namespaces +from html5lib.constants import ( + namespaces, + prefixes, + ) from bs4.element import ( Comment, Doctype, @@ -23,6 +29,15 @@ from bs4.element import ( Tag, ) +try: + # Pre-0.99999999 + from html5lib.treebuilders import _base as treebuilder_base + new_html5lib = False +except ImportError, e: + # 0.99999999 and up + from html5lib.treebuilders import base as treebuilder_base + new_html5lib = True + class HTML5TreeBuilder(HTMLTreeBuilder): """Use html5lib to build a tree.""" @@ -47,7 +62,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder): if self.soup.parse_only is not None: warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.") parser = html5lib.HTMLParser(tree=self.create_treebuilder) - doc = parser.parse(markup, encoding=self.user_specified_encoding) + + extra_kwargs = dict() + if not isinstance(markup, unicode): + if new_html5lib: + extra_kwargs['override_encoding'] = self.user_specified_encoding + else: + extra_kwargs['encoding'] = self.user_specified_encoding + doc = parser.parse(markup, **extra_kwargs) # Set the character encoding detected by the tokenizer. if isinstance(markup, unicode): @@ -55,11 +77,17 @@ class HTML5TreeBuilder(HTMLTreeBuilder): # charEncoding to UTF-8 if it gets Unicode input. doc.original_encoding = None else: - doc.original_encoding = parser.tokenizer.stream.charEncoding[0] + original_encoding = parser.tokenizer.stream.charEncoding[0] + if not isinstance(original_encoding, basestring): + # In 0.99999999 and up, the encoding is an html5lib + # Encoding object. We want to use a string for compatibility + # with other tree builders. + original_encoding = original_encoding.name + doc.original_encoding = original_encoding def create_treebuilder(self, namespaceHTMLElements): self.underlying_builder = TreeBuilderForHtml5lib( - self.soup, namespaceHTMLElements) + namespaceHTMLElements, self.soup) return self.underlying_builder def test_fragment_to_document(self, fragment): @@ -67,10 +95,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder): return u'<html><head></head><body>%s</body></html>' % fragment -class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder): +class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder): - def __init__(self, soup, namespaceHTMLElements): - self.soup = soup + def __init__(self, namespaceHTMLElements, soup=None): + if soup: + self.soup = soup + else: + from bs4 import BeautifulSoup + self.soup = BeautifulSoup("", "html.parser") super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements) def documentClass(self): @@ -93,7 +125,8 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder): return TextNode(Comment(data), self.soup) def fragmentClass(self): - self.soup = BeautifulSoup("") + from bs4 import BeautifulSoup + self.soup = BeautifulSoup("", "html.parser") self.soup.name = "[document_fragment]" return Element(self.soup, self.soup, None) @@ -105,7 +138,57 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder): return self.soup def getFragment(self): - return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element + return treebuilder_base.TreeBuilder.getFragment(self).element + + def testSerializer(self, element): + from bs4 import BeautifulSoup + rv = [] + doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$') + + def serializeElement(element, indent=0): + if isinstance(element, BeautifulSoup): + pass + if isinstance(element, Doctype): + m = doctype_re.match(element) + if m: + name = m.group(1) + if m.lastindex > 1: + publicId = m.group(2) or "" + systemId = m.group(3) or m.group(4) or "" + rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" % + (' ' * indent, name, publicId, systemId)) + else: + rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, name)) + else: + rv.append("|%s<!DOCTYPE >" % (' ' * indent,)) + elif isinstance(element, Comment): + rv.append("|%s<!-- %s -->" % (' ' * indent, element)) + elif isinstance(element, NavigableString): + rv.append("|%s\"%s\"" % (' ' * indent, element)) + else: + if element.namespace: + name = "%s %s" % (prefixes[element.namespace], + element.name) + else: + name = element.name + rv.append("|%s<%s>" % (' ' * indent, name)) + if element.attrs: + attributes = [] + for name, value in element.attrs.items(): + if isinstance(name, NamespacedAttribute): + name = "%s %s" % (prefixes[name.namespace], name.name) + if isinstance(value, list): + value = " ".join(value) + attributes.append((name, value)) + + for name, value in sorted(attributes): + rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + indent += 2 + for child in element.children: + serializeElement(child, indent) + serializeElement(element, 0) + + return "\n".join(rv) class AttrList(object): def __init__(self, element): @@ -137,9 +220,9 @@ class AttrList(object): return name in list(self.attrs.keys()) -class Element(html5lib.treebuilders._base.Node): +class Element(treebuilder_base.Node): def __init__(self, element, soup, namespace): - html5lib.treebuilders._base.Node.__init__(self, element.name) + treebuilder_base.Node.__init__(self, element.name) self.element = element self.soup = soup self.namespace = namespace @@ -158,8 +241,10 @@ class Element(html5lib.treebuilders._base.Node): child = node elif node.element.__class__ == NavigableString: string_child = child = node.element + node.parent = self else: child = node.element + node.parent = self if not isinstance(child, basestring) and child.parent is not None: node.element.extract() @@ -197,6 +282,8 @@ class Element(html5lib.treebuilders._base.Node): most_recent_element=most_recent_element) def getAttributes(self): + if isinstance(self.element, Comment): + return {} return AttrList(self.element) def setAttributes(self, attributes): @@ -224,11 +311,11 @@ class Element(html5lib.treebuilders._base.Node): attributes = property(getAttributes, setAttributes) def insertText(self, data, insertBefore=None): + text = TextNode(self.soup.new_string(data), self.soup) if insertBefore: - text = TextNode(self.soup.new_string(data), self.soup) - self.insertBefore(data, insertBefore) + self.insertBefore(text, insertBefore) else: - self.appendChild(data) + self.appendChild(text) def insertBefore(self, node, refNode): index = self.element.index(refNode.element) @@ -250,6 +337,7 @@ class Element(html5lib.treebuilders._base.Node): # print "MOVE", self.element.contents # print "FROM", self.element # print "TO", new_parent.element + element = self.element new_parent_element = new_parent.element # Determine what this tag's next_element will be once all the children @@ -268,7 +356,6 @@ class Element(html5lib.treebuilders._base.Node): new_parents_last_descendant_next_element = new_parent_element.next_element to_append = element.contents - append_after = new_parent_element.contents if len(to_append) > 0: # Set the first child's previous_element and previous_sibling # to elements within the new parent @@ -285,12 +372,19 @@ class Element(html5lib.treebuilders._base.Node): if new_parents_last_child: new_parents_last_child.next_sibling = first_child - # Fix the last child's next_element and next_sibling - last_child = to_append[-1] - last_child.next_element = new_parents_last_descendant_next_element + # Find the very last element being moved. It is now the + # parent's last descendant. It has no .next_sibling and + # its .next_element is whatever the previous last + # descendant had. + last_childs_last_descendant = to_append[-1]._last_descendant(False, True) + + last_childs_last_descendant.next_element = new_parents_last_descendant_next_element if new_parents_last_descendant_next_element: - new_parents_last_descendant_next_element.previous_element = last_child - last_child.next_sibling = None + # TODO: This code has no test coverage and I'm not sure + # how to get html5lib to go through this path, but it's + # just the other side of the previous line. + new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant + last_childs_last_descendant.next_sibling = None for child in to_append: child.parent = new_parent_element @@ -324,7 +418,7 @@ class Element(html5lib.treebuilders._base.Node): class TextNode(Element): def __init__(self, element, soup): - html5lib.treebuilders._base.Node.__init__(self, None) + treebuilder_base.Node.__init__(self, None) self.element = element self.soup = soup diff --git a/lib/bs4/builder/_htmlparser.py b/lib/bs4/builder/_htmlparser.py index 0101d647bdd99674422c1edeab205f01060cd341..823ca15aacb3693ab1d41004bb6ca5680f11b17a 100644 --- a/lib/bs4/builder/_htmlparser.py +++ b/lib/bs4/builder/_htmlparser.py @@ -1,5 +1,8 @@ """Use the HTMLParser library to parse HTML files that aren't too bad.""" +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + __all__ = [ 'HTMLParserTreeBuilder', ] diff --git a/lib/bs4/builder/_lxml.py b/lib/bs4/builder/_lxml.py index 9e8f88fb56e682da0e3c35a4c8b3bdeda1233654..d2ca2872d17b625a9ef3abdc3df3b6d89cbf9947 100644 --- a/lib/bs4/builder/_lxml.py +++ b/lib/bs4/builder/_lxml.py @@ -1,3 +1,5 @@ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. __all__ = [ 'LXMLTreeBuilderForXML', 'LXMLTreeBuilder', @@ -12,6 +14,7 @@ from bs4.element import ( Doctype, NamespacedAttribute, ProcessingInstruction, + XMLProcessingInstruction, ) from bs4.builder import ( FAST, @@ -29,6 +32,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): DEFAULT_PARSER_CLASS = etree.XMLParser is_xml = True + processing_instruction_class = XMLProcessingInstruction NAME = "lxml-xml" ALTERNATE_NAMES = ["xml"] @@ -87,6 +91,16 @@ class LXMLTreeBuilderForXML(TreeBuilder): Each 4-tuple represents a strategy for parsing the document. """ + # Instead of using UnicodeDammit to convert the bytestring to + # Unicode using different encodings, use EncodingDetector to + # iterate over the encodings, and tell lxml to try to parse + # the document as each one in turn. + is_html = not self.is_xml + if is_html: + self.processing_instruction_class = ProcessingInstruction + else: + self.processing_instruction_class = XMLProcessingInstruction + if isinstance(markup, unicode): # We were given Unicode. Maybe lxml can parse Unicode on # this system? @@ -98,11 +112,6 @@ class LXMLTreeBuilderForXML(TreeBuilder): yield (markup.encode("utf8"), "utf8", document_declared_encoding, False) - # Instead of using UnicodeDammit to convert the bytestring to - # Unicode using different encodings, use EncodingDetector to - # iterate over the encodings, and tell lxml to try to parse - # the document as each one in turn. - is_html = not self.is_xml try_encodings = [user_specified_encoding, document_declared_encoding] detector = EncodingDetector( markup, try_encodings, is_html, exclude_encodings) @@ -201,7 +210,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): def pi(self, target, data): self.soup.endData() self.soup.handle_data(target + ' ' + data) - self.soup.endData(ProcessingInstruction) + self.soup.endData(self.processing_instruction_class) def data(self, content): self.soup.handle_data(content) @@ -229,6 +238,7 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE] is_xml = False + processing_instruction_class = ProcessingInstruction def default_parser(self, encoding): return etree.HTMLParser diff --git a/lib/bs4/dammit.py b/lib/bs4/dammit.py index 636f81b4c00bac992c037560d02b694b56906366..7965565f5cf79fef34d69a3252b500c1df1d6402 100644 --- a/lib/bs4/dammit.py +++ b/lib/bs4/dammit.py @@ -6,9 +6,10 @@ necessary. It is heavily based on code from Mark Pilgrim's Universal Feed Parser. It works best on XML and HTML, but it does not rewrite the XML or HTML to reflect a new encoding; that's the tree builder's job. """ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. __license__ = "MIT" -from pdb import set_trace import codecs from htmlentitydefs import codepoint2name import re @@ -309,7 +310,7 @@ class EncodingDetector: else: xml_endpos = 1024 html_endpos = max(2048, int(len(markup) * 0.05)) - + declared_encoding = None declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos) if not declared_encoding_match and is_html: @@ -346,7 +347,7 @@ class UnicodeDammit: self.tried_encodings = [] self.contains_replacement_characters = False self.is_html = is_html - + self.log = logging.getLogger(__name__) self.detector = EncodingDetector( markup, override_encodings, is_html, exclude_encodings) @@ -376,9 +377,10 @@ class UnicodeDammit: if encoding != "ascii": u = self._convert_from(encoding, "replace") if u is not None: - logging.warning( + self.log.warning( "Some characters could not be decoded, and were " - "replaced with REPLACEMENT CHARACTER.") + "replaced with REPLACEMENT CHARACTER." + ) self.contains_replacement_characters = True break @@ -734,7 +736,7 @@ class UnicodeDammit: 0xde : b'\xc3\x9e', # Þ 0xdf : b'\xc3\x9f', # ß 0xe0 : b'\xc3\xa0', # à - 0xe1 : b'\xa1', # á + 0xe1 : b'\xa1', # á 0xe2 : b'\xc3\xa2', # â 0xe3 : b'\xc3\xa3', # ã 0xe4 : b'\xc3\xa4', # ä diff --git a/lib/bs4/diagnose.py b/lib/bs4/diagnose.py index c04d23c3592b3b4db2d103f702b2737cc29cf609..8768332f5e6a4a03b9e041cb93b25bc47e4acb0e 100644 --- a/lib/bs4/diagnose.py +++ b/lib/bs4/diagnose.py @@ -1,5 +1,7 @@ """Diagnostic functions, mainly for use when doing tech support.""" +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. __license__ = "MIT" import cProfile @@ -56,7 +58,8 @@ def diagnose(data): data = data.read() elif os.path.exists(data): print '"%s" looks like a filename. Reading data from the file.' % data - data = open(data).read() + with open(data) as fp: + data = fp.read() elif data.startswith("http:") or data.startswith("https:"): print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." diff --git a/lib/bs4/element.py b/lib/bs4/element.py index ecf2b2804d1caba3a71c6405dc8decfcff094723..b100d18bbb608015845609f55cc52bebc448fa0b 100644 --- a/lib/bs4/element.py +++ b/lib/bs4/element.py @@ -1,8 +1,10 @@ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. __license__ = "MIT" -from pdb import set_trace import collections import re +import shlex import sys import warnings from bs4.dammit import EntitySubstitution @@ -99,6 +101,8 @@ class HTMLAwareEntitySubstitution(EntitySubstitution): preformatted_tags = set(["pre"]) + preserve_whitespace_tags = set(['pre', 'textarea']) + @classmethod def _substitute_if_appropriate(cls, ns, f): if (isinstance(ns, NavigableString) @@ -169,11 +173,19 @@ class PageElement(object): This is used when mapping a formatter name ("minimal") to an appropriate function (one that performs entity-substitution on - the contents of <script> and <style> tags, or not). It's + the contents of <script> and <style> tags, or not). It can be inefficient, but it should be called very rarely. """ + if self.known_xml is not None: + # Most of the time we will have determined this when the + # document is parsed. + return self.known_xml + + # Otherwise, it's likely that this element was created by + # direct invocation of the constructor from within the user's + # Python code. if self.parent is None: - # This is the top-level object. It should have .is_xml set + # This is the top-level object. It should have .known_xml set # from tree creation. If not, take a guess--BS is usually # used on HTML markup. return getattr(self, 'is_xml', False) @@ -637,7 +649,7 @@ class PageElement(object): return lambda el: el._attr_value_as_string( attribute, '').startswith(value) elif operator == '$': - # string represenation of `attribute` ends with `value` + # string representation of `attribute` ends with `value` return lambda el: el._attr_value_as_string( attribute, '').endswith(value) elif operator == '*': @@ -677,6 +689,11 @@ class NavigableString(unicode, PageElement): PREFIX = '' SUFFIX = '' + # We can't tell just by looking at a string whether it's contained + # in an XML document or an HTML document. + + known_xml = None + def __new__(cls, value): """Create a new NavigableString. @@ -743,10 +760,16 @@ class CData(PreformattedString): SUFFIX = u']]>' class ProcessingInstruction(PreformattedString): + """A SGML processing instruction.""" PREFIX = u'<?' SUFFIX = u'>' +class XMLProcessingInstruction(ProcessingInstruction): + """An XML processing instruction.""" + PREFIX = u'<?' + SUFFIX = u'?>' + class Comment(PreformattedString): PREFIX = u'<!--' @@ -781,7 +804,8 @@ class Tag(PageElement): """Represents a found HTML tag with its attributes and contents.""" def __init__(self, parser=None, builder=None, name=None, namespace=None, - prefix=None, attrs=None, parent=None, previous=None): + prefix=None, attrs=None, parent=None, previous=None, + is_xml=None): "Basic constructor." if parser is None: @@ -795,6 +819,14 @@ class Tag(PageElement): self.name = name self.namespace = namespace self.prefix = prefix + if builder is not None: + preserve_whitespace_tags = builder.preserve_whitespace_tags + else: + if is_xml: + preserve_whitespace_tags = [] + else: + preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags + self.preserve_whitespace_tags = preserve_whitespace_tags if attrs is None: attrs = {} elif attrs: @@ -805,6 +837,13 @@ class Tag(PageElement): attrs = dict(attrs) else: attrs = dict(attrs) + + # If possible, determine ahead of time whether this tag is an + # XML tag. + if builder: + self.known_xml = builder.is_xml + else: + self.known_xml = is_xml self.attrs = attrs self.contents = [] self.setup(parent, previous) @@ -824,7 +863,7 @@ class Tag(PageElement): Its contents are a copy of the old Tag's contents. """ clone = type(self)(None, self.builder, self.name, self.namespace, - self.nsprefix, self.attrs) + self.nsprefix, self.attrs, is_xml=self._is_xml) for attr in ('can_be_empty_element', 'hidden'): setattr(clone, attr, getattr(self, attr)) for child in self.contents: @@ -997,7 +1036,7 @@ class Tag(PageElement): tag_name, tag_name)) return self.find(tag_name) # We special case contents to avoid recursion. - elif not tag.startswith("__") and not tag=="contents": + elif not tag.startswith("__") and not tag == "contents": return self.find(tag) raise AttributeError( "'%s' object has no attribute '%s'" % (self.__class__, tag)) @@ -1057,10 +1096,11 @@ class Tag(PageElement): def _should_pretty_print(self, indent_level): """Should this tag be pretty-printed?""" + return ( - indent_level is not None and - (self.name not in HTMLAwareEntitySubstitution.preformatted_tags - or self._is_xml)) + indent_level is not None + and self.name not in self.preserve_whitespace_tags + ) def decode(self, indent_level=None, eventual_encoding=DEFAULT_OUTPUT_ENCODING, @@ -1280,6 +1320,7 @@ class Tag(PageElement): _selector_combinators = ['>', '+', '~'] _select_debug = False + quoted_colon = re.compile('"[^"]*:[^"]*"') def select_one(self, selector): """Perform a CSS selection operation on the current element.""" value = self.select(selector, limit=1) @@ -1305,8 +1346,7 @@ class Tag(PageElement): if limit and len(context) >= limit: break return context - - tokens = selector.split() + tokens = shlex.split(selector) current_context = [self] if tokens[-1] in self._selector_combinators: @@ -1358,7 +1398,7 @@ class Tag(PageElement): return classes.issubset(candidate.get('class', [])) checker = classes_match - elif ':' in token: + elif ':' in token and not self.quoted_colon.search(token): # Pseudo-class tag_name, pseudo = token.split(':', 1) if tag_name == '': @@ -1389,11 +1429,8 @@ class Tag(PageElement): self.count += 1 if self.count == self.destination: return True - if self.count > self.destination: - # Stop the generator that's sending us - # these things. - raise StopIteration() - return False + else: + return False checker = Counter(pseudo_value).nth_child_of_type else: raise NotImplementedError( @@ -1498,13 +1535,12 @@ class Tag(PageElement): # don't include it in the context more than once. new_context.append(candidate) new_context_ids.add(id(candidate)) - if limit and len(new_context) >= limit: - break elif self._select_debug: print " FAILURE %s %s" % (candidate.name, repr(candidate.attrs)) - current_context = new_context + if limit and len(current_context) >= limit: + current_context = current_context[:limit] if self._select_debug: print "Final verdict:" @@ -1668,21 +1704,15 @@ class SoupStrainer(object): if isinstance(markup, list) or isinstance(markup, tuple): # This should only happen when searching a multi-valued attribute # like 'class'. - if (isinstance(match_against, unicode) - and ' ' in match_against): - # A bit of a special case. If they try to match "foo - # bar" on a multivalue attribute's value, only accept - # the literal value "foo bar" - # - # XXX This is going to be pretty slow because we keep - # splitting match_against. But it shouldn't come up - # too often. - return (whitespace_re.split(match_against) == markup) - else: - for item in markup: - if self._matches(item, match_against): - return True - return False + for item in markup: + if self._matches(item, match_against): + return True + # We didn't match any particular value of the multivalue + # attribute, but maybe we match the attribute value when + # considered as a string. + if self._matches(' '.join(markup), match_against): + return True + return False if match_against is True: # True matches any non-None value. diff --git a/lib/bs4/testing.py b/lib/bs4/testing.py index 7ba54ab39589dad6e1ab3ba45528ecac45ef2a4d..3a6ed4251fb84ab57c268285b86146caff30d753 100644 --- a/lib/bs4/testing.py +++ b/lib/bs4/testing.py @@ -1,5 +1,7 @@ """Helper classes for tests.""" +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. __license__ = "MIT" import pickle @@ -137,6 +139,14 @@ class HTMLTreeBuilderSmokeTest(object): markup.replace(b"\n", b"")) def test_processing_instruction(self): + # We test both Unicode and bytestring to verify that + # process_markup correctly sets processing_instruction_class + # even when the markup is already Unicode and there is no + # need to process anything. + markup = u"""<?PITarget PIContent?>""" + soup = self.soup(markup) + self.assertEqual(markup, soup.decode()) + markup = b"""<?PITarget PIContent?>""" soup = self.soup(markup) self.assertEqual(markup, soup.encode("utf8")) @@ -215,9 +225,22 @@ Hello, world! self.assertEqual(comment, baz.previous_element) def test_preserved_whitespace_in_pre_and_textarea(self): - """Whitespace must be preserved in <pre> and <textarea> tags.""" - self.assertSoupEquals("<pre> </pre>") - self.assertSoupEquals("<textarea> woo </textarea>") + """Whitespace must be preserved in <pre> and <textarea> tags, + even if that would mean not prettifying the markup. + """ + pre_markup = "<pre> </pre>" + textarea_markup = "<textarea> woo\nwoo </textarea>" + self.assertSoupEquals(pre_markup) + self.assertSoupEquals(textarea_markup) + + soup = self.soup(pre_markup) + self.assertEqual(soup.pre.prettify(), pre_markup) + + soup = self.soup(textarea_markup) + self.assertEqual(soup.textarea.prettify(), textarea_markup) + + soup = self.soup("<textarea></textarea>") + self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>") def test_nested_inline_elements(self): """Inline elements can be nested indefinitely.""" @@ -480,7 +503,9 @@ Hello, world! hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>' soup = self.soup( hebrew_document, from_encoding="iso8859-8") - self.assertEqual(soup.original_encoding, 'iso8859-8') + # Some tree builders call it iso8859-8, others call it iso-8859-9. + # That's not a difference we really care about. + assert soup.original_encoding in ('iso8859-8', 'iso-8859-8') self.assertEqual( soup.encode('utf-8'), hebrew_document.decode("iso8859-8").encode("utf-8")) @@ -563,6 +588,11 @@ class XMLTreeBuilderSmokeTest(object): soup = self.soup(markup) self.assertEqual(markup, soup.encode("utf8")) + def test_processing_instruction(self): + markup = b"""<?xml version="1.0" encoding="utf8"?>\n<?PITarget PIContent?>""" + soup = self.soup(markup) + self.assertEqual(markup, soup.encode("utf8")) + def test_real_xhtml_document(self): """A real XHTML document should come out *exactly* the same as it went in.""" markup = b"""<?xml version="1.0" encoding="utf-8"?> diff --git a/lib/bs4/tests/test_html5lib.py b/lib/bs4/tests/test_html5lib.py index 65536c2a2482b4e35e459e984f29c7fb2672dae3..0f89d62445a3f608fa61052c912b46d3c621bea3 100644 --- a/lib/bs4/tests/test_html5lib.py +++ b/lib/bs4/tests/test_html5lib.py @@ -84,6 +84,33 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode()) self.assertEqual(2, len(soup.find_all('p'))) + def test_reparented_markup_containing_identical_whitespace_nodes(self): + """Verify that we keep the two whitespace nodes in this + document distinct when reparenting the adjacent <tbody> tags. + """ + markup = '<table> <tbody><tbody><ims></tbody> </table>' + soup = self.soup(markup) + space1, space2 = soup.find_all(string=' ') + tbody1, tbody2 = soup.find_all('tbody') + assert space1.next_element is tbody1 + assert tbody2.next_element is space2 + + def test_reparented_markup_containing_children(self): + markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>' + soup = self.soup(markup) + noscript = soup.noscript + self.assertEqual("target", noscript.next_element) + target = soup.find(string='target') + + # The 'aftermath' string was duplicated; we want the second one. + final_aftermath = soup.find_all(string='aftermath')[-1] + + # The <noscript> tag was moved beneath a copy of the <a> tag, + # but the 'target' string within is still connected to the + # (second) 'aftermath' string. + self.assertEqual(final_aftermath, target.next_element) + self.assertEqual(target, final_aftermath.previous_element) + def test_processing_instruction(self): """Processing instructions become comments.""" markup = b"""<?PITarget PIContent?>""" @@ -96,3 +123,8 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): a1, a2 = soup.find_all('a') self.assertEqual(a1, a2) assert a1 is not a2 + + def test_foster_parenting(self): + markup = b"""<table><td></tbody>A""" + soup = self.soup(markup) + self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode()) diff --git a/lib/bs4/tests/test_soup.py b/lib/bs4/tests/test_soup.py index 1238af22fb8faf2909ec7337cb544cc282181f25..f3e69edf308d1a3468190dbf28f8d953341f9524 100644 --- a/lib/bs4/tests/test_soup.py +++ b/lib/bs4/tests/test_soup.py @@ -35,7 +35,6 @@ try: except ImportError, e: LXML_PRESENT = False -PYTHON_2_PRE_2_7 = (sys.version_info < (2,7)) PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2)) class TestConstructor(SoupTest): @@ -77,7 +76,7 @@ class TestWarnings(SoupTest): def test_no_warning_if_explicit_parser_specified(self): with warnings.catch_warnings(record=True) as w: soup = self.soup("<a><b></b></a>", "html.parser") - self.assertEquals([], w) + self.assertEqual([], w) def test_parseOnlyThese_renamed_to_parse_only(self): with warnings.catch_warnings(record=True) as w: @@ -118,15 +117,34 @@ class TestWarnings(SoupTest): soup = self.soup(filename) self.assertEqual(0, len(w)) - def test_url_warning(self): - with warnings.catch_warnings(record=True) as w: - soup = self.soup("http://www.crummy.com/") - msg = str(w[0].message) - self.assertTrue("looks like a URL" in msg) + def test_url_warning_with_bytes_url(self): + with warnings.catch_warnings(record=True) as warning_list: + soup = self.soup(b"http://www.crummybytes.com/") + # Be aware this isn't the only warning that can be raised during + # execution.. + self.assertTrue(any("looks like a URL" in str(w.message) + for w in warning_list)) + + def test_url_warning_with_unicode_url(self): + with warnings.catch_warnings(record=True) as warning_list: + # note - this url must differ from the bytes one otherwise + # python's warnings system swallows the second warning + soup = self.soup(u"http://www.crummyunicode.com/") + self.assertTrue(any("looks like a URL" in str(w.message) + for w in warning_list)) + + def test_url_warning_with_bytes_and_space(self): + with warnings.catch_warnings(record=True) as warning_list: + soup = self.soup(b"http://www.crummybytes.com/ is great") + self.assertFalse(any("looks like a URL" in str(w.message) + for w in warning_list)) + + def test_url_warning_with_unicode_and_space(self): + with warnings.catch_warnings(record=True) as warning_list: + soup = self.soup(u"http://www.crummyuncode.com/ is great") + self.assertFalse(any("looks like a URL" in str(w.message) + for w in warning_list)) - with warnings.catch_warnings(record=True) as w: - soup = self.soup("http://www.crummy.com/ is great") - self.assertEqual(0, len(w)) class TestSelectiveParsing(SoupTest): @@ -260,7 +278,7 @@ class TestEncodingConversion(SoupTest): self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data) @skipIf( - PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2, + PYTHON_3_PRE_3_2, "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.") def test_attribute_name_containing_unicode_characters(self): markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>' diff --git a/lib/bs4/tests/test_tree.py b/lib/bs4/tests/test_tree.py index 6b2a1239a91a7ad501e20c1aec8939b2cb1ad5b6..a4fe0b1664d6f3e85f592392a404e501e6ad7054 100644 --- a/lib/bs4/tests/test_tree.py +++ b/lib/bs4/tests/test_tree.py @@ -222,6 +222,17 @@ class TestFindAllByName(TreeTest): self.assertSelects( tree.find_all(id_matches_name), ["Match 1.", "Match 2."]) + def test_find_with_multi_valued_attribute(self): + soup = self.soup( + "<div class='a b'>1</div><div class='a c'>2</div><div class='a d'>3</div>" + ) + r1 = soup.find('div', 'a d'); + r2 = soup.find('div', re.compile(r'a d')); + r3, r4 = soup.find_all('div', ['a b', 'a d']); + self.assertEqual('3', r1.string) + self.assertEqual('3', r2.string) + self.assertEqual('1', r3.string) + self.assertEqual('3', r4.string) class TestFindAllByAttribute(TreeTest): @@ -294,10 +305,10 @@ class TestFindAllByAttribute(TreeTest): f = tree.find_all("gar", class_=re.compile("a")) self.assertSelects(f, ["Found it"]) - # Since the class is not the string "foo bar", but the two - # strings "foo" and "bar", this will not find anything. + # If the search fails to match the individual strings "foo" and "bar", + # it will be tried against the combined string "foo bar". f = tree.find_all("gar", class_=re.compile("o b")) - self.assertSelects(f, []) + self.assertSelects(f, ["Found it"]) def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self): soup = self.soup("<a class='bar'>Found it</a>") @@ -335,7 +346,7 @@ class TestFindAllByAttribute(TreeTest): strainer = SoupStrainer(attrs={'id' : 'first'}) self.assertSelects(tree.find_all(strainer), ['Match.']) - def test_find_all_with_missing_atribute(self): + def test_find_all_with_missing_attribute(self): # You can pass in None as the value of an attribute to find_all. # This will match tags that do not have that attribute set. tree = self.soup("""<a id="1">ID present.</a> @@ -1328,6 +1339,13 @@ class TestPersistence(SoupTest): copied = copy.deepcopy(self.tree) self.assertEqual(copied.decode(), self.tree.decode()) + def test_copy_preserves_encoding(self): + soup = BeautifulSoup(b'<p> </p>', 'html.parser') + encoding = soup.original_encoding + copy = soup.__copy__() + self.assertEqual(u"<p> </p>", unicode(copy)) + self.assertEqual(encoding, copy.original_encoding) + def test_unicode_pickle(self): # A tree containing Unicode characters can be pickled. html = u"<b>\N{SNOWMAN}</b>" @@ -1676,8 +1694,8 @@ class TestSoupSelector(TreeTest): def setUp(self): self.soup = BeautifulSoup(self.HTML, 'html.parser') - def assertSelects(self, selector, expected_ids): - el_ids = [el['id'] for el in self.soup.select(selector)] + def assertSelects(self, selector, expected_ids, **kwargs): + el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)] el_ids.sort() expected_ids.sort() self.assertEqual(expected_ids, el_ids, @@ -1720,6 +1738,13 @@ class TestSoupSelector(TreeTest): for selector in ('html div', 'html body div', 'body div'): self.assertSelects(selector, ['data1', 'main', 'inner', 'footer']) + + def test_limit(self): + self.assertSelects('html div', ['main'], limit=1) + self.assertSelects('html body div', ['inner', 'main'], limit=2) + self.assertSelects('body div', ['data1', 'main', 'inner', 'footer'], + limit=10) + def test_tag_no_match(self): self.assertEqual(len(self.soup.select('del')), 0) @@ -1902,6 +1927,14 @@ class TestSoupSelector(TreeTest): ('div[data-tag]', ['data1']) ) + def test_quoted_space_in_selector_name(self): + html = """<div style="display: wrong">nope</div> + <div style="display: right">yes</div> + """ + soup = BeautifulSoup(html, 'html.parser') + [chosen] = soup.select('div[style="display: right"]') + self.assertEqual("yes", chosen.string) + def test_unsupported_pseudoclass(self): self.assertRaises( NotImplementedError, self.soup.select, "a:no-such-pseudoclass") diff --git a/lib/certifi/__init__.py b/lib/certifi/__init__.py index ed3dd1685b4be4015e8f2b9b96cf473d4cc7c7ca..b8cd2894de4321ceb8867f1a5c6a84855037527d 100644 --- a/lib/certifi/__init__.py +++ b/lib/certifi/__init__.py @@ -1,3 +1,3 @@ from .core import where, old_where -__version__ = "2016.09.26" +__version__ = "2017.04.17" diff --git a/lib/certifi/cacert.pem b/lib/certifi/cacert.pem index 6b7bccfbaf8bb16b8a97f7eeb3a6518d7e70332c..e5f0896cd1fb098bfdb370b0347ddfc522892f76 100644 --- a/lib/certifi/cacert.pem +++ b/lib/certifi/cacert.pem @@ -317,35 +317,6 @@ eu6FSqdQgPCnXEqULl8FmTxSQeDNtGPPAUO6nIPcj2A781q0tHuu2guQOHXvgR1m 0vdXcDazv/wor3ElhVsT/h5/WrQ8 -----END CERTIFICATE----- -# Issuer: O=RSA Security Inc OU=RSA Security 2048 V3 -# Subject: O=RSA Security Inc OU=RSA Security 2048 V3 -# Label: "RSA Security 2048 v3" -# Serial: 13297492616345471454730593562152402946 -# MD5 Fingerprint: 77:0d:19:b1:21:fd:00:42:9c:3e:0c:a5:dd:0b:02:8e -# SHA1 Fingerprint: 25:01:90:19:cf:fb:d9:99:1c:b7:68:25:74:8d:94:5f:30:93:95:42 -# SHA256 Fingerprint: af:8b:67:62:a1:e5:28:22:81:61:a9:5d:5c:55:9e:e2:66:27:8f:75:d7:9e:83:01:89:a5:03:50:6a:bd:6b:4c ------BEGIN CERTIFICATE----- -MIIDYTCCAkmgAwIBAgIQCgEBAQAAAnwAAAAKAAAAAjANBgkqhkiG9w0BAQUFADA6 -MRkwFwYDVQQKExBSU0EgU2VjdXJpdHkgSW5jMR0wGwYDVQQLExRSU0EgU2VjdXJp -dHkgMjA0OCBWMzAeFw0wMTAyMjIyMDM5MjNaFw0yNjAyMjIyMDM5MjNaMDoxGTAX -BgNVBAoTEFJTQSBTZWN1cml0eSBJbmMxHTAbBgNVBAsTFFJTQSBTZWN1cml0eSAy -MDQ4IFYzMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAt49VcdKA3Xtp -eafwGFAyPGJn9gqVB93mG/Oe2dJBVGutn3y+Gc37RqtBaB4Y6lXIL5F4iSj7Jylg -/9+PjDvJSZu1pJTOAeo+tWN7fyb9Gd3AIb2E0S1PRsNO3Ng3OTsor8udGuorryGl -wSMiuLgbWhOHV4PR8CDn6E8jQrAApX2J6elhc5SYcSa8LWrg903w8bYqODGBDSnh -AMFRD0xS+ARaqn1y07iHKrtjEAMqs6FPDVpeRrc9DvV07Jmf+T0kgYim3WBU6JU2 -PcYJk5qjEoAAVZkZR73QpXzDuvsf9/UP+Ky5tfQ3mBMY3oVbtwyCO4dvlTlYMNpu -AWgXIszACwIDAQABo2MwYTAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIB -BjAfBgNVHSMEGDAWgBQHw1EwpKrpRa41JPr/JCwz0LGdjDAdBgNVHQ4EFgQUB8NR -MKSq6UWuNST6/yQsM9CxnYwwDQYJKoZIhvcNAQEFBQADggEBAF8+hnZuuDU8TjYc -HnmYv/3VEhF5Ug7uMYm83X/50cYVIeiKAVQNOvtUudZj1LGqlk2iQk3UUx+LEN5/ -Zb5gEydxiKRz44Rj0aRV4VCT5hsOedBnvEbIvz8XDZXmxpBp3ue0L96VfdASPz0+ -f00/FGj1EVDVwfSQpQgdMWD/YIwjVAqv/qFuxdF6Kmh4zx6CCiC0H63lhbJqaHVO -rSU3lIW+vaHU6rcMSzyd6BIA8F+sDeGscGNz9395nzIlQnQFgCi/vcEkllgVsRch -6YlL2weIZ/QVrXA+L02FO8K32/6YaCOJ4XQP3vTFhGMpG8zLB8kApKnXwiJPZ9d3 -7CAFYd4= ------END CERTIFICATE----- - # Issuer: CN=GeoTrust Global CA O=GeoTrust Inc. # Subject: CN=GeoTrust Global CA O=GeoTrust Inc. # Label: "GeoTrust Global CA" @@ -1643,42 +1614,6 @@ wKeI8lN3s2Berq4o2jUsbzRF0ybh3uxbTydrFny9RAQYgrOJeRcQcT16ohZO9QHN pGxlaKFJdlxDydi8NmdspZS11My5vWo1ViHe2MPr+8ukYEywVaCge1ey -----END CERTIFICATE----- -# Issuer: CN=WellsSecure Public Root Certificate Authority O=Wells Fargo WellsSecure OU=Wells Fargo Bank NA -# Subject: CN=WellsSecure Public Root Certificate Authority O=Wells Fargo WellsSecure OU=Wells Fargo Bank NA -# Label: "WellsSecure Public Root Certificate Authority" -# Serial: 1 -# MD5 Fingerprint: 15:ac:a5:c2:92:2d:79:bc:e8:7f:cb:67:ed:02:cf:36 -# SHA1 Fingerprint: e7:b4:f6:9d:61:ec:90:69:db:7e:90:a7:40:1a:3c:f4:7d:4f:e8:ee -# SHA256 Fingerprint: a7:12:72:ae:aa:a3:cf:e8:72:7f:7f:b3:9f:0f:b3:d1:e5:42:6e:90:60:b0:6e:e6:f1:3e:9a:3c:58:33:cd:43 ------BEGIN CERTIFICATE----- -MIIEvTCCA6WgAwIBAgIBATANBgkqhkiG9w0BAQUFADCBhTELMAkGA1UEBhMCVVMx -IDAeBgNVBAoMF1dlbGxzIEZhcmdvIFdlbGxzU2VjdXJlMRwwGgYDVQQLDBNXZWxs -cyBGYXJnbyBCYW5rIE5BMTYwNAYDVQQDDC1XZWxsc1NlY3VyZSBQdWJsaWMgUm9v -dCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwHhcNMDcxMjEzMTcwNzU0WhcNMjIxMjE0 -MDAwNzU0WjCBhTELMAkGA1UEBhMCVVMxIDAeBgNVBAoMF1dlbGxzIEZhcmdvIFdl -bGxzU2VjdXJlMRwwGgYDVQQLDBNXZWxscyBGYXJnbyBCYW5rIE5BMTYwNAYDVQQD -DC1XZWxsc1NlY3VyZSBQdWJsaWMgUm9vdCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkw -ggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDub7S9eeKPCCGeOARBJe+r -WxxTkqxtnt3CxC5FlAM1iGd0V+PfjLindo8796jE2yljDpFoNoqXjopxaAkH5OjU -Dk/41itMpBb570OYj7OeUt9tkTmPOL13i0Nj67eT/DBMHAGTthP796EfvyXhdDcs -HqRePGj4S78NuR4uNuip5Kf4D8uCdXw1LSLWwr8L87T8bJVhHlfXBIEyg1J55oNj -z7fLY4sR4r1e6/aN7ZVyKLSsEmLpSjPmgzKuBXWVvYSV2ypcm44uDLiBK0HmOFaf -SZtsdvqKXfcBeYF8wYNABf5x/Qw/zE5gCQ5lRxAvAcAFP4/4s0HvWkJ+We/Slwxl -AgMBAAGjggE0MIIBMDAPBgNVHRMBAf8EBTADAQH/MDkGA1UdHwQyMDAwLqAsoCqG -KGh0dHA6Ly9jcmwucGtpLndlbGxzZmFyZ28uY29tL3dzcHJjYS5jcmwwDgYDVR0P -AQH/BAQDAgHGMB0GA1UdDgQWBBQmlRkQ2eihl5H/3BnZtQQ+0nMKajCBsgYDVR0j -BIGqMIGngBQmlRkQ2eihl5H/3BnZtQQ+0nMKaqGBi6SBiDCBhTELMAkGA1UEBhMC -VVMxIDAeBgNVBAoMF1dlbGxzIEZhcmdvIFdlbGxzU2VjdXJlMRwwGgYDVQQLDBNX -ZWxscyBGYXJnbyBCYW5rIE5BMTYwNAYDVQQDDC1XZWxsc1NlY3VyZSBQdWJsaWMg -Um9vdCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHmCAQEwDQYJKoZIhvcNAQEFBQADggEB -ALkVsUSRzCPIK0134/iaeycNzXK7mQDKfGYZUMbVmO2rvwNa5U3lHshPcZeG1eMd -/ZDJPHV3V3p9+N701NX3leZ0bh08rnyd2wIDBSxxSyU+B+NemvVmFymIGjifz6pB -A4SXa5M4esowRBskRDPQ5NHcKDj0E0M1NSljqHyita04pO2t/caaH/+Xc/77szWn -k4bGdpEA5qxRFsQnMlzbc9qlk1eOPm01JghZ1edE13YgY+esE2fDbbFwRnzVlhE9 -iW9dqKHrjQrawx0zbKPqZxmamX9LPYNRKh3KL4YMon4QLSvUFpULB6ouFJJJtylv -2G0xffX8oRAHh84vWdw+WNs= ------END CERTIFICATE----- - # Issuer: CN=COMODO ECC Certification Authority O=COMODO CA Limited # Subject: CN=COMODO ECC Certification Authority O=COMODO CA Limited # Label: "COMODO ECC Certification Authority" @@ -1764,57 +1699,6 @@ Fj4A4xylNoEYokxSdsARo27mHbrjWr42U8U+dY+GaSlYU7Wcu2+fXMUY7N0v4ZjJ /L7fCg0= -----END CERTIFICATE----- -# Issuer: CN=Microsec e-Szigno Root CA O=Microsec Ltd. OU=e-Szigno CA -# Subject: CN=Microsec e-Szigno Root CA O=Microsec Ltd. OU=e-Szigno CA -# Label: "Microsec e-Szigno Root CA" -# Serial: 272122594155480254301341951808045322001 -# MD5 Fingerprint: f0:96:b6:2f:c5:10:d5:67:8e:83:25:32:e8:5e:2e:e5 -# SHA1 Fingerprint: 23:88:c9:d3:71:cc:9e:96:3d:ff:7d:3c:a7:ce:fc:d6:25:ec:19:0d -# SHA256 Fingerprint: 32:7a:3d:76:1a:ba:de:a0:34:eb:99:84:06:27:5c:b1:a4:77:6e:fd:ae:2f:df:6d:01:68:ea:1c:4f:55:67:d0 ------BEGIN CERTIFICATE----- -MIIHqDCCBpCgAwIBAgIRAMy4579OKRr9otxmpRwsDxEwDQYJKoZIhvcNAQEFBQAw -cjELMAkGA1UEBhMCSFUxETAPBgNVBAcTCEJ1ZGFwZXN0MRYwFAYDVQQKEw1NaWNy -b3NlYyBMdGQuMRQwEgYDVQQLEwtlLVN6aWdubyBDQTEiMCAGA1UEAxMZTWljcm9z -ZWMgZS1Temlnbm8gUm9vdCBDQTAeFw0wNTA0MDYxMjI4NDRaFw0xNzA0MDYxMjI4 -NDRaMHIxCzAJBgNVBAYTAkhVMREwDwYDVQQHEwhCdWRhcGVzdDEWMBQGA1UEChMN -TWljcm9zZWMgTHRkLjEUMBIGA1UECxMLZS1Temlnbm8gQ0ExIjAgBgNVBAMTGU1p -Y3Jvc2VjIGUtU3ppZ25vIFJvb3QgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAw -ggEKAoIBAQDtyADVgXvNOABHzNuEwSFpLHSQDCHZU4ftPkNEU6+r+ICbPHiN1I2u -uO/TEdyB5s87lozWbxXGd36hL+BfkrYn13aaHUM86tnsL+4582pnS4uCzyL4ZVX+ -LMsvfUh6PXX5qqAnu3jCBspRwn5mS6/NoqdNAoI/gqyFxuEPkEeZlApxcpMqyabA -vjxWTHOSJ/FrtfX9/DAFYJLG65Z+AZHCabEeHXtTRbjcQR/Ji3HWVBTji1R4P770 -Yjtb9aPs1ZJ04nQw7wHb4dSrmZsqa/i9phyGI0Jf7Enemotb9HI6QMVJPqW+jqpx -62z69Rrkav17fVVA71hu5tnVvCSrwe+3AgMBAAGjggQ3MIIEMzBnBggrBgEFBQcB -AQRbMFkwKAYIKwYBBQUHMAGGHGh0dHBzOi8vcmNhLmUtc3ppZ25vLmh1L29jc3Aw -LQYIKwYBBQUHMAKGIWh0dHA6Ly93d3cuZS1zemlnbm8uaHUvUm9vdENBLmNydDAP -BgNVHRMBAf8EBTADAQH/MIIBcwYDVR0gBIIBajCCAWYwggFiBgwrBgEEAYGoGAIB -AQEwggFQMCgGCCsGAQUFBwIBFhxodHRwOi8vd3d3LmUtc3ppZ25vLmh1L1NaU1ov -MIIBIgYIKwYBBQUHAgIwggEUHoIBEABBACAAdABhAG4A+gBzAO0AdAB2AOEAbgB5 -ACAA6QByAHQAZQBsAG0AZQB6AOkAcwDpAGgAZQB6ACAA6QBzACAAZQBsAGYAbwBn -AGEAZADhAHMA4QBoAG8AegAgAGEAIABTAHoAbwBsAGcA4QBsAHQAYQB0APMAIABT -AHoAbwBsAGcA4QBsAHQAYQB0AOEAcwBpACAAUwB6AGEAYgDhAGwAeQB6AGEAdABh -ACAAcwB6AGUAcgBpAG4AdAAgAGsAZQBsAGwAIABlAGwAagDhAHIAbgBpADoAIABo -AHQAdABwADoALwAvAHcAdwB3AC4AZQAtAHMAegBpAGcAbgBvAC4AaAB1AC8AUwBa -AFMAWgAvMIHIBgNVHR8EgcAwgb0wgbqggbeggbSGIWh0dHA6Ly93d3cuZS1zemln -bm8uaHUvUm9vdENBLmNybIaBjmxkYXA6Ly9sZGFwLmUtc3ppZ25vLmh1L0NOPU1p -Y3Jvc2VjJTIwZS1Temlnbm8lMjBSb290JTIwQ0EsT1U9ZS1Temlnbm8lMjBDQSxP -PU1pY3Jvc2VjJTIwTHRkLixMPUJ1ZGFwZXN0LEM9SFU/Y2VydGlmaWNhdGVSZXZv -Y2F0aW9uTGlzdDtiaW5hcnkwDgYDVR0PAQH/BAQDAgEGMIGWBgNVHREEgY4wgYuB -EGluZm9AZS1zemlnbm8uaHWkdzB1MSMwIQYDVQQDDBpNaWNyb3NlYyBlLVN6aWdu -w7MgUm9vdCBDQTEWMBQGA1UECwwNZS1TemlnbsOzIEhTWjEWMBQGA1UEChMNTWlj -cm9zZWMgS2Z0LjERMA8GA1UEBxMIQnVkYXBlc3QxCzAJBgNVBAYTAkhVMIGsBgNV -HSMEgaQwgaGAFMegSXUWYYTbMUuE0vE3QJDvTtz3oXakdDByMQswCQYDVQQGEwJI -VTERMA8GA1UEBxMIQnVkYXBlc3QxFjAUBgNVBAoTDU1pY3Jvc2VjIEx0ZC4xFDAS -BgNVBAsTC2UtU3ppZ25vIENBMSIwIAYDVQQDExlNaWNyb3NlYyBlLVN6aWdubyBS -b290IENBghEAzLjnv04pGv2i3GalHCwPETAdBgNVHQ4EFgQUx6BJdRZhhNsxS4TS -8TdAkO9O3PcwDQYJKoZIhvcNAQEFBQADggEBANMTnGZjWS7KXHAM/IO8VbH0jgds -ZifOwTsgqRy7RlRw7lrMoHfqaEQn6/Ip3Xep1fvj1KcExJW4C+FEaGAHQzAxQmHl -7tnlJNUb3+FKG6qfx1/4ehHqE5MAyopYse7tDk2016g2JnzgOsHVV4Lxdbb9iV/a -86g4nzUGCM4ilb7N1fy+W955a9x6qWVmvrElWl/tftOsRm1M9DKHtCAE4Gx4sHfR -hUZLphK3dehKyVZs15KrnfVJONJPU+NVkBHbmJbGSfI+9J8b4PeI3CVimUTYc78/ -MPMMNz7UwiiAc7EBt51alhQBS6kRnSlqLtBdgcDPsiBDxwPgN05dCtxZICU= ------END CERTIFICATE----- - # Issuer: CN=Certigna O=Dhimyotis # Subject: CN=Certigna O=Dhimyotis # Label: "Certigna" @@ -1946,8 +1830,8 @@ W9c3rkIO3aQab3yIVMUWbuF6aC74Or8NpDyJO3inTmODBCEIZ43ygknQW/2xzQ+D hNQ+IIX3Sj0rnP0qCglN6oH4EZw= -----END CERTIFICATE----- -# Issuer: CN=TÜBİTAK UEKAE Kök Sertifika Hizmet Sağlayıcısı - Sürüm 3 O=Türkiye Bilimsel ve Teknolojik Araştırma Kurumu - TÜBİTAK OU=Ulusal Elektronik ve Kriptoloji Araştırma Enstitüsü - UEKAE/Kamu Sertifikasyon Merkezi -# Subject: CN=TÜBİTAK UEKAE Kök Sertifika Hizmet Sağlayıcısı - Sürüm 3 O=Türkiye Bilimsel ve Teknolojik Araştırma Kurumu - TÜBİTAK OU=Ulusal Elektronik ve Kriptoloji Araştırma Enstitüsü - UEKAE/Kamu Sertifikasyon Merkezi +# Issuer: CN=T\xdcB\u0130TAK UEKAE K\xf6k Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 - S\xfcr\xfcm 3 O=T\xfcrkiye Bilimsel ve Teknolojik Ara\u015ft\u0131rma Kurumu - T\xdcB\u0130TAK OU=Ulusal Elektronik ve Kriptoloji Ara\u015ft\u0131rma Enstit\xfcs\xfc - UEKAE/Kamu Sertifikasyon Merkezi +# Subject: CN=T\xdcB\u0130TAK UEKAE K\xf6k Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 - S\xfcr\xfcm 3 O=T\xfcrkiye Bilimsel ve Teknolojik Ara\u015ft\u0131rma Kurumu - T\xdcB\u0130TAK OU=Ulusal Elektronik ve Kriptoloji Ara\u015ft\u0131rma Enstit\xfcs\xfc - UEKAE/Kamu Sertifikasyon Merkezi # Label: "T\xc3\x9c\x42\xC4\xB0TAK UEKAE K\xC3\xB6k Sertifika Hizmet Sa\xC4\x9Flay\xc4\xb1\x63\xc4\xb1s\xc4\xb1 - S\xC3\xBCr\xC3\xBCm 3" # Serial: 17 # MD5 Fingerprint: ed:41:f5:8c:50:c5:2b:9c:73:e6:ee:6c:eb:c2:a8:26 @@ -1984,34 +1868,6 @@ oN+J1q2MdqMTw5RhK2vZbMEHCiIHhWyFJEapvj+LeISCfiQMnf2BN+MlqO02TpUs yZyQ2uypQjyttgI= -----END CERTIFICATE----- -# Issuer: CN=Buypass Class 2 CA 1 O=Buypass AS-983163327 -# Subject: CN=Buypass Class 2 CA 1 O=Buypass AS-983163327 -# Label: "Buypass Class 2 CA 1" -# Serial: 1 -# MD5 Fingerprint: b8:08:9a:f0:03:cc:1b:0d:c8:6c:0b:76:a1:75:64:23 -# SHA1 Fingerprint: a0:a1:ab:90:c9:fc:84:7b:3b:12:61:e8:97:7d:5f:d3:22:61:d3:cc -# SHA256 Fingerprint: 0f:4e:9c:dd:26:4b:02:55:50:d1:70:80:63:40:21:4f:e9:44:34:c9:b0:2f:69:7e:c7:10:fc:5f:ea:fb:5e:38 ------BEGIN CERTIFICATE----- -MIIDUzCCAjugAwIBAgIBATANBgkqhkiG9w0BAQUFADBLMQswCQYDVQQGEwJOTzEd -MBsGA1UECgwUQnV5cGFzcyBBUy05ODMxNjMzMjcxHTAbBgNVBAMMFEJ1eXBhc3Mg -Q2xhc3MgMiBDQSAxMB4XDTA2MTAxMzEwMjUwOVoXDTE2MTAxMzEwMjUwOVowSzEL -MAkGA1UEBhMCTk8xHTAbBgNVBAoMFEJ1eXBhc3MgQVMtOTgzMTYzMzI3MR0wGwYD -VQQDDBRCdXlwYXNzIENsYXNzIDIgQ0EgMTCCASIwDQYJKoZIhvcNAQEBBQADggEP -ADCCAQoCggEBAIs8B0XY9t/mx8q6jUPFR42wWsE425KEHK8T1A9vNkYgxC7McXA0 -ojTTNy7Y3Tp3L8DrKehc0rWpkTSHIln+zNvnma+WwajHQN2lFYxuyHyXA8vmIPLX -l18xoS830r7uvqmtqEyeIWZDO6i88wmjONVZJMHCR3axiFyCO7srpgTXjAePzdVB -HfCuuCkslFJgNJQ72uA40Z0zPhX0kzLFANq1KWYOOngPIVJfAuWSeyXTkh4vFZ2B -5J2O6O+JzhRMVB0cgRJNcKi+EAUXfh/RuFdV7c27UsKwHnjCTTZoy1YmwVLBvXb3 -WNVyfh9EdrsAiR0WnVE1703CVu9r4Iw7DekCAwEAAaNCMEAwDwYDVR0TAQH/BAUw -AwEB/zAdBgNVHQ4EFgQUP42aWYv8e3uco684sDntkHGA1sgwDgYDVR0PAQH/BAQD -AgEGMA0GCSqGSIb3DQEBBQUAA4IBAQAVGn4TirnoB6NLJzKyQJHyIdFkhb5jatLP -gcIV1Xp+DCmsNx4cfHZSldq1fyOhKXdlyTKdqC5Wq2B2zha0jX94wNWZUYN/Xtm+ -DKhQ7SLHrQVMdvvt7h5HZPb3J31cKA9FxVxiXqaakZG3Uxcu3K1gnZZkOb1naLKu -BctN518fV4bVIJwo+28TOPX2EZL2fZleHwzoq0QkKXJAPTZSr4xYkHPB7GEseaHs -h7U/2k3ZIQAw3pDaDtMaSKk+hQsUi4y8QZ5q9w5wwDX3OaJdZtB7WZ+oRxKaJyOk -LY4ng5IgodcVf/EuGO70SH8vf/GhGLWhC5SgYiAynB321O+/TIho ------END CERTIFICATE----- - # Issuer: O=certSIGN OU=certSIGN ROOT CA # Subject: O=certSIGN OU=certSIGN ROOT CA # Label: "certSIGN ROOT CA" @@ -2068,36 +1924,6 @@ buXf6iFViZx9fX+Y9QCJ7uOEwFyWtcVG6kbghVW2G8kS1sHNzYDzAgE8yGnLRUhj 2JTQ7IUOO04RZfSCjKY9ri4ilAnIXOo8gV0WKgOXFlUJ24pBgp5mmxE= -----END CERTIFICATE----- -# Issuer: O=Japanese Government OU=ApplicationCA -# Subject: O=Japanese Government OU=ApplicationCA -# Label: "ApplicationCA - Japanese Government" -# Serial: 49 -# MD5 Fingerprint: 7e:23:4e:5b:a7:a5:b4:25:e9:00:07:74:11:62:ae:d6 -# SHA1 Fingerprint: 7f:8a:b0:cf:d0:51:87:6a:66:f3:36:0f:47:c8:8d:8c:d3:35:fc:74 -# SHA256 Fingerprint: 2d:47:43:7d:e1:79:51:21:5a:12:f3:c5:8e:51:c7:29:a5:80:26:ef:1f:cc:0a:5f:b3:d9:dc:01:2f:60:0d:19 ------BEGIN CERTIFICATE----- -MIIDoDCCAoigAwIBAgIBMTANBgkqhkiG9w0BAQUFADBDMQswCQYDVQQGEwJKUDEc -MBoGA1UEChMTSmFwYW5lc2UgR292ZXJubWVudDEWMBQGA1UECxMNQXBwbGljYXRp -b25DQTAeFw0wNzEyMTIxNTAwMDBaFw0xNzEyMTIxNTAwMDBaMEMxCzAJBgNVBAYT -AkpQMRwwGgYDVQQKExNKYXBhbmVzZSBHb3Zlcm5tZW50MRYwFAYDVQQLEw1BcHBs -aWNhdGlvbkNBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAp23gdE6H -j6UG3mii24aZS2QNcfAKBZuOquHMLtJqO8F6tJdhjYq+xpqcBrSGUeQ3DnR4fl+K -f5Sk10cI/VBaVuRorChzoHvpfxiSQE8tnfWuREhzNgaeZCw7NCPbXCbkcXmP1G55 -IrmTwcrNwVbtiGrXoDkhBFcsovW8R0FPXjQilbUfKW1eSvNNcr5BViCH/OlQR9cw -FO5cjFW6WY2H/CPek9AEjP3vbb3QesmlOmpyM8ZKDQUXKi17safY1vC+9D/qDiht -QWEjdnjDuGWk81quzMKq2edY3rZ+nYVunyoKb58DKTCXKB28t89UKU5RMfkntigm -/qJj5kEW8DOYRwIDAQABo4GeMIGbMB0GA1UdDgQWBBRUWssmP3HMlEYNllPqa0jQ -k/5CdTAOBgNVHQ8BAf8EBAMCAQYwWQYDVR0RBFIwUKROMEwxCzAJBgNVBAYTAkpQ -MRgwFgYDVQQKDA/ml6XmnKzlm73mlL/lupwxIzAhBgNVBAsMGuOCouODl+ODquOC -seODvOOCt+ODp+ODs0NBMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEFBQAD -ggEBADlqRHZ3ODrso2dGD/mLBqj7apAxzn7s2tGJfHrrLgy9mTLnsCTWw//1sogJ -hyzjVOGjprIIC8CFqMjSnHH2HZ9g/DgzE+Ge3Atf2hZQKXsvcJEPmbo0NI2VdMV+ -eKlmXb3KIXdCEKxmJj3ekav9FfBv7WxfEPjzFvYDio+nEhEMy/0/ecGc/WLuo89U -DNErXxc+4z6/wCs+CZv+iKZ+tJIX/COUgb1up8WMwusRRdv4QcmWdupwX3kSa+Sj -B1oF7ydJzyGfikwJcGapJsErEU4z0g781mzSDjJkaP+tBXhfAx2o45CsJOAPQKdL -rosot4LKGAfmt1t06SAZf7IbiVQ= ------END CERTIFICATE----- - # Issuer: CN=GeoTrust Primary Certification Authority - G3 O=GeoTrust Inc. OU=(c) 2008 GeoTrust Inc. - For authorized use only # Subject: CN=GeoTrust Primary Certification Authority - G3 O=GeoTrust Inc. OU=(c) 2008 GeoTrust Inc. - For authorized use only # Label: "GeoTrust Primary Certification Authority - G3" @@ -2277,9 +2103,9 @@ kf3upm7ktS5Jj4d4gYDs5bG1MAoGCCqGSM49BAMDA2gAMGUCMGYhDBgmYFo4e1ZC FRJZap7v1VmyHVIsmXHNxynfGyphe3HR3vPA5Q06Sqotp9iGKt0uEA== -----END CERTIFICATE----- -# Issuer: CN=NetLock Arany (Class Gold) Főtanúsítvány O=NetLock Kft. OU=Tanúsítványkiadók (Certification Services) -# Subject: CN=NetLock Arany (Class Gold) Főtanúsítvány O=NetLock Kft. OU=Tanúsítványkiadók (Certification Services) -# Label: "NetLock Arany (Class Gold) Főtanúsítvány" +# Issuer: CN=NetLock Arany (Class Gold) F\u0151tan\xfas\xedtv\xe1ny O=NetLock Kft. OU=Tan\xfas\xedtv\xe1nykiad\xf3k (Certification Services) +# Subject: CN=NetLock Arany (Class Gold) F\u0151tan\xfas\xedtv\xe1ny O=NetLock Kft. OU=Tan\xfas\xedtv\xe1nykiad\xf3k (Certification Services) +# Label: "NetLock Arany (Class Gold) F\u0151tan\xfas\xedtv\xe1ny" # Serial: 80544274841616 # MD5 Fingerprint: c5:a1:b7:ff:73:dd:d6:d7:34:32:18:df:fc:3c:ad:88 # SHA1 Fingerprint: 06:08:3f:59:3f:15:a1:04:a0:69:a4:6b:a9:03:d0:06:b7:97:09:91 @@ -2933,9 +2759,9 @@ VoNzcOSGGtIxQbovvi0TWnZvTuhOgQ4/WwMioBK+ZlgRSssDxLQqKi2WF+A5VLxI 03YnnZotBqbJ7DnSq9ufmgsnAjUpsUCV5/nonFWIGUbWtzT1fs45mtk48VH3Tyw= -----END CERTIFICATE----- -# Issuer: CN=Certinomis - Autorité Racine O=Certinomis OU=0002 433998903 -# Subject: CN=Certinomis - Autorité Racine O=Certinomis OU=0002 433998903 -# Label: "Certinomis - Autorité Racine" +# Issuer: CN=Certinomis - Autorit\xe9 Racine O=Certinomis OU=0002 433998903 +# Subject: CN=Certinomis - Autorit\xe9 Racine O=Certinomis OU=0002 433998903 +# Label: "Certinomis - Autorit\xe9 Racine" # Serial: 1 # MD5 Fingerprint: 7f:30:78:8c:03:e3:ca:c9:0a:e2:c9:ea:1e:aa:55:1a # SHA1 Fingerprint: 2e:14:da:ec:28:f0:fa:1e:8e:38:9a:4e:ab:eb:26:c0:0a:d3:83:c3 @@ -2973,51 +2799,6 @@ dsLLO7XSAPCjDuGtbkD326C00EauFddEwk01+dIL8hf2rGbVJLJP0RyZwG71fet0 BLj5TXcJ17TPBzAJ8bgAVtkXFhYKK4bfjwEZGuW7gmP/vgt2Fl43N+bYdJeimUV5 -----END CERTIFICATE----- -# Issuer: CN=Root CA Generalitat Valenciana O=Generalitat Valenciana OU=PKIGVA -# Subject: CN=Root CA Generalitat Valenciana O=Generalitat Valenciana OU=PKIGVA -# Label: "Root CA Generalitat Valenciana" -# Serial: 994436456 -# MD5 Fingerprint: 2c:8c:17:5e:b1:54:ab:93:17:b5:36:5a:db:d1:c6:f2 -# SHA1 Fingerprint: a0:73:e5:c5:bd:43:61:0d:86:4c:21:13:0a:85:58:57:cc:9c:ea:46 -# SHA256 Fingerprint: 8c:4e:df:d0:43:48:f3:22:96:9e:7e:29:a4:cd:4d:ca:00:46:55:06:1c:16:e1:b0:76:42:2e:f3:42:ad:63:0e ------BEGIN CERTIFICATE----- -MIIGizCCBXOgAwIBAgIEO0XlaDANBgkqhkiG9w0BAQUFADBoMQswCQYDVQQGEwJF -UzEfMB0GA1UEChMWR2VuZXJhbGl0YXQgVmFsZW5jaWFuYTEPMA0GA1UECxMGUEtJ -R1ZBMScwJQYDVQQDEx5Sb290IENBIEdlbmVyYWxpdGF0IFZhbGVuY2lhbmEwHhcN -MDEwNzA2MTYyMjQ3WhcNMjEwNzAxMTUyMjQ3WjBoMQswCQYDVQQGEwJFUzEfMB0G -A1UEChMWR2VuZXJhbGl0YXQgVmFsZW5jaWFuYTEPMA0GA1UECxMGUEtJR1ZBMScw -JQYDVQQDEx5Sb290IENBIEdlbmVyYWxpdGF0IFZhbGVuY2lhbmEwggEiMA0GCSqG -SIb3DQEBAQUAA4IBDwAwggEKAoIBAQDGKqtXETcvIorKA3Qdyu0togu8M1JAJke+ -WmmmO3I2F0zo37i7L3bhQEZ0ZQKQUgi0/6iMweDHiVYQOTPvaLRfX9ptI6GJXiKj -SgbwJ/BXufjpTjJ3Cj9BZPPrZe52/lSqfR0grvPXdMIKX/UIKFIIzFVd0g/bmoGl -u6GzwZTNVOAydTGRGmKy3nXiz0+J2ZGQD0EbtFpKd71ng+CT516nDOeB0/RSrFOy -A8dEJvt55cs0YFAQexvba9dHq198aMpunUEDEO5rmXteJajCq+TA81yc477OMUxk -Hl6AovWDfgzWyoxVjr7gvkkHD6MkQXpYHYTqWBLI4bft75PelAgxAgMBAAGjggM7 -MIIDNzAyBggrBgEFBQcBAQQmMCQwIgYIKwYBBQUHMAGGFmh0dHA6Ly9vY3NwLnBr -aS5ndmEuZXMwEgYDVR0TAQH/BAgwBgEB/wIBAjCCAjQGA1UdIASCAiswggInMIIC -IwYKKwYBBAG/VQIBADCCAhMwggHoBggrBgEFBQcCAjCCAdoeggHWAEEAdQB0AG8A -cgBpAGQAYQBkACAAZABlACAAQwBlAHIAdABpAGYAaQBjAGEAYwBpAPMAbgAgAFIA -YQDtAHoAIABkAGUAIABsAGEAIABHAGUAbgBlAHIAYQBsAGkAdABhAHQAIABWAGEA -bABlAG4AYwBpAGEAbgBhAC4ADQAKAEwAYQAgAEQAZQBjAGwAYQByAGEAYwBpAPMA -bgAgAGQAZQAgAFAAcgDhAGMAdABpAGMAYQBzACAAZABlACAAQwBlAHIAdABpAGYA -aQBjAGEAYwBpAPMAbgAgAHEAdQBlACAAcgBpAGcAZQAgAGUAbAAgAGYAdQBuAGMA -aQBvAG4AYQBtAGkAZQBuAHQAbwAgAGQAZQAgAGwAYQAgAHAAcgBlAHMAZQBuAHQA -ZQAgAEEAdQB0AG8AcgBpAGQAYQBkACAAZABlACAAQwBlAHIAdABpAGYAaQBjAGEA -YwBpAPMAbgAgAHMAZQAgAGUAbgBjAHUAZQBuAHQAcgBhACAAZQBuACAAbABhACAA -ZABpAHIAZQBjAGMAaQDzAG4AIAB3AGUAYgAgAGgAdAB0AHAAOgAvAC8AdwB3AHcA -LgBwAGsAaQAuAGcAdgBhAC4AZQBzAC8AYwBwAHMwJQYIKwYBBQUHAgEWGWh0dHA6 -Ly93d3cucGtpLmd2YS5lcy9jcHMwHQYDVR0OBBYEFHs100DSHHgZZu90ECjcPk+y -eAT8MIGVBgNVHSMEgY0wgYqAFHs100DSHHgZZu90ECjcPk+yeAT8oWykajBoMQsw -CQYDVQQGEwJFUzEfMB0GA1UEChMWR2VuZXJhbGl0YXQgVmFsZW5jaWFuYTEPMA0G -A1UECxMGUEtJR1ZBMScwJQYDVQQDEx5Sb290IENBIEdlbmVyYWxpdGF0IFZhbGVu -Y2lhbmGCBDtF5WgwDQYJKoZIhvcNAQEFBQADggEBACRhTvW1yEICKrNcda3Fbcrn -lD+laJWIwVTAEGmiEi8YPyVQqHxK6sYJ2fR1xkDar1CdPaUWu20xxsdzCkj+IHLt -b8zog2EWRpABlUt9jppSCS/2bxzkoXHPjCpaF3ODR00PNvsETUlR4hTJZGH71BTg -9J63NI8KJr2XXPR5OkowGcytT6CYirQxlyric21+eLj4iIlPsSKRZEv1UN4D2+XF -ducTZnV+ZfsBn5OHiJ35Rld8TWCvmHMTI6QgkYH60GFmuH3Rr9ZvHmw96RH9qfmC -IoaZM3Fa6hlXPZHNqcCjbgcTpsnt+GijnsNacgmHKNHEc8RzGF9QdRYxn7fofMM= ------END CERTIFICATE----- - # Issuer: CN=TWCA Root Certification Authority O=TAIWAN-CA OU=Root CA # Subject: CN=TWCA Root Certification Authority O=TAIWAN-CA OU=Root CA # Label: "TWCA Root Certification Authority" @@ -3410,8 +3191,8 @@ iAYLtqZLICjU3j2LrTcFU3T+bsy8QxdxXvnFzBqpYe73dgzzcvRyrc9yAjYHR8/v GVCJYMzpJJUPwssd8m92kMfMdcGWxZ0= -----END CERTIFICATE----- -# Issuer: CN=TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı O=TÜRKTRUST Bilgi İletişim ve Bilişim Güvenliği Hizmetleri A.Ş. (c) Aralık 2007 -# Subject: CN=TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı O=TÜRKTRUST Bilgi İletişim ve Bilişim Güvenliği Hizmetleri A.Ş. (c) Aralık 2007 +# Issuer: CN=T\xdcRKTRUST Elektronik Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 O=T\xdcRKTRUST Bilgi \u0130leti\u015fim ve Bili\u015fim G\xfcvenli\u011fi Hizmetleri A.\u015e. (c) Aral\u0131k 2007 +# Subject: CN=T\xdcRKTRUST Elektronik Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 O=T\xdcRKTRUST Bilgi \u0130leti\u015fim ve Bili\u015fim G\xfcvenli\u011fi Hizmetleri A.\u015e. (c) Aral\u0131k 2007 # Label: "TURKTRUST Certificate Services Provider Root 2007" # Serial: 1 # MD5 Fingerprint: 2b:70:20:56:86:82:a0:18:c8:07:53:12:28:70:21:72 @@ -3893,8 +3674,8 @@ HL/EVlP6Y2XQ8xwOFvVrhlhNGNTkDY6lnVuR3HYkUD/GKvvZt5y11ubQ2egZixVx SK236thZiNSQvxaz2emsWWFUyBy6ysHK4bkgTI86k4mloMy/0/Z1pHWWbVY= -----END CERTIFICATE----- -# Issuer: CN=E-Tugra Certification Authority O=E-Tuğra EBG Bilişim Teknolojileri ve Hizmetleri A.Ş. OU=E-Tugra Sertifikasyon Merkezi -# Subject: CN=E-Tugra Certification Authority O=E-Tuğra EBG Bilişim Teknolojileri ve Hizmetleri A.Ş. OU=E-Tugra Sertifikasyon Merkezi +# Issuer: CN=E-Tugra Certification Authority O=E-Tu\u011fra EBG Bili\u015fim Teknolojileri ve Hizmetleri A.\u015e. OU=E-Tugra Sertifikasyon Merkezi +# Subject: CN=E-Tugra Certification Authority O=E-Tu\u011fra EBG Bili\u015fim Teknolojileri ve Hizmetleri A.\u015e. OU=E-Tugra Sertifikasyon Merkezi # Label: "E-Tugra Certification Authority" # Serial: 7667447206703254355 # MD5 Fingerprint: b8:a1:03:63:b0:bd:21:71:70:8a:6f:13:3a:bb:79:49 @@ -4300,8 +4081,8 @@ OtzCWfHjXEa7ZywCRuoeSKbmW9m1vFGikpbbqsY3Iqb+zCB0oy2pLmvLwIIRIbWT ee5Ehr7XHuQe+w== -----END CERTIFICATE----- -# Issuer: CN=CA 沃通根证书 O=WoSign CA Limited -# Subject: CN=CA 沃通根证书 O=WoSign CA Limited +# Issuer: CN=CA \u6c83\u901a\u6839\u8bc1\u4e66 O=WoSign CA Limited +# Subject: CN=CA \u6c83\u901a\u6839\u8bc1\u4e66 O=WoSign CA Limited # Label: "WoSign China" # Serial: 106921963437422998931660691310149453965 # MD5 Fingerprint: 78:83:5b:52:16:76:c4:24:3b:83:78:e8:ac:da:9a:93 @@ -4747,9 +4528,9 @@ AAoACxGV2lZFA4gKn2fQ1XmxqI1AbQ3CekD6819kR5LLU7m7Wc5P/dAVUwHY3+vZ 5nbv0CO7O6l5s9UCKc2Jo5YPSjXnTkLAdc0Hz+Ys63su -----END CERTIFICATE----- -# Issuer: CN=TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı H5 O=TÜRKTRUST Bilgi İletişim ve Bilişim Güvenliği Hizmetleri A.Ş. -# Subject: CN=TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı H5 O=TÜRKTRUST Bilgi İletişim ve Bilişim Güvenliği Hizmetleri A.Ş. -# Label: "TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı H5" +# Issuer: CN=T\xdcRKTRUST Elektronik Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 H5 O=T\xdcRKTRUST Bilgi \u0130leti\u015fim ve Bili\u015fim G\xfcvenli\u011fi Hizmetleri A.\u015e. +# Subject: CN=T\xdcRKTRUST Elektronik Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 H5 O=T\xdcRKTRUST Bilgi \u0130leti\u015fim ve Bili\u015fim G\xfcvenli\u011fi Hizmetleri A.\u015e. +# Label: "T\xdcRKTRUST Elektronik Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 H5" # Serial: 156233699172481 # MD5 Fingerprint: da:70:8e:f0:22:df:93:26:f6:5f:9f:d3:15:06:52:4e # SHA1 Fingerprint: c4:18:f6:4d:46:d1:df:00:3d:27:30:13:72:43:a9:12:11:c6:75:fb @@ -4780,39 +4561,6 @@ Yv4HAqGEVka+lgqaE9chTLd8B59OTj+RdPsnnRHM3eaxynFNExc5JsUpISuTKWqW +qtB4Uu2NQvAmxU= -----END CERTIFICATE----- -# Issuer: CN=TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı H6 O=TÜRKTRUST Bilgi İletişim ve Bilişim Güvenliği Hizmetleri A.Ş. -# Subject: CN=TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı H6 O=TÜRKTRUST Bilgi İletişim ve Bilişim Güvenliği Hizmetleri A.Ş. -# Label: "TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı H6" -# Serial: 138134509972618 -# MD5 Fingerprint: f8:c5:ee:2a:6b:be:95:8d:08:f7:25:4a:ea:71:3e:46 -# SHA1 Fingerprint: 8a:5c:8c:ee:a5:03:e6:05:56:ba:d8:1b:d4:f6:c9:b0:ed:e5:2f:e0 -# SHA256 Fingerprint: 8d:e7:86:55:e1:be:7f:78:47:80:0b:93:f6:94:d2:1d:36:8c:c0:6e:03:3e:7f:ab:04:bb:5e:b9:9d:a6:b7:00 ------BEGIN CERTIFICATE----- -MIIEJjCCAw6gAwIBAgIGfaHyZeyKMA0GCSqGSIb3DQEBCwUAMIGxMQswCQYDVQQG -EwJUUjEPMA0GA1UEBwwGQW5rYXJhMU0wSwYDVQQKDERUw5xSS1RSVVNUIEJpbGdp -IMSwbGV0acWfaW0gdmUgQmlsacWfaW0gR8O8dmVubGnEn2kgSGl6bWV0bGVyaSBB -LsWeLjFCMEAGA1UEAww5VMOcUktUUlVTVCBFbGVrdHJvbmlrIFNlcnRpZmlrYSBI -aXptZXQgU2HEn2xhecSxY8Sxc8SxIEg2MB4XDTEzMTIxODA5MDQxMFoXDTIzMTIx -NjA5MDQxMFowgbExCzAJBgNVBAYTAlRSMQ8wDQYDVQQHDAZBbmthcmExTTBLBgNV -BAoMRFTDnFJLVFJVU1QgQmlsZ2kgxLBsZXRpxZ9pbSB2ZSBCaWxpxZ9pbSBHw7x2 -ZW5sacSfaSBIaXptZXRsZXJpIEEuxZ4uMUIwQAYDVQQDDDlUw5xSS1RSVVNUIEVs -ZWt0cm9uaWsgU2VydGlmaWthIEhpem1ldCBTYcSfbGF5xLFjxLFzxLEgSDYwggEi -MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCdsGjW6L0UlqMACprx9MfMkU1x -eHe59yEmFXNRFpQJRwXiM/VomjX/3EsvMsew7eKC5W/a2uqsxgbPJQ1BgfbBOCK9 -+bGlprMBvD9QFyv26WZV1DOzXPhDIHiTVRZwGTLmiddk671IUP320EEDwnS3/faA -z1vFq6TWlRKb55cTMgPp1KtDWxbtMyJkKbbSk60vbNg9tvYdDjTu0n2pVQ8g9P0p -u5FbHH3GQjhtQiht1AH7zYiXSX6484P4tZgvsycLSF5W506jM7NE1qXyGJTtHB6p -lVxiSvgNZ1GpryHV+DKdeboaX+UEVU0TRv/yz3THGmNtwx8XEsMeED5gCLMxAgMB -AAGjQjBAMB0GA1UdDgQWBBTdVRcT9qzoSCHK77Wv0QAy7Z6MtTAOBgNVHQ8BAf8E -BAMCAQYwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAb1gNl0Oq -FlQ+v6nfkkU/hQu7VtMMUszIv3ZnXuaqs6fvuay0EBQNdH49ba3RfdCaqaXKGDsC -QC4qnFAUi/5XfldcEQlLNkVS9z2sFP1E34uXI9TDwe7UU5X+LEr+DXCqu4svLcsy -o4LyVN/Y8t3XSHLuSqMplsNEzm61kod2pLv0kmzOLBQJZo6NrRa1xxsJYTvjIKID -gI6tflEATseWhvtDmHd9KMeP2Cpu54Rvl0EpABZeTeIT6lnAY2c6RPuY/ATTMHKm -9ocJV612ph1jmv3XZch4gyt1O6VbuA1df74jrlZVlFjvH4GMKrLN5ptjnhi85WsG -tAuYSyher4hYyw== ------END CERTIFICATE----- - # Issuer: CN=Certinomis - Root CA O=Certinomis OU=0002 433998903 # Subject: CN=Certinomis - Root CA O=Certinomis OU=0002 433998903 # Label: "Certinomis - Root CA" @@ -5273,3 +5021,226 @@ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc= -----END CERTIFICATE----- + +# Issuer: O=FNMT-RCM OU=AC RAIZ FNMT-RCM +# Subject: O=FNMT-RCM OU=AC RAIZ FNMT-RCM +# Label: "AC RAIZ FNMT-RCM" +# Serial: 485876308206448804701554682760554759 +# MD5 Fingerprint: e2:09:04:b4:d3:bd:d1:a0:14:fd:1a:d2:47:c4:57:1d +# SHA1 Fingerprint: ec:50:35:07:b2:15:c4:95:62:19:e2:a8:9a:5b:42:99:2c:4c:2c:20 +# SHA256 Fingerprint: eb:c5:57:0c:29:01:8c:4d:67:b1:aa:12:7b:af:12:f7:03:b4:61:1e:bc:17:b7:da:b5:57:38:94:17:9b:93:fa +-----BEGIN CERTIFICATE----- +MIIFgzCCA2ugAwIBAgIPXZONMGc2yAYdGsdUhGkHMA0GCSqGSIb3DQEBCwUAMDsx +CzAJBgNVBAYTAkVTMREwDwYDVQQKDAhGTk1ULVJDTTEZMBcGA1UECwwQQUMgUkFJ +WiBGTk1ULVJDTTAeFw0wODEwMjkxNTU5NTZaFw0zMDAxMDEwMDAwMDBaMDsxCzAJ +BgNVBAYTAkVTMREwDwYDVQQKDAhGTk1ULVJDTTEZMBcGA1UECwwQQUMgUkFJWiBG +Tk1ULVJDTTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALpxgHpMhm5/ +yBNtwMZ9HACXjywMI7sQmkCpGreHiPibVmr75nuOi5KOpyVdWRHbNi63URcfqQgf +BBckWKo3Shjf5TnUV/3XwSyRAZHiItQDwFj8d0fsjz50Q7qsNI1NOHZnjrDIbzAz +WHFctPVrbtQBULgTfmxKo0nRIBnuvMApGGWn3v7v3QqQIecaZ5JCEJhfTzC8PhxF +tBDXaEAUwED653cXeuYLj2VbPNmaUtu1vZ5Gzz3rkQUCwJaydkxNEJY7kvqcfw+Z +374jNUUeAlz+taibmSXaXvMiwzn15Cou08YfxGyqxRxqAQVKL9LFwag0Jl1mpdIC +IfkYtwb1TplvqKtMUejPUBjFd8g5CSxJkjKZqLsXF3mwWsXmo8RZZUc1g16p6DUL +mbvkzSDGm0oGObVo/CK67lWMK07q87Hj/LaZmtVC+nFNCM+HHmpxffnTtOmlcYF7 +wk5HlqX2doWjKI/pgG6BU6VtX7hI+cL5NqYuSf+4lsKMB7ObiFj86xsc3i1w4peS +MKGJ47xVqCfWS+2QrYv6YyVZLag13cqXM7zlzced0ezvXg5KkAYmY6252TUtB7p2 +ZSysV4999AeU14ECll2jB0nVetBX+RvnU0Z1qrB5QstocQjpYL05ac70r8NWQMet +UqIJ5G+GR4of6ygnXYMgrwTJbFaai0b1AgMBAAGjgYMwgYAwDwYDVR0TAQH/BAUw +AwEB/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFPd9xf3E6Jobd2Sn9R2gzL+H +YJptMD4GA1UdIAQ3MDUwMwYEVR0gADArMCkGCCsGAQUFBwIBFh1odHRwOi8vd3d3 +LmNlcnQuZm5tdC5lcy9kcGNzLzANBgkqhkiG9w0BAQsFAAOCAgEAB5BK3/MjTvDD +nFFlm5wioooMhfNzKWtN/gHiqQxjAb8EZ6WdmF/9ARP67Jpi6Yb+tmLSbkyU+8B1 +RXxlDPiyN8+sD8+Nb/kZ94/sHvJwnvDKuO+3/3Y3dlv2bojzr2IyIpMNOmqOFGYM +LVN0V2Ue1bLdI4E7pWYjJ2cJj+F3qkPNZVEI7VFY/uY5+ctHhKQV8Xa7pO6kO8Rf +77IzlhEYt8llvhjho6Tc+hj507wTmzl6NLrTQfv6MooqtyuGC2mDOL7Nii4LcK2N +JpLuHvUBKwrZ1pebbuCoGRw6IYsMHkCtA+fdZn71uSANA+iW+YJF1DngoABd15jm +fZ5nc8OaKveri6E6FO80vFIOiZiaBECEHX5FaZNXzuvO+FB8TxxuBEOb+dY7Ixjp +6o7RTUaN8Tvkasq6+yO3m/qZASlaWFot4/nUbQ4mrcFuNLwy+AwF+mWj2zs3gyLp +1txyM/1d8iC9djwj2ij3+RvrWWTV3F9yfiD8zYm1kGdNYno/Tq0dwzn+evQoFt9B +9kiABdcPUXmsEKvU7ANm5mqwujGSQkBqvjrTcuFqN1W8rB2Vt2lh8kORdOag0wok +RqEIr9baRRmW1FMdW4R58MD3R++Lj8UGrp1MYp3/RgT408m2ECVAdf4WqslKYIYv +uu8wd+RU4riEmViAqhOLUTpPSPaLtrM= +-----END CERTIFICATE----- + +# Issuer: CN=Amazon Root CA 1 O=Amazon +# Subject: CN=Amazon Root CA 1 O=Amazon +# Label: "Amazon Root CA 1" +# Serial: 143266978916655856878034712317230054538369994 +# MD5 Fingerprint: 43:c6:bf:ae:ec:fe:ad:2f:18:c6:88:68:30:fc:c8:e6 +# SHA1 Fingerprint: 8d:a7:f9:65:ec:5e:fc:37:91:0f:1c:6e:59:fd:c1:cc:6a:6e:de:16 +# SHA256 Fingerprint: 8e:cd:e6:88:4f:3d:87:b1:12:5b:a3:1a:c3:fc:b1:3d:70:16:de:7f:57:cc:90:4f:e1:cb:97:c6:ae:98:19:6e +-----BEGIN CERTIFICATE----- +MIIDQTCCAimgAwIBAgITBmyfz5m/jAo54vB4ikPmljZbyjANBgkqhkiG9w0BAQsF +ADA5MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6 +b24gUm9vdCBDQSAxMB4XDTE1MDUyNjAwMDAwMFoXDTM4MDExNzAwMDAwMFowOTEL +MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJv +b3QgQ0EgMTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALJ4gHHKeNXj +ca9HgFB0fW7Y14h29Jlo91ghYPl0hAEvrAIthtOgQ3pOsqTQNroBvo3bSMgHFzZM +9O6II8c+6zf1tRn4SWiw3te5djgdYZ6k/oI2peVKVuRF4fn9tBb6dNqcmzU5L/qw +IFAGbHrQgLKm+a/sRxmPUDgH3KKHOVj4utWp+UhnMJbulHheb4mjUcAwhmahRWa6 +VOujw5H5SNz/0egwLX0tdHA114gk957EWW67c4cX8jJGKLhD+rcdqsq08p8kDi1L +93FcXmn/6pUCyziKrlA4b9v7LWIbxcceVOF34GfID5yHI9Y/QCB/IIDEgEw+OyQm +jgSubJrIqg0CAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMC +AYYwHQYDVR0OBBYEFIQYzIU07LwMlJQuCFmcx7IQTgoIMA0GCSqGSIb3DQEBCwUA +A4IBAQCY8jdaQZChGsV2USggNiMOruYou6r4lK5IpDB/G/wkjUu0yKGX9rbxenDI +U5PMCCjjmCXPI6T53iHTfIUJrU6adTrCC2qJeHZERxhlbI1Bjjt/msv0tadQ1wUs +N+gDS63pYaACbvXy8MWy7Vu33PqUXHeeE6V/Uq2V8viTO96LXFvKWlJbYK8U90vv +o/ufQJVtMVT8QtPHRh8jrdkPSHCa2XV4cdFyQzR1bldZwgJcJmApzyMZFo6IQ6XU +5MsI+yMRQ+hDKXJioaldXgjUkK642M4UwtBV8ob2xJNDd2ZhwLnoQdeXeGADbkpy +rqXRfboQnoZsG4q5WTP468SQvvG5 +-----END CERTIFICATE----- + +# Issuer: CN=Amazon Root CA 2 O=Amazon +# Subject: CN=Amazon Root CA 2 O=Amazon +# Label: "Amazon Root CA 2" +# Serial: 143266982885963551818349160658925006970653239 +# MD5 Fingerprint: c8:e5:8d:ce:a8:42:e2:7a:c0:2a:5c:7c:9e:26:bf:66 +# SHA1 Fingerprint: 5a:8c:ef:45:d7:a6:98:59:76:7a:8c:8b:44:96:b5:78:cf:47:4b:1a +# SHA256 Fingerprint: 1b:a5:b2:aa:8c:65:40:1a:82:96:01:18:f8:0b:ec:4f:62:30:4d:83:ce:c4:71:3a:19:c3:9c:01:1e:a4:6d:b4 +-----BEGIN CERTIFICATE----- +MIIFQTCCAymgAwIBAgITBmyf0pY1hp8KD+WGePhbJruKNzANBgkqhkiG9w0BAQwF +ADA5MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6 +b24gUm9vdCBDQSAyMB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTEL +MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJv +b3QgQ0EgMjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK2Wny2cSkxK +gXlRmeyKy2tgURO8TW0G/LAIjd0ZEGrHJgw12MBvIITplLGbhQPDW9tK6Mj4kHbZ +W0/jTOgGNk3Mmqw9DJArktQGGWCsN0R5hYGCrVo34A3MnaZMUnbqQ523BNFQ9lXg +1dKmSYXpN+nKfq5clU1Imj+uIFptiJXZNLhSGkOQsL9sBbm2eLfq0OQ6PBJTYv9K +8nu+NQWpEjTj82R0Yiw9AElaKP4yRLuH3WUnAnE72kr3H9rN9yFVkE8P7K6C4Z9r +2UXTu/Bfh+08LDmG2j/e7HJV63mjrdvdfLC6HM783k81ds8P+HgfajZRRidhW+me +z/CiVX18JYpvL7TFz4QuK/0NURBs+18bvBt+xa47mAExkv8LV/SasrlX6avvDXbR +8O70zoan4G7ptGmh32n2M8ZpLpcTnqWHsFcQgTfJU7O7f/aS0ZzQGPSSbtqDT6Zj +mUyl+17vIWR6IF9sZIUVyzfpYgwLKhbcAS4y2j5L9Z469hdAlO+ekQiG+r5jqFoz +7Mt0Q5X5bGlSNscpb/xVA1wf+5+9R+vnSUeVC06JIglJ4PVhHvG/LopyboBZ/1c6 ++XUyo05f7O0oYtlNc/LMgRdg7c3r3NunysV+Ar3yVAhU/bQtCSwXVEqY0VThUWcI +0u1ufm8/0i2BWSlmy5A5lREedCf+3euvAgMBAAGjQjBAMA8GA1UdEwEB/wQFMAMB +Af8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQWBBSwDPBMMPQFWAJI/TPlUq9LhONm +UjANBgkqhkiG9w0BAQwFAAOCAgEAqqiAjw54o+Ci1M3m9Zh6O+oAA7CXDpO8Wqj2 +LIxyh6mx/H9z/WNxeKWHWc8w4Q0QshNabYL1auaAn6AFC2jkR2vHat+2/XcycuUY ++gn0oJMsXdKMdYV2ZZAMA3m3MSNjrXiDCYZohMr/+c8mmpJ5581LxedhpxfL86kS +k5Nrp+gvU5LEYFiwzAJRGFuFjWJZY7attN6a+yb3ACfAXVU3dJnJUH/jWS5E4ywl +7uxMMne0nxrpS10gxdr9HIcWxkPo1LsmmkVwXqkLN1PiRnsn/eBG8om3zEK2yygm +btmlyTrIQRNg91CMFa6ybRoVGld45pIq2WWQgj9sAq+uEjonljYE1x2igGOpm/Hl +urR8FLBOybEfdF849lHqm/osohHUqS0nGkWxr7JOcQ3AWEbWaQbLU8uz/mtBzUF+ +fUwPfHJ5elnNXkoOrJupmHN5fLT0zLm4BwyydFy4x2+IoZCn9Kr5v2c69BoVYh63 +n749sSmvZ6ES8lgQGVMDMBu4Gon2nL2XA46jCfMdiyHxtN/kHNGfZQIG6lzWE7OE +76KlXIx3KadowGuuQNKotOrN8I1LOJwZmhsoVLiJkO/KdYE+HvJkJMcYr07/R54H +9jVlpNMKVv/1F2Rs76giJUmTtt8AF9pYfl3uxRuw0dFfIRDH+fO6AgonB8Xx1sfT +4PsJYGw= +-----END CERTIFICATE----- + +# Issuer: CN=Amazon Root CA 3 O=Amazon +# Subject: CN=Amazon Root CA 3 O=Amazon +# Label: "Amazon Root CA 3" +# Serial: 143266986699090766294700635381230934788665930 +# MD5 Fingerprint: a0:d4:ef:0b:f7:b5:d8:49:95:2a:ec:f5:c4:fc:81:87 +# SHA1 Fingerprint: 0d:44:dd:8c:3c:8c:1a:1a:58:75:64:81:e9:0f:2e:2a:ff:b3:d2:6e +# SHA256 Fingerprint: 18:ce:6c:fe:7b:f1:4e:60:b2:e3:47:b8:df:e8:68:cb:31:d0:2e:bb:3a:da:27:15:69:f5:03:43:b4:6d:b3:a4 +-----BEGIN CERTIFICATE----- +MIIBtjCCAVugAwIBAgITBmyf1XSXNmY/Owua2eiedgPySjAKBggqhkjOPQQDAjA5 +MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6b24g +Um9vdCBDQSAzMB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTELMAkG +A1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJvb3Qg +Q0EgMzBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABCmXp8ZBf8ANm+gBG1bG8lKl +ui2yEujSLtf6ycXYqm0fc4E7O5hrOXwzpcVOho6AF2hiRVd9RFgdszflZwjrZt6j +QjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQWBBSr +ttvXBp43rDCGB5Fwx5zEGbF4wDAKBggqhkjOPQQDAgNJADBGAiEA4IWSoxe3jfkr +BqWTrBqYaGFy+uGh0PsceGCmQ5nFuMQCIQCcAu/xlJyzlvnrxir4tiz+OpAUFteM +YyRIHN8wfdVoOw== +-----END CERTIFICATE----- + +# Issuer: CN=Amazon Root CA 4 O=Amazon +# Subject: CN=Amazon Root CA 4 O=Amazon +# Label: "Amazon Root CA 4" +# Serial: 143266989758080763974105200630763877849284878 +# MD5 Fingerprint: 89:bc:27:d5:eb:17:8d:06:6a:69:d5:fd:89:47:b4:cd +# SHA1 Fingerprint: f6:10:84:07:d6:f8:bb:67:98:0c:c2:e2:44:c2:eb:ae:1c:ef:63:be +# SHA256 Fingerprint: e3:5d:28:41:9e:d0:20:25:cf:a6:90:38:cd:62:39:62:45:8d:a5:c6:95:fb:de:a3:c2:2b:0b:fb:25:89:70:92 +-----BEGIN CERTIFICATE----- +MIIB8jCCAXigAwIBAgITBmyf18G7EEwpQ+Vxe3ssyBrBDjAKBggqhkjOPQQDAzA5 +MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6b24g +Um9vdCBDQSA0MB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTELMAkG +A1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJvb3Qg +Q0EgNDB2MBAGByqGSM49AgEGBSuBBAAiA2IABNKrijdPo1MN/sGKe0uoe0ZLY7Bi +9i0b2whxIdIA6GO9mif78DluXeo9pcmBqqNbIJhFXRbb/egQbeOc4OO9X4Ri83Bk +M6DLJC9wuoihKqB1+IGuYgbEgds5bimwHvouXKNCMEAwDwYDVR0TAQH/BAUwAwEB +/zAOBgNVHQ8BAf8EBAMCAYYwHQYDVR0OBBYEFNPsxzplbszh2naaVvuc84ZtV+WB +MAoGCCqGSM49BAMDA2gAMGUCMDqLIfG9fhGt0O9Yli/W651+kI0rz2ZVwyzjKKlw +CkcO8DdZEv8tmZQoTipPNU0zWgIxAOp1AE47xDqUEpHJWEadIRNyp4iciuRMStuW +1KyLa2tJElMzrdfkviT8tQp21KW8EA== +-----END CERTIFICATE----- + +# Issuer: CN=LuxTrust Global Root 2 O=LuxTrust S.A. +# Subject: CN=LuxTrust Global Root 2 O=LuxTrust S.A. +# Label: "LuxTrust Global Root 2" +# Serial: 59914338225734147123941058376788110305822489521 +# MD5 Fingerprint: b2:e1:09:00:61:af:f7:f1:91:6f:c4:ad:8d:5e:3b:7c +# SHA1 Fingerprint: 1e:0e:56:19:0a:d1:8b:25:98:b2:04:44:ff:66:8a:04:17:99:5f:3f +# SHA256 Fingerprint: 54:45:5f:71:29:c2:0b:14:47:c4:18:f9:97:16:8f:24:c5:8f:c5:02:3b:f5:da:5b:e2:eb:6e:1d:d8:90:2e:d5 +-----BEGIN CERTIFICATE----- +MIIFwzCCA6ugAwIBAgIUCn6m30tEntpqJIWe5rgV0xZ/u7EwDQYJKoZIhvcNAQEL +BQAwRjELMAkGA1UEBhMCTFUxFjAUBgNVBAoMDUx1eFRydXN0IFMuQS4xHzAdBgNV +BAMMFkx1eFRydXN0IEdsb2JhbCBSb290IDIwHhcNMTUwMzA1MTMyMTU3WhcNMzUw +MzA1MTMyMTU3WjBGMQswCQYDVQQGEwJMVTEWMBQGA1UECgwNTHV4VHJ1c3QgUy5B +LjEfMB0GA1UEAwwWTHV4VHJ1c3QgR2xvYmFsIFJvb3QgMjCCAiIwDQYJKoZIhvcN +AQEBBQADggIPADCCAgoCggIBANeFl78RmOnwYoNMPIf5U2o3C/IPPIfOb9wmKb3F +ibrJgz337spbxm1Jc7TJRqMbNBM/wYlFV/TZsfs2ZUv7COJIcRHIbjuend+JZTem +hfY7RBi2xjcwYkSSl2l9QjAk5A0MiWtj3sXh306pFGxT4GHO9hcvHTy95iJMHZP1 +EMShduxq3sVs35a0VkBCwGKSMKEtFZSg0iAGCW5qbeXrt77U8PEVfIvmTroTzEsn +Xpk8F12PgX8zPU/TPxvsXD/wPEx1bvKm1Z3aLQdjAsZy6ZS8TEmVT4hSyNvoaYL4 +zDRbIvCGp4m9SAptZoFtyMhk+wHh9OHe2Z7d21vUKpkmFRseTJIpgp7VkoGSQXAZ +96Tlk0u8d2cx3Rz9MXANF5kM+Qw5GSoXtTBxVdUPrljhPS80m8+f9niFwpN6cj5m +j5wWEWCPnolvZ77gR1o7DJpni89Gxq44o/KnvObWhWszJHAiS8sIm7vI+AIpHb4g +DEa/a4ebsypmQjVGbKq6rfmYe+lQVRQxv7HaLe2ArWgk+2mr2HETMOZns4dA/Yl+ +8kPREd8vZS9kzl8UubG/Mb2HeFpZZYiq/FkySIbWTLkpS5XTdvN3JW1CHDiDTf2j +X5t/Lax5Gw5CMZdjpPuKadUiDTSQMC6otOBttpSsvItO13D8xTiOZCXhTTmQzsmH +hFhxAgMBAAGjgagwgaUwDwYDVR0TAQH/BAUwAwEB/zBCBgNVHSAEOzA5MDcGByuB +KwEBAQowLDAqBggrBgEFBQcCARYeaHR0cHM6Ly9yZXBvc2l0b3J5Lmx1eHRydXN0 +Lmx1MA4GA1UdDwEB/wQEAwIBBjAfBgNVHSMEGDAWgBT/GCh2+UgFLKGu8SsbK7JT ++Et8szAdBgNVHQ4EFgQU/xgodvlIBSyhrvErGyuyU/hLfLMwDQYJKoZIhvcNAQEL +BQADggIBAGoZFO1uecEsh9QNcH7X9njJCwROxLHOk3D+sFTAMs2ZMGQXvw/l4jP9 +BzZAcg4atmpZ1gDlaCDdLnINH2pkMSCEfUmmWjfrRcmF9dTHF5kH5ptV5AzoqbTO +jFu1EVzPig4N1qx3gf4ynCSecs5U89BvolbW7MM3LGVYvlcAGvI1+ut7MV3CwRI9 +loGIlonBWVx65n9wNOeD4rHh4bhY79SV5GCc8JaXcozrhAIuZY+kt9J/Z93I055c +qqmkoCUUBpvsT34tC38ddfEz2O3OuHVtPlu5mB0xDVbYQw8wkbIEa91WvpWAVWe+ +2M2D2RjuLg+GLZKecBPs3lHJQ3gCpU3I+V/EkVhGFndadKpAvAefMLmx9xIX3eP/ +JEAdemrRTxgKqpAd60Ae36EeRJIQmvKN4dFLRp7oRUKX6kWZ8+xm1QL68qZKJKre +zrnK+T+Tb/mjuuqlPpmt/f97mfVl7vBZKGfXkJWkE4SphMHozs51k2MavDzq1WQf +LSoSOcbDWjLtR5EWDrw4wVDej8oqkDQc7kGUnF4ZLvhFSZl0kbAEb+MEWrGrKqv+ +x9CWttrhSmQGbmBNvUJO/3jaJMobtNeWOWyu8Q6qp31IiyBMz2TWuJdGsE7RKlY6 +oJO9r4Ak4Ap+58rVyuiFVdw2KuGUaJPHZnJED4AhMmwlxyOAgwrr +-----END CERTIFICATE----- + +# Issuer: CN=TUBITAK Kamu SM SSL Kok Sertifikasi - Surum 1 O=Turkiye Bilimsel ve Teknolojik Arastirma Kurumu - TUBITAK OU=Kamu Sertifikasyon Merkezi - Kamu SM +# Subject: CN=TUBITAK Kamu SM SSL Kok Sertifikasi - Surum 1 O=Turkiye Bilimsel ve Teknolojik Arastirma Kurumu - TUBITAK OU=Kamu Sertifikasyon Merkezi - Kamu SM +# Label: "TUBITAK Kamu SM SSL Kok Sertifikasi - Surum 1" +# Serial: 1 +# MD5 Fingerprint: dc:00:81:dc:69:2f:3e:2f:b0:3b:f6:3d:5a:91:8e:49 +# SHA1 Fingerprint: 31:43:64:9b:ec:ce:27:ec:ed:3a:3f:0b:8f:0d:e4:e8:91:dd:ee:ca +# SHA256 Fingerprint: 46:ed:c3:68:90:46:d5:3a:45:3f:b3:10:4a:b8:0d:ca:ec:65:8b:26:60:ea:16:29:dd:7e:86:79:90:64:87:16 +-----BEGIN CERTIFICATE----- +MIIEYzCCA0ugAwIBAgIBATANBgkqhkiG9w0BAQsFADCB0jELMAkGA1UEBhMCVFIx +GDAWBgNVBAcTD0dlYnplIC0gS29jYWVsaTFCMEAGA1UEChM5VHVya2l5ZSBCaWxp +bXNlbCB2ZSBUZWtub2xvamlrIEFyYXN0aXJtYSBLdXJ1bXUgLSBUVUJJVEFLMS0w +KwYDVQQLEyRLYW11IFNlcnRpZmlrYXN5b24gTWVya2V6aSAtIEthbXUgU00xNjA0 +BgNVBAMTLVRVQklUQUsgS2FtdSBTTSBTU0wgS29rIFNlcnRpZmlrYXNpIC0gU3Vy +dW0gMTAeFw0xMzExMjUwODI1NTVaFw00MzEwMjUwODI1NTVaMIHSMQswCQYDVQQG +EwJUUjEYMBYGA1UEBxMPR2ViemUgLSBLb2NhZWxpMUIwQAYDVQQKEzlUdXJraXll +IEJpbGltc2VsIHZlIFRla25vbG9qaWsgQXJhc3Rpcm1hIEt1cnVtdSAtIFRVQklU +QUsxLTArBgNVBAsTJEthbXUgU2VydGlmaWthc3lvbiBNZXJrZXppIC0gS2FtdSBT +TTE2MDQGA1UEAxMtVFVCSVRBSyBLYW11IFNNIFNTTCBLb2sgU2VydGlmaWthc2kg +LSBTdXJ1bSAxMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAr3UwM6q7 +a9OZLBI3hNmNe5eA027n/5tQlT6QlVZC1xl8JoSNkvoBHToP4mQ4t4y86Ij5iySr +LqP1N+RAjhgleYN1Hzv/bKjFxlb4tO2KRKOrbEz8HdDc72i9z+SqzvBV96I01INr +N3wcwv61A+xXzry0tcXtAA9TNypN9E8Mg/uGz8v+jE69h/mniyFXnHrfA2eJLJ2X +YacQuFWQfw4tJzh03+f92k4S400VIgLI4OD8D62K18lUUMw7D8oWgITQUVbDjlZ/ +iSIzL+aFCr2lqBs23tPcLG07xxO9WSMs5uWk99gL7eqQQESolbuT1dCANLZGeA4f +AJNG4e7p+exPFwIDAQABo0IwQDAdBgNVHQ4EFgQUZT/HiobGPN08VFw1+DrtUgxH +V8gwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEL +BQADggEBACo/4fEyjq7hmFxLXs9rHmoJ0iKpEsdeV31zVmSAhHqT5Am5EM2fKifh +AHe+SMg1qIGf5LgsyX8OsNJLN13qudULXjS99HMpw+0mFZx+CFOKWI3QSyjfwbPf +IPP54+M638yclNhOT8NrF7f3cuitZjO1JVOr4PhMqZ398g26rrnZqsZr+ZO7rqu4 +lzwDGrpDxpa5RXI4s6ehlj2Re37AIVNMh+3yC1SVUZPVIqUNivGTDj5UDrDYyU7c +8jEyVupk+eq1nRZmQnLzf9OxMUP8pI4X8W0jq5Rm+K37DwhuJi1/FwcJsoz7UMCf +lo3Ptv0AnVoUmr8CRPXBwp8iXqIPoeM= +-----END CERTIFICATE----- diff --git a/lib/certifi/weak.pem b/lib/certifi/weak.pem index 108f9d631a376e4e1057f9c11df3e95c3908eb62..7691c0765895e9efb40cdbd5efa8804b70923175 100644 --- a/lib/certifi/weak.pem +++ b/lib/certifi/weak.pem @@ -317,35 +317,6 @@ eu6FSqdQgPCnXEqULl8FmTxSQeDNtGPPAUO6nIPcj2A781q0tHuu2guQOHXvgR1m 0vdXcDazv/wor3ElhVsT/h5/WrQ8 -----END CERTIFICATE----- -# Issuer: O=RSA Security Inc OU=RSA Security 2048 V3 -# Subject: O=RSA Security Inc OU=RSA Security 2048 V3 -# Label: "RSA Security 2048 v3" -# Serial: 13297492616345471454730593562152402946 -# MD5 Fingerprint: 77:0d:19:b1:21:fd:00:42:9c:3e:0c:a5:dd:0b:02:8e -# SHA1 Fingerprint: 25:01:90:19:cf:fb:d9:99:1c:b7:68:25:74:8d:94:5f:30:93:95:42 -# SHA256 Fingerprint: af:8b:67:62:a1:e5:28:22:81:61:a9:5d:5c:55:9e:e2:66:27:8f:75:d7:9e:83:01:89:a5:03:50:6a:bd:6b:4c ------BEGIN CERTIFICATE----- -MIIDYTCCAkmgAwIBAgIQCgEBAQAAAnwAAAAKAAAAAjANBgkqhkiG9w0BAQUFADA6 -MRkwFwYDVQQKExBSU0EgU2VjdXJpdHkgSW5jMR0wGwYDVQQLExRSU0EgU2VjdXJp -dHkgMjA0OCBWMzAeFw0wMTAyMjIyMDM5MjNaFw0yNjAyMjIyMDM5MjNaMDoxGTAX -BgNVBAoTEFJTQSBTZWN1cml0eSBJbmMxHTAbBgNVBAsTFFJTQSBTZWN1cml0eSAy -MDQ4IFYzMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAt49VcdKA3Xtp -eafwGFAyPGJn9gqVB93mG/Oe2dJBVGutn3y+Gc37RqtBaB4Y6lXIL5F4iSj7Jylg -/9+PjDvJSZu1pJTOAeo+tWN7fyb9Gd3AIb2E0S1PRsNO3Ng3OTsor8udGuorryGl -wSMiuLgbWhOHV4PR8CDn6E8jQrAApX2J6elhc5SYcSa8LWrg903w8bYqODGBDSnh -AMFRD0xS+ARaqn1y07iHKrtjEAMqs6FPDVpeRrc9DvV07Jmf+T0kgYim3WBU6JU2 -PcYJk5qjEoAAVZkZR73QpXzDuvsf9/UP+Ky5tfQ3mBMY3oVbtwyCO4dvlTlYMNpu -AWgXIszACwIDAQABo2MwYTAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIB -BjAfBgNVHSMEGDAWgBQHw1EwpKrpRa41JPr/JCwz0LGdjDAdBgNVHQ4EFgQUB8NR -MKSq6UWuNST6/yQsM9CxnYwwDQYJKoZIhvcNAQEFBQADggEBAF8+hnZuuDU8TjYc -HnmYv/3VEhF5Ug7uMYm83X/50cYVIeiKAVQNOvtUudZj1LGqlk2iQk3UUx+LEN5/ -Zb5gEydxiKRz44Rj0aRV4VCT5hsOedBnvEbIvz8XDZXmxpBp3ue0L96VfdASPz0+ -f00/FGj1EVDVwfSQpQgdMWD/YIwjVAqv/qFuxdF6Kmh4zx6CCiC0H63lhbJqaHVO -rSU3lIW+vaHU6rcMSzyd6BIA8F+sDeGscGNz9395nzIlQnQFgCi/vcEkllgVsRch -6YlL2weIZ/QVrXA+L02FO8K32/6YaCOJ4XQP3vTFhGMpG8zLB8kApKnXwiJPZ9d3 -7CAFYd4= ------END CERTIFICATE----- - # Issuer: CN=GeoTrust Global CA O=GeoTrust Inc. # Subject: CN=GeoTrust Global CA O=GeoTrust Inc. # Label: "GeoTrust Global CA" @@ -1643,42 +1614,6 @@ wKeI8lN3s2Berq4o2jUsbzRF0ybh3uxbTydrFny9RAQYgrOJeRcQcT16ohZO9QHN pGxlaKFJdlxDydi8NmdspZS11My5vWo1ViHe2MPr+8ukYEywVaCge1ey -----END CERTIFICATE----- -# Issuer: CN=WellsSecure Public Root Certificate Authority O=Wells Fargo WellsSecure OU=Wells Fargo Bank NA -# Subject: CN=WellsSecure Public Root Certificate Authority O=Wells Fargo WellsSecure OU=Wells Fargo Bank NA -# Label: "WellsSecure Public Root Certificate Authority" -# Serial: 1 -# MD5 Fingerprint: 15:ac:a5:c2:92:2d:79:bc:e8:7f:cb:67:ed:02:cf:36 -# SHA1 Fingerprint: e7:b4:f6:9d:61:ec:90:69:db:7e:90:a7:40:1a:3c:f4:7d:4f:e8:ee -# SHA256 Fingerprint: a7:12:72:ae:aa:a3:cf:e8:72:7f:7f:b3:9f:0f:b3:d1:e5:42:6e:90:60:b0:6e:e6:f1:3e:9a:3c:58:33:cd:43 ------BEGIN CERTIFICATE----- -MIIEvTCCA6WgAwIBAgIBATANBgkqhkiG9w0BAQUFADCBhTELMAkGA1UEBhMCVVMx -IDAeBgNVBAoMF1dlbGxzIEZhcmdvIFdlbGxzU2VjdXJlMRwwGgYDVQQLDBNXZWxs -cyBGYXJnbyBCYW5rIE5BMTYwNAYDVQQDDC1XZWxsc1NlY3VyZSBQdWJsaWMgUm9v -dCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwHhcNMDcxMjEzMTcwNzU0WhcNMjIxMjE0 -MDAwNzU0WjCBhTELMAkGA1UEBhMCVVMxIDAeBgNVBAoMF1dlbGxzIEZhcmdvIFdl -bGxzU2VjdXJlMRwwGgYDVQQLDBNXZWxscyBGYXJnbyBCYW5rIE5BMTYwNAYDVQQD -DC1XZWxsc1NlY3VyZSBQdWJsaWMgUm9vdCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkw -ggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDub7S9eeKPCCGeOARBJe+r -WxxTkqxtnt3CxC5FlAM1iGd0V+PfjLindo8796jE2yljDpFoNoqXjopxaAkH5OjU -Dk/41itMpBb570OYj7OeUt9tkTmPOL13i0Nj67eT/DBMHAGTthP796EfvyXhdDcs -HqRePGj4S78NuR4uNuip5Kf4D8uCdXw1LSLWwr8L87T8bJVhHlfXBIEyg1J55oNj -z7fLY4sR4r1e6/aN7ZVyKLSsEmLpSjPmgzKuBXWVvYSV2ypcm44uDLiBK0HmOFaf -SZtsdvqKXfcBeYF8wYNABf5x/Qw/zE5gCQ5lRxAvAcAFP4/4s0HvWkJ+We/Slwxl -AgMBAAGjggE0MIIBMDAPBgNVHRMBAf8EBTADAQH/MDkGA1UdHwQyMDAwLqAsoCqG -KGh0dHA6Ly9jcmwucGtpLndlbGxzZmFyZ28uY29tL3dzcHJjYS5jcmwwDgYDVR0P -AQH/BAQDAgHGMB0GA1UdDgQWBBQmlRkQ2eihl5H/3BnZtQQ+0nMKajCBsgYDVR0j -BIGqMIGngBQmlRkQ2eihl5H/3BnZtQQ+0nMKaqGBi6SBiDCBhTELMAkGA1UEBhMC -VVMxIDAeBgNVBAoMF1dlbGxzIEZhcmdvIFdlbGxzU2VjdXJlMRwwGgYDVQQLDBNX -ZWxscyBGYXJnbyBCYW5rIE5BMTYwNAYDVQQDDC1XZWxsc1NlY3VyZSBQdWJsaWMg -Um9vdCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHmCAQEwDQYJKoZIhvcNAQEFBQADggEB -ALkVsUSRzCPIK0134/iaeycNzXK7mQDKfGYZUMbVmO2rvwNa5U3lHshPcZeG1eMd -/ZDJPHV3V3p9+N701NX3leZ0bh08rnyd2wIDBSxxSyU+B+NemvVmFymIGjifz6pB -A4SXa5M4esowRBskRDPQ5NHcKDj0E0M1NSljqHyita04pO2t/caaH/+Xc/77szWn -k4bGdpEA5qxRFsQnMlzbc9qlk1eOPm01JghZ1edE13YgY+esE2fDbbFwRnzVlhE9 -iW9dqKHrjQrawx0zbKPqZxmamX9LPYNRKh3KL4YMon4QLSvUFpULB6ouFJJJtylv -2G0xffX8oRAHh84vWdw+WNs= ------END CERTIFICATE----- - # Issuer: CN=COMODO ECC Certification Authority O=COMODO CA Limited # Subject: CN=COMODO ECC Certification Authority O=COMODO CA Limited # Label: "COMODO ECC Certification Authority" @@ -1764,57 +1699,6 @@ Fj4A4xylNoEYokxSdsARo27mHbrjWr42U8U+dY+GaSlYU7Wcu2+fXMUY7N0v4ZjJ /L7fCg0= -----END CERTIFICATE----- -# Issuer: CN=Microsec e-Szigno Root CA O=Microsec Ltd. OU=e-Szigno CA -# Subject: CN=Microsec e-Szigno Root CA O=Microsec Ltd. OU=e-Szigno CA -# Label: "Microsec e-Szigno Root CA" -# Serial: 272122594155480254301341951808045322001 -# MD5 Fingerprint: f0:96:b6:2f:c5:10:d5:67:8e:83:25:32:e8:5e:2e:e5 -# SHA1 Fingerprint: 23:88:c9:d3:71:cc:9e:96:3d:ff:7d:3c:a7:ce:fc:d6:25:ec:19:0d -# SHA256 Fingerprint: 32:7a:3d:76:1a:ba:de:a0:34:eb:99:84:06:27:5c:b1:a4:77:6e:fd:ae:2f:df:6d:01:68:ea:1c:4f:55:67:d0 ------BEGIN CERTIFICATE----- -MIIHqDCCBpCgAwIBAgIRAMy4579OKRr9otxmpRwsDxEwDQYJKoZIhvcNAQEFBQAw -cjELMAkGA1UEBhMCSFUxETAPBgNVBAcTCEJ1ZGFwZXN0MRYwFAYDVQQKEw1NaWNy -b3NlYyBMdGQuMRQwEgYDVQQLEwtlLVN6aWdubyBDQTEiMCAGA1UEAxMZTWljcm9z -ZWMgZS1Temlnbm8gUm9vdCBDQTAeFw0wNTA0MDYxMjI4NDRaFw0xNzA0MDYxMjI4 -NDRaMHIxCzAJBgNVBAYTAkhVMREwDwYDVQQHEwhCdWRhcGVzdDEWMBQGA1UEChMN -TWljcm9zZWMgTHRkLjEUMBIGA1UECxMLZS1Temlnbm8gQ0ExIjAgBgNVBAMTGU1p -Y3Jvc2VjIGUtU3ppZ25vIFJvb3QgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAw -ggEKAoIBAQDtyADVgXvNOABHzNuEwSFpLHSQDCHZU4ftPkNEU6+r+ICbPHiN1I2u -uO/TEdyB5s87lozWbxXGd36hL+BfkrYn13aaHUM86tnsL+4582pnS4uCzyL4ZVX+ -LMsvfUh6PXX5qqAnu3jCBspRwn5mS6/NoqdNAoI/gqyFxuEPkEeZlApxcpMqyabA -vjxWTHOSJ/FrtfX9/DAFYJLG65Z+AZHCabEeHXtTRbjcQR/Ji3HWVBTji1R4P770 -Yjtb9aPs1ZJ04nQw7wHb4dSrmZsqa/i9phyGI0Jf7Enemotb9HI6QMVJPqW+jqpx -62z69Rrkav17fVVA71hu5tnVvCSrwe+3AgMBAAGjggQ3MIIEMzBnBggrBgEFBQcB -AQRbMFkwKAYIKwYBBQUHMAGGHGh0dHBzOi8vcmNhLmUtc3ppZ25vLmh1L29jc3Aw -LQYIKwYBBQUHMAKGIWh0dHA6Ly93d3cuZS1zemlnbm8uaHUvUm9vdENBLmNydDAP -BgNVHRMBAf8EBTADAQH/MIIBcwYDVR0gBIIBajCCAWYwggFiBgwrBgEEAYGoGAIB -AQEwggFQMCgGCCsGAQUFBwIBFhxodHRwOi8vd3d3LmUtc3ppZ25vLmh1L1NaU1ov -MIIBIgYIKwYBBQUHAgIwggEUHoIBEABBACAAdABhAG4A+gBzAO0AdAB2AOEAbgB5 -ACAA6QByAHQAZQBsAG0AZQB6AOkAcwDpAGgAZQB6ACAA6QBzACAAZQBsAGYAbwBn -AGEAZADhAHMA4QBoAG8AegAgAGEAIABTAHoAbwBsAGcA4QBsAHQAYQB0APMAIABT -AHoAbwBsAGcA4QBsAHQAYQB0AOEAcwBpACAAUwB6AGEAYgDhAGwAeQB6AGEAdABh -ACAAcwB6AGUAcgBpAG4AdAAgAGsAZQBsAGwAIABlAGwAagDhAHIAbgBpADoAIABo -AHQAdABwADoALwAvAHcAdwB3AC4AZQAtAHMAegBpAGcAbgBvAC4AaAB1AC8AUwBa -AFMAWgAvMIHIBgNVHR8EgcAwgb0wgbqggbeggbSGIWh0dHA6Ly93d3cuZS1zemln -bm8uaHUvUm9vdENBLmNybIaBjmxkYXA6Ly9sZGFwLmUtc3ppZ25vLmh1L0NOPU1p -Y3Jvc2VjJTIwZS1Temlnbm8lMjBSb290JTIwQ0EsT1U9ZS1Temlnbm8lMjBDQSxP -PU1pY3Jvc2VjJTIwTHRkLixMPUJ1ZGFwZXN0LEM9SFU/Y2VydGlmaWNhdGVSZXZv -Y2F0aW9uTGlzdDtiaW5hcnkwDgYDVR0PAQH/BAQDAgEGMIGWBgNVHREEgY4wgYuB -EGluZm9AZS1zemlnbm8uaHWkdzB1MSMwIQYDVQQDDBpNaWNyb3NlYyBlLVN6aWdu -w7MgUm9vdCBDQTEWMBQGA1UECwwNZS1TemlnbsOzIEhTWjEWMBQGA1UEChMNTWlj -cm9zZWMgS2Z0LjERMA8GA1UEBxMIQnVkYXBlc3QxCzAJBgNVBAYTAkhVMIGsBgNV -HSMEgaQwgaGAFMegSXUWYYTbMUuE0vE3QJDvTtz3oXakdDByMQswCQYDVQQGEwJI -VTERMA8GA1UEBxMIQnVkYXBlc3QxFjAUBgNVBAoTDU1pY3Jvc2VjIEx0ZC4xFDAS -BgNVBAsTC2UtU3ppZ25vIENBMSIwIAYDVQQDExlNaWNyb3NlYyBlLVN6aWdubyBS -b290IENBghEAzLjnv04pGv2i3GalHCwPETAdBgNVHQ4EFgQUx6BJdRZhhNsxS4TS -8TdAkO9O3PcwDQYJKoZIhvcNAQEFBQADggEBANMTnGZjWS7KXHAM/IO8VbH0jgds -ZifOwTsgqRy7RlRw7lrMoHfqaEQn6/Ip3Xep1fvj1KcExJW4C+FEaGAHQzAxQmHl -7tnlJNUb3+FKG6qfx1/4ehHqE5MAyopYse7tDk2016g2JnzgOsHVV4Lxdbb9iV/a -86g4nzUGCM4ilb7N1fy+W955a9x6qWVmvrElWl/tftOsRm1M9DKHtCAE4Gx4sHfR -hUZLphK3dehKyVZs15KrnfVJONJPU+NVkBHbmJbGSfI+9J8b4PeI3CVimUTYc78/ -MPMMNz7UwiiAc7EBt51alhQBS6kRnSlqLtBdgcDPsiBDxwPgN05dCtxZICU= ------END CERTIFICATE----- - # Issuer: CN=Certigna O=Dhimyotis # Subject: CN=Certigna O=Dhimyotis # Label: "Certigna" @@ -1946,8 +1830,8 @@ W9c3rkIO3aQab3yIVMUWbuF6aC74Or8NpDyJO3inTmODBCEIZ43ygknQW/2xzQ+D hNQ+IIX3Sj0rnP0qCglN6oH4EZw= -----END CERTIFICATE----- -# Issuer: CN=TÜBİTAK UEKAE Kök Sertifika Hizmet Sağlayıcısı - Sürüm 3 O=Türkiye Bilimsel ve Teknolojik Araştırma Kurumu - TÜBİTAK OU=Ulusal Elektronik ve Kriptoloji Araştırma Enstitüsü - UEKAE/Kamu Sertifikasyon Merkezi -# Subject: CN=TÜBİTAK UEKAE Kök Sertifika Hizmet Sağlayıcısı - Sürüm 3 O=Türkiye Bilimsel ve Teknolojik Araştırma Kurumu - TÜBİTAK OU=Ulusal Elektronik ve Kriptoloji Araştırma Enstitüsü - UEKAE/Kamu Sertifikasyon Merkezi +# Issuer: CN=T\xdcB\u0130TAK UEKAE K\xf6k Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 - S\xfcr\xfcm 3 O=T\xfcrkiye Bilimsel ve Teknolojik Ara\u015ft\u0131rma Kurumu - T\xdcB\u0130TAK OU=Ulusal Elektronik ve Kriptoloji Ara\u015ft\u0131rma Enstit\xfcs\xfc - UEKAE/Kamu Sertifikasyon Merkezi +# Subject: CN=T\xdcB\u0130TAK UEKAE K\xf6k Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 - S\xfcr\xfcm 3 O=T\xfcrkiye Bilimsel ve Teknolojik Ara\u015ft\u0131rma Kurumu - T\xdcB\u0130TAK OU=Ulusal Elektronik ve Kriptoloji Ara\u015ft\u0131rma Enstit\xfcs\xfc - UEKAE/Kamu Sertifikasyon Merkezi # Label: "T\xc3\x9c\x42\xC4\xB0TAK UEKAE K\xC3\xB6k Sertifika Hizmet Sa\xC4\x9Flay\xc4\xb1\x63\xc4\xb1s\xc4\xb1 - S\xC3\xBCr\xC3\xBCm 3" # Serial: 17 # MD5 Fingerprint: ed:41:f5:8c:50:c5:2b:9c:73:e6:ee:6c:eb:c2:a8:26 @@ -1984,34 +1868,6 @@ oN+J1q2MdqMTw5RhK2vZbMEHCiIHhWyFJEapvj+LeISCfiQMnf2BN+MlqO02TpUs yZyQ2uypQjyttgI= -----END CERTIFICATE----- -# Issuer: CN=Buypass Class 2 CA 1 O=Buypass AS-983163327 -# Subject: CN=Buypass Class 2 CA 1 O=Buypass AS-983163327 -# Label: "Buypass Class 2 CA 1" -# Serial: 1 -# MD5 Fingerprint: b8:08:9a:f0:03:cc:1b:0d:c8:6c:0b:76:a1:75:64:23 -# SHA1 Fingerprint: a0:a1:ab:90:c9:fc:84:7b:3b:12:61:e8:97:7d:5f:d3:22:61:d3:cc -# SHA256 Fingerprint: 0f:4e:9c:dd:26:4b:02:55:50:d1:70:80:63:40:21:4f:e9:44:34:c9:b0:2f:69:7e:c7:10:fc:5f:ea:fb:5e:38 ------BEGIN CERTIFICATE----- -MIIDUzCCAjugAwIBAgIBATANBgkqhkiG9w0BAQUFADBLMQswCQYDVQQGEwJOTzEd -MBsGA1UECgwUQnV5cGFzcyBBUy05ODMxNjMzMjcxHTAbBgNVBAMMFEJ1eXBhc3Mg -Q2xhc3MgMiBDQSAxMB4XDTA2MTAxMzEwMjUwOVoXDTE2MTAxMzEwMjUwOVowSzEL -MAkGA1UEBhMCTk8xHTAbBgNVBAoMFEJ1eXBhc3MgQVMtOTgzMTYzMzI3MR0wGwYD -VQQDDBRCdXlwYXNzIENsYXNzIDIgQ0EgMTCCASIwDQYJKoZIhvcNAQEBBQADggEP -ADCCAQoCggEBAIs8B0XY9t/mx8q6jUPFR42wWsE425KEHK8T1A9vNkYgxC7McXA0 -ojTTNy7Y3Tp3L8DrKehc0rWpkTSHIln+zNvnma+WwajHQN2lFYxuyHyXA8vmIPLX -l18xoS830r7uvqmtqEyeIWZDO6i88wmjONVZJMHCR3axiFyCO7srpgTXjAePzdVB -HfCuuCkslFJgNJQ72uA40Z0zPhX0kzLFANq1KWYOOngPIVJfAuWSeyXTkh4vFZ2B -5J2O6O+JzhRMVB0cgRJNcKi+EAUXfh/RuFdV7c27UsKwHnjCTTZoy1YmwVLBvXb3 -WNVyfh9EdrsAiR0WnVE1703CVu9r4Iw7DekCAwEAAaNCMEAwDwYDVR0TAQH/BAUw -AwEB/zAdBgNVHQ4EFgQUP42aWYv8e3uco684sDntkHGA1sgwDgYDVR0PAQH/BAQD -AgEGMA0GCSqGSIb3DQEBBQUAA4IBAQAVGn4TirnoB6NLJzKyQJHyIdFkhb5jatLP -gcIV1Xp+DCmsNx4cfHZSldq1fyOhKXdlyTKdqC5Wq2B2zha0jX94wNWZUYN/Xtm+ -DKhQ7SLHrQVMdvvt7h5HZPb3J31cKA9FxVxiXqaakZG3Uxcu3K1gnZZkOb1naLKu -BctN518fV4bVIJwo+28TOPX2EZL2fZleHwzoq0QkKXJAPTZSr4xYkHPB7GEseaHs -h7U/2k3ZIQAw3pDaDtMaSKk+hQsUi4y8QZ5q9w5wwDX3OaJdZtB7WZ+oRxKaJyOk -LY4ng5IgodcVf/EuGO70SH8vf/GhGLWhC5SgYiAynB321O+/TIho ------END CERTIFICATE----- - # Issuer: O=certSIGN OU=certSIGN ROOT CA # Subject: O=certSIGN OU=certSIGN ROOT CA # Label: "certSIGN ROOT CA" @@ -2068,36 +1924,6 @@ buXf6iFViZx9fX+Y9QCJ7uOEwFyWtcVG6kbghVW2G8kS1sHNzYDzAgE8yGnLRUhj 2JTQ7IUOO04RZfSCjKY9ri4ilAnIXOo8gV0WKgOXFlUJ24pBgp5mmxE= -----END CERTIFICATE----- -# Issuer: O=Japanese Government OU=ApplicationCA -# Subject: O=Japanese Government OU=ApplicationCA -# Label: "ApplicationCA - Japanese Government" -# Serial: 49 -# MD5 Fingerprint: 7e:23:4e:5b:a7:a5:b4:25:e9:00:07:74:11:62:ae:d6 -# SHA1 Fingerprint: 7f:8a:b0:cf:d0:51:87:6a:66:f3:36:0f:47:c8:8d:8c:d3:35:fc:74 -# SHA256 Fingerprint: 2d:47:43:7d:e1:79:51:21:5a:12:f3:c5:8e:51:c7:29:a5:80:26:ef:1f:cc:0a:5f:b3:d9:dc:01:2f:60:0d:19 ------BEGIN CERTIFICATE----- -MIIDoDCCAoigAwIBAgIBMTANBgkqhkiG9w0BAQUFADBDMQswCQYDVQQGEwJKUDEc -MBoGA1UEChMTSmFwYW5lc2UgR292ZXJubWVudDEWMBQGA1UECxMNQXBwbGljYXRp -b25DQTAeFw0wNzEyMTIxNTAwMDBaFw0xNzEyMTIxNTAwMDBaMEMxCzAJBgNVBAYT -AkpQMRwwGgYDVQQKExNKYXBhbmVzZSBHb3Zlcm5tZW50MRYwFAYDVQQLEw1BcHBs -aWNhdGlvbkNBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAp23gdE6H -j6UG3mii24aZS2QNcfAKBZuOquHMLtJqO8F6tJdhjYq+xpqcBrSGUeQ3DnR4fl+K -f5Sk10cI/VBaVuRorChzoHvpfxiSQE8tnfWuREhzNgaeZCw7NCPbXCbkcXmP1G55 -IrmTwcrNwVbtiGrXoDkhBFcsovW8R0FPXjQilbUfKW1eSvNNcr5BViCH/OlQR9cw -FO5cjFW6WY2H/CPek9AEjP3vbb3QesmlOmpyM8ZKDQUXKi17safY1vC+9D/qDiht -QWEjdnjDuGWk81quzMKq2edY3rZ+nYVunyoKb58DKTCXKB28t89UKU5RMfkntigm -/qJj5kEW8DOYRwIDAQABo4GeMIGbMB0GA1UdDgQWBBRUWssmP3HMlEYNllPqa0jQ -k/5CdTAOBgNVHQ8BAf8EBAMCAQYwWQYDVR0RBFIwUKROMEwxCzAJBgNVBAYTAkpQ -MRgwFgYDVQQKDA/ml6XmnKzlm73mlL/lupwxIzAhBgNVBAsMGuOCouODl+ODquOC -seODvOOCt+ODp+ODs0NBMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEFBQAD -ggEBADlqRHZ3ODrso2dGD/mLBqj7apAxzn7s2tGJfHrrLgy9mTLnsCTWw//1sogJ -hyzjVOGjprIIC8CFqMjSnHH2HZ9g/DgzE+Ge3Atf2hZQKXsvcJEPmbo0NI2VdMV+ -eKlmXb3KIXdCEKxmJj3ekav9FfBv7WxfEPjzFvYDio+nEhEMy/0/ecGc/WLuo89U -DNErXxc+4z6/wCs+CZv+iKZ+tJIX/COUgb1up8WMwusRRdv4QcmWdupwX3kSa+Sj -B1oF7ydJzyGfikwJcGapJsErEU4z0g781mzSDjJkaP+tBXhfAx2o45CsJOAPQKdL -rosot4LKGAfmt1t06SAZf7IbiVQ= ------END CERTIFICATE----- - # Issuer: CN=GeoTrust Primary Certification Authority - G3 O=GeoTrust Inc. OU=(c) 2008 GeoTrust Inc. - For authorized use only # Subject: CN=GeoTrust Primary Certification Authority - G3 O=GeoTrust Inc. OU=(c) 2008 GeoTrust Inc. - For authorized use only # Label: "GeoTrust Primary Certification Authority - G3" @@ -2277,9 +2103,9 @@ kf3upm7ktS5Jj4d4gYDs5bG1MAoGCCqGSM49BAMDA2gAMGUCMGYhDBgmYFo4e1ZC FRJZap7v1VmyHVIsmXHNxynfGyphe3HR3vPA5Q06Sqotp9iGKt0uEA== -----END CERTIFICATE----- -# Issuer: CN=NetLock Arany (Class Gold) Főtanúsítvány O=NetLock Kft. OU=Tanúsítványkiadók (Certification Services) -# Subject: CN=NetLock Arany (Class Gold) Főtanúsítvány O=NetLock Kft. OU=Tanúsítványkiadók (Certification Services) -# Label: "NetLock Arany (Class Gold) Főtanúsítvány" +# Issuer: CN=NetLock Arany (Class Gold) F\u0151tan\xfas\xedtv\xe1ny O=NetLock Kft. OU=Tan\xfas\xedtv\xe1nykiad\xf3k (Certification Services) +# Subject: CN=NetLock Arany (Class Gold) F\u0151tan\xfas\xedtv\xe1ny O=NetLock Kft. OU=Tan\xfas\xedtv\xe1nykiad\xf3k (Certification Services) +# Label: "NetLock Arany (Class Gold) F\u0151tan\xfas\xedtv\xe1ny" # Serial: 80544274841616 # MD5 Fingerprint: c5:a1:b7:ff:73:dd:d6:d7:34:32:18:df:fc:3c:ad:88 # SHA1 Fingerprint: 06:08:3f:59:3f:15:a1:04:a0:69:a4:6b:a9:03:d0:06:b7:97:09:91 @@ -2933,9 +2759,9 @@ VoNzcOSGGtIxQbovvi0TWnZvTuhOgQ4/WwMioBK+ZlgRSssDxLQqKi2WF+A5VLxI 03YnnZotBqbJ7DnSq9ufmgsnAjUpsUCV5/nonFWIGUbWtzT1fs45mtk48VH3Tyw= -----END CERTIFICATE----- -# Issuer: CN=Certinomis - Autorité Racine O=Certinomis OU=0002 433998903 -# Subject: CN=Certinomis - Autorité Racine O=Certinomis OU=0002 433998903 -# Label: "Certinomis - Autorité Racine" +# Issuer: CN=Certinomis - Autorit\xe9 Racine O=Certinomis OU=0002 433998903 +# Subject: CN=Certinomis - Autorit\xe9 Racine O=Certinomis OU=0002 433998903 +# Label: "Certinomis - Autorit\xe9 Racine" # Serial: 1 # MD5 Fingerprint: 7f:30:78:8c:03:e3:ca:c9:0a:e2:c9:ea:1e:aa:55:1a # SHA1 Fingerprint: 2e:14:da:ec:28:f0:fa:1e:8e:38:9a:4e:ab:eb:26:c0:0a:d3:83:c3 @@ -2973,51 +2799,6 @@ dsLLO7XSAPCjDuGtbkD326C00EauFddEwk01+dIL8hf2rGbVJLJP0RyZwG71fet0 BLj5TXcJ17TPBzAJ8bgAVtkXFhYKK4bfjwEZGuW7gmP/vgt2Fl43N+bYdJeimUV5 -----END CERTIFICATE----- -# Issuer: CN=Root CA Generalitat Valenciana O=Generalitat Valenciana OU=PKIGVA -# Subject: CN=Root CA Generalitat Valenciana O=Generalitat Valenciana OU=PKIGVA -# Label: "Root CA Generalitat Valenciana" -# Serial: 994436456 -# MD5 Fingerprint: 2c:8c:17:5e:b1:54:ab:93:17:b5:36:5a:db:d1:c6:f2 -# SHA1 Fingerprint: a0:73:e5:c5:bd:43:61:0d:86:4c:21:13:0a:85:58:57:cc:9c:ea:46 -# SHA256 Fingerprint: 8c:4e:df:d0:43:48:f3:22:96:9e:7e:29:a4:cd:4d:ca:00:46:55:06:1c:16:e1:b0:76:42:2e:f3:42:ad:63:0e ------BEGIN CERTIFICATE----- -MIIGizCCBXOgAwIBAgIEO0XlaDANBgkqhkiG9w0BAQUFADBoMQswCQYDVQQGEwJF -UzEfMB0GA1UEChMWR2VuZXJhbGl0YXQgVmFsZW5jaWFuYTEPMA0GA1UECxMGUEtJ -R1ZBMScwJQYDVQQDEx5Sb290IENBIEdlbmVyYWxpdGF0IFZhbGVuY2lhbmEwHhcN -MDEwNzA2MTYyMjQ3WhcNMjEwNzAxMTUyMjQ3WjBoMQswCQYDVQQGEwJFUzEfMB0G -A1UEChMWR2VuZXJhbGl0YXQgVmFsZW5jaWFuYTEPMA0GA1UECxMGUEtJR1ZBMScw -JQYDVQQDEx5Sb290IENBIEdlbmVyYWxpdGF0IFZhbGVuY2lhbmEwggEiMA0GCSqG -SIb3DQEBAQUAA4IBDwAwggEKAoIBAQDGKqtXETcvIorKA3Qdyu0togu8M1JAJke+ -WmmmO3I2F0zo37i7L3bhQEZ0ZQKQUgi0/6iMweDHiVYQOTPvaLRfX9ptI6GJXiKj -SgbwJ/BXufjpTjJ3Cj9BZPPrZe52/lSqfR0grvPXdMIKX/UIKFIIzFVd0g/bmoGl -u6GzwZTNVOAydTGRGmKy3nXiz0+J2ZGQD0EbtFpKd71ng+CT516nDOeB0/RSrFOy -A8dEJvt55cs0YFAQexvba9dHq198aMpunUEDEO5rmXteJajCq+TA81yc477OMUxk -Hl6AovWDfgzWyoxVjr7gvkkHD6MkQXpYHYTqWBLI4bft75PelAgxAgMBAAGjggM7 -MIIDNzAyBggrBgEFBQcBAQQmMCQwIgYIKwYBBQUHMAGGFmh0dHA6Ly9vY3NwLnBr -aS5ndmEuZXMwEgYDVR0TAQH/BAgwBgEB/wIBAjCCAjQGA1UdIASCAiswggInMIIC -IwYKKwYBBAG/VQIBADCCAhMwggHoBggrBgEFBQcCAjCCAdoeggHWAEEAdQB0AG8A -cgBpAGQAYQBkACAAZABlACAAQwBlAHIAdABpAGYAaQBjAGEAYwBpAPMAbgAgAFIA -YQDtAHoAIABkAGUAIABsAGEAIABHAGUAbgBlAHIAYQBsAGkAdABhAHQAIABWAGEA -bABlAG4AYwBpAGEAbgBhAC4ADQAKAEwAYQAgAEQAZQBjAGwAYQByAGEAYwBpAPMA -bgAgAGQAZQAgAFAAcgDhAGMAdABpAGMAYQBzACAAZABlACAAQwBlAHIAdABpAGYA -aQBjAGEAYwBpAPMAbgAgAHEAdQBlACAAcgBpAGcAZQAgAGUAbAAgAGYAdQBuAGMA -aQBvAG4AYQBtAGkAZQBuAHQAbwAgAGQAZQAgAGwAYQAgAHAAcgBlAHMAZQBuAHQA -ZQAgAEEAdQB0AG8AcgBpAGQAYQBkACAAZABlACAAQwBlAHIAdABpAGYAaQBjAGEA -YwBpAPMAbgAgAHMAZQAgAGUAbgBjAHUAZQBuAHQAcgBhACAAZQBuACAAbABhACAA -ZABpAHIAZQBjAGMAaQDzAG4AIAB3AGUAYgAgAGgAdAB0AHAAOgAvAC8AdwB3AHcA -LgBwAGsAaQAuAGcAdgBhAC4AZQBzAC8AYwBwAHMwJQYIKwYBBQUHAgEWGWh0dHA6 -Ly93d3cucGtpLmd2YS5lcy9jcHMwHQYDVR0OBBYEFHs100DSHHgZZu90ECjcPk+y -eAT8MIGVBgNVHSMEgY0wgYqAFHs100DSHHgZZu90ECjcPk+yeAT8oWykajBoMQsw -CQYDVQQGEwJFUzEfMB0GA1UEChMWR2VuZXJhbGl0YXQgVmFsZW5jaWFuYTEPMA0G -A1UECxMGUEtJR1ZBMScwJQYDVQQDEx5Sb290IENBIEdlbmVyYWxpdGF0IFZhbGVu -Y2lhbmGCBDtF5WgwDQYJKoZIhvcNAQEFBQADggEBACRhTvW1yEICKrNcda3Fbcrn -lD+laJWIwVTAEGmiEi8YPyVQqHxK6sYJ2fR1xkDar1CdPaUWu20xxsdzCkj+IHLt -b8zog2EWRpABlUt9jppSCS/2bxzkoXHPjCpaF3ODR00PNvsETUlR4hTJZGH71BTg -9J63NI8KJr2XXPR5OkowGcytT6CYirQxlyric21+eLj4iIlPsSKRZEv1UN4D2+XF -ducTZnV+ZfsBn5OHiJ35Rld8TWCvmHMTI6QgkYH60GFmuH3Rr9ZvHmw96RH9qfmC -IoaZM3Fa6hlXPZHNqcCjbgcTpsnt+GijnsNacgmHKNHEc8RzGF9QdRYxn7fofMM= ------END CERTIFICATE----- - # Issuer: CN=TWCA Root Certification Authority O=TAIWAN-CA OU=Root CA # Subject: CN=TWCA Root Certification Authority O=TAIWAN-CA OU=Root CA # Label: "TWCA Root Certification Authority" @@ -3410,8 +3191,8 @@ iAYLtqZLICjU3j2LrTcFU3T+bsy8QxdxXvnFzBqpYe73dgzzcvRyrc9yAjYHR8/v GVCJYMzpJJUPwssd8m92kMfMdcGWxZ0= -----END CERTIFICATE----- -# Issuer: CN=TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı O=TÜRKTRUST Bilgi İletişim ve Bilişim Güvenliği Hizmetleri A.Ş. (c) Aralık 2007 -# Subject: CN=TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı O=TÜRKTRUST Bilgi İletişim ve Bilişim Güvenliği Hizmetleri A.Ş. (c) Aralık 2007 +# Issuer: CN=T\xdcRKTRUST Elektronik Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 O=T\xdcRKTRUST Bilgi \u0130leti\u015fim ve Bili\u015fim G\xfcvenli\u011fi Hizmetleri A.\u015e. (c) Aral\u0131k 2007 +# Subject: CN=T\xdcRKTRUST Elektronik Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 O=T\xdcRKTRUST Bilgi \u0130leti\u015fim ve Bili\u015fim G\xfcvenli\u011fi Hizmetleri A.\u015e. (c) Aral\u0131k 2007 # Label: "TURKTRUST Certificate Services Provider Root 2007" # Serial: 1 # MD5 Fingerprint: 2b:70:20:56:86:82:a0:18:c8:07:53:12:28:70:21:72 @@ -3893,8 +3674,8 @@ HL/EVlP6Y2XQ8xwOFvVrhlhNGNTkDY6lnVuR3HYkUD/GKvvZt5y11ubQ2egZixVx SK236thZiNSQvxaz2emsWWFUyBy6ysHK4bkgTI86k4mloMy/0/Z1pHWWbVY= -----END CERTIFICATE----- -# Issuer: CN=E-Tugra Certification Authority O=E-Tuğra EBG Bilişim Teknolojileri ve Hizmetleri A.Ş. OU=E-Tugra Sertifikasyon Merkezi -# Subject: CN=E-Tugra Certification Authority O=E-Tuğra EBG Bilişim Teknolojileri ve Hizmetleri A.Ş. OU=E-Tugra Sertifikasyon Merkezi +# Issuer: CN=E-Tugra Certification Authority O=E-Tu\u011fra EBG Bili\u015fim Teknolojileri ve Hizmetleri A.\u015e. OU=E-Tugra Sertifikasyon Merkezi +# Subject: CN=E-Tugra Certification Authority O=E-Tu\u011fra EBG Bili\u015fim Teknolojileri ve Hizmetleri A.\u015e. OU=E-Tugra Sertifikasyon Merkezi # Label: "E-Tugra Certification Authority" # Serial: 7667447206703254355 # MD5 Fingerprint: b8:a1:03:63:b0:bd:21:71:70:8a:6f:13:3a:bb:79:49 @@ -4300,8 +4081,8 @@ OtzCWfHjXEa7ZywCRuoeSKbmW9m1vFGikpbbqsY3Iqb+zCB0oy2pLmvLwIIRIbWT ee5Ehr7XHuQe+w== -----END CERTIFICATE----- -# Issuer: CN=CA 沃通根证书 O=WoSign CA Limited -# Subject: CN=CA 沃通根证书 O=WoSign CA Limited +# Issuer: CN=CA \u6c83\u901a\u6839\u8bc1\u4e66 O=WoSign CA Limited +# Subject: CN=CA \u6c83\u901a\u6839\u8bc1\u4e66 O=WoSign CA Limited # Label: "WoSign China" # Serial: 106921963437422998931660691310149453965 # MD5 Fingerprint: 78:83:5b:52:16:76:c4:24:3b:83:78:e8:ac:da:9a:93 @@ -4747,9 +4528,9 @@ AAoACxGV2lZFA4gKn2fQ1XmxqI1AbQ3CekD6819kR5LLU7m7Wc5P/dAVUwHY3+vZ 5nbv0CO7O6l5s9UCKc2Jo5YPSjXnTkLAdc0Hz+Ys63su -----END CERTIFICATE----- -# Issuer: CN=TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı H5 O=TÜRKTRUST Bilgi İletişim ve Bilişim Güvenliği Hizmetleri A.Ş. -# Subject: CN=TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı H5 O=TÜRKTRUST Bilgi İletişim ve Bilişim Güvenliği Hizmetleri A.Ş. -# Label: "TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı H5" +# Issuer: CN=T\xdcRKTRUST Elektronik Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 H5 O=T\xdcRKTRUST Bilgi \u0130leti\u015fim ve Bili\u015fim G\xfcvenli\u011fi Hizmetleri A.\u015e. +# Subject: CN=T\xdcRKTRUST Elektronik Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 H5 O=T\xdcRKTRUST Bilgi \u0130leti\u015fim ve Bili\u015fim G\xfcvenli\u011fi Hizmetleri A.\u015e. +# Label: "T\xdcRKTRUST Elektronik Sertifika Hizmet Sa\u011flay\u0131c\u0131s\u0131 H5" # Serial: 156233699172481 # MD5 Fingerprint: da:70:8e:f0:22:df:93:26:f6:5f:9f:d3:15:06:52:4e # SHA1 Fingerprint: c4:18:f6:4d:46:d1:df:00:3d:27:30:13:72:43:a9:12:11:c6:75:fb @@ -4780,39 +4561,6 @@ Yv4HAqGEVka+lgqaE9chTLd8B59OTj+RdPsnnRHM3eaxynFNExc5JsUpISuTKWqW +qtB4Uu2NQvAmxU= -----END CERTIFICATE----- -# Issuer: CN=TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı H6 O=TÜRKTRUST Bilgi İletişim ve Bilişim Güvenliği Hizmetleri A.Ş. -# Subject: CN=TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı H6 O=TÜRKTRUST Bilgi İletişim ve Bilişim Güvenliği Hizmetleri A.Ş. -# Label: "TÜRKTRUST Elektronik Sertifika Hizmet Sağlayıcısı H6" -# Serial: 138134509972618 -# MD5 Fingerprint: f8:c5:ee:2a:6b:be:95:8d:08:f7:25:4a:ea:71:3e:46 -# SHA1 Fingerprint: 8a:5c:8c:ee:a5:03:e6:05:56:ba:d8:1b:d4:f6:c9:b0:ed:e5:2f:e0 -# SHA256 Fingerprint: 8d:e7:86:55:e1:be:7f:78:47:80:0b:93:f6:94:d2:1d:36:8c:c0:6e:03:3e:7f:ab:04:bb:5e:b9:9d:a6:b7:00 ------BEGIN CERTIFICATE----- -MIIEJjCCAw6gAwIBAgIGfaHyZeyKMA0GCSqGSIb3DQEBCwUAMIGxMQswCQYDVQQG -EwJUUjEPMA0GA1UEBwwGQW5rYXJhMU0wSwYDVQQKDERUw5xSS1RSVVNUIEJpbGdp -IMSwbGV0acWfaW0gdmUgQmlsacWfaW0gR8O8dmVubGnEn2kgSGl6bWV0bGVyaSBB -LsWeLjFCMEAGA1UEAww5VMOcUktUUlVTVCBFbGVrdHJvbmlrIFNlcnRpZmlrYSBI -aXptZXQgU2HEn2xhecSxY8Sxc8SxIEg2MB4XDTEzMTIxODA5MDQxMFoXDTIzMTIx -NjA5MDQxMFowgbExCzAJBgNVBAYTAlRSMQ8wDQYDVQQHDAZBbmthcmExTTBLBgNV -BAoMRFTDnFJLVFJVU1QgQmlsZ2kgxLBsZXRpxZ9pbSB2ZSBCaWxpxZ9pbSBHw7x2 -ZW5sacSfaSBIaXptZXRsZXJpIEEuxZ4uMUIwQAYDVQQDDDlUw5xSS1RSVVNUIEVs -ZWt0cm9uaWsgU2VydGlmaWthIEhpem1ldCBTYcSfbGF5xLFjxLFzxLEgSDYwggEi -MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCdsGjW6L0UlqMACprx9MfMkU1x -eHe59yEmFXNRFpQJRwXiM/VomjX/3EsvMsew7eKC5W/a2uqsxgbPJQ1BgfbBOCK9 -+bGlprMBvD9QFyv26WZV1DOzXPhDIHiTVRZwGTLmiddk671IUP320EEDwnS3/faA -z1vFq6TWlRKb55cTMgPp1KtDWxbtMyJkKbbSk60vbNg9tvYdDjTu0n2pVQ8g9P0p -u5FbHH3GQjhtQiht1AH7zYiXSX6484P4tZgvsycLSF5W506jM7NE1qXyGJTtHB6p -lVxiSvgNZ1GpryHV+DKdeboaX+UEVU0TRv/yz3THGmNtwx8XEsMeED5gCLMxAgMB -AAGjQjBAMB0GA1UdDgQWBBTdVRcT9qzoSCHK77Wv0QAy7Z6MtTAOBgNVHQ8BAf8E -BAMCAQYwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAb1gNl0Oq -FlQ+v6nfkkU/hQu7VtMMUszIv3ZnXuaqs6fvuay0EBQNdH49ba3RfdCaqaXKGDsC -QC4qnFAUi/5XfldcEQlLNkVS9z2sFP1E34uXI9TDwe7UU5X+LEr+DXCqu4svLcsy -o4LyVN/Y8t3XSHLuSqMplsNEzm61kod2pLv0kmzOLBQJZo6NrRa1xxsJYTvjIKID -gI6tflEATseWhvtDmHd9KMeP2Cpu54Rvl0EpABZeTeIT6lnAY2c6RPuY/ATTMHKm -9ocJV612ph1jmv3XZch4gyt1O6VbuA1df74jrlZVlFjvH4GMKrLN5ptjnhi85WsG -tAuYSyher4hYyw== ------END CERTIFICATE----- - # Issuer: CN=Certinomis - Root CA O=Certinomis OU=0002 433998903 # Subject: CN=Certinomis - Root CA O=Certinomis OU=0002 433998903 # Label: "Certinomis - Root CA" @@ -5273,6 +5021,229 @@ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc= -----END CERTIFICATE----- + +# Issuer: O=FNMT-RCM OU=AC RAIZ FNMT-RCM +# Subject: O=FNMT-RCM OU=AC RAIZ FNMT-RCM +# Label: "AC RAIZ FNMT-RCM" +# Serial: 485876308206448804701554682760554759 +# MD5 Fingerprint: e2:09:04:b4:d3:bd:d1:a0:14:fd:1a:d2:47:c4:57:1d +# SHA1 Fingerprint: ec:50:35:07:b2:15:c4:95:62:19:e2:a8:9a:5b:42:99:2c:4c:2c:20 +# SHA256 Fingerprint: eb:c5:57:0c:29:01:8c:4d:67:b1:aa:12:7b:af:12:f7:03:b4:61:1e:bc:17:b7:da:b5:57:38:94:17:9b:93:fa +-----BEGIN CERTIFICATE----- +MIIFgzCCA2ugAwIBAgIPXZONMGc2yAYdGsdUhGkHMA0GCSqGSIb3DQEBCwUAMDsx +CzAJBgNVBAYTAkVTMREwDwYDVQQKDAhGTk1ULVJDTTEZMBcGA1UECwwQQUMgUkFJ +WiBGTk1ULVJDTTAeFw0wODEwMjkxNTU5NTZaFw0zMDAxMDEwMDAwMDBaMDsxCzAJ +BgNVBAYTAkVTMREwDwYDVQQKDAhGTk1ULVJDTTEZMBcGA1UECwwQQUMgUkFJWiBG +Tk1ULVJDTTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALpxgHpMhm5/ +yBNtwMZ9HACXjywMI7sQmkCpGreHiPibVmr75nuOi5KOpyVdWRHbNi63URcfqQgf +BBckWKo3Shjf5TnUV/3XwSyRAZHiItQDwFj8d0fsjz50Q7qsNI1NOHZnjrDIbzAz +WHFctPVrbtQBULgTfmxKo0nRIBnuvMApGGWn3v7v3QqQIecaZ5JCEJhfTzC8PhxF +tBDXaEAUwED653cXeuYLj2VbPNmaUtu1vZ5Gzz3rkQUCwJaydkxNEJY7kvqcfw+Z +374jNUUeAlz+taibmSXaXvMiwzn15Cou08YfxGyqxRxqAQVKL9LFwag0Jl1mpdIC +IfkYtwb1TplvqKtMUejPUBjFd8g5CSxJkjKZqLsXF3mwWsXmo8RZZUc1g16p6DUL +mbvkzSDGm0oGObVo/CK67lWMK07q87Hj/LaZmtVC+nFNCM+HHmpxffnTtOmlcYF7 +wk5HlqX2doWjKI/pgG6BU6VtX7hI+cL5NqYuSf+4lsKMB7ObiFj86xsc3i1w4peS +MKGJ47xVqCfWS+2QrYv6YyVZLag13cqXM7zlzced0ezvXg5KkAYmY6252TUtB7p2 +ZSysV4999AeU14ECll2jB0nVetBX+RvnU0Z1qrB5QstocQjpYL05ac70r8NWQMet +UqIJ5G+GR4of6ygnXYMgrwTJbFaai0b1AgMBAAGjgYMwgYAwDwYDVR0TAQH/BAUw +AwEB/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFPd9xf3E6Jobd2Sn9R2gzL+H +YJptMD4GA1UdIAQ3MDUwMwYEVR0gADArMCkGCCsGAQUFBwIBFh1odHRwOi8vd3d3 +LmNlcnQuZm5tdC5lcy9kcGNzLzANBgkqhkiG9w0BAQsFAAOCAgEAB5BK3/MjTvDD +nFFlm5wioooMhfNzKWtN/gHiqQxjAb8EZ6WdmF/9ARP67Jpi6Yb+tmLSbkyU+8B1 +RXxlDPiyN8+sD8+Nb/kZ94/sHvJwnvDKuO+3/3Y3dlv2bojzr2IyIpMNOmqOFGYM +LVN0V2Ue1bLdI4E7pWYjJ2cJj+F3qkPNZVEI7VFY/uY5+ctHhKQV8Xa7pO6kO8Rf +77IzlhEYt8llvhjho6Tc+hj507wTmzl6NLrTQfv6MooqtyuGC2mDOL7Nii4LcK2N +JpLuHvUBKwrZ1pebbuCoGRw6IYsMHkCtA+fdZn71uSANA+iW+YJF1DngoABd15jm +fZ5nc8OaKveri6E6FO80vFIOiZiaBECEHX5FaZNXzuvO+FB8TxxuBEOb+dY7Ixjp +6o7RTUaN8Tvkasq6+yO3m/qZASlaWFot4/nUbQ4mrcFuNLwy+AwF+mWj2zs3gyLp +1txyM/1d8iC9djwj2ij3+RvrWWTV3F9yfiD8zYm1kGdNYno/Tq0dwzn+evQoFt9B +9kiABdcPUXmsEKvU7ANm5mqwujGSQkBqvjrTcuFqN1W8rB2Vt2lh8kORdOag0wok +RqEIr9baRRmW1FMdW4R58MD3R++Lj8UGrp1MYp3/RgT408m2ECVAdf4WqslKYIYv +uu8wd+RU4riEmViAqhOLUTpPSPaLtrM= +-----END CERTIFICATE----- + +# Issuer: CN=Amazon Root CA 1 O=Amazon +# Subject: CN=Amazon Root CA 1 O=Amazon +# Label: "Amazon Root CA 1" +# Serial: 143266978916655856878034712317230054538369994 +# MD5 Fingerprint: 43:c6:bf:ae:ec:fe:ad:2f:18:c6:88:68:30:fc:c8:e6 +# SHA1 Fingerprint: 8d:a7:f9:65:ec:5e:fc:37:91:0f:1c:6e:59:fd:c1:cc:6a:6e:de:16 +# SHA256 Fingerprint: 8e:cd:e6:88:4f:3d:87:b1:12:5b:a3:1a:c3:fc:b1:3d:70:16:de:7f:57:cc:90:4f:e1:cb:97:c6:ae:98:19:6e +-----BEGIN CERTIFICATE----- +MIIDQTCCAimgAwIBAgITBmyfz5m/jAo54vB4ikPmljZbyjANBgkqhkiG9w0BAQsF +ADA5MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6 +b24gUm9vdCBDQSAxMB4XDTE1MDUyNjAwMDAwMFoXDTM4MDExNzAwMDAwMFowOTEL +MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJv +b3QgQ0EgMTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALJ4gHHKeNXj +ca9HgFB0fW7Y14h29Jlo91ghYPl0hAEvrAIthtOgQ3pOsqTQNroBvo3bSMgHFzZM +9O6II8c+6zf1tRn4SWiw3te5djgdYZ6k/oI2peVKVuRF4fn9tBb6dNqcmzU5L/qw +IFAGbHrQgLKm+a/sRxmPUDgH3KKHOVj4utWp+UhnMJbulHheb4mjUcAwhmahRWa6 +VOujw5H5SNz/0egwLX0tdHA114gk957EWW67c4cX8jJGKLhD+rcdqsq08p8kDi1L +93FcXmn/6pUCyziKrlA4b9v7LWIbxcceVOF34GfID5yHI9Y/QCB/IIDEgEw+OyQm +jgSubJrIqg0CAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMC +AYYwHQYDVR0OBBYEFIQYzIU07LwMlJQuCFmcx7IQTgoIMA0GCSqGSIb3DQEBCwUA +A4IBAQCY8jdaQZChGsV2USggNiMOruYou6r4lK5IpDB/G/wkjUu0yKGX9rbxenDI +U5PMCCjjmCXPI6T53iHTfIUJrU6adTrCC2qJeHZERxhlbI1Bjjt/msv0tadQ1wUs +N+gDS63pYaACbvXy8MWy7Vu33PqUXHeeE6V/Uq2V8viTO96LXFvKWlJbYK8U90vv +o/ufQJVtMVT8QtPHRh8jrdkPSHCa2XV4cdFyQzR1bldZwgJcJmApzyMZFo6IQ6XU +5MsI+yMRQ+hDKXJioaldXgjUkK642M4UwtBV8ob2xJNDd2ZhwLnoQdeXeGADbkpy +rqXRfboQnoZsG4q5WTP468SQvvG5 +-----END CERTIFICATE----- + +# Issuer: CN=Amazon Root CA 2 O=Amazon +# Subject: CN=Amazon Root CA 2 O=Amazon +# Label: "Amazon Root CA 2" +# Serial: 143266982885963551818349160658925006970653239 +# MD5 Fingerprint: c8:e5:8d:ce:a8:42:e2:7a:c0:2a:5c:7c:9e:26:bf:66 +# SHA1 Fingerprint: 5a:8c:ef:45:d7:a6:98:59:76:7a:8c:8b:44:96:b5:78:cf:47:4b:1a +# SHA256 Fingerprint: 1b:a5:b2:aa:8c:65:40:1a:82:96:01:18:f8:0b:ec:4f:62:30:4d:83:ce:c4:71:3a:19:c3:9c:01:1e:a4:6d:b4 +-----BEGIN CERTIFICATE----- +MIIFQTCCAymgAwIBAgITBmyf0pY1hp8KD+WGePhbJruKNzANBgkqhkiG9w0BAQwF +ADA5MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6 +b24gUm9vdCBDQSAyMB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTEL +MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJv +b3QgQ0EgMjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK2Wny2cSkxK +gXlRmeyKy2tgURO8TW0G/LAIjd0ZEGrHJgw12MBvIITplLGbhQPDW9tK6Mj4kHbZ +W0/jTOgGNk3Mmqw9DJArktQGGWCsN0R5hYGCrVo34A3MnaZMUnbqQ523BNFQ9lXg +1dKmSYXpN+nKfq5clU1Imj+uIFptiJXZNLhSGkOQsL9sBbm2eLfq0OQ6PBJTYv9K +8nu+NQWpEjTj82R0Yiw9AElaKP4yRLuH3WUnAnE72kr3H9rN9yFVkE8P7K6C4Z9r +2UXTu/Bfh+08LDmG2j/e7HJV63mjrdvdfLC6HM783k81ds8P+HgfajZRRidhW+me +z/CiVX18JYpvL7TFz4QuK/0NURBs+18bvBt+xa47mAExkv8LV/SasrlX6avvDXbR +8O70zoan4G7ptGmh32n2M8ZpLpcTnqWHsFcQgTfJU7O7f/aS0ZzQGPSSbtqDT6Zj +mUyl+17vIWR6IF9sZIUVyzfpYgwLKhbcAS4y2j5L9Z469hdAlO+ekQiG+r5jqFoz +7Mt0Q5X5bGlSNscpb/xVA1wf+5+9R+vnSUeVC06JIglJ4PVhHvG/LopyboBZ/1c6 ++XUyo05f7O0oYtlNc/LMgRdg7c3r3NunysV+Ar3yVAhU/bQtCSwXVEqY0VThUWcI +0u1ufm8/0i2BWSlmy5A5lREedCf+3euvAgMBAAGjQjBAMA8GA1UdEwEB/wQFMAMB +Af8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQWBBSwDPBMMPQFWAJI/TPlUq9LhONm +UjANBgkqhkiG9w0BAQwFAAOCAgEAqqiAjw54o+Ci1M3m9Zh6O+oAA7CXDpO8Wqj2 +LIxyh6mx/H9z/WNxeKWHWc8w4Q0QshNabYL1auaAn6AFC2jkR2vHat+2/XcycuUY ++gn0oJMsXdKMdYV2ZZAMA3m3MSNjrXiDCYZohMr/+c8mmpJ5581LxedhpxfL86kS +k5Nrp+gvU5LEYFiwzAJRGFuFjWJZY7attN6a+yb3ACfAXVU3dJnJUH/jWS5E4ywl +7uxMMne0nxrpS10gxdr9HIcWxkPo1LsmmkVwXqkLN1PiRnsn/eBG8om3zEK2yygm +btmlyTrIQRNg91CMFa6ybRoVGld45pIq2WWQgj9sAq+uEjonljYE1x2igGOpm/Hl +urR8FLBOybEfdF849lHqm/osohHUqS0nGkWxr7JOcQ3AWEbWaQbLU8uz/mtBzUF+ +fUwPfHJ5elnNXkoOrJupmHN5fLT0zLm4BwyydFy4x2+IoZCn9Kr5v2c69BoVYh63 +n749sSmvZ6ES8lgQGVMDMBu4Gon2nL2XA46jCfMdiyHxtN/kHNGfZQIG6lzWE7OE +76KlXIx3KadowGuuQNKotOrN8I1LOJwZmhsoVLiJkO/KdYE+HvJkJMcYr07/R54H +9jVlpNMKVv/1F2Rs76giJUmTtt8AF9pYfl3uxRuw0dFfIRDH+fO6AgonB8Xx1sfT +4PsJYGw= +-----END CERTIFICATE----- + +# Issuer: CN=Amazon Root CA 3 O=Amazon +# Subject: CN=Amazon Root CA 3 O=Amazon +# Label: "Amazon Root CA 3" +# Serial: 143266986699090766294700635381230934788665930 +# MD5 Fingerprint: a0:d4:ef:0b:f7:b5:d8:49:95:2a:ec:f5:c4:fc:81:87 +# SHA1 Fingerprint: 0d:44:dd:8c:3c:8c:1a:1a:58:75:64:81:e9:0f:2e:2a:ff:b3:d2:6e +# SHA256 Fingerprint: 18:ce:6c:fe:7b:f1:4e:60:b2:e3:47:b8:df:e8:68:cb:31:d0:2e:bb:3a:da:27:15:69:f5:03:43:b4:6d:b3:a4 +-----BEGIN CERTIFICATE----- +MIIBtjCCAVugAwIBAgITBmyf1XSXNmY/Owua2eiedgPySjAKBggqhkjOPQQDAjA5 +MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6b24g +Um9vdCBDQSAzMB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTELMAkG +A1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJvb3Qg +Q0EgMzBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABCmXp8ZBf8ANm+gBG1bG8lKl +ui2yEujSLtf6ycXYqm0fc4E7O5hrOXwzpcVOho6AF2hiRVd9RFgdszflZwjrZt6j +QjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQWBBSr +ttvXBp43rDCGB5Fwx5zEGbF4wDAKBggqhkjOPQQDAgNJADBGAiEA4IWSoxe3jfkr +BqWTrBqYaGFy+uGh0PsceGCmQ5nFuMQCIQCcAu/xlJyzlvnrxir4tiz+OpAUFteM +YyRIHN8wfdVoOw== +-----END CERTIFICATE----- + +# Issuer: CN=Amazon Root CA 4 O=Amazon +# Subject: CN=Amazon Root CA 4 O=Amazon +# Label: "Amazon Root CA 4" +# Serial: 143266989758080763974105200630763877849284878 +# MD5 Fingerprint: 89:bc:27:d5:eb:17:8d:06:6a:69:d5:fd:89:47:b4:cd +# SHA1 Fingerprint: f6:10:84:07:d6:f8:bb:67:98:0c:c2:e2:44:c2:eb:ae:1c:ef:63:be +# SHA256 Fingerprint: e3:5d:28:41:9e:d0:20:25:cf:a6:90:38:cd:62:39:62:45:8d:a5:c6:95:fb:de:a3:c2:2b:0b:fb:25:89:70:92 +-----BEGIN CERTIFICATE----- +MIIB8jCCAXigAwIBAgITBmyf18G7EEwpQ+Vxe3ssyBrBDjAKBggqhkjOPQQDAzA5 +MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6b24g +Um9vdCBDQSA0MB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTELMAkG +A1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJvb3Qg +Q0EgNDB2MBAGByqGSM49AgEGBSuBBAAiA2IABNKrijdPo1MN/sGKe0uoe0ZLY7Bi +9i0b2whxIdIA6GO9mif78DluXeo9pcmBqqNbIJhFXRbb/egQbeOc4OO9X4Ri83Bk +M6DLJC9wuoihKqB1+IGuYgbEgds5bimwHvouXKNCMEAwDwYDVR0TAQH/BAUwAwEB +/zAOBgNVHQ8BAf8EBAMCAYYwHQYDVR0OBBYEFNPsxzplbszh2naaVvuc84ZtV+WB +MAoGCCqGSM49BAMDA2gAMGUCMDqLIfG9fhGt0O9Yli/W651+kI0rz2ZVwyzjKKlw +CkcO8DdZEv8tmZQoTipPNU0zWgIxAOp1AE47xDqUEpHJWEadIRNyp4iciuRMStuW +1KyLa2tJElMzrdfkviT8tQp21KW8EA== +-----END CERTIFICATE----- + +# Issuer: CN=LuxTrust Global Root 2 O=LuxTrust S.A. +# Subject: CN=LuxTrust Global Root 2 O=LuxTrust S.A. +# Label: "LuxTrust Global Root 2" +# Serial: 59914338225734147123941058376788110305822489521 +# MD5 Fingerprint: b2:e1:09:00:61:af:f7:f1:91:6f:c4:ad:8d:5e:3b:7c +# SHA1 Fingerprint: 1e:0e:56:19:0a:d1:8b:25:98:b2:04:44:ff:66:8a:04:17:99:5f:3f +# SHA256 Fingerprint: 54:45:5f:71:29:c2:0b:14:47:c4:18:f9:97:16:8f:24:c5:8f:c5:02:3b:f5:da:5b:e2:eb:6e:1d:d8:90:2e:d5 +-----BEGIN CERTIFICATE----- +MIIFwzCCA6ugAwIBAgIUCn6m30tEntpqJIWe5rgV0xZ/u7EwDQYJKoZIhvcNAQEL +BQAwRjELMAkGA1UEBhMCTFUxFjAUBgNVBAoMDUx1eFRydXN0IFMuQS4xHzAdBgNV +BAMMFkx1eFRydXN0IEdsb2JhbCBSb290IDIwHhcNMTUwMzA1MTMyMTU3WhcNMzUw +MzA1MTMyMTU3WjBGMQswCQYDVQQGEwJMVTEWMBQGA1UECgwNTHV4VHJ1c3QgUy5B +LjEfMB0GA1UEAwwWTHV4VHJ1c3QgR2xvYmFsIFJvb3QgMjCCAiIwDQYJKoZIhvcN +AQEBBQADggIPADCCAgoCggIBANeFl78RmOnwYoNMPIf5U2o3C/IPPIfOb9wmKb3F +ibrJgz337spbxm1Jc7TJRqMbNBM/wYlFV/TZsfs2ZUv7COJIcRHIbjuend+JZTem +hfY7RBi2xjcwYkSSl2l9QjAk5A0MiWtj3sXh306pFGxT4GHO9hcvHTy95iJMHZP1 +EMShduxq3sVs35a0VkBCwGKSMKEtFZSg0iAGCW5qbeXrt77U8PEVfIvmTroTzEsn +Xpk8F12PgX8zPU/TPxvsXD/wPEx1bvKm1Z3aLQdjAsZy6ZS8TEmVT4hSyNvoaYL4 +zDRbIvCGp4m9SAptZoFtyMhk+wHh9OHe2Z7d21vUKpkmFRseTJIpgp7VkoGSQXAZ +96Tlk0u8d2cx3Rz9MXANF5kM+Qw5GSoXtTBxVdUPrljhPS80m8+f9niFwpN6cj5m +j5wWEWCPnolvZ77gR1o7DJpni89Gxq44o/KnvObWhWszJHAiS8sIm7vI+AIpHb4g +DEa/a4ebsypmQjVGbKq6rfmYe+lQVRQxv7HaLe2ArWgk+2mr2HETMOZns4dA/Yl+ +8kPREd8vZS9kzl8UubG/Mb2HeFpZZYiq/FkySIbWTLkpS5XTdvN3JW1CHDiDTf2j +X5t/Lax5Gw5CMZdjpPuKadUiDTSQMC6otOBttpSsvItO13D8xTiOZCXhTTmQzsmH +hFhxAgMBAAGjgagwgaUwDwYDVR0TAQH/BAUwAwEB/zBCBgNVHSAEOzA5MDcGByuB +KwEBAQowLDAqBggrBgEFBQcCARYeaHR0cHM6Ly9yZXBvc2l0b3J5Lmx1eHRydXN0 +Lmx1MA4GA1UdDwEB/wQEAwIBBjAfBgNVHSMEGDAWgBT/GCh2+UgFLKGu8SsbK7JT ++Et8szAdBgNVHQ4EFgQU/xgodvlIBSyhrvErGyuyU/hLfLMwDQYJKoZIhvcNAQEL +BQADggIBAGoZFO1uecEsh9QNcH7X9njJCwROxLHOk3D+sFTAMs2ZMGQXvw/l4jP9 +BzZAcg4atmpZ1gDlaCDdLnINH2pkMSCEfUmmWjfrRcmF9dTHF5kH5ptV5AzoqbTO +jFu1EVzPig4N1qx3gf4ynCSecs5U89BvolbW7MM3LGVYvlcAGvI1+ut7MV3CwRI9 +loGIlonBWVx65n9wNOeD4rHh4bhY79SV5GCc8JaXcozrhAIuZY+kt9J/Z93I055c +qqmkoCUUBpvsT34tC38ddfEz2O3OuHVtPlu5mB0xDVbYQw8wkbIEa91WvpWAVWe+ +2M2D2RjuLg+GLZKecBPs3lHJQ3gCpU3I+V/EkVhGFndadKpAvAefMLmx9xIX3eP/ +JEAdemrRTxgKqpAd60Ae36EeRJIQmvKN4dFLRp7oRUKX6kWZ8+xm1QL68qZKJKre +zrnK+T+Tb/mjuuqlPpmt/f97mfVl7vBZKGfXkJWkE4SphMHozs51k2MavDzq1WQf +LSoSOcbDWjLtR5EWDrw4wVDej8oqkDQc7kGUnF4ZLvhFSZl0kbAEb+MEWrGrKqv+ +x9CWttrhSmQGbmBNvUJO/3jaJMobtNeWOWyu8Q6qp31IiyBMz2TWuJdGsE7RKlY6 +oJO9r4Ak4Ap+58rVyuiFVdw2KuGUaJPHZnJED4AhMmwlxyOAgwrr +-----END CERTIFICATE----- + +# Issuer: CN=TUBITAK Kamu SM SSL Kok Sertifikasi - Surum 1 O=Turkiye Bilimsel ve Teknolojik Arastirma Kurumu - TUBITAK OU=Kamu Sertifikasyon Merkezi - Kamu SM +# Subject: CN=TUBITAK Kamu SM SSL Kok Sertifikasi - Surum 1 O=Turkiye Bilimsel ve Teknolojik Arastirma Kurumu - TUBITAK OU=Kamu Sertifikasyon Merkezi - Kamu SM +# Label: "TUBITAK Kamu SM SSL Kok Sertifikasi - Surum 1" +# Serial: 1 +# MD5 Fingerprint: dc:00:81:dc:69:2f:3e:2f:b0:3b:f6:3d:5a:91:8e:49 +# SHA1 Fingerprint: 31:43:64:9b:ec:ce:27:ec:ed:3a:3f:0b:8f:0d:e4:e8:91:dd:ee:ca +# SHA256 Fingerprint: 46:ed:c3:68:90:46:d5:3a:45:3f:b3:10:4a:b8:0d:ca:ec:65:8b:26:60:ea:16:29:dd:7e:86:79:90:64:87:16 +-----BEGIN CERTIFICATE----- +MIIEYzCCA0ugAwIBAgIBATANBgkqhkiG9w0BAQsFADCB0jELMAkGA1UEBhMCVFIx +GDAWBgNVBAcTD0dlYnplIC0gS29jYWVsaTFCMEAGA1UEChM5VHVya2l5ZSBCaWxp +bXNlbCB2ZSBUZWtub2xvamlrIEFyYXN0aXJtYSBLdXJ1bXUgLSBUVUJJVEFLMS0w +KwYDVQQLEyRLYW11IFNlcnRpZmlrYXN5b24gTWVya2V6aSAtIEthbXUgU00xNjA0 +BgNVBAMTLVRVQklUQUsgS2FtdSBTTSBTU0wgS29rIFNlcnRpZmlrYXNpIC0gU3Vy +dW0gMTAeFw0xMzExMjUwODI1NTVaFw00MzEwMjUwODI1NTVaMIHSMQswCQYDVQQG +EwJUUjEYMBYGA1UEBxMPR2ViemUgLSBLb2NhZWxpMUIwQAYDVQQKEzlUdXJraXll +IEJpbGltc2VsIHZlIFRla25vbG9qaWsgQXJhc3Rpcm1hIEt1cnVtdSAtIFRVQklU +QUsxLTArBgNVBAsTJEthbXUgU2VydGlmaWthc3lvbiBNZXJrZXppIC0gS2FtdSBT +TTE2MDQGA1UEAxMtVFVCSVRBSyBLYW11IFNNIFNTTCBLb2sgU2VydGlmaWthc2kg +LSBTdXJ1bSAxMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAr3UwM6q7 +a9OZLBI3hNmNe5eA027n/5tQlT6QlVZC1xl8JoSNkvoBHToP4mQ4t4y86Ij5iySr +LqP1N+RAjhgleYN1Hzv/bKjFxlb4tO2KRKOrbEz8HdDc72i9z+SqzvBV96I01INr +N3wcwv61A+xXzry0tcXtAA9TNypN9E8Mg/uGz8v+jE69h/mniyFXnHrfA2eJLJ2X +YacQuFWQfw4tJzh03+f92k4S400VIgLI4OD8D62K18lUUMw7D8oWgITQUVbDjlZ/ +iSIzL+aFCr2lqBs23tPcLG07xxO9WSMs5uWk99gL7eqQQESolbuT1dCANLZGeA4f +AJNG4e7p+exPFwIDAQABo0IwQDAdBgNVHQ4EFgQUZT/HiobGPN08VFw1+DrtUgxH +V8gwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEL +BQADggEBACo/4fEyjq7hmFxLXs9rHmoJ0iKpEsdeV31zVmSAhHqT5Am5EM2fKifh +AHe+SMg1qIGf5LgsyX8OsNJLN13qudULXjS99HMpw+0mFZx+CFOKWI3QSyjfwbPf +IPP54+M638yclNhOT8NrF7f3cuitZjO1JVOr4PhMqZ398g26rrnZqsZr+ZO7rqu4 +lzwDGrpDxpa5RXI4s6ehlj2Re37AIVNMh+3yC1SVUZPVIqUNivGTDj5UDrDYyU7c +8jEyVupk+eq1nRZmQnLzf9OxMUP8pI4X8W0jq5Rm+K37DwhuJi1/FwcJsoz7UMCf +lo3Ptv0AnVoUmr8CRPXBwp8iXqIPoeM= +-----END CERTIFICATE----- # Issuer: CN=Entrust.net Secure Server Certification Authority O=Entrust.net OU=www.entrust.net/CPS incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited # Subject: CN=Entrust.net Secure Server Certification Authority O=Entrust.net OU=www.entrust.net/CPS incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited # Label: "Entrust.net Secure Server CA" diff --git a/lib/html5lib/__init__.py b/lib/html5lib/__init__.py index 19a4b7d6927d965dadd94494c18fb500c30ae470..8ee9b53e1215f0dacc740acbb2bacba4e085267d 100644 --- a/lib/html5lib/__init__.py +++ b/lib/html5lib/__init__.py @@ -20,4 +20,6 @@ from .serializer import serialize __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", "getTreeWalker", "serialize"] -__version__ = "0.999" + +# this has to be at the top level, see how setup.py parses this +__version__ = "0.999999999" diff --git a/lib/html5lib/ihatexml.py b/lib/html5lib/_ihatexml.py similarity index 97% rename from lib/html5lib/ihatexml.py rename to lib/html5lib/_ihatexml.py index 0fc79308ef4c94ab8f80af9e88a180bc998b8919..d6d1d6fb7d382493ef4166ee4a999ec7c65d2d8e 100644 --- a/lib/html5lib/ihatexml.py +++ b/lib/html5lib/_ihatexml.py @@ -175,9 +175,9 @@ def escapeRegexp(string): return string # output from the above -nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') +nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa -nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') +nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa # Simpler things nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]") @@ -186,7 +186,7 @@ nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]") class InfosetFilter(object): replacementRegexp = re.compile(r"U[\dA-F]{5,5}") - def __init__(self, replaceChars=None, + def __init__(self, dropXmlnsLocalName=False, dropXmlnsAttrNs=False, preventDoubleDashComments=False, @@ -217,7 +217,7 @@ class InfosetFilter(object): else: return self.toXmlName(name) - def coerceElement(self, name, namespace=None): + def coerceElement(self, name): return self.toXmlName(name) def coerceComment(self, data): @@ -225,11 +225,14 @@ class InfosetFilter(object): while "--" in data: warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) data = data.replace("--", "- -") + if data.endswith("-"): + warnings.warn("Comments cannot end in a dash", DataLossWarning) + data += " " return data def coerceCharacters(self, data): if self.replaceFormFeedCharacters: - for i in range(data.count("\x0C")): + for _ in range(data.count("\x0C")): warnings.warn("Text cannot contain U+000C", DataLossWarning) data = data.replace("\x0C", " ") # Other non-xml characters diff --git a/lib/html5lib/inputstream.py b/lib/html5lib/_inputstream.py similarity index 80% rename from lib/html5lib/inputstream.py rename to lib/html5lib/_inputstream.py index 9e03b9313d7f3ab16da20f6ba6a33b53f83608c3..79f2331e77a04d0106041411a68650ee6d64e8a6 100644 --- a/lib/html5lib/inputstream.py +++ b/lib/html5lib/_inputstream.py @@ -1,13 +1,16 @@ from __future__ import absolute_import, division, unicode_literals -from six import text_type -from six.moves import http_client + +from six import text_type, binary_type +from six.moves import http_client, urllib import codecs import re +import webencodings + from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase -from .constants import encodings, ReparseException -from . import utils +from .constants import ReparseException +from . import _utils from io import StringIO @@ -16,19 +19,26 @@ try: except ImportError: BytesIO = StringIO -try: - from io import BufferedIOBase -except ImportError: - class BufferedIOBase(object): - pass - # Non-unicode versions of constants for use in the pre-parser spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) -invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]") + +invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa + +if _utils.supports_lone_surrogates: + # Use one extra step of indirection and create surrogates with + # eval. Not using this indirection would introduce an illegal + # unicode literal on platforms not supporting such lone + # surrogates. + assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1 + invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] + + eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used + "]") +else: + invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, @@ -118,10 +128,13 @@ class BufferedStream(object): return b"".join(rv) -def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True): - if isinstance(source, http_client.HTTPResponse): - # Work around Python bug #20007: read(0) closes the connection. - # http://bugs.python.org/issue20007 +def HTMLInputStream(source, **kwargs): + # Work around Python bug #20007: read(0) closes the connection. + # http://bugs.python.org/issue20007 + if (isinstance(source, http_client.HTTPResponse) or + # Also check for addinfourl wrapping HTTPResponse + (isinstance(source, urllib.response.addbase) and + isinstance(source.fp, http_client.HTTPResponse))): isUnicode = False elif hasattr(source, "read"): isUnicode = isinstance(source.read(0), text_type) @@ -129,12 +142,13 @@ def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True): isUnicode = isinstance(source, text_type) if isUnicode: - if encoding is not None: - raise TypeError("Cannot explicitly set an encoding with a unicode string") + encodings = [x for x in kwargs if x.endswith("_encoding")] + if encodings: + raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings) - return HTMLUnicodeInputStream(source) + return HTMLUnicodeInputStream(source, **kwargs) else: - return HTMLBinaryInputStream(source, encoding, parseMeta, chardet) + return HTMLBinaryInputStream(source, **kwargs) class HTMLUnicodeInputStream(object): @@ -160,22 +174,21 @@ class HTMLUnicodeInputStream(object): regardless of any BOM or later declaration (such as in a meta element) - parseMeta - Look for a <meta> element containing encoding information - """ - # Craziness - if len("\U0010FFFF") == 1: + if not _utils.supports_lone_surrogates: + # Such platforms will have already checked for such + # surrogate errors, so no need to do this checking. + self.reportCharacterErrors = None + elif len("\U0010FFFF") == 1: self.reportCharacterErrors = self.characterErrorsUCS4 - self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]") else: self.reportCharacterErrors = self.characterErrorsUCS2 - self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])") # List of where new lines occur self.newLines = [0] - self.charEncoding = ("utf-8", "certain") + self.charEncoding = (lookupEncoding("utf-8"), "certain") self.dataStream = self.openStream(source) self.reset() @@ -265,12 +278,10 @@ class HTMLUnicodeInputStream(object): self._bufferedCharacter = data[-1] data = data[:-1] - self.reportCharacterErrors(data) + if self.reportCharacterErrors: + self.reportCharacterErrors(data) # Replace invalid characters - # Note U+0000 is dealt with in the tokenizer - data = self.replaceCharactersRegexp.sub("\ufffd", data) - data = data.replace("\r\n", "\n") data = data.replace("\r", "\n") @@ -280,7 +291,7 @@ class HTMLUnicodeInputStream(object): return True def characterErrorsUCS4(self, data): - for i in range(len(invalid_unicode_re.findall(data))): + for _ in range(len(invalid_unicode_re.findall(data))): self.errors.append("invalid-codepoint") def characterErrorsUCS2(self, data): @@ -293,9 +304,9 @@ class HTMLUnicodeInputStream(object): codepoint = ord(match.group()) pos = match.start() # Pretty sure there should be endianness issues here - if utils.isSurrogatePair(data[pos:pos + 2]): + if _utils.isSurrogatePair(data[pos:pos + 2]): # We have a surrogate pair! - char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2]) + char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2]) if char_val in non_bmp_invalid_codepoints: self.errors.append("invalid-codepoint") skip = True @@ -378,7 +389,9 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream): """ - def __init__(self, source, encoding=None, parseMeta=True, chardet=True): + def __init__(self, source, override_encoding=None, transport_encoding=None, + same_origin_parent_encoding=None, likely_encoding=None, + default_encoding="windows-1252", useChardet=True): """Initialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source @@ -391,8 +404,6 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream): regardless of any BOM or later declaration (such as in a meta element) - parseMeta - Look for a <meta> element containing encoding information - """ # Raw Stream - for unicode objects this will encode to utf-8 and set # self.charEncoding as appropriate @@ -400,27 +411,28 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream): HTMLUnicodeInputStream.__init__(self, self.rawStream) - self.charEncoding = (codecName(encoding), "certain") - # Encoding Information # Number of bytes to use when looking for a meta element with # encoding information - self.numBytesMeta = 512 + self.numBytesMeta = 1024 # Number of bytes to use when using detecting encoding using chardet self.numBytesChardet = 100 - # Encoding to use if no other information can be found - self.defaultEncoding = "windows-1252" + # Things from args + self.override_encoding = override_encoding + self.transport_encoding = transport_encoding + self.same_origin_parent_encoding = same_origin_parent_encoding + self.likely_encoding = likely_encoding + self.default_encoding = default_encoding - # Detect encoding iff no explicit "transport level" encoding is supplied - if (self.charEncoding[0] is None): - self.charEncoding = self.detectEncoding(parseMeta, chardet) + # Determine encoding + self.charEncoding = self.determineEncoding(useChardet) + assert self.charEncoding[0] is not None # Call superclass self.reset() def reset(self): - self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream, - 'replace') + self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace') HTMLUnicodeInputStream.reset(self) def openStream(self, source): @@ -437,29 +449,50 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream): try: stream.seek(stream.tell()) - except: + except: # pylint:disable=bare-except stream = BufferedStream(stream) return stream - def detectEncoding(self, parseMeta=True, chardet=True): - # First look for a BOM + def determineEncoding(self, chardet=True): + # BOMs take precedence over everything # This will also read past the BOM if present - encoding = self.detectBOM() - confidence = "certain" - # If there is no BOM need to look for meta elements with encoding - # information - if encoding is None and parseMeta: - encoding = self.detectEncodingMeta() - confidence = "tentative" - # Guess with chardet, if avaliable - if encoding is None and chardet: - confidence = "tentative" + charEncoding = self.detectBOM(), "certain" + if charEncoding[0] is not None: + return charEncoding + + # If we've been overriden, we've been overriden + charEncoding = lookupEncoding(self.override_encoding), "certain" + if charEncoding[0] is not None: + return charEncoding + + # Now check the transport layer + charEncoding = lookupEncoding(self.transport_encoding), "certain" + if charEncoding[0] is not None: + return charEncoding + + # Look for meta elements with encoding information + charEncoding = self.detectEncodingMeta(), "tentative" + if charEncoding[0] is not None: + return charEncoding + + # Parent document encoding + charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative" + if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"): + return charEncoding + + # "likely" encoding + charEncoding = lookupEncoding(self.likely_encoding), "tentative" + if charEncoding[0] is not None: + return charEncoding + + # Guess with chardet, if available + if chardet: try: - try: - from charade.universaldetector import UniversalDetector - except ImportError: - from chardet.universaldetector import UniversalDetector + from chardet.universaldetector import UniversalDetector + except ImportError: + pass + else: buffers = [] detector = UniversalDetector() while not detector.done: @@ -470,36 +503,33 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream): buffers.append(buffer) detector.feed(buffer) detector.close() - encoding = detector.result['encoding'] + encoding = lookupEncoding(detector.result['encoding']) self.rawStream.seek(0) - except ImportError: - pass - # If all else fails use the default encoding - if encoding is None: - confidence = "tentative" - encoding = self.defaultEncoding - - # Substitute for equivalent encodings: - encodingSub = {"iso-8859-1": "windows-1252"} + if encoding is not None: + return encoding, "tentative" - if encoding.lower() in encodingSub: - encoding = encodingSub[encoding.lower()] + # Try the default encoding + charEncoding = lookupEncoding(self.default_encoding), "tentative" + if charEncoding[0] is not None: + return charEncoding - return encoding, confidence + # Fallback to html5lib's default if even that hasn't worked + return lookupEncoding("windows-1252"), "tentative" def changeEncoding(self, newEncoding): assert self.charEncoding[1] != "certain" - newEncoding = codecName(newEncoding) - if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"): - newEncoding = "utf-8" + newEncoding = lookupEncoding(newEncoding) if newEncoding is None: return + if newEncoding.name in ("utf-16be", "utf-16le"): + newEncoding = lookupEncoding("utf-8") + assert newEncoding is not None elif newEncoding == self.charEncoding[0]: self.charEncoding = (self.charEncoding[0], "certain") else: self.rawStream.seek(0) - self.reset() self.charEncoding = (newEncoding, "certain") + self.reset() raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) def detectBOM(self): @@ -508,8 +538,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream): encoding otherwise return None""" bomDict = { codecs.BOM_UTF8: 'utf-8', - codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be', - codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be' + codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be', + codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be' } # Go to beginning of file and read in 4 bytes @@ -529,9 +559,12 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream): # Set the read position past the BOM if one was found, otherwise # set it to the start of the stream - self.rawStream.seek(encoding and seek or 0) - - return encoding + if encoding: + self.rawStream.seek(seek) + return lookupEncoding(encoding) + else: + self.rawStream.seek(0) + return None def detectEncodingMeta(self): """Report the encoding declared by the meta element @@ -542,8 +575,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream): self.rawStream.seek(0) encoding = parser.getEncoding() - if encoding in ("utf-16", "utf-16-be", "utf-16-le"): - encoding = "utf-8" + if encoding is not None and encoding.name in ("utf-16be", "utf-16le"): + encoding = lookupEncoding("utf-8") return encoding @@ -557,6 +590,7 @@ class EncodingBytes(bytes): return bytes.__new__(self, value.lower()) def __init__(self, value): + # pylint:disable=unused-argument self._position = -1 def __iter__(self): @@ -667,7 +701,7 @@ class EncodingParser(object): (b"<!", self.handleOther), (b"<?", self.handleOther), (b"<", self.handlePossibleStartTag)) - for byte in self.data: + for _ in self.data: keepParsing = True for key, method in methodDispatch: if self.data.matchBytes(key): @@ -706,7 +740,7 @@ class EncodingParser(object): return False elif attr[0] == b"charset": tentativeEncoding = attr[1] - codec = codecName(tentativeEncoding) + codec = lookupEncoding(tentativeEncoding) if codec is not None: self.encoding = codec return False @@ -714,7 +748,7 @@ class EncodingParser(object): contentParser = ContentAttrParser(EncodingBytes(attr[1])) tentativeEncoding = contentParser.parse() if tentativeEncoding is not None: - codec = codecName(tentativeEncoding) + codec = lookupEncoding(tentativeEncoding) if codec is not None: if hasPragma: self.encoding = codec @@ -871,16 +905,19 @@ class ContentAttrParser(object): return None -def codecName(encoding): +def lookupEncoding(encoding): """Return the python codec name corresponding to an encoding or None if the string doesn't correspond to a valid encoding.""" - if isinstance(encoding, bytes): + if isinstance(encoding, binary_type): try: encoding = encoding.decode("ascii") except UnicodeDecodeError: return None - if encoding: - canonicalName = ascii_punctuation_re.sub("", encoding).lower() - return encodings.get(canonicalName, None) + + if encoding is not None: + try: + return webencodings.lookup(encoding) + except AttributeError: + return None else: return None diff --git a/lib/html5lib/tokenizer.py b/lib/html5lib/_tokenizer.py similarity index 98% rename from lib/html5lib/tokenizer.py rename to lib/html5lib/_tokenizer.py index 797745787a49fefe7e64667b4b0b0355275bf8fa..6078f66aa0d3195c45668fc65efbf19217b3185e 100644 --- a/lib/html5lib/tokenizer.py +++ b/lib/html5lib/_tokenizer.py @@ -1,9 +1,6 @@ from __future__ import absolute_import, division, unicode_literals -try: - chr = unichr # flake8: noqa -except NameError: - pass +from six import unichr as chr from collections import deque @@ -14,9 +11,9 @@ from .constants import digits, hexDigits, EOF from .constants import tokenTypes, tagTokenTypes from .constants import replacementCharacters -from .inputstream import HTMLInputStream +from ._inputstream import HTMLInputStream -from .trie import Trie +from ._trie import Trie entitiesTrie = Trie(entities) @@ -34,16 +31,11 @@ class HTMLTokenizer(object): Points to HTMLInputStream object. """ - def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True, - lowercaseElementName=True, lowercaseAttrName=True, parser=None): + def __init__(self, stream, parser=None, **kwargs): - self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet) + self.stream = HTMLInputStream(stream, **kwargs) self.parser = parser - # Perform case conversions? - self.lowercaseElementName = lowercaseElementName - self.lowercaseAttrName = lowercaseAttrName - # Setup the initial tokenizer state self.escapeFlag = False self.lastFourChars = [] @@ -147,8 +139,8 @@ class HTMLTokenizer(object): output = "&" charStack = [self.stream.char()] - if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") - or (allowedChar is not None and allowedChar == charStack[0])): + if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or + (allowedChar is not None and allowedChar == charStack[0])): self.stream.unget(charStack[0]) elif charStack[0] == "#": @@ -235,8 +227,7 @@ class HTMLTokenizer(object): token = self.currentToken # Add token to the queue to be yielded if (token["type"] in tagTokenTypes): - if self.lowercaseElementName: - token["name"] = token["name"].translate(asciiUpper2Lower) + token["name"] = token["name"].translate(asciiUpper2Lower) if token["type"] == tokenTypes["EndTag"]: if token["data"]: self.tokenQueue.append({"type": tokenTypes["ParseError"], @@ -921,10 +912,9 @@ class HTMLTokenizer(object): # Attributes are not dropped at this stage. That happens when the # start tag token is emitted so values can still be safely appended # to attributes, but we do want to report the parse error in time. - if self.lowercaseAttrName: - self.currentToken["data"][-1][0] = ( - self.currentToken["data"][-1][0].translate(asciiUpper2Lower)) - for name, value in self.currentToken["data"][:-1]: + self.currentToken["data"][-1][0] = ( + self.currentToken["data"][-1][0].translate(asciiUpper2Lower)) + for name, _ in self.currentToken["data"][:-1]: if self.currentToken["data"][-1][0] == name: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "duplicate-attribute"}) @@ -1716,11 +1706,11 @@ class HTMLTokenizer(object): else: data.append(char) - data = "".join(data) + data = "".join(data) # pylint:disable=redefined-variable-type # Deal with null here rather than in the parser nullCount = data.count("\u0000") if nullCount > 0: - for i in range(nullCount): + for _ in range(nullCount): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) data = data.replace("\u0000", "\uFFFD") diff --git a/lib/html5lib/trie/__init__.py b/lib/html5lib/_trie/__init__.py similarity index 73% rename from lib/html5lib/trie/__init__.py rename to lib/html5lib/_trie/__init__.py index a8cca8a9acf8069817391bd7b61e09fb7e97cb97..a5ba4bf123aa585ec8a47dce6838b09b4dfa0b1a 100644 --- a/lib/html5lib/trie/__init__.py +++ b/lib/html5lib/_trie/__init__.py @@ -4,9 +4,11 @@ from .py import Trie as PyTrie Trie = PyTrie +# pylint:disable=wrong-import-position try: from .datrie import Trie as DATrie except ImportError: pass else: Trie = DATrie +# pylint:enable=wrong-import-position diff --git a/lib/html5lib/trie/_base.py b/lib/html5lib/_trie/_base.py similarity index 91% rename from lib/html5lib/trie/_base.py rename to lib/html5lib/_trie/_base.py index 724486b16eb707d26cdbb3c52b61c86cc1ab3a01..25eece46ed2da28c4fc22b888f4945e2b07a828f 100644 --- a/lib/html5lib/trie/_base.py +++ b/lib/html5lib/_trie/_base.py @@ -7,7 +7,8 @@ class Trie(Mapping): """Abstract base class for tries""" def keys(self, prefix=None): - keys = super().keys() + # pylint:disable=arguments-differ + keys = super(Trie, self).keys() if prefix is None: return set(keys) diff --git a/lib/html5lib/trie/datrie.py b/lib/html5lib/_trie/datrie.py similarity index 100% rename from lib/html5lib/trie/datrie.py rename to lib/html5lib/_trie/datrie.py diff --git a/lib/html5lib/trie/py.py b/lib/html5lib/_trie/py.py similarity index 100% rename from lib/html5lib/trie/py.py rename to lib/html5lib/_trie/py.py diff --git a/lib/html5lib/utils.py b/lib/html5lib/_utils.py similarity index 54% rename from lib/html5lib/utils.py rename to lib/html5lib/_utils.py index 2f41f4dfa608b7387cb3fec2a4c36e0080e13c9b..03f0dab7c037ce6df2d01172da0d501f21bec6ad 100644 --- a/lib/html5lib/utils.py +++ b/lib/html5lib/_utils.py @@ -1,7 +1,10 @@ from __future__ import absolute_import, division, unicode_literals +import sys from types import ModuleType +from six import text_type + try: import xml.etree.cElementTree as default_etree except ImportError: @@ -9,7 +12,28 @@ except ImportError: __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", - "surrogatePairToCodepoint", "moduleFactoryFactory"] + "surrogatePairToCodepoint", "moduleFactoryFactory", + "supports_lone_surrogates", "PY27"] + + +PY27 = sys.version_info[0] == 2 and sys.version_info[1] >= 7 + +# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be +# caught by the below test. In general this would be any platform +# using UTF-16 as its encoding of unicode strings, such as +# Jython. This is because UTF-16 itself is based on the use of such +# surrogates, and there is no mechanism to further escape such +# escapes. +try: + _x = eval('"\\uD800"') # pylint:disable=eval-used + if not isinstance(_x, text_type): + # We need this with u"" because of http://bugs.jython.org/issue2039 + _x = eval('u"\\uD800"') # pylint:disable=eval-used + assert isinstance(_x, text_type) +except: # pylint:disable=bare-except + supports_lone_surrogates = False +else: + supports_lone_surrogates = True class MethodDispatcher(dict): @@ -31,19 +55,20 @@ class MethodDispatcher(dict): # anything here. _dictEntries = [] for name, value in items: - if type(name) in (list, tuple, frozenset, set): + if isinstance(name, (list, tuple, frozenset, set)): for item in name: _dictEntries.append((item, value)) else: _dictEntries.append((name, value)) dict.__init__(self, _dictEntries) + assert len(self) == len(_dictEntries) self.default = None def __getitem__(self, key): return dict.get(self, key, self.default) -# Some utility functions to dal with weirdness around UCS2 vs UCS4 +# Some utility functions to deal with weirdness around UCS2 vs UCS4 # python builds def isSurrogatePair(data): @@ -70,13 +95,33 @@ def moduleFactoryFactory(factory): else: name = b"_%s_factory" % baseModule.__name__ - if name in moduleCache: - return moduleCache[name] - else: + kwargs_tuple = tuple(kwargs.items()) + + try: + return moduleCache[name][args][kwargs_tuple] + except KeyError: mod = ModuleType(name) objs = factory(baseModule, *args, **kwargs) mod.__dict__.update(objs) - moduleCache[name] = mod + if "name" not in moduleCache: + moduleCache[name] = {} + if "args" not in moduleCache[name]: + moduleCache[name][args] = {} + if "kwargs" not in moduleCache[name][args]: + moduleCache[name][args][kwargs_tuple] = {} + moduleCache[name][args][kwargs_tuple] = mod return mod return moduleFactory + + +def memoize(func): + cache = {} + + def wrapped(*args, **kwargs): + key = (tuple(args), tuple(kwargs.items())) + if key not in cache: + cache[key] = func(*args, **kwargs) + return cache[key] + + return wrapped diff --git a/lib/html5lib/constants.py b/lib/html5lib/constants.py index e7089846d59f71a9241314463e845f118cc3476f..9e7541d3826e8ded149f116fb8459e41e2224754 100644 --- a/lib/html5lib/constants.py +++ b/lib/html5lib/constants.py @@ -1,292 +1,296 @@ from __future__ import absolute_import, division, unicode_literals import string -import gettext -_ = gettext.gettext EOF = None E = { "null-character": - _("Null character in input stream, replaced with U+FFFD."), + "Null character in input stream, replaced with U+FFFD.", "invalid-codepoint": - _("Invalid codepoint in stream."), + "Invalid codepoint in stream.", "incorrectly-placed-solidus": - _("Solidus (/) incorrectly placed in tag."), + "Solidus (/) incorrectly placed in tag.", "incorrect-cr-newline-entity": - _("Incorrect CR newline entity, replaced with LF."), + "Incorrect CR newline entity, replaced with LF.", "illegal-windows-1252-entity": - _("Entity used with illegal number (windows-1252 reference)."), + "Entity used with illegal number (windows-1252 reference).", "cant-convert-numeric-entity": - _("Numeric entity couldn't be converted to character " - "(codepoint U+%(charAsInt)08x)."), + "Numeric entity couldn't be converted to character " + "(codepoint U+%(charAsInt)08x).", "illegal-codepoint-for-numeric-entity": - _("Numeric entity represents an illegal codepoint: " - "U+%(charAsInt)08x."), + "Numeric entity represents an illegal codepoint: " + "U+%(charAsInt)08x.", "numeric-entity-without-semicolon": - _("Numeric entity didn't end with ';'."), + "Numeric entity didn't end with ';'.", "expected-numeric-entity-but-got-eof": - _("Numeric entity expected. Got end of file instead."), + "Numeric entity expected. Got end of file instead.", "expected-numeric-entity": - _("Numeric entity expected but none found."), + "Numeric entity expected but none found.", "named-entity-without-semicolon": - _("Named entity didn't end with ';'."), + "Named entity didn't end with ';'.", "expected-named-entity": - _("Named entity expected. Got none."), + "Named entity expected. Got none.", "attributes-in-end-tag": - _("End tag contains unexpected attributes."), + "End tag contains unexpected attributes.", 'self-closing-flag-on-end-tag': - _("End tag contains unexpected self-closing flag."), + "End tag contains unexpected self-closing flag.", "expected-tag-name-but-got-right-bracket": - _("Expected tag name. Got '>' instead."), + "Expected tag name. Got '>' instead.", "expected-tag-name-but-got-question-mark": - _("Expected tag name. Got '?' instead. (HTML doesn't " - "support processing instructions.)"), + "Expected tag name. Got '?' instead. (HTML doesn't " + "support processing instructions.)", "expected-tag-name": - _("Expected tag name. Got something else instead"), + "Expected tag name. Got something else instead", "expected-closing-tag-but-got-right-bracket": - _("Expected closing tag. Got '>' instead. Ignoring '</>'."), + "Expected closing tag. Got '>' instead. Ignoring '</>'.", "expected-closing-tag-but-got-eof": - _("Expected closing tag. Unexpected end of file."), + "Expected closing tag. Unexpected end of file.", "expected-closing-tag-but-got-char": - _("Expected closing tag. Unexpected character '%(data)s' found."), + "Expected closing tag. Unexpected character '%(data)s' found.", "eof-in-tag-name": - _("Unexpected end of file in the tag name."), + "Unexpected end of file in the tag name.", "expected-attribute-name-but-got-eof": - _("Unexpected end of file. Expected attribute name instead."), + "Unexpected end of file. Expected attribute name instead.", "eof-in-attribute-name": - _("Unexpected end of file in attribute name."), + "Unexpected end of file in attribute name.", "invalid-character-in-attribute-name": - _("Invalid character in attribute name"), + "Invalid character in attribute name", "duplicate-attribute": - _("Dropped duplicate attribute on tag."), + "Dropped duplicate attribute on tag.", "expected-end-of-tag-name-but-got-eof": - _("Unexpected end of file. Expected = or end of tag."), + "Unexpected end of file. Expected = or end of tag.", "expected-attribute-value-but-got-eof": - _("Unexpected end of file. Expected attribute value."), + "Unexpected end of file. Expected attribute value.", "expected-attribute-value-but-got-right-bracket": - _("Expected attribute value. Got '>' instead."), + "Expected attribute value. Got '>' instead.", 'equals-in-unquoted-attribute-value': - _("Unexpected = in unquoted attribute"), + "Unexpected = in unquoted attribute", 'unexpected-character-in-unquoted-attribute-value': - _("Unexpected character in unquoted attribute"), + "Unexpected character in unquoted attribute", "invalid-character-after-attribute-name": - _("Unexpected character after attribute name."), + "Unexpected character after attribute name.", "unexpected-character-after-attribute-value": - _("Unexpected character after attribute value."), + "Unexpected character after attribute value.", "eof-in-attribute-value-double-quote": - _("Unexpected end of file in attribute value (\")."), + "Unexpected end of file in attribute value (\").", "eof-in-attribute-value-single-quote": - _("Unexpected end of file in attribute value (')."), + "Unexpected end of file in attribute value (').", "eof-in-attribute-value-no-quotes": - _("Unexpected end of file in attribute value."), + "Unexpected end of file in attribute value.", "unexpected-EOF-after-solidus-in-tag": - _("Unexpected end of file in tag. Expected >"), + "Unexpected end of file in tag. Expected >", "unexpected-character-after-solidus-in-tag": - _("Unexpected character after / in tag. Expected >"), + "Unexpected character after / in tag. Expected >", "expected-dashes-or-doctype": - _("Expected '--' or 'DOCTYPE'. Not found."), + "Expected '--' or 'DOCTYPE'. Not found.", "unexpected-bang-after-double-dash-in-comment": - _("Unexpected ! after -- in comment"), + "Unexpected ! after -- in comment", "unexpected-space-after-double-dash-in-comment": - _("Unexpected space after -- in comment"), + "Unexpected space after -- in comment", "incorrect-comment": - _("Incorrect comment."), + "Incorrect comment.", "eof-in-comment": - _("Unexpected end of file in comment."), + "Unexpected end of file in comment.", "eof-in-comment-end-dash": - _("Unexpected end of file in comment (-)"), + "Unexpected end of file in comment (-)", "unexpected-dash-after-double-dash-in-comment": - _("Unexpected '-' after '--' found in comment."), + "Unexpected '-' after '--' found in comment.", "eof-in-comment-double-dash": - _("Unexpected end of file in comment (--)."), + "Unexpected end of file in comment (--).", "eof-in-comment-end-space-state": - _("Unexpected end of file in comment."), + "Unexpected end of file in comment.", "eof-in-comment-end-bang-state": - _("Unexpected end of file in comment."), + "Unexpected end of file in comment.", "unexpected-char-in-comment": - _("Unexpected character in comment found."), + "Unexpected character in comment found.", "need-space-after-doctype": - _("No space after literal string 'DOCTYPE'."), + "No space after literal string 'DOCTYPE'.", "expected-doctype-name-but-got-right-bracket": - _("Unexpected > character. Expected DOCTYPE name."), + "Unexpected > character. Expected DOCTYPE name.", "expected-doctype-name-but-got-eof": - _("Unexpected end of file. Expected DOCTYPE name."), + "Unexpected end of file. Expected DOCTYPE name.", "eof-in-doctype-name": - _("Unexpected end of file in DOCTYPE name."), + "Unexpected end of file in DOCTYPE name.", "eof-in-doctype": - _("Unexpected end of file in DOCTYPE."), + "Unexpected end of file in DOCTYPE.", "expected-space-or-right-bracket-in-doctype": - _("Expected space or '>'. Got '%(data)s'"), + "Expected space or '>'. Got '%(data)s'", "unexpected-end-of-doctype": - _("Unexpected end of DOCTYPE."), + "Unexpected end of DOCTYPE.", "unexpected-char-in-doctype": - _("Unexpected character in DOCTYPE."), + "Unexpected character in DOCTYPE.", "eof-in-innerhtml": - _("XXX innerHTML EOF"), + "XXX innerHTML EOF", "unexpected-doctype": - _("Unexpected DOCTYPE. Ignored."), + "Unexpected DOCTYPE. Ignored.", "non-html-root": - _("html needs to be the first start tag."), + "html needs to be the first start tag.", "expected-doctype-but-got-eof": - _("Unexpected End of file. Expected DOCTYPE."), + "Unexpected End of file. Expected DOCTYPE.", "unknown-doctype": - _("Erroneous DOCTYPE."), + "Erroneous DOCTYPE.", "expected-doctype-but-got-chars": - _("Unexpected non-space characters. Expected DOCTYPE."), + "Unexpected non-space characters. Expected DOCTYPE.", "expected-doctype-but-got-start-tag": - _("Unexpected start tag (%(name)s). Expected DOCTYPE."), + "Unexpected start tag (%(name)s). Expected DOCTYPE.", "expected-doctype-but-got-end-tag": - _("Unexpected end tag (%(name)s). Expected DOCTYPE."), + "Unexpected end tag (%(name)s). Expected DOCTYPE.", "end-tag-after-implied-root": - _("Unexpected end tag (%(name)s) after the (implied) root element."), + "Unexpected end tag (%(name)s) after the (implied) root element.", "expected-named-closing-tag-but-got-eof": - _("Unexpected end of file. Expected end tag (%(name)s)."), + "Unexpected end of file. Expected end tag (%(name)s).", "two-heads-are-not-better-than-one": - _("Unexpected start tag head in existing head. Ignored."), + "Unexpected start tag head in existing head. Ignored.", "unexpected-end-tag": - _("Unexpected end tag (%(name)s). Ignored."), + "Unexpected end tag (%(name)s). Ignored.", "unexpected-start-tag-out-of-my-head": - _("Unexpected start tag (%(name)s) that can be in head. Moved."), + "Unexpected start tag (%(name)s) that can be in head. Moved.", "unexpected-start-tag": - _("Unexpected start tag (%(name)s)."), + "Unexpected start tag (%(name)s).", "missing-end-tag": - _("Missing end tag (%(name)s)."), + "Missing end tag (%(name)s).", "missing-end-tags": - _("Missing end tags (%(name)s)."), + "Missing end tags (%(name)s).", "unexpected-start-tag-implies-end-tag": - _("Unexpected start tag (%(startName)s) " - "implies end tag (%(endName)s)."), + "Unexpected start tag (%(startName)s) " + "implies end tag (%(endName)s).", "unexpected-start-tag-treated-as": - _("Unexpected start tag (%(originalName)s). Treated as %(newName)s."), + "Unexpected start tag (%(originalName)s). Treated as %(newName)s.", "deprecated-tag": - _("Unexpected start tag %(name)s. Don't use it!"), + "Unexpected start tag %(name)s. Don't use it!", "unexpected-start-tag-ignored": - _("Unexpected start tag %(name)s. Ignored."), + "Unexpected start tag %(name)s. Ignored.", "expected-one-end-tag-but-got-another": - _("Unexpected end tag (%(gotName)s). " - "Missing end tag (%(expectedName)s)."), + "Unexpected end tag (%(gotName)s). " + "Missing end tag (%(expectedName)s).", "end-tag-too-early": - _("End tag (%(name)s) seen too early. Expected other end tag."), + "End tag (%(name)s) seen too early. Expected other end tag.", "end-tag-too-early-named": - _("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."), + "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).", "end-tag-too-early-ignored": - _("End tag (%(name)s) seen too early. Ignored."), + "End tag (%(name)s) seen too early. Ignored.", "adoption-agency-1.1": - _("End tag (%(name)s) violates step 1, " - "paragraph 1 of the adoption agency algorithm."), + "End tag (%(name)s) violates step 1, " + "paragraph 1 of the adoption agency algorithm.", "adoption-agency-1.2": - _("End tag (%(name)s) violates step 1, " - "paragraph 2 of the adoption agency algorithm."), + "End tag (%(name)s) violates step 1, " + "paragraph 2 of the adoption agency algorithm.", "adoption-agency-1.3": - _("End tag (%(name)s) violates step 1, " - "paragraph 3 of the adoption agency algorithm."), + "End tag (%(name)s) violates step 1, " + "paragraph 3 of the adoption agency algorithm.", "adoption-agency-4.4": - _("End tag (%(name)s) violates step 4, " - "paragraph 4 of the adoption agency algorithm."), + "End tag (%(name)s) violates step 4, " + "paragraph 4 of the adoption agency algorithm.", "unexpected-end-tag-treated-as": - _("Unexpected end tag (%(originalName)s). Treated as %(newName)s."), + "Unexpected end tag (%(originalName)s). Treated as %(newName)s.", "no-end-tag": - _("This element (%(name)s) has no end tag."), + "This element (%(name)s) has no end tag.", "unexpected-implied-end-tag-in-table": - _("Unexpected implied end tag (%(name)s) in the table phase."), + "Unexpected implied end tag (%(name)s) in the table phase.", "unexpected-implied-end-tag-in-table-body": - _("Unexpected implied end tag (%(name)s) in the table body phase."), + "Unexpected implied end tag (%(name)s) in the table body phase.", "unexpected-char-implies-table-voodoo": - _("Unexpected non-space characters in " - "table context caused voodoo mode."), + "Unexpected non-space characters in " + "table context caused voodoo mode.", "unexpected-hidden-input-in-table": - _("Unexpected input with type hidden in table context."), + "Unexpected input with type hidden in table context.", "unexpected-form-in-table": - _("Unexpected form in table context."), + "Unexpected form in table context.", "unexpected-start-tag-implies-table-voodoo": - _("Unexpected start tag (%(name)s) in " - "table context caused voodoo mode."), + "Unexpected start tag (%(name)s) in " + "table context caused voodoo mode.", "unexpected-end-tag-implies-table-voodoo": - _("Unexpected end tag (%(name)s) in " - "table context caused voodoo mode."), + "Unexpected end tag (%(name)s) in " + "table context caused voodoo mode.", "unexpected-cell-in-table-body": - _("Unexpected table cell start tag (%(name)s) " - "in the table body phase."), + "Unexpected table cell start tag (%(name)s) " + "in the table body phase.", "unexpected-cell-end-tag": - _("Got table cell end tag (%(name)s) " - "while required end tags are missing."), + "Got table cell end tag (%(name)s) " + "while required end tags are missing.", "unexpected-end-tag-in-table-body": - _("Unexpected end tag (%(name)s) in the table body phase. Ignored."), + "Unexpected end tag (%(name)s) in the table body phase. Ignored.", "unexpected-implied-end-tag-in-table-row": - _("Unexpected implied end tag (%(name)s) in the table row phase."), + "Unexpected implied end tag (%(name)s) in the table row phase.", "unexpected-end-tag-in-table-row": - _("Unexpected end tag (%(name)s) in the table row phase. Ignored."), + "Unexpected end tag (%(name)s) in the table row phase. Ignored.", "unexpected-select-in-select": - _("Unexpected select start tag in the select phase " - "treated as select end tag."), + "Unexpected select start tag in the select phase " + "treated as select end tag.", "unexpected-input-in-select": - _("Unexpected input start tag in the select phase."), + "Unexpected input start tag in the select phase.", "unexpected-start-tag-in-select": - _("Unexpected start tag token (%(name)s in the select phase. " - "Ignored."), + "Unexpected start tag token (%(name)s in the select phase. " + "Ignored.", "unexpected-end-tag-in-select": - _("Unexpected end tag (%(name)s) in the select phase. Ignored."), + "Unexpected end tag (%(name)s) in the select phase. Ignored.", "unexpected-table-element-start-tag-in-select-in-table": - _("Unexpected table element start tag (%(name)s) in the select in table phase."), + "Unexpected table element start tag (%(name)s) in the select in table phase.", "unexpected-table-element-end-tag-in-select-in-table": - _("Unexpected table element end tag (%(name)s) in the select in table phase."), + "Unexpected table element end tag (%(name)s) in the select in table phase.", "unexpected-char-after-body": - _("Unexpected non-space characters in the after body phase."), + "Unexpected non-space characters in the after body phase.", "unexpected-start-tag-after-body": - _("Unexpected start tag token (%(name)s)" - " in the after body phase."), + "Unexpected start tag token (%(name)s)" + " in the after body phase.", "unexpected-end-tag-after-body": - _("Unexpected end tag token (%(name)s)" - " in the after body phase."), + "Unexpected end tag token (%(name)s)" + " in the after body phase.", "unexpected-char-in-frameset": - _("Unexpected characters in the frameset phase. Characters ignored."), + "Unexpected characters in the frameset phase. Characters ignored.", "unexpected-start-tag-in-frameset": - _("Unexpected start tag token (%(name)s)" - " in the frameset phase. Ignored."), + "Unexpected start tag token (%(name)s)" + " in the frameset phase. Ignored.", "unexpected-frameset-in-frameset-innerhtml": - _("Unexpected end tag token (frameset) " - "in the frameset phase (innerHTML)."), + "Unexpected end tag token (frameset) " + "in the frameset phase (innerHTML).", "unexpected-end-tag-in-frameset": - _("Unexpected end tag token (%(name)s)" - " in the frameset phase. Ignored."), + "Unexpected end tag token (%(name)s)" + " in the frameset phase. Ignored.", "unexpected-char-after-frameset": - _("Unexpected non-space characters in the " - "after frameset phase. Ignored."), + "Unexpected non-space characters in the " + "after frameset phase. Ignored.", "unexpected-start-tag-after-frameset": - _("Unexpected start tag (%(name)s)" - " in the after frameset phase. Ignored."), + "Unexpected start tag (%(name)s)" + " in the after frameset phase. Ignored.", "unexpected-end-tag-after-frameset": - _("Unexpected end tag (%(name)s)" - " in the after frameset phase. Ignored."), + "Unexpected end tag (%(name)s)" + " in the after frameset phase. Ignored.", "unexpected-end-tag-after-body-innerhtml": - _("Unexpected end tag after body(innerHtml)"), + "Unexpected end tag after body(innerHtml)", "expected-eof-but-got-char": - _("Unexpected non-space characters. Expected end of file."), + "Unexpected non-space characters. Expected end of file.", "expected-eof-but-got-start-tag": - _("Unexpected start tag (%(name)s)" - ". Expected end of file."), + "Unexpected start tag (%(name)s)" + ". Expected end of file.", "expected-eof-but-got-end-tag": - _("Unexpected end tag (%(name)s)" - ". Expected end of file."), + "Unexpected end tag (%(name)s)" + ". Expected end of file.", "eof-in-table": - _("Unexpected end of file. Expected table content."), + "Unexpected end of file. Expected table content.", "eof-in-select": - _("Unexpected end of file. Expected select content."), + "Unexpected end of file. Expected select content.", "eof-in-frameset": - _("Unexpected end of file. Expected frameset content."), + "Unexpected end of file. Expected frameset content.", "eof-in-script-in-script": - _("Unexpected end of file. Expected script content."), + "Unexpected end of file. Expected script content.", "eof-in-foreign-lands": - _("Unexpected end of file. Expected foreign content"), + "Unexpected end of file. Expected foreign content", "non-void-element-with-trailing-solidus": - _("Trailing solidus not allowed on element %(name)s"), + "Trailing solidus not allowed on element %(name)s", "unexpected-html-element-in-foreign-content": - _("Element %(name)s not allowed in a non-html context"), + "Element %(name)s not allowed in a non-html context", "unexpected-end-tag-before-html": - _("Unexpected end tag (%(name)s) before html."), + "Unexpected end tag (%(name)s) before html.", + "unexpected-inhead-noscript-tag": + "Element %(name)s not allowed in a inhead-noscript context", + "eof-in-head-noscript": + "Unexpected end of file. Expected inhead-noscript content", + "char-in-head-noscript": + "Unexpected non-space character. Expected inhead-noscript content", "XXX-undefined-error": - _("Undefined error (this sucks and should be fixed)"), + "Undefined error (this sucks and should be fixed)", } namespaces = { @@ -298,7 +302,7 @@ namespaces = { "xmlns": "http://www.w3.org/2000/xmlns/" } -scopingElements = frozenset(( +scopingElements = frozenset([ (namespaces["html"], "applet"), (namespaces["html"], "caption"), (namespaces["html"], "html"), @@ -316,9 +320,9 @@ scopingElements = frozenset(( (namespaces["svg"], "foreignObject"), (namespaces["svg"], "desc"), (namespaces["svg"], "title"), -)) +]) -formattingElements = frozenset(( +formattingElements = frozenset([ (namespaces["html"], "a"), (namespaces["html"], "b"), (namespaces["html"], "big"), @@ -333,9 +337,9 @@ formattingElements = frozenset(( (namespaces["html"], "strong"), (namespaces["html"], "tt"), (namespaces["html"], "u") -)) +]) -specialElements = frozenset(( +specialElements = frozenset([ (namespaces["html"], "address"), (namespaces["html"], "applet"), (namespaces["html"], "area"), @@ -416,22 +420,89 @@ specialElements = frozenset(( (namespaces["html"], "wbr"), (namespaces["html"], "xmp"), (namespaces["svg"], "foreignObject") -)) +]) -htmlIntegrationPointElements = frozenset(( +htmlIntegrationPointElements = frozenset([ (namespaces["mathml"], "annotaion-xml"), (namespaces["svg"], "foreignObject"), (namespaces["svg"], "desc"), (namespaces["svg"], "title") -)) +]) -mathmlTextIntegrationPointElements = frozenset(( +mathmlTextIntegrationPointElements = frozenset([ (namespaces["mathml"], "mi"), (namespaces["mathml"], "mo"), (namespaces["mathml"], "mn"), (namespaces["mathml"], "ms"), (namespaces["mathml"], "mtext") -)) +]) + +adjustSVGAttributes = { + "attributename": "attributeName", + "attributetype": "attributeType", + "basefrequency": "baseFrequency", + "baseprofile": "baseProfile", + "calcmode": "calcMode", + "clippathunits": "clipPathUnits", + "contentscripttype": "contentScriptType", + "contentstyletype": "contentStyleType", + "diffuseconstant": "diffuseConstant", + "edgemode": "edgeMode", + "externalresourcesrequired": "externalResourcesRequired", + "filterres": "filterRes", + "filterunits": "filterUnits", + "glyphref": "glyphRef", + "gradienttransform": "gradientTransform", + "gradientunits": "gradientUnits", + "kernelmatrix": "kernelMatrix", + "kernelunitlength": "kernelUnitLength", + "keypoints": "keyPoints", + "keysplines": "keySplines", + "keytimes": "keyTimes", + "lengthadjust": "lengthAdjust", + "limitingconeangle": "limitingConeAngle", + "markerheight": "markerHeight", + "markerunits": "markerUnits", + "markerwidth": "markerWidth", + "maskcontentunits": "maskContentUnits", + "maskunits": "maskUnits", + "numoctaves": "numOctaves", + "pathlength": "pathLength", + "patterncontentunits": "patternContentUnits", + "patterntransform": "patternTransform", + "patternunits": "patternUnits", + "pointsatx": "pointsAtX", + "pointsaty": "pointsAtY", + "pointsatz": "pointsAtZ", + "preservealpha": "preserveAlpha", + "preserveaspectratio": "preserveAspectRatio", + "primitiveunits": "primitiveUnits", + "refx": "refX", + "refy": "refY", + "repeatcount": "repeatCount", + "repeatdur": "repeatDur", + "requiredextensions": "requiredExtensions", + "requiredfeatures": "requiredFeatures", + "specularconstant": "specularConstant", + "specularexponent": "specularExponent", + "spreadmethod": "spreadMethod", + "startoffset": "startOffset", + "stddeviation": "stdDeviation", + "stitchtiles": "stitchTiles", + "surfacescale": "surfaceScale", + "systemlanguage": "systemLanguage", + "tablevalues": "tableValues", + "targetx": "targetX", + "targety": "targetY", + "textlength": "textLength", + "viewbox": "viewBox", + "viewtarget": "viewTarget", + "xchannelselector": "xChannelSelector", + "ychannelselector": "yChannelSelector", + "zoomandpan": "zoomAndPan" +} + +adjustMathMLAttributes = {"definitionurl": "definitionURL"} adjustForeignAttributes = { "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), @@ -451,21 +522,21 @@ adjustForeignAttributes = { unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in adjustForeignAttributes.items()]) -spaceCharacters = frozenset(( +spaceCharacters = frozenset([ "\t", "\n", "\u000C", " ", "\r" -)) +]) -tableInsertModeElements = frozenset(( +tableInsertModeElements = frozenset([ "table", "tbody", "tfoot", "thead", "tr" -)) +]) asciiLowercase = frozenset(string.ascii_lowercase) asciiUppercase = frozenset(string.ascii_uppercase) @@ -486,7 +557,7 @@ headingElements = ( "h6" ) -voidElements = frozenset(( +voidElements = frozenset([ "base", "command", "event-source", @@ -502,11 +573,11 @@ voidElements = frozenset(( "input", "source", "track" -)) +]) -cdataElements = frozenset(('title', 'textarea')) +cdataElements = frozenset(['title', 'textarea']) -rcdataElements = frozenset(( +rcdataElements = frozenset([ 'style', 'script', 'xmp', @@ -514,27 +585,27 @@ rcdataElements = frozenset(( 'noembed', 'noframes', 'noscript' -)) +]) booleanAttributes = { - "": frozenset(("irrelevant",)), - "style": frozenset(("scoped",)), - "img": frozenset(("ismap",)), - "audio": frozenset(("autoplay", "controls")), - "video": frozenset(("autoplay", "controls")), - "script": frozenset(("defer", "async")), - "details": frozenset(("open",)), - "datagrid": frozenset(("multiple", "disabled")), - "command": frozenset(("hidden", "disabled", "checked", "default")), - "hr": frozenset(("noshade")), - "menu": frozenset(("autosubmit",)), - "fieldset": frozenset(("disabled", "readonly")), - "option": frozenset(("disabled", "readonly", "selected")), - "optgroup": frozenset(("disabled", "readonly")), - "button": frozenset(("disabled", "autofocus")), - "input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")), - "select": frozenset(("disabled", "readonly", "autofocus", "multiple")), - "output": frozenset(("disabled", "readonly")), + "": frozenset(["irrelevant"]), + "style": frozenset(["scoped"]), + "img": frozenset(["ismap"]), + "audio": frozenset(["autoplay", "controls"]), + "video": frozenset(["autoplay", "controls"]), + "script": frozenset(["defer", "async"]), + "details": frozenset(["open"]), + "datagrid": frozenset(["multiple", "disabled"]), + "command": frozenset(["hidden", "disabled", "checked", "default"]), + "hr": frozenset(["noshade"]), + "menu": frozenset(["autosubmit"]), + "fieldset": frozenset(["disabled", "readonly"]), + "option": frozenset(["disabled", "readonly", "selected"]), + "optgroup": frozenset(["disabled", "readonly"]), + "button": frozenset(["disabled", "autofocus"]), + "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]), + "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]), + "output": frozenset(["disabled", "readonly"]), } # entitiesWindows1252 has to be _ordered_ and needs to have an index. It @@ -574,7 +645,7 @@ entitiesWindows1252 = ( 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS ) -xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;')) +xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;']) entities = { "AElig": "\xc6", @@ -2815,7 +2886,6 @@ replacementCharacters = { 0x0d: "\u000D", 0x80: "\u20AC", 0x81: "\u0081", - 0x81: "\u0081", 0x82: "\u201A", 0x83: "\u0192", 0x84: "\u201E", @@ -2848,235 +2918,6 @@ replacementCharacters = { 0x9F: "\u0178", } -encodings = { - '437': 'cp437', - '850': 'cp850', - '852': 'cp852', - '855': 'cp855', - '857': 'cp857', - '860': 'cp860', - '861': 'cp861', - '862': 'cp862', - '863': 'cp863', - '865': 'cp865', - '866': 'cp866', - '869': 'cp869', - 'ansix341968': 'ascii', - 'ansix341986': 'ascii', - 'arabic': 'iso8859-6', - 'ascii': 'ascii', - 'asmo708': 'iso8859-6', - 'big5': 'big5', - 'big5hkscs': 'big5hkscs', - 'chinese': 'gbk', - 'cp037': 'cp037', - 'cp1026': 'cp1026', - 'cp154': 'ptcp154', - 'cp367': 'ascii', - 'cp424': 'cp424', - 'cp437': 'cp437', - 'cp500': 'cp500', - 'cp775': 'cp775', - 'cp819': 'windows-1252', - 'cp850': 'cp850', - 'cp852': 'cp852', - 'cp855': 'cp855', - 'cp857': 'cp857', - 'cp860': 'cp860', - 'cp861': 'cp861', - 'cp862': 'cp862', - 'cp863': 'cp863', - 'cp864': 'cp864', - 'cp865': 'cp865', - 'cp866': 'cp866', - 'cp869': 'cp869', - 'cp936': 'gbk', - 'cpgr': 'cp869', - 'cpis': 'cp861', - 'csascii': 'ascii', - 'csbig5': 'big5', - 'cseuckr': 'cp949', - 'cseucpkdfmtjapanese': 'euc_jp', - 'csgb2312': 'gbk', - 'cshproman8': 'hp-roman8', - 'csibm037': 'cp037', - 'csibm1026': 'cp1026', - 'csibm424': 'cp424', - 'csibm500': 'cp500', - 'csibm855': 'cp855', - 'csibm857': 'cp857', - 'csibm860': 'cp860', - 'csibm861': 'cp861', - 'csibm863': 'cp863', - 'csibm864': 'cp864', - 'csibm865': 'cp865', - 'csibm866': 'cp866', - 'csibm869': 'cp869', - 'csiso2022jp': 'iso2022_jp', - 'csiso2022jp2': 'iso2022_jp_2', - 'csiso2022kr': 'iso2022_kr', - 'csiso58gb231280': 'gbk', - 'csisolatin1': 'windows-1252', - 'csisolatin2': 'iso8859-2', - 'csisolatin3': 'iso8859-3', - 'csisolatin4': 'iso8859-4', - 'csisolatin5': 'windows-1254', - 'csisolatin6': 'iso8859-10', - 'csisolatinarabic': 'iso8859-6', - 'csisolatincyrillic': 'iso8859-5', - 'csisolatingreek': 'iso8859-7', - 'csisolatinhebrew': 'iso8859-8', - 'cskoi8r': 'koi8-r', - 'csksc56011987': 'cp949', - 'cspc775baltic': 'cp775', - 'cspc850multilingual': 'cp850', - 'cspc862latinhebrew': 'cp862', - 'cspc8codepage437': 'cp437', - 'cspcp852': 'cp852', - 'csptcp154': 'ptcp154', - 'csshiftjis': 'shift_jis', - 'csunicode11utf7': 'utf-7', - 'cyrillic': 'iso8859-5', - 'cyrillicasian': 'ptcp154', - 'ebcdiccpbe': 'cp500', - 'ebcdiccpca': 'cp037', - 'ebcdiccpch': 'cp500', - 'ebcdiccphe': 'cp424', - 'ebcdiccpnl': 'cp037', - 'ebcdiccpus': 'cp037', - 'ebcdiccpwt': 'cp037', - 'ecma114': 'iso8859-6', - 'ecma118': 'iso8859-7', - 'elot928': 'iso8859-7', - 'eucjp': 'euc_jp', - 'euckr': 'cp949', - 'extendedunixcodepackedformatforjapanese': 'euc_jp', - 'gb18030': 'gb18030', - 'gb2312': 'gbk', - 'gb231280': 'gbk', - 'gbk': 'gbk', - 'greek': 'iso8859-7', - 'greek8': 'iso8859-7', - 'hebrew': 'iso8859-8', - 'hproman8': 'hp-roman8', - 'hzgb2312': 'hz', - 'ibm037': 'cp037', - 'ibm1026': 'cp1026', - 'ibm367': 'ascii', - 'ibm424': 'cp424', - 'ibm437': 'cp437', - 'ibm500': 'cp500', - 'ibm775': 'cp775', - 'ibm819': 'windows-1252', - 'ibm850': 'cp850', - 'ibm852': 'cp852', - 'ibm855': 'cp855', - 'ibm857': 'cp857', - 'ibm860': 'cp860', - 'ibm861': 'cp861', - 'ibm862': 'cp862', - 'ibm863': 'cp863', - 'ibm864': 'cp864', - 'ibm865': 'cp865', - 'ibm866': 'cp866', - 'ibm869': 'cp869', - 'iso2022jp': 'iso2022_jp', - 'iso2022jp2': 'iso2022_jp_2', - 'iso2022kr': 'iso2022_kr', - 'iso646irv1991': 'ascii', - 'iso646us': 'ascii', - 'iso88591': 'windows-1252', - 'iso885910': 'iso8859-10', - 'iso8859101992': 'iso8859-10', - 'iso885911987': 'windows-1252', - 'iso885913': 'iso8859-13', - 'iso885914': 'iso8859-14', - 'iso8859141998': 'iso8859-14', - 'iso885915': 'iso8859-15', - 'iso885916': 'iso8859-16', - 'iso8859162001': 'iso8859-16', - 'iso88592': 'iso8859-2', - 'iso885921987': 'iso8859-2', - 'iso88593': 'iso8859-3', - 'iso885931988': 'iso8859-3', - 'iso88594': 'iso8859-4', - 'iso885941988': 'iso8859-4', - 'iso88595': 'iso8859-5', - 'iso885951988': 'iso8859-5', - 'iso88596': 'iso8859-6', - 'iso885961987': 'iso8859-6', - 'iso88597': 'iso8859-7', - 'iso885971987': 'iso8859-7', - 'iso88598': 'iso8859-8', - 'iso885981988': 'iso8859-8', - 'iso88599': 'windows-1254', - 'iso885991989': 'windows-1254', - 'isoceltic': 'iso8859-14', - 'isoir100': 'windows-1252', - 'isoir101': 'iso8859-2', - 'isoir109': 'iso8859-3', - 'isoir110': 'iso8859-4', - 'isoir126': 'iso8859-7', - 'isoir127': 'iso8859-6', - 'isoir138': 'iso8859-8', - 'isoir144': 'iso8859-5', - 'isoir148': 'windows-1254', - 'isoir149': 'cp949', - 'isoir157': 'iso8859-10', - 'isoir199': 'iso8859-14', - 'isoir226': 'iso8859-16', - 'isoir58': 'gbk', - 'isoir6': 'ascii', - 'koi8r': 'koi8-r', - 'koi8u': 'koi8-u', - 'korean': 'cp949', - 'ksc5601': 'cp949', - 'ksc56011987': 'cp949', - 'ksc56011989': 'cp949', - 'l1': 'windows-1252', - 'l10': 'iso8859-16', - 'l2': 'iso8859-2', - 'l3': 'iso8859-3', - 'l4': 'iso8859-4', - 'l5': 'windows-1254', - 'l6': 'iso8859-10', - 'l8': 'iso8859-14', - 'latin1': 'windows-1252', - 'latin10': 'iso8859-16', - 'latin2': 'iso8859-2', - 'latin3': 'iso8859-3', - 'latin4': 'iso8859-4', - 'latin5': 'windows-1254', - 'latin6': 'iso8859-10', - 'latin8': 'iso8859-14', - 'latin9': 'iso8859-15', - 'ms936': 'gbk', - 'mskanji': 'shift_jis', - 'pt154': 'ptcp154', - 'ptcp154': 'ptcp154', - 'r8': 'hp-roman8', - 'roman8': 'hp-roman8', - 'shiftjis': 'shift_jis', - 'tis620': 'cp874', - 'unicode11utf7': 'utf-7', - 'us': 'ascii', - 'usascii': 'ascii', - 'utf16': 'utf-16', - 'utf16be': 'utf-16-be', - 'utf16le': 'utf-16-le', - 'utf8': 'utf-8', - 'windows1250': 'cp1250', - 'windows1251': 'cp1251', - 'windows1252': 'cp1252', - 'windows1253': 'cp1253', - 'windows1254': 'cp1254', - 'windows1255': 'cp1255', - 'windows1256': 'cp1256', - 'windows1257': 'cp1257', - 'windows1258': 'cp1258', - 'windows936': 'gbk', - 'x-x-big5': 'big5'} - tokenTypes = { "Doctype": 0, "Characters": 1, @@ -3088,8 +2929,8 @@ tokenTypes = { "ParseError": 7 } -tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], - tokenTypes["EmptyTag"])) +tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"], + tokenTypes["EmptyTag"]]) prefixes = dict([(v, k) for k, v in namespaces.items()]) diff --git a/lib/html5lib/filters/alphabeticalattributes.py b/lib/html5lib/filters/alphabeticalattributes.py index fed6996c1d9b0dcc5462f3e1cc8e4e153cb414ce..4795baecc9949b5fb076fb07107c472f7fcff1d9 100644 --- a/lib/html5lib/filters/alphabeticalattributes.py +++ b/lib/html5lib/filters/alphabeticalattributes.py @@ -1,6 +1,6 @@ from __future__ import absolute_import, division, unicode_literals -from . import _base +from . import base try: from collections import OrderedDict @@ -8,9 +8,9 @@ except ImportError: from ordereddict import OrderedDict -class Filter(_base.Filter): +class Filter(base.Filter): def __iter__(self): - for token in _base.Filter.__iter__(self): + for token in base.Filter.__iter__(self): if token["type"] in ("StartTag", "EmptyTag"): attrs = OrderedDict() for name, value in sorted(token["data"].items(), diff --git a/lib/html5lib/filters/_base.py b/lib/html5lib/filters/base.py similarity index 100% rename from lib/html5lib/filters/_base.py rename to lib/html5lib/filters/base.py diff --git a/lib/html5lib/filters/inject_meta_charset.py b/lib/html5lib/filters/inject_meta_charset.py index ca33b70b5307e69ddcaf1d0d63f43ea64fa6528a..2059ec861871c4a8d73ac8c736169d3bd67acc94 100644 --- a/lib/html5lib/filters/inject_meta_charset.py +++ b/lib/html5lib/filters/inject_meta_charset.py @@ -1,11 +1,11 @@ from __future__ import absolute_import, division, unicode_literals -from . import _base +from . import base -class Filter(_base.Filter): +class Filter(base.Filter): def __init__(self, source, encoding): - _base.Filter.__init__(self, source) + base.Filter.__init__(self, source) self.encoding = encoding def __iter__(self): @@ -13,7 +13,7 @@ class Filter(_base.Filter): meta_found = (self.encoding is None) pending = [] - for token in _base.Filter.__iter__(self): + for token in base.Filter.__iter__(self): type = token["type"] if type == "StartTag": if token["name"].lower() == "head": diff --git a/lib/html5lib/filters/lint.py b/lib/html5lib/filters/lint.py index 7cc99a4ba7c010fc42df30acce71598711dbcc5f..a9c0831a9f5544d15000ad5e7a3ca14fa0d00e38 100644 --- a/lib/html5lib/filters/lint.py +++ b/lib/html5lib/filters/lint.py @@ -1,93 +1,81 @@ from __future__ import absolute_import, division, unicode_literals -from gettext import gettext -_ = gettext +from six import text_type -from . import _base -from ..constants import cdataElements, rcdataElements, voidElements +from . import base +from ..constants import namespaces, voidElements from ..constants import spaceCharacters spaceCharacters = "".join(spaceCharacters) -class LintError(Exception): - pass +class Filter(base.Filter): + def __init__(self, source, require_matching_tags=True): + super(Filter, self).__init__(source) + self.require_matching_tags = require_matching_tags - -class Filter(_base.Filter): def __iter__(self): open_elements = [] - contentModelFlag = "PCDATA" - for token in _base.Filter.__iter__(self): + for token in base.Filter.__iter__(self): type = token["type"] if type in ("StartTag", "EmptyTag"): + namespace = token["namespace"] name = token["name"] - if contentModelFlag != "PCDATA": - raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name}) - if not isinstance(name, str): - raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name}) - if not name: - raise LintError(_("Empty tag name")) - if type == "StartTag" and name in voidElements: - raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name}) - elif type == "EmptyTag" and name not in voidElements: - raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]}) - if type == "StartTag": - open_elements.append(name) - for name, value in token["data"]: - if not isinstance(name, str): - raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name}) - if not name: - raise LintError(_("Empty attribute name")) - if not isinstance(value, str): - raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value}) - if name in cdataElements: - contentModelFlag = "CDATA" - elif name in rcdataElements: - contentModelFlag = "RCDATA" - elif name == "plaintext": - contentModelFlag = "PLAINTEXT" + assert namespace is None or isinstance(namespace, text_type) + assert namespace != "" + assert isinstance(name, text_type) + assert name != "" + assert isinstance(token["data"], dict) + if (not namespace or namespace == namespaces["html"]) and name in voidElements: + assert type == "EmptyTag" + else: + assert type == "StartTag" + if type == "StartTag" and self.require_matching_tags: + open_elements.append((namespace, name)) + for (namespace, name), value in token["data"].items(): + assert namespace is None or isinstance(namespace, text_type) + assert namespace != "" + assert isinstance(name, text_type) + assert name != "" + assert isinstance(value, text_type) elif type == "EndTag": + namespace = token["namespace"] name = token["name"] - if not isinstance(name, str): - raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name}) - if not name: - raise LintError(_("Empty tag name")) - if name in voidElements: - raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name}) - start_name = open_elements.pop() - if start_name != name: - raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name}) - contentModelFlag = "PCDATA" + assert namespace is None or isinstance(namespace, text_type) + assert namespace != "" + assert isinstance(name, text_type) + assert name != "" + if (not namespace or namespace == namespaces["html"]) and name in voidElements: + assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} + elif self.require_matching_tags: + start = open_elements.pop() + assert start == (namespace, name) elif type == "Comment": - if contentModelFlag != "PCDATA": - raise LintError(_("Comment not in PCDATA content model flag")) + data = token["data"] + assert isinstance(data, text_type) elif type in ("Characters", "SpaceCharacters"): data = token["data"] - if not isinstance(data, str): - raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data}) - if not data: - raise LintError(_("%(type)s token with empty data") % {"type": type}) + assert isinstance(data, text_type) + assert data != "" if type == "SpaceCharacters": - data = data.strip(spaceCharacters) - if data: - raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data}) + assert data.strip(spaceCharacters) == "" elif type == "Doctype": name = token["name"] - if contentModelFlag != "PCDATA": - raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name}) - if not isinstance(name, str): - raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name}) - # XXX: what to do with token["data"] ? + assert name is None or isinstance(name, text_type) + assert token["publicId"] is None or isinstance(name, text_type) + assert token["systemId"] is None or isinstance(name, text_type) + + elif type == "Entity": + assert isinstance(token["name"], text_type) - elif type in ("ParseError", "SerializeError"): - pass + elif type == "SerializerError": + assert isinstance(token["data"], text_type) else: - raise LintError(_("Unknown token type: %(type)s") % {"type": type}) + assert False, "Unknown token type: %(type)s" % {"type": type} yield token diff --git a/lib/html5lib/filters/optionaltags.py b/lib/html5lib/filters/optionaltags.py index fefe0b3097bd9646b43c4c3f3bd8ba8d0bbb01a0..f6edb7341c535edbf67b9c0d9e9d13c1781abc47 100644 --- a/lib/html5lib/filters/optionaltags.py +++ b/lib/html5lib/filters/optionaltags.py @@ -1,9 +1,9 @@ from __future__ import absolute_import, division, unicode_literals -from . import _base +from . import base -class Filter(_base.Filter): +class Filter(base.Filter): def slider(self): previous1 = previous2 = None for token in self.source: @@ -11,7 +11,8 @@ class Filter(_base.Filter): yield previous2, previous1, token previous2 = previous1 previous1 = token - yield previous2, previous1, None + if previous1 is not None: + yield previous2, previous1, None def __iter__(self): for previous, token, next in self.slider(): @@ -58,7 +59,7 @@ class Filter(_base.Filter): elif tagname == 'colgroup': # A colgroup element's start tag may be omitted if the first thing # inside the colgroup element is a col element, and if the element - # is not immediately preceeded by another colgroup element whose + # is not immediately preceded by another colgroup element whose # end tag has been omitted. if type in ("StartTag", "EmptyTag"): # XXX: we do not look at the preceding event, so instead we never @@ -70,7 +71,7 @@ class Filter(_base.Filter): elif tagname == 'tbody': # A tbody element's start tag may be omitted if the first thing # inside the tbody element is a tr element, and if the element is - # not immediately preceeded by a tbody, thead, or tfoot element + # not immediately preceded by a tbody, thead, or tfoot element # whose end tag has been omitted. if type == "StartTag": # omit the thead and tfoot elements' end tag when they are diff --git a/lib/html5lib/filters/sanitizer.py b/lib/html5lib/filters/sanitizer.py index b206b54e7a74fbcfc0ebd580dc3d255fd4e75456..b5ddcb93124d1cd5929f8d06c7f47395af3400be 100644 --- a/lib/html5lib/filters/sanitizer.py +++ b/lib/html5lib/filters/sanitizer.py @@ -1,12 +1,865 @@ from __future__ import absolute_import, division, unicode_literals -from . import _base -from ..sanitizer import HTMLSanitizerMixin +import re +from xml.sax.saxutils import escape, unescape +from six.moves import urllib_parse as urlparse + +from . import base +from ..constants import namespaces, prefixes + +__all__ = ["Filter"] + + +allowed_elements = frozenset(( + (namespaces['html'], 'a'), + (namespaces['html'], 'abbr'), + (namespaces['html'], 'acronym'), + (namespaces['html'], 'address'), + (namespaces['html'], 'area'), + (namespaces['html'], 'article'), + (namespaces['html'], 'aside'), + (namespaces['html'], 'audio'), + (namespaces['html'], 'b'), + (namespaces['html'], 'big'), + (namespaces['html'], 'blockquote'), + (namespaces['html'], 'br'), + (namespaces['html'], 'button'), + (namespaces['html'], 'canvas'), + (namespaces['html'], 'caption'), + (namespaces['html'], 'center'), + (namespaces['html'], 'cite'), + (namespaces['html'], 'code'), + (namespaces['html'], 'col'), + (namespaces['html'], 'colgroup'), + (namespaces['html'], 'command'), + (namespaces['html'], 'datagrid'), + (namespaces['html'], 'datalist'), + (namespaces['html'], 'dd'), + (namespaces['html'], 'del'), + (namespaces['html'], 'details'), + (namespaces['html'], 'dfn'), + (namespaces['html'], 'dialog'), + (namespaces['html'], 'dir'), + (namespaces['html'], 'div'), + (namespaces['html'], 'dl'), + (namespaces['html'], 'dt'), + (namespaces['html'], 'em'), + (namespaces['html'], 'event-source'), + (namespaces['html'], 'fieldset'), + (namespaces['html'], 'figcaption'), + (namespaces['html'], 'figure'), + (namespaces['html'], 'footer'), + (namespaces['html'], 'font'), + (namespaces['html'], 'form'), + (namespaces['html'], 'header'), + (namespaces['html'], 'h1'), + (namespaces['html'], 'h2'), + (namespaces['html'], 'h3'), + (namespaces['html'], 'h4'), + (namespaces['html'], 'h5'), + (namespaces['html'], 'h6'), + (namespaces['html'], 'hr'), + (namespaces['html'], 'i'), + (namespaces['html'], 'img'), + (namespaces['html'], 'input'), + (namespaces['html'], 'ins'), + (namespaces['html'], 'keygen'), + (namespaces['html'], 'kbd'), + (namespaces['html'], 'label'), + (namespaces['html'], 'legend'), + (namespaces['html'], 'li'), + (namespaces['html'], 'm'), + (namespaces['html'], 'map'), + (namespaces['html'], 'menu'), + (namespaces['html'], 'meter'), + (namespaces['html'], 'multicol'), + (namespaces['html'], 'nav'), + (namespaces['html'], 'nextid'), + (namespaces['html'], 'ol'), + (namespaces['html'], 'output'), + (namespaces['html'], 'optgroup'), + (namespaces['html'], 'option'), + (namespaces['html'], 'p'), + (namespaces['html'], 'pre'), + (namespaces['html'], 'progress'), + (namespaces['html'], 'q'), + (namespaces['html'], 's'), + (namespaces['html'], 'samp'), + (namespaces['html'], 'section'), + (namespaces['html'], 'select'), + (namespaces['html'], 'small'), + (namespaces['html'], 'sound'), + (namespaces['html'], 'source'), + (namespaces['html'], 'spacer'), + (namespaces['html'], 'span'), + (namespaces['html'], 'strike'), + (namespaces['html'], 'strong'), + (namespaces['html'], 'sub'), + (namespaces['html'], 'sup'), + (namespaces['html'], 'table'), + (namespaces['html'], 'tbody'), + (namespaces['html'], 'td'), + (namespaces['html'], 'textarea'), + (namespaces['html'], 'time'), + (namespaces['html'], 'tfoot'), + (namespaces['html'], 'th'), + (namespaces['html'], 'thead'), + (namespaces['html'], 'tr'), + (namespaces['html'], 'tt'), + (namespaces['html'], 'u'), + (namespaces['html'], 'ul'), + (namespaces['html'], 'var'), + (namespaces['html'], 'video'), + (namespaces['mathml'], 'maction'), + (namespaces['mathml'], 'math'), + (namespaces['mathml'], 'merror'), + (namespaces['mathml'], 'mfrac'), + (namespaces['mathml'], 'mi'), + (namespaces['mathml'], 'mmultiscripts'), + (namespaces['mathml'], 'mn'), + (namespaces['mathml'], 'mo'), + (namespaces['mathml'], 'mover'), + (namespaces['mathml'], 'mpadded'), + (namespaces['mathml'], 'mphantom'), + (namespaces['mathml'], 'mprescripts'), + (namespaces['mathml'], 'mroot'), + (namespaces['mathml'], 'mrow'), + (namespaces['mathml'], 'mspace'), + (namespaces['mathml'], 'msqrt'), + (namespaces['mathml'], 'mstyle'), + (namespaces['mathml'], 'msub'), + (namespaces['mathml'], 'msubsup'), + (namespaces['mathml'], 'msup'), + (namespaces['mathml'], 'mtable'), + (namespaces['mathml'], 'mtd'), + (namespaces['mathml'], 'mtext'), + (namespaces['mathml'], 'mtr'), + (namespaces['mathml'], 'munder'), + (namespaces['mathml'], 'munderover'), + (namespaces['mathml'], 'none'), + (namespaces['svg'], 'a'), + (namespaces['svg'], 'animate'), + (namespaces['svg'], 'animateColor'), + (namespaces['svg'], 'animateMotion'), + (namespaces['svg'], 'animateTransform'), + (namespaces['svg'], 'clipPath'), + (namespaces['svg'], 'circle'), + (namespaces['svg'], 'defs'), + (namespaces['svg'], 'desc'), + (namespaces['svg'], 'ellipse'), + (namespaces['svg'], 'font-face'), + (namespaces['svg'], 'font-face-name'), + (namespaces['svg'], 'font-face-src'), + (namespaces['svg'], 'g'), + (namespaces['svg'], 'glyph'), + (namespaces['svg'], 'hkern'), + (namespaces['svg'], 'linearGradient'), + (namespaces['svg'], 'line'), + (namespaces['svg'], 'marker'), + (namespaces['svg'], 'metadata'), + (namespaces['svg'], 'missing-glyph'), + (namespaces['svg'], 'mpath'), + (namespaces['svg'], 'path'), + (namespaces['svg'], 'polygon'), + (namespaces['svg'], 'polyline'), + (namespaces['svg'], 'radialGradient'), + (namespaces['svg'], 'rect'), + (namespaces['svg'], 'set'), + (namespaces['svg'], 'stop'), + (namespaces['svg'], 'svg'), + (namespaces['svg'], 'switch'), + (namespaces['svg'], 'text'), + (namespaces['svg'], 'title'), + (namespaces['svg'], 'tspan'), + (namespaces['svg'], 'use'), +)) + +allowed_attributes = frozenset(( + # HTML attributes + (None, 'abbr'), + (None, 'accept'), + (None, 'accept-charset'), + (None, 'accesskey'), + (None, 'action'), + (None, 'align'), + (None, 'alt'), + (None, 'autocomplete'), + (None, 'autofocus'), + (None, 'axis'), + (None, 'background'), + (None, 'balance'), + (None, 'bgcolor'), + (None, 'bgproperties'), + (None, 'border'), + (None, 'bordercolor'), + (None, 'bordercolordark'), + (None, 'bordercolorlight'), + (None, 'bottompadding'), + (None, 'cellpadding'), + (None, 'cellspacing'), + (None, 'ch'), + (None, 'challenge'), + (None, 'char'), + (None, 'charoff'), + (None, 'choff'), + (None, 'charset'), + (None, 'checked'), + (None, 'cite'), + (None, 'class'), + (None, 'clear'), + (None, 'color'), + (None, 'cols'), + (None, 'colspan'), + (None, 'compact'), + (None, 'contenteditable'), + (None, 'controls'), + (None, 'coords'), + (None, 'data'), + (None, 'datafld'), + (None, 'datapagesize'), + (None, 'datasrc'), + (None, 'datetime'), + (None, 'default'), + (None, 'delay'), + (None, 'dir'), + (None, 'disabled'), + (None, 'draggable'), + (None, 'dynsrc'), + (None, 'enctype'), + (None, 'end'), + (None, 'face'), + (None, 'for'), + (None, 'form'), + (None, 'frame'), + (None, 'galleryimg'), + (None, 'gutter'), + (None, 'headers'), + (None, 'height'), + (None, 'hidefocus'), + (None, 'hidden'), + (None, 'high'), + (None, 'href'), + (None, 'hreflang'), + (None, 'hspace'), + (None, 'icon'), + (None, 'id'), + (None, 'inputmode'), + (None, 'ismap'), + (None, 'keytype'), + (None, 'label'), + (None, 'leftspacing'), + (None, 'lang'), + (None, 'list'), + (None, 'longdesc'), + (None, 'loop'), + (None, 'loopcount'), + (None, 'loopend'), + (None, 'loopstart'), + (None, 'low'), + (None, 'lowsrc'), + (None, 'max'), + (None, 'maxlength'), + (None, 'media'), + (None, 'method'), + (None, 'min'), + (None, 'multiple'), + (None, 'name'), + (None, 'nohref'), + (None, 'noshade'), + (None, 'nowrap'), + (None, 'open'), + (None, 'optimum'), + (None, 'pattern'), + (None, 'ping'), + (None, 'point-size'), + (None, 'poster'), + (None, 'pqg'), + (None, 'preload'), + (None, 'prompt'), + (None, 'radiogroup'), + (None, 'readonly'), + (None, 'rel'), + (None, 'repeat-max'), + (None, 'repeat-min'), + (None, 'replace'), + (None, 'required'), + (None, 'rev'), + (None, 'rightspacing'), + (None, 'rows'), + (None, 'rowspan'), + (None, 'rules'), + (None, 'scope'), + (None, 'selected'), + (None, 'shape'), + (None, 'size'), + (None, 'span'), + (None, 'src'), + (None, 'start'), + (None, 'step'), + (None, 'style'), + (None, 'summary'), + (None, 'suppress'), + (None, 'tabindex'), + (None, 'target'), + (None, 'template'), + (None, 'title'), + (None, 'toppadding'), + (None, 'type'), + (None, 'unselectable'), + (None, 'usemap'), + (None, 'urn'), + (None, 'valign'), + (None, 'value'), + (None, 'variable'), + (None, 'volume'), + (None, 'vspace'), + (None, 'vrml'), + (None, 'width'), + (None, 'wrap'), + (namespaces['xml'], 'lang'), + # MathML attributes + (None, 'actiontype'), + (None, 'align'), + (None, 'columnalign'), + (None, 'columnalign'), + (None, 'columnalign'), + (None, 'columnlines'), + (None, 'columnspacing'), + (None, 'columnspan'), + (None, 'depth'), + (None, 'display'), + (None, 'displaystyle'), + (None, 'equalcolumns'), + (None, 'equalrows'), + (None, 'fence'), + (None, 'fontstyle'), + (None, 'fontweight'), + (None, 'frame'), + (None, 'height'), + (None, 'linethickness'), + (None, 'lspace'), + (None, 'mathbackground'), + (None, 'mathcolor'), + (None, 'mathvariant'), + (None, 'mathvariant'), + (None, 'maxsize'), + (None, 'minsize'), + (None, 'other'), + (None, 'rowalign'), + (None, 'rowalign'), + (None, 'rowalign'), + (None, 'rowlines'), + (None, 'rowspacing'), + (None, 'rowspan'), + (None, 'rspace'), + (None, 'scriptlevel'), + (None, 'selection'), + (None, 'separator'), + (None, 'stretchy'), + (None, 'width'), + (None, 'width'), + (namespaces['xlink'], 'href'), + (namespaces['xlink'], 'show'), + (namespaces['xlink'], 'type'), + # SVG attributes + (None, 'accent-height'), + (None, 'accumulate'), + (None, 'additive'), + (None, 'alphabetic'), + (None, 'arabic-form'), + (None, 'ascent'), + (None, 'attributeName'), + (None, 'attributeType'), + (None, 'baseProfile'), + (None, 'bbox'), + (None, 'begin'), + (None, 'by'), + (None, 'calcMode'), + (None, 'cap-height'), + (None, 'class'), + (None, 'clip-path'), + (None, 'color'), + (None, 'color-rendering'), + (None, 'content'), + (None, 'cx'), + (None, 'cy'), + (None, 'd'), + (None, 'dx'), + (None, 'dy'), + (None, 'descent'), + (None, 'display'), + (None, 'dur'), + (None, 'end'), + (None, 'fill'), + (None, 'fill-opacity'), + (None, 'fill-rule'), + (None, 'font-family'), + (None, 'font-size'), + (None, 'font-stretch'), + (None, 'font-style'), + (None, 'font-variant'), + (None, 'font-weight'), + (None, 'from'), + (None, 'fx'), + (None, 'fy'), + (None, 'g1'), + (None, 'g2'), + (None, 'glyph-name'), + (None, 'gradientUnits'), + (None, 'hanging'), + (None, 'height'), + (None, 'horiz-adv-x'), + (None, 'horiz-origin-x'), + (None, 'id'), + (None, 'ideographic'), + (None, 'k'), + (None, 'keyPoints'), + (None, 'keySplines'), + (None, 'keyTimes'), + (None, 'lang'), + (None, 'marker-end'), + (None, 'marker-mid'), + (None, 'marker-start'), + (None, 'markerHeight'), + (None, 'markerUnits'), + (None, 'markerWidth'), + (None, 'mathematical'), + (None, 'max'), + (None, 'min'), + (None, 'name'), + (None, 'offset'), + (None, 'opacity'), + (None, 'orient'), + (None, 'origin'), + (None, 'overline-position'), + (None, 'overline-thickness'), + (None, 'panose-1'), + (None, 'path'), + (None, 'pathLength'), + (None, 'points'), + (None, 'preserveAspectRatio'), + (None, 'r'), + (None, 'refX'), + (None, 'refY'), + (None, 'repeatCount'), + (None, 'repeatDur'), + (None, 'requiredExtensions'), + (None, 'requiredFeatures'), + (None, 'restart'), + (None, 'rotate'), + (None, 'rx'), + (None, 'ry'), + (None, 'slope'), + (None, 'stemh'), + (None, 'stemv'), + (None, 'stop-color'), + (None, 'stop-opacity'), + (None, 'strikethrough-position'), + (None, 'strikethrough-thickness'), + (None, 'stroke'), + (None, 'stroke-dasharray'), + (None, 'stroke-dashoffset'), + (None, 'stroke-linecap'), + (None, 'stroke-linejoin'), + (None, 'stroke-miterlimit'), + (None, 'stroke-opacity'), + (None, 'stroke-width'), + (None, 'systemLanguage'), + (None, 'target'), + (None, 'text-anchor'), + (None, 'to'), + (None, 'transform'), + (None, 'type'), + (None, 'u1'), + (None, 'u2'), + (None, 'underline-position'), + (None, 'underline-thickness'), + (None, 'unicode'), + (None, 'unicode-range'), + (None, 'units-per-em'), + (None, 'values'), + (None, 'version'), + (None, 'viewBox'), + (None, 'visibility'), + (None, 'width'), + (None, 'widths'), + (None, 'x'), + (None, 'x-height'), + (None, 'x1'), + (None, 'x2'), + (namespaces['xlink'], 'actuate'), + (namespaces['xlink'], 'arcrole'), + (namespaces['xlink'], 'href'), + (namespaces['xlink'], 'role'), + (namespaces['xlink'], 'show'), + (namespaces['xlink'], 'title'), + (namespaces['xlink'], 'type'), + (namespaces['xml'], 'base'), + (namespaces['xml'], 'lang'), + (namespaces['xml'], 'space'), + (None, 'y'), + (None, 'y1'), + (None, 'y2'), + (None, 'zoomAndPan'), +)) + +attr_val_is_uri = frozenset(( + (None, 'href'), + (None, 'src'), + (None, 'cite'), + (None, 'action'), + (None, 'longdesc'), + (None, 'poster'), + (None, 'background'), + (None, 'datasrc'), + (None, 'dynsrc'), + (None, 'lowsrc'), + (None, 'ping'), + (namespaces['xlink'], 'href'), + (namespaces['xml'], 'base'), +)) + +svg_attr_val_allows_ref = frozenset(( + (None, 'clip-path'), + (None, 'color-profile'), + (None, 'cursor'), + (None, 'fill'), + (None, 'filter'), + (None, 'marker'), + (None, 'marker-start'), + (None, 'marker-mid'), + (None, 'marker-end'), + (None, 'mask'), + (None, 'stroke'), +)) + +svg_allow_local_href = frozenset(( + (None, 'altGlyph'), + (None, 'animate'), + (None, 'animateColor'), + (None, 'animateMotion'), + (None, 'animateTransform'), + (None, 'cursor'), + (None, 'feImage'), + (None, 'filter'), + (None, 'linearGradient'), + (None, 'pattern'), + (None, 'radialGradient'), + (None, 'textpath'), + (None, 'tref'), + (None, 'set'), + (None, 'use') +)) + +allowed_css_properties = frozenset(( + 'azimuth', + 'background-color', + 'border-bottom-color', + 'border-collapse', + 'border-color', + 'border-left-color', + 'border-right-color', + 'border-top-color', + 'clear', + 'color', + 'cursor', + 'direction', + 'display', + 'elevation', + 'float', + 'font', + 'font-family', + 'font-size', + 'font-style', + 'font-variant', + 'font-weight', + 'height', + 'letter-spacing', + 'line-height', + 'overflow', + 'pause', + 'pause-after', + 'pause-before', + 'pitch', + 'pitch-range', + 'richness', + 'speak', + 'speak-header', + 'speak-numeral', + 'speak-punctuation', + 'speech-rate', + 'stress', + 'text-align', + 'text-decoration', + 'text-indent', + 'unicode-bidi', + 'vertical-align', + 'voice-family', + 'volume', + 'white-space', + 'width', +)) + +allowed_css_keywords = frozenset(( + 'auto', + 'aqua', + 'black', + 'block', + 'blue', + 'bold', + 'both', + 'bottom', + 'brown', + 'center', + 'collapse', + 'dashed', + 'dotted', + 'fuchsia', + 'gray', + 'green', + '!important', + 'italic', + 'left', + 'lime', + 'maroon', + 'medium', + 'none', + 'navy', + 'normal', + 'nowrap', + 'olive', + 'pointer', + 'purple', + 'red', + 'right', + 'solid', + 'silver', + 'teal', + 'top', + 'transparent', + 'underline', + 'white', + 'yellow', +)) + +allowed_svg_properties = frozenset(( + 'fill', + 'fill-opacity', + 'fill-rule', + 'stroke', + 'stroke-width', + 'stroke-linecap', + 'stroke-linejoin', + 'stroke-opacity', +)) + +allowed_protocols = frozenset(( + 'ed2k', + 'ftp', + 'http', + 'https', + 'irc', + 'mailto', + 'news', + 'gopher', + 'nntp', + 'telnet', + 'webcal', + 'xmpp', + 'callto', + 'feed', + 'urn', + 'aim', + 'rsync', + 'tag', + 'ssh', + 'sftp', + 'rtsp', + 'afs', + 'data', +)) + +allowed_content_types = frozenset(( + 'image/png', + 'image/jpeg', + 'image/gif', + 'image/webp', + 'image/bmp', + 'text/plain', +)) + + +data_content_type = re.compile(r''' + ^ + # Match a content type <application>/<type> + (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) + # Match any character set and encoding + (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) + |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) + # Assume the rest is data + ,.* + $ + ''', + re.VERBOSE) + + +class Filter(base.Filter): + """ sanitization of XHTML+MathML+SVG and of inline style attributes.""" + def __init__(self, + source, + allowed_elements=allowed_elements, + allowed_attributes=allowed_attributes, + allowed_css_properties=allowed_css_properties, + allowed_css_keywords=allowed_css_keywords, + allowed_svg_properties=allowed_svg_properties, + allowed_protocols=allowed_protocols, + allowed_content_types=allowed_content_types, + attr_val_is_uri=attr_val_is_uri, + svg_attr_val_allows_ref=svg_attr_val_allows_ref, + svg_allow_local_href=svg_allow_local_href): + super(Filter, self).__init__(source) + self.allowed_elements = allowed_elements + self.allowed_attributes = allowed_attributes + self.allowed_css_properties = allowed_css_properties + self.allowed_css_keywords = allowed_css_keywords + self.allowed_svg_properties = allowed_svg_properties + self.allowed_protocols = allowed_protocols + self.allowed_content_types = allowed_content_types + self.attr_val_is_uri = attr_val_is_uri + self.svg_attr_val_allows_ref = svg_attr_val_allows_ref + self.svg_allow_local_href = svg_allow_local_href -class Filter(_base.Filter, HTMLSanitizerMixin): def __iter__(self): - for token in _base.Filter.__iter__(self): + for token in base.Filter.__iter__(self): token = self.sanitize_token(token) if token: yield token + + # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and + # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style + # attributes are parsed, and a restricted set, # specified by + # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through. + # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified + # in ALLOWED_PROTOCOLS are allowed. + # + # sanitize_html('<script> do_nasty_stuff() </script>') + # => <script> do_nasty_stuff() </script> + # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>') + # => <a>Click here for $100</a> + def sanitize_token(self, token): + + # accommodate filters which use token_type differently + token_type = token["type"] + if token_type in ("StartTag", "EndTag", "EmptyTag"): + name = token["name"] + namespace = token["namespace"] + if ((namespace, name) in self.allowed_elements or + (namespace is None and + (namespaces["html"], name) in self.allowed_elements)): + return self.allowed_token(token) + else: + return self.disallowed_token(token) + elif token_type == "Comment": + pass + else: + return token + + def allowed_token(self, token): + if "data" in token: + attrs = token["data"] + attr_names = set(attrs.keys()) + + # Remove forbidden attributes + for to_remove in (attr_names - self.allowed_attributes): + del token["data"][to_remove] + attr_names.remove(to_remove) + + # Remove attributes with disallowed URL values + for attr in (attr_names & self.attr_val_is_uri): + assert attr in attrs + # I don't have a clue where this regexp comes from or why it matches those + # characters, nor why we call unescape. I just know it's always been here. + # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all + # this will do is remove *more* than it otherwise would. + val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\s]+", '', + unescape(attrs[attr])).lower() + # remove replacement characters from unescaped characters + val_unescaped = val_unescaped.replace("\ufffd", "") + try: + uri = urlparse.urlparse(val_unescaped) + except ValueError: + uri = None + del attrs[attr] + if uri and uri.scheme: + if uri.scheme not in self.allowed_protocols: + del attrs[attr] + if uri.scheme == 'data': + m = data_content_type.match(uri.path) + if not m: + del attrs[attr] + elif m.group('content_type') not in self.allowed_content_types: + del attrs[attr] + + for attr in self.svg_attr_val_allows_ref: + if attr in attrs: + attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', + ' ', + unescape(attrs[attr])) + if (token["name"] in self.svg_allow_local_href and + (namespaces['xlink'], 'href') in attrs and re.search('^\s*[^#\s].*', + attrs[(namespaces['xlink'], 'href')])): + del attrs[(namespaces['xlink'], 'href')] + if (None, 'style') in attrs: + attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) + token["data"] = attrs + return token + + def disallowed_token(self, token): + token_type = token["type"] + if token_type == "EndTag": + token["data"] = "</%s>" % token["name"] + elif token["data"]: + assert token_type in ("StartTag", "EmptyTag") + attrs = [] + for (ns, name), v in token["data"].items(): + attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) + token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) + else: + token["data"] = "<%s>" % token["name"] + if token.get("selfClosing"): + token["data"] = token["data"][:-1] + "/>" + + token["type"] = "Characters" + + del token["name"] + return token + + def sanitize_css(self, style): + # disallow urls + style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) + + # gauntlet + if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): + return '' + if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): + return '' + + clean = [] + for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style): + if not value: + continue + if prop.lower() in self.allowed_css_properties: + clean.append(prop + ': ' + value + ';') + elif prop.split('-')[0].lower() in ['background', 'border', 'margin', + 'padding']: + for keyword in value.split(): + if keyword not in self.allowed_css_keywords and \ + not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa + break + else: + clean.append(prop + ': ' + value + ';') + elif prop.lower() in self.allowed_svg_properties: + clean.append(prop + ': ' + value + ';') + + return ' '.join(clean) diff --git a/lib/html5lib/filters/whitespace.py b/lib/html5lib/filters/whitespace.py index dfc60eebd35bd6450d82cf65abcbedca8f8fb9da..89210528717dcd653e5313c62bf6b83e8445309f 100644 --- a/lib/html5lib/filters/whitespace.py +++ b/lib/html5lib/filters/whitespace.py @@ -2,20 +2,20 @@ from __future__ import absolute_import, division, unicode_literals import re -from . import _base +from . import base from ..constants import rcdataElements, spaceCharacters spaceCharacters = "".join(spaceCharacters) SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) -class Filter(_base.Filter): +class Filter(base.Filter): spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) def __iter__(self): preserve = 0 - for token in _base.Filter.__iter__(self): + for token in base.Filter.__iter__(self): type = token["type"] if type == "StartTag" \ and (preserve or token["name"] in self.spacePreserveElements): diff --git a/lib/html5lib/html5parser.py b/lib/html5lib/html5parser.py index b0f14f393504b1cdce3d96c825d6e600fb297d32..2abd63e444dcbfe709f162705ab689cf742aeeb1 100644 --- a/lib/html5lib/html5parser.py +++ b/lib/html5lib/html5parser.py @@ -1,38 +1,44 @@ from __future__ import absolute_import, division, unicode_literals -from six import with_metaclass +from six import with_metaclass, viewkeys, PY3 import types -from . import inputstream -from . import tokenizer - -from . import treebuilders -from .treebuilders._base import Marker - -from . import utils -from . import constants -from .constants import spaceCharacters, asciiUpper2Lower -from .constants import specialElements -from .constants import headingElements -from .constants import cdataElements, rcdataElements -from .constants import tokenTypes, ReparseException, namespaces -from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements -from .constants import adjustForeignAttributes as adjustForeignAttributesMap +try: + from collections import OrderedDict +except ImportError: + from ordereddict import OrderedDict +from . import _inputstream +from . import _tokenizer -def parse(doc, treebuilder="etree", encoding=None, - namespaceHTMLElements=True): +from . import treebuilders +from .treebuilders.base import Marker + +from . import _utils +from .constants import ( + spaceCharacters, asciiUpper2Lower, + specialElements, headingElements, cdataElements, rcdataElements, + tokenTypes, tagTokenTypes, + namespaces, + htmlIntegrationPointElements, mathmlTextIntegrationPointElements, + adjustForeignAttributes as adjustForeignAttributesMap, + adjustMathMLAttributes, adjustSVGAttributes, + E, + ReparseException +) + + +def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): """Parse a string or file-like object into a tree""" tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) - return p.parse(doc, encoding=encoding) + return p.parse(doc, **kwargs) -def parseFragment(doc, container="div", treebuilder="etree", encoding=None, - namespaceHTMLElements=True): +def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) - return p.parseFragment(doc, container=container, encoding=encoding) + return p.parseFragment(doc, container=container, **kwargs) def method_decorator_metaclass(function): @@ -51,18 +57,13 @@ class HTMLParser(object): """HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML""" - def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer, - strict=False, namespaceHTMLElements=True, debug=False): + def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): """ strict - raise an exception when a parse error is encountered tree - a treebuilder class controlling the type of tree that will be returned. Built in treebuilders can be accessed through html5lib.treebuilders.getTreeBuilder(treeType) - - tokenizer - a class that provides a stream of tokens to the treebuilder. - This may be replaced for e.g. a sanitizer which converts some tags to - text """ # Raise an exception on the first error encountered @@ -71,29 +72,24 @@ class HTMLParser(object): if tree is None: tree = treebuilders.getTreeBuilder("etree") self.tree = tree(namespaceHTMLElements) - self.tokenizer_class = tokenizer self.errors = [] self.phases = dict([(name, cls(self, self.tree)) for name, cls in getPhases(debug).items()]) - def _parse(self, stream, innerHTML=False, container="div", - encoding=None, parseMeta=True, useChardet=True, **kwargs): + def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): self.innerHTMLMode = innerHTML self.container = container - self.tokenizer = self.tokenizer_class(stream, encoding=encoding, - parseMeta=parseMeta, - useChardet=useChardet, - parser=self, **kwargs) + self.scripting = scripting + self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) self.reset() - while True: - try: - self.mainLoop() - break - except ReparseException: - self.reset() + try: + self.mainLoop() + except ReparseException: + self.reset() + self.mainLoop() def reset(self): self.tree.reset() @@ -120,7 +116,7 @@ class HTMLParser(object): self.phase.insertHtmlElement() self.resetInsertionMode() else: - self.innerHTML = False + self.innerHTML = False # pylint:disable=redefined-variable-type self.phase = self.phases["initial"] self.lastPhase = None @@ -129,6 +125,17 @@ class HTMLParser(object): self.framesetOK = True + @property + def documentEncoding(self): + """The name of the character encoding + that was used to decode the input stream, + or :obj:`None` if that is not determined yet. + + """ + if not hasattr(self, 'tokenizer'): + return None + return self.tokenizer.stream.charEncoding[0].name + def isHTMLIntegrationPoint(self, element): if (element.name == "annotation-xml" and element.namespace == namespaces["mathml"]): @@ -152,8 +159,10 @@ class HTMLParser(object): ParseErrorToken = tokenTypes["ParseError"] for token in self.normalizedTokens(): + prev_token = None new_token = token while new_token is not None: + prev_token = new_token currentNode = self.tree.openElements[-1] if self.tree.openElements else None currentNodeNamespace = currentNode.namespace if currentNode else None currentNodeName = currentNode.name if currentNode else None @@ -172,6 +181,7 @@ class HTMLParser(object): type in (CharactersToken, SpaceCharactersToken))) or (currentNodeNamespace == namespaces["mathml"] and currentNodeName == "annotation-xml" and + type == StartTagToken and token["name"] == "svg") or (self.isHTMLIntegrationPoint(currentNode) and type in (StartTagToken, CharactersToken, SpaceCharactersToken))): @@ -192,10 +202,10 @@ class HTMLParser(object): elif type == DoctypeToken: new_token = phase.processDoctype(new_token) - if (type == StartTagToken and token["selfClosing"] - and not token["selfClosingAcknowledged"]): + if (type == StartTagToken and prev_token["selfClosing"] and + not prev_token["selfClosingAcknowledged"]): self.parseError("non-void-element-with-trailing-solidus", - {"name": token["name"]}) + {"name": prev_token["name"]}) # When the loop finishes it's EOF reprocess = True @@ -210,7 +220,7 @@ class HTMLParser(object): for token in self.tokenizer: yield self.normalizeToken(token) - def parse(self, stream, encoding=None, parseMeta=True, useChardet=True): + def parse(self, stream, *args, **kwargs): """Parse a HTML document into a well-formed tree stream - a filelike object or string containing the HTML to be parsed @@ -219,13 +229,13 @@ class HTMLParser(object): the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) + + scripting - treat noscript elements as if javascript was turned on """ - self._parse(stream, innerHTML=False, encoding=encoding, - parseMeta=parseMeta, useChardet=useChardet) + self._parse(stream, False, None, *args, **kwargs) return self.tree.getDocument() - def parseFragment(self, stream, container="div", encoding=None, - parseMeta=False, useChardet=True): + def parseFragment(self, stream, *args, **kwargs): """Parse a HTML fragment into a well-formed tree fragment container - name of the element we're setting the innerHTML property @@ -237,112 +247,43 @@ class HTMLParser(object): the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) + + scripting - treat noscript elements as if javascript was turned on """ - self._parse(stream, True, container=container, encoding=encoding) + self._parse(stream, True, *args, **kwargs) return self.tree.getFragment() - def parseError(self, errorcode="XXX-undefined-error", datavars={}): + def parseError(self, errorcode="XXX-undefined-error", datavars=None): # XXX The idea is to make errorcode mandatory. + if datavars is None: + datavars = {} self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) if self.strict: - raise ParseError + raise ParseError(E[errorcode] % datavars) def normalizeToken(self, token): """ HTML5 specific normalizations to the token stream """ if token["type"] == tokenTypes["StartTag"]: - token["data"] = dict(token["data"][::-1]) + raw = token["data"] + token["data"] = OrderedDict(raw) + if len(raw) > len(token["data"]): + # we had some duplicated attribute, fix so first wins + token["data"].update(raw[::-1]) return token def adjustMathMLAttributes(self, token): - replacements = {"definitionurl": "definitionURL"} - for k, v in replacements.items(): - if k in token["data"]: - token["data"][v] = token["data"][k] - del token["data"][k] + adjust_attributes(token, adjustMathMLAttributes) def adjustSVGAttributes(self, token): - replacements = { - "attributename": "attributeName", - "attributetype": "attributeType", - "basefrequency": "baseFrequency", - "baseprofile": "baseProfile", - "calcmode": "calcMode", - "clippathunits": "clipPathUnits", - "contentscripttype": "contentScriptType", - "contentstyletype": "contentStyleType", - "diffuseconstant": "diffuseConstant", - "edgemode": "edgeMode", - "externalresourcesrequired": "externalResourcesRequired", - "filterres": "filterRes", - "filterunits": "filterUnits", - "glyphref": "glyphRef", - "gradienttransform": "gradientTransform", - "gradientunits": "gradientUnits", - "kernelmatrix": "kernelMatrix", - "kernelunitlength": "kernelUnitLength", - "keypoints": "keyPoints", - "keysplines": "keySplines", - "keytimes": "keyTimes", - "lengthadjust": "lengthAdjust", - "limitingconeangle": "limitingConeAngle", - "markerheight": "markerHeight", - "markerunits": "markerUnits", - "markerwidth": "markerWidth", - "maskcontentunits": "maskContentUnits", - "maskunits": "maskUnits", - "numoctaves": "numOctaves", - "pathlength": "pathLength", - "patterncontentunits": "patternContentUnits", - "patterntransform": "patternTransform", - "patternunits": "patternUnits", - "pointsatx": "pointsAtX", - "pointsaty": "pointsAtY", - "pointsatz": "pointsAtZ", - "preservealpha": "preserveAlpha", - "preserveaspectratio": "preserveAspectRatio", - "primitiveunits": "primitiveUnits", - "refx": "refX", - "refy": "refY", - "repeatcount": "repeatCount", - "repeatdur": "repeatDur", - "requiredextensions": "requiredExtensions", - "requiredfeatures": "requiredFeatures", - "specularconstant": "specularConstant", - "specularexponent": "specularExponent", - "spreadmethod": "spreadMethod", - "startoffset": "startOffset", - "stddeviation": "stdDeviation", - "stitchtiles": "stitchTiles", - "surfacescale": "surfaceScale", - "systemlanguage": "systemLanguage", - "tablevalues": "tableValues", - "targetx": "targetX", - "targety": "targetY", - "textlength": "textLength", - "viewbox": "viewBox", - "viewtarget": "viewTarget", - "xchannelselector": "xChannelSelector", - "ychannelselector": "yChannelSelector", - "zoomandpan": "zoomAndPan" - } - for originalName in list(token["data"].keys()): - if originalName in replacements: - svgName = replacements[originalName] - token["data"][svgName] = token["data"][originalName] - del token["data"][originalName] + adjust_attributes(token, adjustSVGAttributes) def adjustForeignAttributes(self, token): - replacements = adjustForeignAttributesMap - - for originalName in token["data"].keys(): - if originalName in replacements: - foreignName = replacements[originalName] - token["data"][foreignName] = token["data"][originalName] - del token["data"][originalName] + adjust_attributes(token, adjustForeignAttributesMap) def reparseTokenNormal(self, token): + # pylint:disable=unused-argument self.parser.phase() def resetInsertionMode(self): @@ -407,11 +348,12 @@ class HTMLParser(object): self.phase = self.phases["text"] +@_utils.memoize def getPhases(debug): def log(function): """Logger that records which phase processes each token""" type_names = dict((value, key) for key, value in - constants.tokenTypes.items()) + tokenTypes.items()) def wrapped(self, *args, **kwargs): if function.__name__.startswith("process") and len(args) > 0: @@ -420,7 +362,7 @@ def getPhases(debug): info = {"type": type_names[token['type']]} except: raise - if token['type'] in constants.tagTokenTypes: + if token['type'] in tagTokenTypes: info["name"] = token['name'] self.parser.log.append((self.parser.tokenizer.state.__name__, @@ -439,6 +381,7 @@ def getPhases(debug): else: return type + # pylint:disable=unused-argument class Phase(with_metaclass(getMetaclass(debug, log))): """Base class for helper object that implements each phase of processing """ @@ -505,77 +448,76 @@ def getPhases(debug): if publicId != "": publicId = publicId.translate(asciiUpper2Lower) - if (not correct or token["name"] != "html" - or publicId.startswith( - ("+//silmaril//dtd html pro v0r11 19970101//", - "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", - "-//as//dtd html 3.0 aswedit + extensions//", - "-//ietf//dtd html 2.0 level 1//", - "-//ietf//dtd html 2.0 level 2//", - "-//ietf//dtd html 2.0 strict level 1//", - "-//ietf//dtd html 2.0 strict level 2//", - "-//ietf//dtd html 2.0 strict//", - "-//ietf//dtd html 2.0//", - "-//ietf//dtd html 2.1e//", - "-//ietf//dtd html 3.0//", - "-//ietf//dtd html 3.2 final//", - "-//ietf//dtd html 3.2//", - "-//ietf//dtd html 3//", - "-//ietf//dtd html level 0//", - "-//ietf//dtd html level 1//", - "-//ietf//dtd html level 2//", - "-//ietf//dtd html level 3//", - "-//ietf//dtd html strict level 0//", - "-//ietf//dtd html strict level 1//", - "-//ietf//dtd html strict level 2//", - "-//ietf//dtd html strict level 3//", - "-//ietf//dtd html strict//", - "-//ietf//dtd html//", - "-//metrius//dtd metrius presentational//", - "-//microsoft//dtd internet explorer 2.0 html strict//", - "-//microsoft//dtd internet explorer 2.0 html//", - "-//microsoft//dtd internet explorer 2.0 tables//", - "-//microsoft//dtd internet explorer 3.0 html strict//", - "-//microsoft//dtd internet explorer 3.0 html//", - "-//microsoft//dtd internet explorer 3.0 tables//", - "-//netscape comm. corp.//dtd html//", - "-//netscape comm. corp.//dtd strict html//", - "-//o'reilly and associates//dtd html 2.0//", - "-//o'reilly and associates//dtd html extended 1.0//", - "-//o'reilly and associates//dtd html extended relaxed 1.0//", - "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", - "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", - "-//spyglass//dtd html 2.0 extended//", - "-//sq//dtd html 2.0 hotmetal + extensions//", - "-//sun microsystems corp.//dtd hotjava html//", - "-//sun microsystems corp.//dtd hotjava strict html//", - "-//w3c//dtd html 3 1995-03-24//", - "-//w3c//dtd html 3.2 draft//", - "-//w3c//dtd html 3.2 final//", - "-//w3c//dtd html 3.2//", - "-//w3c//dtd html 3.2s draft//", - "-//w3c//dtd html 4.0 frameset//", - "-//w3c//dtd html 4.0 transitional//", - "-//w3c//dtd html experimental 19960712//", - "-//w3c//dtd html experimental 970421//", - "-//w3c//dtd w3 html//", - "-//w3o//dtd w3 html 3.0//", - "-//webtechs//dtd mozilla html 2.0//", - "-//webtechs//dtd mozilla html//")) - or publicId in - ("-//w3o//dtd w3 html strict 3.0//en//", - "-/w3c/dtd html 4.0 transitional/en", - "html") - or publicId.startswith( - ("-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//")) and - systemId is None - or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): + if (not correct or token["name"] != "html" or + publicId.startswith( + ("+//silmaril//dtd html pro v0r11 19970101//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", + "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", + "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", + "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", + "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", + "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//")) or + publicId in ("-//w3o//dtd w3 html strict 3.0//en//", + "-/w3c/dtd html 4.0 transitional/en", + "html") or + publicId.startswith( + ("-//w3c//dtd html 4.01 frameset//", + "-//w3c//dtd html 4.01 transitional//")) and + systemId is None or + systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): self.parser.compatMode = "quirks" elif (publicId.startswith( ("-//w3c//dtd xhtml 1.0 frameset//", - "-//w3c//dtd xhtml 1.0 transitional//")) - or publicId.startswith( + "-//w3c//dtd xhtml 1.0 transitional//")) or + publicId.startswith( ("-//w3c//dtd html 4.01 frameset//", "-//w3c//dtd html 4.01 transitional//")) and systemId is not None): @@ -648,13 +590,13 @@ def getPhases(debug): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ (("head", "body", "html", "br"), self.endTagImplyHead) ]) self.endTagHandler.default = self.endTagOther @@ -694,10 +636,11 @@ def getPhases(debug): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("title", self.startTagTitle), - (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle), + (("noframes", "style"), self.startTagNoFramesStyle), + ("noscript", self.startTagNoscript), ("script", self.startTagScript), (("base", "basefont", "bgsound", "command", "link"), self.startTagBaseLinkCommand), @@ -706,7 +649,7 @@ def getPhases(debug): ]) self.startTagHandler.default = self.startTagOther - self. endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("head", self.endTagHead), (("br", "html", "body"), self.endTagHtmlBodyBr) ]) @@ -748,18 +691,25 @@ def getPhases(debug): # the abstract Unicode string, and just use the # ContentAttrParser on that, but using UTF-8 allows all chars # to be encoded and as a ASCII-superset works. - data = inputstream.EncodingBytes(attributes["content"].encode("utf-8")) - parser = inputstream.ContentAttrParser(data) + data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) + parser = _inputstream.ContentAttrParser(data) codec = parser.parse() self.parser.tokenizer.stream.changeEncoding(codec) def startTagTitle(self, token): self.parser.parseRCDataRawtext(token, "RCDATA") - def startTagNoScriptNoFramesStyle(self, token): + def startTagNoFramesStyle(self, token): # Need to decide whether to implement the scripting-disabled case self.parser.parseRCDataRawtext(token, "RAWTEXT") + def startTagNoscript(self, token): + if self.parser.scripting: + self.parser.parseRCDataRawtext(token, "RAWTEXT") + else: + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inHeadNoscript"] + def startTagScript(self, token): self.tree.insertElement(token) self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState @@ -785,15 +735,75 @@ def getPhases(debug): def anythingElse(self): self.endTagHead(impliedTagToken("head")) - # XXX If we implement a parser for which scripting is disabled we need to - # implement this phase. - # - # class InHeadNoScriptPhase(Phase): + class InHeadNoscriptPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand), + (("head", "noscript"), self.startTagHeadNoscript), + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("noscript", self.endTagNoscript), + ("br", self.endTagBr), + ]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + self.parser.parseError("eof-in-head-noscript") + self.anythingElse() + return True + + def processComment(self, token): + return self.parser.phases["inHead"].processComment(token) + + def processCharacters(self, token): + self.parser.parseError("char-in-head-noscript") + self.anythingElse() + return token + + def processSpaceCharacters(self, token): + return self.parser.phases["inHead"].processSpaceCharacters(token) + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagBaseLinkCommand(self, token): + return self.parser.phases["inHead"].processStartTag(token) + + def startTagHeadNoscript(self, token): + self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) + + def startTagOther(self, token): + self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) + self.anythingElse() + return token + + def endTagNoscript(self, token): + node = self.parser.tree.openElements.pop() + assert node.name == "noscript", "Expected noscript got %s" % node.name + self.parser.phase = self.parser.phases["inHead"] + + def endTagBr(self, token): + self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + # Caller must raise parse error first! + self.endTagNoscript(impliedTagToken("noscript")) + class AfterHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("body", self.startTagBody), ("frameset", self.startTagFrameset), @@ -803,8 +813,8 @@ def getPhases(debug): ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"), - self.endTagHtmlBodyBr)]) + self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), + self.endTagHtmlBodyBr)]) self.endTagHandler.default = self.endTagOther def processEOF(self): @@ -862,18 +872,18 @@ def getPhases(debug): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - # Keep a ref to this for special handling of whitespace in <pre> - self.processSpaceCharactersNonPre = self.processSpaceCharacters + # Set this to the default handler + self.processSpaceCharacters = self.processSpaceCharactersNonPre - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("base", "basefont", "bgsound", "command", "link", "meta", - "noframes", "script", "style", "title"), + "script", "style", "title"), self.startTagProcessInHead), ("body", self.startTagBody), ("frameset", self.startTagFrameset), (("address", "article", "aside", "blockquote", "center", "details", - "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", + "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", "section", "summary", "ul"), self.startTagCloseP), @@ -899,7 +909,8 @@ def getPhases(debug): ("isindex", self.startTagIsIndex), ("textarea", self.startTagTextarea), ("iframe", self.startTagIFrame), - (("noembed", "noframes", "noscript"), self.startTagRawtext), + ("noscript", self.startTagNoscript), + (("noembed", "noframes"), self.startTagRawtext), ("select", self.startTagSelect), (("rp", "rt"), self.startTagRpRt), (("option", "optgroup"), self.startTagOpt), @@ -911,7 +922,7 @@ def getPhases(debug): ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("body", self.endTagBody), ("html", self.endTagHtml), (("address", "article", "aside", "blockquote", "button", "center", @@ -930,17 +941,9 @@ def getPhases(debug): self.endTagHandler.default = self.endTagOther def isMatchingFormattingElement(self, node1, node2): - if node1.name != node2.name or node1.namespace != node2.namespace: - return False - elif len(node1.attributes) != len(node2.attributes): - return False - else: - attributes1 = sorted(node1.attributes.items()) - attributes2 = sorted(node2.attributes.items()) - for attr1, attr2 in zip(attributes1, attributes2): - if attr1 != attr2: - return False - return True + return (node1.name == node2.name and + node1.namespace == node2.namespace and + node1.attributes == node2.attributes) # helper def addFormattingElement(self, token): @@ -976,8 +979,8 @@ def getPhases(debug): data = token["data"] self.processSpaceCharacters = self.processSpaceCharactersNonPre if (data.startswith("\n") and - self.tree.openElements[-1].name in ("pre", "listing", "textarea") - and not self.tree.openElements[-1].hasContent()): + self.tree.openElements[-1].name in ("pre", "listing", "textarea") and + not self.tree.openElements[-1].hasContent()): data = data[1:] if data: self.tree.reconstructActiveFormattingElements() @@ -995,7 +998,7 @@ def getPhases(debug): for char in token["data"]])): self.parser.framesetOK = False - def processSpaceCharacters(self, token): + def processSpaceCharactersNonPre(self, token): self.tree.reconstructActiveFormattingElements() self.tree.insertText(token["data"]) @@ -1004,8 +1007,8 @@ def getPhases(debug): def startTagBody(self, token): self.parser.parseError("unexpected-start-tag", {"name": "body"}) - if (len(self.tree.openElements) == 1 - or self.tree.openElements[1].name != "body"): + if (len(self.tree.openElements) == 1 or + self.tree.openElements[1].name != "body"): assert self.parser.innerHTML else: self.parser.framesetOK = False @@ -1205,8 +1208,7 @@ def getPhases(debug): attributes["name"] = "isindex" self.processStartTag(impliedTagToken("input", "StartTag", attributes=attributes, - selfClosing= - token["selfClosing"])) + selfClosing=token["selfClosing"])) self.processEndTag(impliedTagToken("label")) self.processStartTag(impliedTagToken("hr", "StartTag")) self.processEndTag(impliedTagToken("form")) @@ -1221,6 +1223,12 @@ def getPhases(debug): self.parser.framesetOK = False self.startTagRawtext(token) + def startTagNoscript(self, token): + if self.parser.scripting: + self.startTagRawtext(token) + else: + self.startTagOther(token) + def startTagRawtext(self, token): """iframe, noembed noframes, noscript(if scripting enabled)""" self.parser.parseRCDataRawtext(token, "RAWTEXT") @@ -1316,7 +1324,7 @@ def getPhases(debug): # Not sure this is the correct name for the parse error self.parser.parseError( "expected-one-end-tag-but-got-another", - {"expectedName": "body", "gotName": node.name}) + {"gotName": "body", "expectedName": node.name}) break self.parser.phase = self.parser.phases["afterBody"] @@ -1584,9 +1592,9 @@ def getPhases(debug): class TextPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([]) + self.startTagHandler = _utils.MethodDispatcher([]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("script", self.endTagScript)]) self.endTagHandler.default = self.endTagOther @@ -1618,7 +1626,7 @@ def getPhases(debug): # http://www.whatwg.org/specs/web-apps/current-work/#in-table def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("caption", self.startTagCaption), ("colgroup", self.startTagColgroup), @@ -1632,7 +1640,7 @@ def getPhases(debug): ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("table", self.endTagTable), (("body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"), self.endTagIgnore) @@ -1809,14 +1817,14 @@ def getPhases(debug): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"), self.startTagTableElement) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("caption", self.endTagCaption), ("table", self.endTagTable), (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", @@ -1881,13 +1889,13 @@ def getPhases(debug): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("col", self.startTagCol) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("colgroup", self.endTagColgroup), ("col", self.endTagCol) ]) @@ -1915,6 +1923,7 @@ def getPhases(debug): def startTagCol(self, token): self.tree.insertElement(token) self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True def startTagOther(self, token): ignoreEndTag = self.ignoreEndTagColgroup() @@ -1944,7 +1953,7 @@ def getPhases(debug): # http://www.whatwg.org/specs/web-apps/current-work/#in-table0 def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("tr", self.startTagTr), (("td", "th"), self.startTagTableCell), @@ -1953,7 +1962,7 @@ def getPhases(debug): ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), ("table", self.endTagTable), (("body", "caption", "col", "colgroup", "html", "td", "th", @@ -2042,7 +2051,7 @@ def getPhases(debug): # http://www.whatwg.org/specs/web-apps/current-work/#in-row def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("td", "th"), self.startTagTableCell), (("caption", "col", "colgroup", "tbody", "tfoot", "thead", @@ -2050,7 +2059,7 @@ def getPhases(debug): ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("tr", self.endTagTr), ("table", self.endTagTable), (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), @@ -2131,14 +2140,14 @@ def getPhases(debug): # http://www.whatwg.org/specs/web-apps/current-work/#in-cell def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"), self.startTagTableOther) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ (("td", "th"), self.endTagTableCell), (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore), (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply) @@ -2207,7 +2216,7 @@ def getPhases(debug): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("option", self.startTagOption), ("optgroup", self.startTagOptgroup), @@ -2217,7 +2226,7 @@ def getPhases(debug): ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("option", self.endTagOption), ("optgroup", self.endTagOptgroup), ("select", self.endTagSelect) @@ -2307,13 +2316,13 @@ def getPhases(debug): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), self.startTagTable) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), self.endTagTable) ]) @@ -2434,7 +2443,7 @@ def getPhases(debug): def processEndTag(self, token): nodeIndex = len(self.tree.openElements) - 1 node = self.tree.openElements[-1] - if node.name != token["name"]: + if node.name.translate(asciiUpper2Lower) != token["name"]: self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) while True: @@ -2461,12 +2470,12 @@ def getPhases(debug): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)]) + self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)]) self.endTagHandler.default = self.endTagOther def processEOF(self): @@ -2509,7 +2518,7 @@ def getPhases(debug): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("frameset", self.startTagFrameset), ("frame", self.startTagFrame), @@ -2517,7 +2526,7 @@ def getPhases(debug): ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("frameset", self.endTagFrameset) ]) self.endTagHandler.default = self.endTagOther @@ -2553,7 +2562,7 @@ def getPhases(debug): self.tree.openElements.pop() if (not self.parser.innerHTML and self.tree.openElements[-1].name != "frameset"): - # If we're not in innerHTML mode and the the current node is not a + # If we're not in innerHTML mode and the current node is not a # "frameset" element (anymore) then switch. self.parser.phase = self.parser.phases["afterFrameset"] @@ -2566,13 +2575,13 @@ def getPhases(debug): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("noframes", self.startTagNoframes) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("html", self.endTagHtml) ]) self.endTagHandler.default = self.endTagOther @@ -2602,7 +2611,7 @@ def getPhases(debug): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml) ]) self.startTagHandler.default = self.startTagOther @@ -2640,7 +2649,7 @@ def getPhases(debug): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("noframes", self.startTagNoFrames) ]) @@ -2671,13 +2680,14 @@ def getPhases(debug): def processEndTag(self, token): self.parser.parseError("expected-eof-but-got-end-tag", {"name": token["name"]}) + # pylint:enable=unused-argument return { "initial": InitialPhase, "beforeHtml": BeforeHtmlPhase, "beforeHead": BeforeHeadPhase, "inHead": InHeadPhase, - # XXX "inHeadNoscript": InHeadNoScriptPhase, + "inHeadNoscript": InHeadNoscriptPhase, "afterHead": AfterHeadPhase, "inBody": InBodyPhase, "text": TextPhase, @@ -2700,6 +2710,16 @@ def getPhases(debug): } +def adjust_attributes(token, replacements): + if PY3 or _utils.PY27: + needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) + else: + needs_adjustment = frozenset(token['data']) & frozenset(replacements) + if needs_adjustment: + token['data'] = OrderedDict((replacements.get(k, k), v) + for k, v in token['data'].items()) + + def impliedTagToken(name, type="EndTag", attributes=None, selfClosing=False): if attributes is None: diff --git a/lib/html5lib/sanitizer.py b/lib/html5lib/sanitizer.py deleted file mode 100644 index 71dc5212c1d1f61feda23a087648844b278940e7..0000000000000000000000000000000000000000 --- a/lib/html5lib/sanitizer.py +++ /dev/null @@ -1,271 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import re -from xml.sax.saxutils import escape, unescape - -from .tokenizer import HTMLTokenizer -from .constants import tokenTypes - - -class HTMLSanitizerMixin(object): - """ sanitization of XHTML+MathML+SVG and of inline style attributes.""" - - acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', - 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', - 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', - 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', - 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', - 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1', - 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', - 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter', - 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option', - 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select', - 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', - 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', - 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video'] - - mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi', - 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom', - 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub', - 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder', - 'munderover', 'none'] - - svg_elements = ['a', 'animate', 'animateColor', 'animateMotion', - 'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse', - 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', - 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph', - 'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect', - 'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use'] - - acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey', - 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis', - 'background', 'balance', 'bgcolor', 'bgproperties', 'border', - 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding', - 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff', - 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color', - 'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords', - 'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default', - 'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end', - 'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers', - 'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace', - 'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing', - 'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend', - 'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method', - 'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open', - 'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload', - 'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min', - 'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan', - 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start', - 'step', 'style', 'summary', 'suppress', 'tabindex', 'target', - 'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap', - 'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml', - 'width', 'wrap', 'xml:lang'] - - mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign', - 'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth', - 'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence', - 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace', - 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize', - 'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines', - 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection', - 'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show', - 'xlink:type', 'xmlns', 'xmlns:xlink'] - - svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic', - 'arabic-form', 'ascent', 'attributeName', 'attributeType', - 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height', - 'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx', - 'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill', - 'fill-opacity', 'fill-rule', 'font-family', 'font-size', - 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from', - 'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging', - 'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k', - 'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end', - 'marker-mid', 'marker-start', 'markerHeight', 'markerUnits', - 'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset', - 'opacity', 'orient', 'origin', 'overline-position', - 'overline-thickness', 'panose-1', 'path', 'pathLength', 'points', - 'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount', - 'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart', - 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color', - 'stop-opacity', 'strikethrough-position', 'strikethrough-thickness', - 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', - 'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity', - 'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to', - 'transform', 'type', 'u1', 'u2', 'underline-position', - 'underline-thickness', 'unicode', 'unicode-range', 'units-per-em', - 'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x', - 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole', - 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type', - 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y', - 'y1', 'y2', 'zoomAndPan'] - - attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster', - 'xlink:href', 'xml:base'] - - svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill', - 'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end', - 'mask', 'stroke'] - - svg_allow_local_href = ['altGlyph', 'animate', 'animateColor', - 'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter', - 'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref', - 'set', 'use'] - - acceptable_css_properties = ['azimuth', 'background-color', - 'border-bottom-color', 'border-collapse', 'border-color', - 'border-left-color', 'border-right-color', 'border-top-color', 'clear', - 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font', - 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight', - 'height', 'letter-spacing', 'line-height', 'overflow', 'pause', - 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness', - 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation', - 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent', - 'unicode-bidi', 'vertical-align', 'voice-family', 'volume', - 'white-space', 'width'] - - acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue', - 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed', - 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left', - 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive', - 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top', - 'transparent', 'underline', 'white', 'yellow'] - - acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule', - 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin', - 'stroke-opacity'] - - acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc', - 'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal', - 'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag', - 'ssh', 'sftp', 'rtsp', 'afs'] - - # subclasses may define their own versions of these constants - allowed_elements = acceptable_elements + mathml_elements + svg_elements - allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes - allowed_css_properties = acceptable_css_properties - allowed_css_keywords = acceptable_css_keywords - allowed_svg_properties = acceptable_svg_properties - allowed_protocols = acceptable_protocols - - # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and - # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style - # attributes are parsed, and a restricted set, # specified by - # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through. - # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified - # in ALLOWED_PROTOCOLS are allowed. - # - # sanitize_html('<script> do_nasty_stuff() </script>') - # => <script> do_nasty_stuff() </script> - # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>') - # => <a>Click here for $100</a> - def sanitize_token(self, token): - - # accommodate filters which use token_type differently - token_type = token["type"] - if token_type in list(tokenTypes.keys()): - token_type = tokenTypes[token_type] - - if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], - tokenTypes["EmptyTag"]): - if token["name"] in self.allowed_elements: - return self.allowed_token(token, token_type) - else: - return self.disallowed_token(token, token_type) - elif token_type == tokenTypes["Comment"]: - pass - else: - return token - - def allowed_token(self, token, token_type): - if "data" in token: - attrs = dict([(name, val) for name, val in - token["data"][::-1] - if name in self.allowed_attributes]) - for attr in self.attr_val_is_uri: - if attr not in attrs: - continue - val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', - unescape(attrs[attr])).lower() - # remove replacement characters from unescaped characters - val_unescaped = val_unescaped.replace("\ufffd", "") - if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and - (val_unescaped.split(':')[0] not in - self.allowed_protocols)): - del attrs[attr] - for attr in self.svg_attr_val_allows_ref: - if attr in attrs: - attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', - ' ', - unescape(attrs[attr])) - if (token["name"] in self.svg_allow_local_href and - 'xlink:href' in attrs and re.search('^\s*[^#\s].*', - attrs['xlink:href'])): - del attrs['xlink:href'] - if 'style' in attrs: - attrs['style'] = self.sanitize_css(attrs['style']) - token["data"] = [[name, val] for name, val in list(attrs.items())] - return token - - def disallowed_token(self, token, token_type): - if token_type == tokenTypes["EndTag"]: - token["data"] = "</%s>" % token["name"] - elif token["data"]: - attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]]) - token["data"] = "<%s%s>" % (token["name"], attrs) - else: - token["data"] = "<%s>" % token["name"] - if token.get("selfClosing"): - token["data"] = token["data"][:-1] + "/>" - - if token["type"] in list(tokenTypes.keys()): - token["type"] = "Characters" - else: - token["type"] = tokenTypes["Characters"] - - del token["name"] - return token - - def sanitize_css(self, style): - # disallow urls - style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) - - # gauntlet - if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): - return '' - if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): - return '' - - clean = [] - for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style): - if not value: - continue - if prop.lower() in self.allowed_css_properties: - clean.append(prop + ': ' + value + ';') - elif prop.split('-')[0].lower() in ['background', 'border', 'margin', - 'padding']: - for keyword in value.split(): - if not keyword in self.acceptable_css_keywords and \ - not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): - break - else: - clean.append(prop + ': ' + value + ';') - elif prop.lower() in self.allowed_svg_properties: - clean.append(prop + ': ' + value + ';') - - return ' '.join(clean) - - -class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin): - def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True, - lowercaseElementName=False, lowercaseAttrName=False, parser=None): - # Change case matching defaults as we only output lowercase html anyway - # This solution doesn't seem ideal... - HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet, - lowercaseElementName, lowercaseAttrName, parser=parser) - - def __iter__(self): - for token in HTMLTokenizer.__iter__(self): - token = self.sanitize_token(token) - if token: - yield token diff --git a/lib/html5lib/serializer/htmlserializer.py b/lib/html5lib/serializer.py similarity index 64% rename from lib/html5lib/serializer/htmlserializer.py rename to lib/html5lib/serializer.py index 412a5a2209e0a21d18f2a259ad1f2c1ed0cc658d..103dd206ae4677715642a9aca0ef0e25ebe0009c 100644 --- a/lib/html5lib/serializer/htmlserializer.py +++ b/lib/html5lib/serializer.py @@ -1,82 +1,87 @@ from __future__ import absolute_import, division, unicode_literals from six import text_type -import gettext -_ = gettext.gettext +import re -try: - from functools import reduce -except ImportError: - pass +from codecs import register_error, xmlcharrefreplace_errors -from ..constants import voidElements, booleanAttributes, spaceCharacters -from ..constants import rcdataElements, entities, xmlEntities -from .. import utils +from .constants import voidElements, booleanAttributes, spaceCharacters +from .constants import rcdataElements, entities, xmlEntities +from . import treewalkers, _utils from xml.sax.saxutils import escape -spaceCharacters = "".join(spaceCharacters) - -try: - from codecs import register_error, xmlcharrefreplace_errors -except ImportError: - unicode_encode_errors = "strict" -else: - unicode_encode_errors = "htmlentityreplace" - - encode_entity_map = {} - is_ucs4 = len("\U0010FFFF") == 1 - for k, v in list(entities.items()): - # skip multi-character entities - if ((is_ucs4 and len(v) > 1) or - (not is_ucs4 and len(v) > 2)): - continue - if v != "&": - if len(v) == 2: - v = utils.surrogatePairToCodepoint(v) - else: - v = ord(v) - if not v in encode_entity_map or k.islower(): - # prefer < over < and similarly for &, >, etc. - encode_entity_map[v] = k - - def htmlentityreplace_errors(exc): - if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): - res = [] - codepoints = [] - skip = False - for i, c in enumerate(exc.object[exc.start:exc.end]): - if skip: - skip = False - continue - index = i + exc.start - if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]): - codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2]) - skip = True - else: - codepoint = ord(c) - codepoints.append(codepoint) - for cp in codepoints: - e = encode_entity_map.get(cp) - if e: - res.append("&") - res.append(e) - if not e.endswith(";"): - res.append(";") - else: - res.append("&#x%s;" % (hex(cp)[2:])) - return ("".join(res), exc.end) +_quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`" +_quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]") +_quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars + + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n" + "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15" + "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000" + "\u2001\u2002\u2003\u2004\u2005\u2006\u2007" + "\u2008\u2009\u200a\u2028\u2029\u202f\u205f" + "\u3000]") + + +_encode_entity_map = {} +_is_ucs4 = len("\U0010FFFF") == 1 +for k, v in list(entities.items()): + # skip multi-character entities + if ((_is_ucs4 and len(v) > 1) or + (not _is_ucs4 and len(v) > 2)): + continue + if v != "&": + if len(v) == 2: + v = _utils.surrogatePairToCodepoint(v) else: - return xmlcharrefreplace_errors(exc) + v = ord(v) + if v not in _encode_entity_map or k.islower(): + # prefer < over < and similarly for &, >, etc. + _encode_entity_map[v] = k + + +def htmlentityreplace_errors(exc): + if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): + res = [] + codepoints = [] + skip = False + for i, c in enumerate(exc.object[exc.start:exc.end]): + if skip: + skip = False + continue + index = i + exc.start + if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]): + codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2]) + skip = True + else: + codepoint = ord(c) + codepoints.append(codepoint) + for cp in codepoints: + e = _encode_entity_map.get(cp) + if e: + res.append("&") + res.append(e) + if not e.endswith(";"): + res.append(";") + else: + res.append("&#x%s;" % (hex(cp)[2:])) + return ("".join(res), exc.end) + else: + return xmlcharrefreplace_errors(exc) - register_error(unicode_encode_errors, htmlentityreplace_errors) +register_error("htmlentityreplace", htmlentityreplace_errors) - del register_error + +def serialize(input, tree="etree", encoding=None, **serializer_opts): + # XXX: Should we cache this? + walker = treewalkers.getTreeWalker(tree) + s = HTMLSerializer(**serializer_opts) + return s.render(walker(input), encoding) class HTMLSerializer(object): # attribute quoting options - quote_attr_values = False + quote_attr_values = "legacy" # be secure by default quote_char = '"' use_best_quote_char = True @@ -112,9 +117,9 @@ class HTMLSerializer(object): inject_meta_charset=True|False Whether it insert a meta element to define the character set of the document. - quote_attr_values=True|False + quote_attr_values="legacy"|"spec"|"always" Whether to quote attribute values that don't require quoting - per HTML5 parsing rules. + per legacy browser behaviour, when required by the standard, or always. quote_char=u'"'|u"'" Use given quote character for attribute quoting. Default is to use double quote unless attribute value contains a double quote, @@ -150,6 +155,9 @@ class HTMLSerializer(object): .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation """ + unexpected_args = frozenset(kwargs) - frozenset(self.options) + if len(unexpected_args) > 0: + raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args))) if 'quote_char' in kwargs: self.use_best_quote_char = False for attr in self.options: @@ -160,7 +168,7 @@ class HTMLSerializer(object): def encode(self, string): assert(isinstance(string, text_type)) if self.encoding: - return string.encode(self.encoding, unicode_encode_errors) + return string.encode(self.encoding, "htmlentityreplace") else: return string @@ -172,28 +180,30 @@ class HTMLSerializer(object): return string def serialize(self, treewalker, encoding=None): + # pylint:disable=too-many-nested-blocks self.encoding = encoding in_cdata = False self.errors = [] if encoding and self.inject_meta_charset: - from ..filters.inject_meta_charset import Filter + from .filters.inject_meta_charset import Filter treewalker = Filter(treewalker, encoding) + # Alphabetical attributes is here under the assumption that none of + # the later filters add or change order of attributes; it needs to be + # before the sanitizer so escaped elements come out correctly + if self.alphabetical_attributes: + from .filters.alphabeticalattributes import Filter + treewalker = Filter(treewalker) # WhitespaceFilter should be used before OptionalTagFilter # for maximum efficiently of this latter filter if self.strip_whitespace: - from ..filters.whitespace import Filter + from .filters.whitespace import Filter treewalker = Filter(treewalker) if self.sanitize: - from ..filters.sanitizer import Filter + from .filters.sanitizer import Filter treewalker = Filter(treewalker) if self.omit_optional_tags: - from ..filters.optionaltags import Filter - treewalker = Filter(treewalker) - # Alphabetical attributes must be last, as other filters - # could add attributes and alter the order - if self.alphabetical_attributes: - from ..filters.alphabeticalattributes import Filter + from .filters.optionaltags import Filter treewalker = Filter(treewalker) for token in treewalker: @@ -208,7 +218,7 @@ class HTMLSerializer(object): if token["systemId"]: if token["systemId"].find('"') >= 0: if token["systemId"].find("'") >= 0: - self.serializeError(_("System identifer contains both single and double quote characters")) + self.serializeError("System identifer contains both single and double quote characters") quote_char = "'" else: quote_char = '"' @@ -220,7 +230,7 @@ class HTMLSerializer(object): elif type in ("Characters", "SpaceCharacters"): if type == "SpaceCharacters" or in_cdata: if in_cdata and token["data"].find("</") >= 0: - self.serializeError(_("Unexpected </ in CDATA")) + self.serializeError("Unexpected </ in CDATA") yield self.encode(token["data"]) else: yield self.encode(escape(token["data"])) @@ -231,8 +241,8 @@ class HTMLSerializer(object): if name in rcdataElements and not self.escape_rcdata: in_cdata = True elif in_cdata: - self.serializeError(_("Unexpected child element of a CDATA element")) - for (attr_namespace, attr_name), attr_value in token["data"].items(): + self.serializeError("Unexpected child element of a CDATA element") + for (_, attr_name), attr_value in token["data"].items(): # TODO: Add namespace support here k = attr_name v = attr_value @@ -240,14 +250,18 @@ class HTMLSerializer(object): yield self.encodeStrict(k) if not self.minimize_boolean_attributes or \ - (k not in booleanAttributes.get(name, tuple()) - and k not in booleanAttributes.get("", tuple())): + (k not in booleanAttributes.get(name, tuple()) and + k not in booleanAttributes.get("", tuple())): yield self.encodeStrict("=") - if self.quote_attr_values or not v: + if self.quote_attr_values == "always" or len(v) == 0: quote_attr = True + elif self.quote_attr_values == "spec": + quote_attr = _quoteAttributeSpec.search(v) is not None + elif self.quote_attr_values == "legacy": + quote_attr = _quoteAttributeLegacy.search(v) is not None else: - quote_attr = reduce(lambda x, y: x or (y in v), - spaceCharacters + ">\"'=", False) + raise ValueError("quote_attr_values must be one of: " + "'always', 'spec', or 'legacy'") v = v.replace("&", "&") if self.escape_lt_in_attrs: v = v.replace("<", "<") @@ -279,20 +293,20 @@ class HTMLSerializer(object): if name in rcdataElements: in_cdata = False elif in_cdata: - self.serializeError(_("Unexpected child element of a CDATA element")) + self.serializeError("Unexpected child element of a CDATA element") yield self.encodeStrict("</%s>" % name) elif type == "Comment": data = token["data"] if data.find("--") >= 0: - self.serializeError(_("Comment contains --")) + self.serializeError("Comment contains --") yield self.encodeStrict("<!--%s-->" % token["data"]) elif type == "Entity": name = token["name"] key = name + ";" - if not key in entities: - self.serializeError(_("Entity %s not recognized" % name)) + if key not in entities: + self.serializeError("Entity %s not recognized" % name) if self.resolve_entities and key not in xmlEntities: data = entities[key] else: @@ -315,6 +329,6 @@ class HTMLSerializer(object): raise SerializeError -def SerializeError(Exception): +class SerializeError(Exception): """Error in serialized tree""" pass diff --git a/lib/html5lib/serializer/__init__.py b/lib/html5lib/serializer/__init__.py deleted file mode 100644 index 8380839a6d3e130e7ddd0a1e59f4d046fa824e4d..0000000000000000000000000000000000000000 --- a/lib/html5lib/serializer/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from .. import treewalkers - -from .htmlserializer import HTMLSerializer - - -def serialize(input, tree="etree", format="html", encoding=None, - **serializer_opts): - # XXX: Should we cache this? - walker = treewalkers.getTreeWalker(tree) - if format == "html": - s = HTMLSerializer(**serializer_opts) - else: - raise ValueError("type must be html") - return s.render(walker(input), encoding) diff --git a/lib/html5lib/treeadapters/__init__.py b/lib/html5lib/treeadapters/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..4f97846602bae0016a1894f689abcc583b2b71c3 100644 --- a/lib/html5lib/treeadapters/__init__.py +++ b/lib/html5lib/treeadapters/__init__.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import sax + +__all__ = ["sax"] + +try: + from . import genshi # noqa +except ImportError: + pass +else: + __all__.append("genshi") diff --git a/lib/html5lib/treeadapters/genshi.py b/lib/html5lib/treeadapters/genshi.py new file mode 100644 index 0000000000000000000000000000000000000000..04e316df5e3763f345566e28a4e494fd156b567d --- /dev/null +++ b/lib/html5lib/treeadapters/genshi.py @@ -0,0 +1,47 @@ +from __future__ import absolute_import, division, unicode_literals + +from genshi.core import QName, Attrs +from genshi.core import START, END, TEXT, COMMENT, DOCTYPE + + +def to_genshi(walker): + text = [] + for token in walker: + type = token["type"] + if type in ("Characters", "SpaceCharacters"): + text.append(token["data"]) + elif text: + yield TEXT, "".join(text), (None, -1, -1) + text = [] + + if type in ("StartTag", "EmptyTag"): + if token["namespace"]: + name = "{%s}%s" % (token["namespace"], token["name"]) + else: + name = token["name"] + attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) + for attr, value in token["data"].items()]) + yield (START, (QName(name), attrs), (None, -1, -1)) + if type == "EmptyTag": + type = "EndTag" + + if type == "EndTag": + if token["namespace"]: + name = "{%s}%s" % (token["namespace"], token["name"]) + else: + name = token["name"] + + yield END, QName(name), (None, -1, -1) + + elif type == "Comment": + yield COMMENT, token["data"], (None, -1, -1) + + elif type == "Doctype": + yield DOCTYPE, (token["name"], token["publicId"], + token["systemId"]), (None, -1, -1) + + else: + pass # FIXME: What to do? + + if text: + yield TEXT, "".join(text), (None, -1, -1) diff --git a/lib/html5lib/treebuilders/__init__.py b/lib/html5lib/treebuilders/__init__.py index 6a6b2a4c45c04247663bc9155d09c12cd3b346a0..e23288474fc6445e48a0be0e2a554416af5431b6 100644 --- a/lib/html5lib/treebuilders/__init__.py +++ b/lib/html5lib/treebuilders/__init__.py @@ -28,7 +28,7 @@ to the format used in the unittests from __future__ import absolute_import, division, unicode_literals -from ..utils import default_etree +from .._utils import default_etree treeBuilderCache = {} diff --git a/lib/html5lib/treebuilders/_base.py b/lib/html5lib/treebuilders/base.py similarity index 97% rename from lib/html5lib/treebuilders/_base.py rename to lib/html5lib/treebuilders/base.py index 8b97cc11a21a5cd41c7a1ef836a5a2ac9baf873d..a4b2792a0e6080109218c0df6834f5b3652b16a9 100644 --- a/lib/html5lib/treebuilders/_base.py +++ b/lib/html5lib/treebuilders/base.py @@ -126,6 +126,7 @@ class TreeBuilder(object): commentClass - the class to use for comments doctypeClass - the class to use for doctypes """ + # pylint:disable=not-callable # Document class documentClass = None @@ -166,12 +167,17 @@ class TreeBuilder(object): # If we pass a node in we match that. if we pass a string # match any node with that name exactNode = hasattr(target, "nameTuple") + if not exactNode: + if isinstance(target, text_type): + target = (namespaces["html"], target) + assert isinstance(target, tuple) listElements, invert = listElementsMap[variant] for node in reversed(self.openElements): - if (node.name == target and not exactNode or - node == target and exactNode): + if exactNode and node == target: + return True + elif not exactNode and node.nameTuple == target: return True elif (invert ^ (node.nameTuple in listElements)): return False @@ -353,8 +359,8 @@ class TreeBuilder(object): def generateImpliedEndTags(self, exclude=None): name = self.openElements[-1].name # XXX td, th and tr are not actually needed - if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) - and name != exclude): + if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and + name != exclude): self.openElements.pop() # XXX This is not entirely what the specification says. We should # investigate it more closely. diff --git a/lib/html5lib/treebuilders/dom.py b/lib/html5lib/treebuilders/dom.py index 61e5ed79edc4d0d2db3a37da69687d258bec96ab..dcfac220bfaf9906b1f57ee747baff2a627cfa58 100644 --- a/lib/html5lib/treebuilders/dom.py +++ b/lib/html5lib/treebuilders/dom.py @@ -1,54 +1,62 @@ from __future__ import absolute_import, division, unicode_literals +from collections import MutableMapping from xml.dom import minidom, Node import weakref -from . import _base +from . import base from .. import constants from ..constants import namespaces -from ..utils import moduleFactoryFactory +from .._utils import moduleFactoryFactory def getDomBuilder(DomImplementation): Dom = DomImplementation - class AttrList(object): + class AttrList(MutableMapping): def __init__(self, element): self.element = element def __iter__(self): - return list(self.element.attributes.items()).__iter__() + return iter(self.element.attributes.keys()) def __setitem__(self, name, value): - self.element.setAttribute(name, value) + if isinstance(name, tuple): + raise NotImplementedError + else: + attr = self.element.ownerDocument.createAttribute(name) + attr.value = value + self.element.attributes[name] = attr def __len__(self): - return len(list(self.element.attributes.items())) + return len(self.element.attributes) def items(self): - return [(item[0], item[1]) for item in - list(self.element.attributes.items())] + return list(self.element.attributes.items()) - def keys(self): - return list(self.element.attributes.keys()) + def values(self): + return list(self.element.attributes.values()) def __getitem__(self, name): - return self.element.getAttribute(name) + if isinstance(name, tuple): + raise NotImplementedError + else: + return self.element.attributes[name].value - def __contains__(self, name): + def __delitem__(self, name): if isinstance(name, tuple): raise NotImplementedError else: - return self.element.hasAttribute(name) + del self.element.attributes[name] - class NodeBuilder(_base.Node): + class NodeBuilder(base.Node): def __init__(self, element): - _base.Node.__init__(self, element.nodeName) + base.Node.__init__(self, element.nodeName) self.element = element - namespace = property(lambda self: hasattr(self.element, "namespaceURI") - and self.element.namespaceURI or None) + namespace = property(lambda self: hasattr(self.element, "namespaceURI") and + self.element.namespaceURI or None) def appendChild(self, node): node.parent = self @@ -109,7 +117,7 @@ def getDomBuilder(DomImplementation): nameTuple = property(getNameTuple) - class TreeBuilder(_base.TreeBuilder): + class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable def documentClass(self): self.dom = Dom.getDOMImplementation().createDocument(None, None, None) return weakref.proxy(self) @@ -149,16 +157,17 @@ def getDomBuilder(DomImplementation): return self.dom def getFragment(self): - return _base.TreeBuilder.getFragment(self).element + return base.TreeBuilder.getFragment(self).element def insertText(self, data, parent=None): data = data if parent != self: - _base.TreeBuilder.insertText(self, data, parent) + base.TreeBuilder.insertText(self, data, parent) else: # HACK: allow text nodes as children of the document node if hasattr(self.dom, '_child_node_types'): - if not Node.TEXT_NODE in self.dom._child_node_types: + # pylint:disable=protected-access + if Node.TEXT_NODE not in self.dom._child_node_types: self.dom._child_node_types = list(self.dom._child_node_types) self.dom._child_node_types.append(Node.TEXT_NODE) self.dom.appendChild(self.dom.createTextNode(data)) diff --git a/lib/html5lib/treebuilders/etree.py b/lib/html5lib/treebuilders/etree.py index 2c8ed19f8fe83c9fc866a9b48a06cd4e0fd46889..cb1d4aef503692d57516bb8f5426a872fe89287d 100644 --- a/lib/html5lib/treebuilders/etree.py +++ b/lib/html5lib/treebuilders/etree.py @@ -1,13 +1,15 @@ from __future__ import absolute_import, division, unicode_literals +# pylint:disable=protected-access + from six import text_type import re -from . import _base -from .. import ihatexml +from . import base +from .. import _ihatexml from .. import constants from ..constants import namespaces -from ..utils import moduleFactoryFactory +from .._utils import moduleFactoryFactory tag_regexp = re.compile("{([^}]*)}(.*)") @@ -16,7 +18,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False): ElementTree = ElementTreeImplementation ElementTreeCommentType = ElementTree.Comment("asd").tag - class Element(_base.Node): + class Element(base.Node): def __init__(self, name, namespace=None): self._name = name self._namespace = namespace @@ -98,6 +100,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False): node.parent = self def removeChild(self, node): + self._childNodes.remove(node) self._element.remove(node._element) node.parent = None @@ -139,7 +142,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False): if self._element.text is not None: newParent._element.text += self._element.text self._element.text = "" - _base.Node.reparentChildren(self, newParent) + base.Node.reparentChildren(self, newParent) class Comment(Element): def __init__(self, data): @@ -253,10 +256,10 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False): return "\n".join(rv) - def tostring(element): + def tostring(element): # pylint:disable=unused-variable """Serialize an element and its child nodes to a string""" rv = [] - filter = ihatexml.InfosetFilter() + filter = _ihatexml.InfosetFilter() def serializeElement(element): if isinstance(element, ElementTree.ElementTree): @@ -307,7 +310,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False): return "".join(rv) - class TreeBuilder(_base.TreeBuilder): + class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable documentClass = Document doctypeClass = DocumentType elementClass = Element @@ -329,7 +332,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False): return self.document._element.find("html") def getFragment(self): - return _base.TreeBuilder.getFragment(self)._element + return base.TreeBuilder.getFragment(self)._element return locals() diff --git a/lib/html5lib/treebuilders/etree_lxml.py b/lib/html5lib/treebuilders/etree_lxml.py index 35d08efaa6145719f8daad533d03df6188e2d2e4..908820c08104c24a9f9c7851252f6af8d8c25355 100644 --- a/lib/html5lib/treebuilders/etree_lxml.py +++ b/lib/html5lib/treebuilders/etree_lxml.py @@ -10,16 +10,17 @@ When any of these things occur, we emit a DataLossWarning """ from __future__ import absolute_import, division, unicode_literals +# pylint:disable=protected-access import warnings import re import sys -from . import _base +from . import base from ..constants import DataLossWarning from .. import constants from . import etree as etree_builders -from .. import ihatexml +from .. import _ihatexml import lxml.etree as etree @@ -53,8 +54,7 @@ class Document(object): def testSerializer(element): rv = [] - finalText = None - infosetFilter = ihatexml.InfosetFilter() + infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) def serializeElement(element, indent=0): if not hasattr(element, "tag"): @@ -79,7 +79,7 @@ def testSerializer(element): next_element = next_element.getnext() elif isinstance(element, str) or isinstance(element, bytes): # Text in a fragment - assert isinstance(element, str) or sys.version_info.major == 2 + assert isinstance(element, str) or sys.version_info[0] == 2 rv.append("|%s\"%s\"" % (' ' * indent, element)) else: # Fragment case @@ -128,16 +128,12 @@ def testSerializer(element): rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) serializeElement(element, 0) - if finalText is not None: - rv.append("|%s\"%s\"" % (' ' * 2, finalText)) - return "\n".join(rv) def tostring(element): """Serialize an element and its child nodes to a string""" rv = [] - finalText = None def serializeElement(element): if not hasattr(element, "tag"): @@ -173,13 +169,10 @@ def tostring(element): serializeElement(element) - if finalText is not None: - rv.append("%s\"" % (' ' * 2, finalText)) - return "".join(rv) -class TreeBuilder(_base.TreeBuilder): +class TreeBuilder(base.TreeBuilder): documentClass = Document doctypeClass = DocumentType elementClass = None @@ -189,13 +182,15 @@ class TreeBuilder(_base.TreeBuilder): def __init__(self, namespaceHTMLElements, fullTree=False): builder = etree_builders.getETreeModule(etree, fullTree=fullTree) - infosetFilter = self.infosetFilter = ihatexml.InfosetFilter() + infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) self.namespaceHTMLElements = namespaceHTMLElements class Attributes(dict): - def __init__(self, element, value={}): + def __init__(self, element, value=None): + if value is None: + value = {} self._element = element - dict.__init__(self, value) + dict.__init__(self, value) # pylint:disable=non-parent-init-called for key, value in self.items(): if isinstance(key, tuple): name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) @@ -257,12 +252,12 @@ class TreeBuilder(_base.TreeBuilder): data = property(_getData, _setData) self.elementClass = Element - self.commentClass = builder.Comment + self.commentClass = Comment # self.fragmentClass = builder.DocumentFragment - _base.TreeBuilder.__init__(self, namespaceHTMLElements) + base.TreeBuilder.__init__(self, namespaceHTMLElements) def reset(self): - _base.TreeBuilder.reset(self) + base.TreeBuilder.reset(self) self.insertComment = self.insertCommentInitial self.initial_comments = [] self.doctype = None @@ -303,19 +298,21 @@ class TreeBuilder(_base.TreeBuilder): self.doctype = doctype def insertCommentInitial(self, data, parent=None): + assert parent is None or parent is self.document + assert self.document._elementTree is None self.initial_comments.append(data) def insertCommentMain(self, data, parent=None): if (parent == self.document and self.document._elementTree.getroot()[-1].tag == comment_type): - warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) + warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) super(TreeBuilder, self).insertComment(data, parent) def insertRoot(self, token): """Create the document root""" # Because of the way libxml2 works, it doesn't seem to be possible to # alter information like the doctype after the tree has been parsed. - # Therefore we need to use the built-in parser to create our iniial + # Therefore we need to use the built-in parser to create our initial # tree, after which we can add elements like normal docStr = "" if self.doctype: @@ -344,7 +341,8 @@ class TreeBuilder(_base.TreeBuilder): # Append the initial comments: for comment_token in self.initial_comments: - root.addprevious(etree.Comment(comment_token["data"])) + comment = self.commentClass(comment_token["data"]) + root.addprevious(comment._element) # Create the root document and add the ElementTree to it self.document = self.documentClass() diff --git a/lib/html5lib/treewalkers/__init__.py b/lib/html5lib/treewalkers/__init__.py index 18124e75f3b9d6e04c034af2ec36243d14400734..9e19a5595af928738fa0e1ab2bc4513e02ebc026 100644 --- a/lib/html5lib/treewalkers/__init__.py +++ b/lib/html5lib/treewalkers/__init__.py @@ -10,9 +10,10 @@ returning an iterator generating tokens. from __future__ import absolute_import, division, unicode_literals -import sys +from .. import constants +from .._utils import default_etree -from ..utils import default_etree +__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"] treeWalkerCache = {} @@ -20,34 +21,33 @@ treeWalkerCache = {} def getTreeWalker(treeType, implementation=None, **kwargs): """Get a TreeWalker class for various types of tree with built-in support - treeType - the name of the tree type required (case-insensitive). Supported - values are: + Args: + treeType (str): the name of the tree type required (case-insensitive). + Supported values are: - "dom" - The xml.dom.minidom DOM implementation - "pulldom" - The xml.dom.pulldom event stream - "etree" - A generic walker for tree implementations exposing an - elementtree-like interface (known to work with - ElementTree, cElementTree and lxml.etree). - "lxml" - Optimized walker for lxml.etree - "genshi" - a Genshi stream + - "dom": The xml.dom.minidom DOM implementation + - "etree": A generic walker for tree implementations exposing an + elementtree-like interface (known to work with + ElementTree, cElementTree and lxml.etree). + - "lxml": Optimized walker for lxml.etree + - "genshi": a Genshi stream - implementation - (Currently applies to the "etree" tree type only). A module - implementing the tree type e.g. xml.etree.ElementTree or - cElementTree.""" + Implementation: A module implementing the tree type e.g. + xml.etree.ElementTree or cElementTree (Currently applies to the + "etree" tree type only). + """ treeType = treeType.lower() if treeType not in treeWalkerCache: - if treeType in ("dom", "pulldom"): - name = "%s.%s" % (__name__, treeType) - __import__(name) - mod = sys.modules[name] - treeWalkerCache[treeType] = mod.TreeWalker + if treeType == "dom": + from . import dom + treeWalkerCache[treeType] = dom.TreeWalker elif treeType == "genshi": - from . import genshistream - treeWalkerCache[treeType] = genshistream.TreeWalker + from . import genshi + treeWalkerCache[treeType] = genshi.TreeWalker elif treeType == "lxml": - from . import lxmletree - treeWalkerCache[treeType] = lxmletree.TreeWalker + from . import etree_lxml + treeWalkerCache[treeType] = etree_lxml.TreeWalker elif treeType == "etree": from . import etree if implementation is None: @@ -55,3 +55,89 @@ def getTreeWalker(treeType, implementation=None, **kwargs): # XXX: NEVER cache here, caching is done in the etree submodule return etree.getETreeModule(implementation, **kwargs).TreeWalker return treeWalkerCache.get(treeType) + + +def concatenateCharacterTokens(tokens): + pendingCharacters = [] + for token in tokens: + type = token["type"] + if type in ("Characters", "SpaceCharacters"): + pendingCharacters.append(token["data"]) + else: + if pendingCharacters: + yield {"type": "Characters", "data": "".join(pendingCharacters)} + pendingCharacters = [] + yield token + if pendingCharacters: + yield {"type": "Characters", "data": "".join(pendingCharacters)} + + +def pprint(walker): + """Pretty printer for tree walkers""" + output = [] + indent = 0 + for token in concatenateCharacterTokens(walker): + type = token["type"] + if type in ("StartTag", "EmptyTag"): + # tag name + if token["namespace"] and token["namespace"] != constants.namespaces["html"]: + if token["namespace"] in constants.prefixes: + ns = constants.prefixes[token["namespace"]] + else: + ns = token["namespace"] + name = "%s %s" % (ns, token["name"]) + else: + name = token["name"] + output.append("%s<%s>" % (" " * indent, name)) + indent += 2 + # attributes (sorted for consistent ordering) + attrs = token["data"] + for (namespace, localname), value in sorted(attrs.items()): + if namespace: + if namespace in constants.prefixes: + ns = constants.prefixes[namespace] + else: + ns = namespace + name = "%s %s" % (ns, localname) + else: + name = localname + output.append("%s%s=\"%s\"" % (" " * indent, name, value)) + # self-closing + if type == "EmptyTag": + indent -= 2 + + elif type == "EndTag": + indent -= 2 + + elif type == "Comment": + output.append("%s<!-- %s -->" % (" " * indent, token["data"])) + + elif type == "Doctype": + if token["name"]: + if token["publicId"]: + output.append("""%s<!DOCTYPE %s "%s" "%s">""" % + (" " * indent, + token["name"], + token["publicId"], + token["systemId"] if token["systemId"] else "")) + elif token["systemId"]: + output.append("""%s<!DOCTYPE %s "" "%s">""" % + (" " * indent, + token["name"], + token["systemId"])) + else: + output.append("%s<!DOCTYPE %s>" % (" " * indent, + token["name"])) + else: + output.append("%s<!DOCTYPE >" % (" " * indent,)) + + elif type == "Characters": + output.append("%s\"%s\"" % (" " * indent, token["data"])) + + elif type == "SpaceCharacters": + assert False, "concatenateCharacterTokens should have got rid of all Space tokens" + + else: + raise ValueError("Unknown token type, %s" % type) + + return "\n".join(output) diff --git a/lib/html5lib/treewalkers/_base.py b/lib/html5lib/treewalkers/base.py similarity index 56% rename from lib/html5lib/treewalkers/_base.py rename to lib/html5lib/treewalkers/base.py index 34252e50c0e855b19f80b59be6caba19669789b3..36e1ba2420fb4375adf27f060ab8f15cf138174e 100644 --- a/lib/html5lib/treewalkers/_base.py +++ b/lib/html5lib/treewalkers/base.py @@ -1,10 +1,10 @@ from __future__ import absolute_import, division, unicode_literals -from six import text_type, string_types - -import gettext -_ = gettext.gettext from xml.dom import Node +from ..constants import namespaces, voidElements, spaceCharacters + +__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN", + "TreeWalker", "NonRecursiveTreeWalker"] DOCUMENT = Node.DOCUMENT_NODE DOCTYPE = Node.DOCUMENT_TYPE_NODE @@ -14,28 +14,9 @@ COMMENT = Node.COMMENT_NODE ENTITY = Node.ENTITY_NODE UNKNOWN = "<#UNKNOWN#>" -from ..constants import voidElements, spaceCharacters spaceCharacters = "".join(spaceCharacters) -def to_text(s, blank_if_none=True): - """Wrapper around six.text_type to convert None to empty string""" - if s is None: - if blank_if_none: - return "" - else: - return None - elif isinstance(s, text_type): - return s - else: - return text_type(s) - - -def is_text_or_none(string): - """Wrapper around isinstance(string_types) or is None""" - return string is None or isinstance(string, string_types) - - class TreeWalker(object): def __init__(self, tree): self.tree = tree @@ -47,47 +28,25 @@ class TreeWalker(object): return {"type": "SerializeError", "data": msg} def emptyTag(self, namespace, name, attrs, hasChildren=False): - assert namespace is None or isinstance(namespace, string_types), type(namespace) - assert isinstance(name, string_types), type(name) - assert all((namespace is None or isinstance(namespace, string_types)) and - isinstance(name, string_types) and - isinstance(value, string_types) - for (namespace, name), value in attrs.items()) - - yield {"type": "EmptyTag", "name": to_text(name, False), - "namespace": to_text(namespace), + yield {"type": "EmptyTag", "name": name, + "namespace": namespace, "data": attrs} if hasChildren: - yield self.error(_("Void element has children")) + yield self.error("Void element has children") def startTag(self, namespace, name, attrs): - assert namespace is None or isinstance(namespace, string_types), type(namespace) - assert isinstance(name, string_types), type(name) - assert all((namespace is None or isinstance(namespace, string_types)) and - isinstance(name, string_types) and - isinstance(value, string_types) - for (namespace, name), value in attrs.items()) - return {"type": "StartTag", - "name": text_type(name), - "namespace": to_text(namespace), - "data": dict(((to_text(namespace, False), to_text(name)), - to_text(value, False)) - for (namespace, name), value in attrs.items())} + "name": name, + "namespace": namespace, + "data": attrs} def endTag(self, namespace, name): - assert namespace is None or isinstance(namespace, string_types), type(namespace) - assert isinstance(name, string_types), type(namespace) - return {"type": "EndTag", - "name": to_text(name, False), - "namespace": to_text(namespace), - "data": {}} + "name": name, + "namespace": namespace} def text(self, data): - assert isinstance(data, string_types), type(data) - - data = to_text(data) + data = data middle = data.lstrip(spaceCharacters) left = data[:len(data) - len(middle)] if left: @@ -101,28 +60,19 @@ class TreeWalker(object): yield {"type": "SpaceCharacters", "data": right} def comment(self, data): - assert isinstance(data, string_types), type(data) - - return {"type": "Comment", "data": text_type(data)} - - def doctype(self, name, publicId=None, systemId=None, correct=True): - assert is_text_or_none(name), type(name) - assert is_text_or_none(publicId), type(publicId) - assert is_text_or_none(systemId), type(systemId) + return {"type": "Comment", "data": data} + def doctype(self, name, publicId=None, systemId=None): return {"type": "Doctype", - "name": to_text(name), - "publicId": to_text(publicId), - "systemId": to_text(systemId), - "correct": to_text(correct)} + "name": name, + "publicId": publicId, + "systemId": systemId} def entity(self, name): - assert isinstance(name, string_types), type(name) - - return {"type": "Entity", "name": text_type(name)} + return {"type": "Entity", "name": name} def unknown(self, nodeType): - return self.error(_("Unknown node type: ") + nodeType) + return self.error("Unknown node type: " + nodeType) class NonRecursiveTreeWalker(TreeWalker): @@ -154,7 +104,7 @@ class NonRecursiveTreeWalker(TreeWalker): elif type == ELEMENT: namespace, name, attributes, hasChildren = details - if name in voidElements: + if (not namespace or namespace == namespaces["html"]) and name in voidElements: for token in self.emptyTag(namespace, name, attributes, hasChildren): yield token @@ -187,7 +137,7 @@ class NonRecursiveTreeWalker(TreeWalker): type, details = details[0], details[1:] if type == ELEMENT: namespace, name, attributes, hasChildren = details - if name not in voidElements: + if (namespace and namespace != namespaces["html"]) or name not in voidElements: yield self.endTag(namespace, name) if self.tree is currentNode: currentNode = None diff --git a/lib/html5lib/treewalkers/dom.py b/lib/html5lib/treewalkers/dom.py index a01287a94481d3ca616497fc116a1f3c3e27b704..b0c89b001fd3b60511734c31e452c1d2053468d0 100644 --- a/lib/html5lib/treewalkers/dom.py +++ b/lib/html5lib/treewalkers/dom.py @@ -2,19 +2,16 @@ from __future__ import absolute_import, division, unicode_literals from xml.dom import Node -import gettext -_ = gettext.gettext +from . import base -from . import _base - -class TreeWalker(_base.NonRecursiveTreeWalker): +class TreeWalker(base.NonRecursiveTreeWalker): def getNodeDetails(self, node): if node.nodeType == Node.DOCUMENT_TYPE_NODE: - return _base.DOCTYPE, node.name, node.publicId, node.systemId + return base.DOCTYPE, node.name, node.publicId, node.systemId elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - return _base.TEXT, node.nodeValue + return base.TEXT, node.nodeValue elif node.nodeType == Node.ELEMENT_NODE: attrs = {} @@ -24,17 +21,17 @@ class TreeWalker(_base.NonRecursiveTreeWalker): attrs[(attr.namespaceURI, attr.localName)] = attr.value else: attrs[(None, attr.name)] = attr.value - return (_base.ELEMENT, node.namespaceURI, node.nodeName, + return (base.ELEMENT, node.namespaceURI, node.nodeName, attrs, node.hasChildNodes()) elif node.nodeType == Node.COMMENT_NODE: - return _base.COMMENT, node.nodeValue + return base.COMMENT, node.nodeValue elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): - return (_base.DOCUMENT,) + return (base.DOCUMENT,) else: - return _base.UNKNOWN, node.nodeType + return base.UNKNOWN, node.nodeType def getFirstChild(self, node): return node.firstChild diff --git a/lib/html5lib/treewalkers/etree.py b/lib/html5lib/treewalkers/etree.py index fd8a9cc9b52c1b391190de4d776ec3ffa76550ba..8f30f0783977026572e958a183d9a5f1740c63b2 100644 --- a/lib/html5lib/treewalkers/etree.py +++ b/lib/html5lib/treewalkers/etree.py @@ -7,15 +7,13 @@ except ImportError: from ordereddict import OrderedDict except ImportError: OrderedDict = dict -import gettext -_ = gettext.gettext import re -from six import text_type +from six import string_types -from . import _base -from ..utils import moduleFactoryFactory +from . import base +from .._utils import moduleFactoryFactory tag_regexp = re.compile("{([^}]*)}(.*)") @@ -24,7 +22,7 @@ def getETreeBuilder(ElementTreeImplementation): ElementTree = ElementTreeImplementation ElementTreeCommentType = ElementTree.Comment("asd").tag - class TreeWalker(_base.NonRecursiveTreeWalker): + class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable """Given the particular ElementTree representation, this implementation, to avoid using recursion, returns "nodes" as tuples with the following content: @@ -40,9 +38,9 @@ def getETreeBuilder(ElementTreeImplementation): """ def getNodeDetails(self, node): if isinstance(node, tuple): # It might be the root Element - elt, key, parents, flag = node + elt, _, _, flag = node if flag in ("text", "tail"): - return _base.TEXT, getattr(elt, flag) + return base.TEXT, getattr(elt, flag) else: node = elt @@ -50,17 +48,17 @@ def getETreeBuilder(ElementTreeImplementation): node = node.getroot() if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): - return (_base.DOCUMENT,) + return (base.DOCUMENT,) elif node.tag == "<!DOCTYPE>": - return (_base.DOCTYPE, node.text, + return (base.DOCTYPE, node.text, node.get("publicId"), node.get("systemId")) elif node.tag == ElementTreeCommentType: - return _base.COMMENT, node.text + return base.COMMENT, node.text else: - assert type(node.tag) == text_type, type(node.tag) + assert isinstance(node.tag, string_types), type(node.tag) # This is assumed to be an ordinary element match = tag_regexp.match(node.tag) if match: @@ -75,7 +73,7 @@ def getETreeBuilder(ElementTreeImplementation): attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value - return (_base.ELEMENT, namespace, tag, + return (base.ELEMENT, namespace, tag, attrs, len(node) or node.text) def getFirstChild(self, node): @@ -131,6 +129,7 @@ def getETreeBuilder(ElementTreeImplementation): if not parents: return parent else: + assert list(parents[-1]).count(parent) == 1 return parent, list(parents[-1]).index(parent), parents, None return locals() diff --git a/lib/html5lib/treewalkers/lxmletree.py b/lib/html5lib/treewalkers/etree_lxml.py similarity index 72% rename from lib/html5lib/treewalkers/lxmletree.py rename to lib/html5lib/treewalkers/etree_lxml.py index bc934ac05a9a7848b6d93c724bec9ac6bb47e1da..fb2363112b1ea39d4789ef9f1589385f69854ab5 100644 --- a/lib/html5lib/treewalkers/lxmletree.py +++ b/lib/html5lib/treewalkers/etree_lxml.py @@ -4,12 +4,9 @@ from six import text_type from lxml import etree from ..treebuilders.etree import tag_regexp -from gettext import gettext -_ = gettext +from . import base -from . import _base - -from .. import ihatexml +from .. import _ihatexml def ensure_str(s): @@ -18,20 +15,27 @@ def ensure_str(s): elif isinstance(s, text_type): return s else: - return s.decode("utf-8", "strict") + return s.decode("ascii", "strict") class Root(object): def __init__(self, et): self.elementtree = et self.children = [] - if et.docinfo.internalDTD: - self.children.append(Doctype(self, - ensure_str(et.docinfo.root_name), - ensure_str(et.docinfo.public_id), - ensure_str(et.docinfo.system_url))) - root = et.getroot() - node = root + + try: + if et.docinfo.internalDTD: + self.children.append(Doctype(self, + ensure_str(et.docinfo.root_name), + ensure_str(et.docinfo.public_id), + ensure_str(et.docinfo.system_url))) + except AttributeError: + pass + + try: + node = et.getroot() + except AttributeError: + node = et while node.getprevious() is not None: node = node.getprevious() @@ -118,35 +122,38 @@ class FragmentWrapper(object): return len(self.obj) -class TreeWalker(_base.NonRecursiveTreeWalker): +class TreeWalker(base.NonRecursiveTreeWalker): def __init__(self, tree): - if hasattr(tree, "getroot"): - tree = Root(tree) - elif isinstance(tree, list): + # pylint:disable=redefined-variable-type + if isinstance(tree, list): + self.fragmentChildren = set(tree) tree = FragmentRoot(tree) - _base.NonRecursiveTreeWalker.__init__(self, tree) - self.filter = ihatexml.InfosetFilter() + else: + self.fragmentChildren = set() + tree = Root(tree) + base.NonRecursiveTreeWalker.__init__(self, tree) + self.filter = _ihatexml.InfosetFilter() def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node - assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key - return _base.TEXT, ensure_str(getattr(node, key)) + assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key + return base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): - return (_base.DOCUMENT,) + return (base.DOCUMENT,) elif isinstance(node, Doctype): - return _base.DOCTYPE, node.name, node.public_id, node.system_id + return base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): - return _base.TEXT, node.obj + return base.TEXT, ensure_str(node.obj) elif node.tag == etree.Comment: - return _base.COMMENT, ensure_str(node.text) + return base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: - return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &; + return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element @@ -165,11 +172,11 @@ class TreeWalker(_base.NonRecursiveTreeWalker): attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value - return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag), + return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text) def getFirstChild(self, node): - assert not isinstance(node, tuple), _("Text nodes have no children") + assert not isinstance(node, tuple), "Text nodes have no children" assert len(node) or node.text, "Node has no children" if node.text: @@ -180,7 +187,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker): def getNextSibling(self, node): if isinstance(node, tuple): # Text node node, key = node - assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key + assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key if key == "text": # XXX: we cannot use a "bool(node) and node[0] or None" construct here # because node[0] might evaluate to False if it has no child element @@ -196,9 +203,11 @@ class TreeWalker(_base.NonRecursiveTreeWalker): def getParentNode(self, node): if isinstance(node, tuple): # Text node node, key = node - assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key + assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key if key == "text": return node # else: fallback to "normal" processing + elif node in self.fragmentChildren: + return None return node.getparent() diff --git a/lib/html5lib/treewalkers/genshistream.py b/lib/html5lib/treewalkers/genshi.py similarity index 90% rename from lib/html5lib/treewalkers/genshistream.py rename to lib/html5lib/treewalkers/genshi.py index f559c45d04459035e0cf054e17e1381a81088f2a..7483be27d4d24f845e56b6954ee63eec730c00aa 100644 --- a/lib/html5lib/treewalkers/genshistream.py +++ b/lib/html5lib/treewalkers/genshi.py @@ -4,12 +4,12 @@ from genshi.core import QName from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT -from . import _base +from . import base from ..constants import voidElements, namespaces -class TreeWalker(_base.TreeWalker): +class TreeWalker(base.TreeWalker): def __iter__(self): # Buffer the events so we can pass in the following one previous = None @@ -25,7 +25,7 @@ class TreeWalker(_base.TreeWalker): yield token def tokens(self, event, next): - kind, data, pos = event + kind, data, _ = event if kind == START: tag, attribs = data name = tag.localname @@ -39,8 +39,8 @@ class TreeWalker(_base.TreeWalker): if namespace == namespaces["html"] and name in voidElements: for token in self.emptyTag(namespace, name, converted_attribs, - not next or next[0] != END - or next[1] != tag): + not next or next[0] != END or + next[1] != tag): yield token else: yield self.startTag(namespace, name, converted_attribs) @@ -48,7 +48,7 @@ class TreeWalker(_base.TreeWalker): elif kind == END: name = data.localname namespace = data.namespace - if name not in voidElements: + if namespace != namespaces["html"] or name not in voidElements: yield self.endTag(namespace, name) elif kind == COMMENT: diff --git a/lib/html5lib/treewalkers/pulldom.py b/lib/html5lib/treewalkers/pulldom.py deleted file mode 100644 index 0b0f515fec7e48ea063ca2b6ed95e3bf26c2d612..0000000000000000000000000000000000000000 --- a/lib/html5lib/treewalkers/pulldom.py +++ /dev/null @@ -1,63 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \ - COMMENT, IGNORABLE_WHITESPACE, CHARACTERS - -from . import _base - -from ..constants import voidElements - - -class TreeWalker(_base.TreeWalker): - def __iter__(self): - ignore_until = None - previous = None - for event in self.tree: - if previous is not None and \ - (ignore_until is None or previous[1] is ignore_until): - if previous[1] is ignore_until: - ignore_until = None - for token in self.tokens(previous, event): - yield token - if token["type"] == "EmptyTag": - ignore_until = previous[1] - previous = event - if ignore_until is None or previous[1] is ignore_until: - for token in self.tokens(previous, None): - yield token - elif ignore_until is not None: - raise ValueError("Illformed DOM event stream: void element without END_ELEMENT") - - def tokens(self, event, next): - type, node = event - if type == START_ELEMENT: - name = node.nodeName - namespace = node.namespaceURI - attrs = {} - for attr in list(node.attributes.keys()): - attr = node.getAttributeNode(attr) - attrs[(attr.namespaceURI, attr.localName)] = attr.value - if name in voidElements: - for token in self.emptyTag(namespace, - name, - attrs, - not next or next[1] is not node): - yield token - else: - yield self.startTag(namespace, name, attrs) - - elif type == END_ELEMENT: - name = node.nodeName - namespace = node.namespaceURI - if name not in voidElements: - yield self.endTag(namespace, name) - - elif type == COMMENT: - yield self.comment(node.nodeValue) - - elif type in (IGNORABLE_WHITESPACE, CHARACTERS): - for token in self.text(node.nodeValue): - yield token - - else: - yield self.unknown(type) diff --git a/lib/mako/__init__.py b/lib/mako/__init__.py index 74526ecce72d9d63a265ab42a998d5dddb6cc810..eaa24dc81f3caa63295dc996fa1a57cba32e94a2 100644 --- a/lib/mako/__init__.py +++ b/lib/mako/__init__.py @@ -1,8 +1,8 @@ # mako/__init__.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -__version__ = '1.0.3' +__version__ = '1.0.6' diff --git a/lib/mako/_ast_util.py b/lib/mako/_ast_util.py index cc298d58662c0d7d3814a9b46c16b3b206e29858..8d19b0d751f8359091c16df35478cb30b35e05c9 100644 --- a/lib/mako/_ast_util.py +++ b/lib/mako/_ast_util.py @@ -1,5 +1,5 @@ # mako/_ast_util.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/ast.py b/lib/mako/ast.py index c55b29c9393020129f5b1ba3f71eb82e6b413b22..8d2d150cd6620a50a8e00c633fbd74dd2d494521 100644 --- a/lib/mako/ast.py +++ b/lib/mako/ast.py @@ -1,5 +1,5 @@ # mako/ast.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/cache.py b/lib/mako/cache.py index c7aabd2b441c8adc5a91b161e21b1c4223032868..94f387018faf9fec9ed8857feb307ce9a979847c 100644 --- a/lib/mako/cache.py +++ b/lib/mako/cache.py @@ -1,5 +1,5 @@ # mako/cache.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/cmd.py b/lib/mako/cmd.py index 50d47fcb4ffa05481827a678803a07840734d77d..dd1f833e77af65ea7f51758ca811181ea50a7037 100755 --- a/lib/mako/cmd.py +++ b/lib/mako/cmd.py @@ -1,5 +1,5 @@ # mako/cmd.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/codegen.py b/lib/mako/codegen.py index bf86d795bfd0f5df7a9bb22e3100f61b56600032..d4ecbe8bb3d4ee0d2acedf17a0451e5be7226b05 100644 --- a/lib/mako/codegen.py +++ b/lib/mako/codegen.py @@ -1,5 +1,5 @@ # mako/codegen.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/compat.py b/lib/mako/compat.py index db22b991f67087efe1215861747bd4d7f766b7a6..a2ab243427e4d3775afaf0cbf291bb4e574ee1e0 100644 --- a/lib/mako/compat.py +++ b/lib/mako/compat.py @@ -5,6 +5,7 @@ py3k = sys.version_info >= (3, 0) py33 = sys.version_info >= (3, 3) py2k = sys.version_info < (3,) py26 = sys.version_info >= (2, 6) +py27 = sys.version_info >= (2, 7) jython = sys.platform.startswith('java') win32 = sys.platform.startswith('win') pypy = hasattr(sys, 'pypy_version_info') diff --git a/lib/mako/exceptions.py b/lib/mako/exceptions.py index 84d2297b919f3b36e830952f09a07cd208037eaa..cb6fb3f7084df48b37f86db0f7961813d274ba64 100644 --- a/lib/mako/exceptions.py +++ b/lib/mako/exceptions.py @@ -1,5 +1,5 @@ # mako/exceptions.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/ext/autohandler.py b/lib/mako/ext/autohandler.py index 9ee780a6ae88648ac78d59923a66e36341c1855a..9d1c9114aad08e45a5d6c3f691d8e44ac2e02ac3 100644 --- a/lib/mako/ext/autohandler.py +++ b/lib/mako/ext/autohandler.py @@ -1,5 +1,5 @@ # ext/autohandler.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/ext/babelplugin.py b/lib/mako/ext/babelplugin.py index 53d62baabd9ecdbe56a6c4abe2c8701c9ace0d29..0b5e84facaf7b1d933a01a311b876f68d6f9554f 100644 --- a/lib/mako/ext/babelplugin.py +++ b/lib/mako/ext/babelplugin.py @@ -1,5 +1,5 @@ # ext/babelplugin.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/ext/preprocessors.py b/lib/mako/ext/preprocessors.py index 5624f701ccd10ae5cccd931335ee584cf71dc778..9b700d1da95047675170440b16b960d064d5001f 100644 --- a/lib/mako/ext/preprocessors.py +++ b/lib/mako/ext/preprocessors.py @@ -1,5 +1,5 @@ # ext/preprocessors.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/ext/pygmentplugin.py b/lib/mako/ext/pygmentplugin.py index 1121c5d3d24f1dd3b4ac703f6e7f24479cabeb1f..4057caa670f575b022b2766b1ec19325c4635eb2 100644 --- a/lib/mako/ext/pygmentplugin.py +++ b/lib/mako/ext/pygmentplugin.py @@ -1,5 +1,5 @@ # ext/pygmentplugin.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/ext/turbogears.py b/lib/mako/ext/turbogears.py index 2e7d03909ab2d45732e49349acfd7979c0296132..eaa2d7800077b57446e84a776a8e759957db3ac2 100644 --- a/lib/mako/ext/turbogears.py +++ b/lib/mako/ext/turbogears.py @@ -1,5 +1,5 @@ # ext/turbogears.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/filters.py b/lib/mako/filters.py index 525aeb8e558855b18bd8f141333be8090ee92db1..c082690c4d6567671a0a9f60677a8d38be40b239 100644 --- a/lib/mako/filters.py +++ b/lib/mako/filters.py @@ -1,5 +1,5 @@ # mako/filters.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/lexer.py b/lib/mako/lexer.py index 2fa08e4ac3c73ace2b44b7a590b33af64038d7a2..cf4187f711679d464113336614187b6f32b8fd30 100644 --- a/lib/mako/lexer.py +++ b/lib/mako/lexer.py @@ -1,5 +1,5 @@ # mako/lexer.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -95,31 +95,37 @@ class Lexer(object): # (match and "TRUE" or "FALSE") return match - def parse_until_text(self, *text): + def parse_until_text(self, watch_nesting, *text): startpos = self.match_position text_re = r'|'.join(text) brace_level = 0 + paren_level = 0 + bracket_level = 0 while True: match = self.match(r'#.*\n') if match: continue - match = self.match(r'(\"\"\"|\'\'\'|\"|\')((?<!\\)\\\1|.)*?\1', + match = self.match(r'(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1', re.S) if match: continue match = self.match(r'(%s)' % text_re) - if match: - if match.group(1) == '}' and brace_level > 0: - brace_level -= 1 - continue + if match and not (watch_nesting + and (brace_level > 0 or paren_level > 0 + or bracket_level > 0)): return \ self.text[startpos: self.match_position - len(match.group(1))],\ match.group(1) - match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S) + elif not match: + match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S) if match: brace_level += match.group(1).count('{') brace_level -= match.group(1).count('}') + paren_level += match.group(1).count('(') + paren_level -= match.group(1).count(')') + bracket_level += match.group(1).count('[') + bracket_level -= match.group(1).count(']') continue raise exceptions.SyntaxException( "Expected: %s" % @@ -368,7 +374,7 @@ class Lexer(object): match = self.match(r"<%(!)?") if match: line, pos = self.matched_lineno, self.matched_charpos - text, end = self.parse_until_text(r'%>') + text, end = self.parse_until_text(False, r'%>') # the trailing newline helps # compiler.parse() not complain about indentation text = adjust_whitespace(text) + "\n" @@ -384,9 +390,9 @@ class Lexer(object): match = self.match(r"\${") if match: line, pos = self.matched_lineno, self.matched_charpos - text, end = self.parse_until_text(r'\|', r'}') + text, end = self.parse_until_text(True, r'\|', r'}') if end == '|': - escapes, end = self.parse_until_text(r'}') + escapes, end = self.parse_until_text(True, r'}') else: escapes = "" text = text.replace('\r\n', '\n') diff --git a/lib/mako/lookup.py b/lib/mako/lookup.py index e6dff9d2fc13eb5b2edd3b966e3eaf9c129f461a..0d3f304bb345e25884ddac78fb4d2913be27966a 100644 --- a/lib/mako/lookup.py +++ b/lib/mako/lookup.py @@ -1,5 +1,5 @@ # mako/lookup.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -180,7 +180,8 @@ class TemplateLookup(TemplateCollection): enable_loop=True, input_encoding=None, preprocessor=None, - lexer_cls=None): + lexer_cls=None, + include_error_handler=None): self.directories = [posixpath.normpath(d) for d in util.to_list(directories, ()) @@ -203,6 +204,7 @@ class TemplateLookup(TemplateCollection): self.template_args = { 'format_exceptions': format_exceptions, 'error_handler': error_handler, + 'include_error_handler': include_error_handler, 'disable_unicode': disable_unicode, 'bytestring_passthrough': bytestring_passthrough, 'output_encoding': output_encoding, diff --git a/lib/mako/parsetree.py b/lib/mako/parsetree.py index e7af4bc0f3347f3786ab64df3dc677c7c78a483a..879882e675e1acfcbc5f8caf5de49088285efa8e 100644 --- a/lib/mako/parsetree.py +++ b/lib/mako/parsetree.py @@ -1,5 +1,5 @@ # mako/parsetree.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/pygen.py b/lib/mako/pygen.py index 5d87bbd2864e0071ef4e923d76141d89772858e9..8514e0226bc7a22970f4daf8468c30ba87558a4a 100644 --- a/lib/mako/pygen.py +++ b/lib/mako/pygen.py @@ -1,5 +1,5 @@ # mako/pygen.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/pyparser.py b/lib/mako/pyparser.py index 96e533507bb2728478a8d62363dbd0df5c3dcfb1..15d0da6efcaf4e674c0442272fb834c533a60302 100644 --- a/lib/mako/pyparser.py +++ b/lib/mako/pyparser.py @@ -1,5 +1,5 @@ # mako/pyparser.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/mako/runtime.py b/lib/mako/runtime.py index 8d2f4a9e569c49f5e0544fac6dc56c7715d277a6..769541c6145f9220edf6cf2b4b24627b3f26e96e 100644 --- a/lib/mako/runtime.py +++ b/lib/mako/runtime.py @@ -1,5 +1,5 @@ # mako/runtime.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -749,7 +749,16 @@ def _include_file(context, uri, calling_uri, **kwargs): (callable_, ctx) = _populate_self_namespace( context._clean_inheritance_tokens(), template) - callable_(ctx, **_kwargs_for_include(callable_, context._data, **kwargs)) + kwargs = _kwargs_for_include(callable_, context._data, **kwargs) + if template.include_error_handler: + try: + callable_(ctx, **kwargs) + except Exception: + result = template.include_error_handler(ctx, compat.exception_as()) + if not result: + compat.reraise(*sys.exc_info()) + else: + callable_(ctx, **kwargs) def _inherit_from(context, uri, calling_uri): diff --git a/lib/mako/template.py b/lib/mako/template.py index afb679a5280b62def31e7766de96173941751126..c3e0c252c497305f051ddf439d8c3f2d61e1be4b 100644 --- a/lib/mako/template.py +++ b/lib/mako/template.py @@ -1,5 +1,5 @@ # mako/template.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -109,6 +109,11 @@ class Template(object): completes. Is used to provide custom error-rendering functions. + .. seealso:: + + :paramref:`.Template.include_error_handler` - include-specific + error handler function + :param format_exceptions: if ``True``, exceptions which occur during the render phase of this template will be caught and formatted into an HTML error page, which then becomes the @@ -129,6 +134,16 @@ class Template(object): import will not appear as the first executed statement in the generated code and will therefore not have the desired effect. + :param include_error_handler: An error handler that runs when this template + is included within another one via the ``<%include>`` tag, and raises an + error. Compare to the :paramref:`.Template.error_handler` option. + + .. versionadded:: 1.0.6 + + .. seealso:: + + :paramref:`.Template.error_handler` - top-level error handler function + :param input_encoding: Encoding of the template's source code. Can be used in lieu of the coding comment. See :ref:`usage_unicode` as well as :ref:`unicode_toplevel` for @@ -243,7 +258,8 @@ class Template(object): future_imports=None, enable_loop=True, preprocessor=None, - lexer_cls=None): + lexer_cls=None, + include_error_handler=None): if uri: self.module_id = re.sub(r'\W', "_", uri) self.uri = uri @@ -329,6 +345,7 @@ class Template(object): self.callable_ = self.module.render_body self.format_exceptions = format_exceptions self.error_handler = error_handler + self.include_error_handler = include_error_handler self.lookup = lookup self.module_directory = module_directory @@ -475,6 +492,14 @@ class Template(object): return DefTemplate(self, getattr(self.module, "render_%s" % name)) + def list_defs(self): + """return a list of defs in the template. + + .. versionadded:: 1.0.4 + + """ + return [i[7:] for i in dir(self.module) if i[:7] == 'render_'] + def _get_def_callable(self, name): return getattr(self.module, "render_%s" % name) @@ -520,6 +545,7 @@ class ModuleTemplate(Template): cache_type=None, cache_dir=None, cache_url=None, + include_error_handler=None, ): self.module_id = re.sub(r'\W', "_", module._template_uri) self.uri = module._template_uri @@ -551,6 +577,7 @@ class ModuleTemplate(Template): self.callable_ = self.module.render_body self.format_exceptions = format_exceptions self.error_handler = error_handler + self.include_error_handler = include_error_handler self.lookup = lookup self._setup_cache_args( cache_impl, cache_enabled, cache_args, @@ -571,6 +598,7 @@ class DefTemplate(Template): self.encoding_errors = parent.encoding_errors self.format_exceptions = parent.format_exceptions self.error_handler = parent.error_handler + self.include_error_handler = parent.include_error_handler self.enable_loop = parent.enable_loop self.lookup = parent.lookup self.bytestring_passthrough = parent.bytestring_passthrough diff --git a/lib/mako/util.py b/lib/mako/util.py index c7dad655a32be4ca9d4d4140810dd9cdc17c79ad..2f089ff4047015e1bbf24ecf55dbc5443d7fa203 100644 --- a/lib/mako/util.py +++ b/lib/mako/util.py @@ -1,5 +1,5 @@ # mako/util.py -# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file> +# Copyright (C) 2006-2016 the Mako authors and contributors <see AUTHORS file> # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/lib/markupsafe/__init__.py b/lib/markupsafe/__init__.py index 275540154ea5b5a7e3031939c952d774e42c29ed..68dc85f61200a025fba7a7b337b20c271fd7c70b 100644 --- a/lib/markupsafe/__init__.py +++ b/lib/markupsafe/__init__.py @@ -14,12 +14,13 @@ from collections import Mapping from markupsafe._compat import text_type, string_types, int_types, \ unichr, iteritems, PY2 +__version__ = "1.0" __all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent'] _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)') -_entity_re = re.compile(r'&([^;]+);') +_entity_re = re.compile(r'&([^& ;]+);') class Markup(text_type): @@ -142,7 +143,8 @@ class Markup(text_type): return unichr(int(name[1:])) except ValueError: pass - return u'' + # Don't modify unexpected input. + return m.group() return _entity_re.sub(handle_match, text_type(self)) def striptags(self): @@ -260,7 +262,12 @@ if hasattr(text_type, 'format'): 'its __html__ method.') rv = value.__html__() else: - rv = string.Formatter.format_field(self, value, format_spec) + # We need to make sure the format spec is unicode here as + # otherwise the wrong callback methods are invoked. For + # instance a byte string there would invoke __str__ and + # not __unicode__. + rv = string.Formatter.format_field( + self, value, text_type(format_spec)) return text_type(self.escape(rv)) diff --git a/lib/markupsafe/_speedups.c b/lib/markupsafe/_speedups.c index f349febf22d59ec7dfe440b65faf5838c1234b90..d779a68cc554fa03aad6ab9980c3aed8a07d8c5b 100644 --- a/lib/markupsafe/_speedups.c +++ b/lib/markupsafe/_speedups.c @@ -29,7 +29,7 @@ static int init_constants(void) { PyObject *module; - /* happing of characters to replace */ + /* mapping of characters to replace */ escaped_chars_repl['"'] = UNICHR("""); escaped_chars_repl['\''] = UNICHR("'"); escaped_chars_repl['&'] = UNICHR("&"); @@ -41,7 +41,7 @@ init_constants(void) escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \ escaped_chars_delta_len['&'] = 4; escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3; - + /* import markup type so that we can mark the return value */ module = PyImport_ImportModule("markupsafe"); if (!module) @@ -94,7 +94,7 @@ escape_unicode(PyUnicodeObject *in) } ++next_escp; } - + if (next_escp > inp) { /* copy unescaped chars between inp and next_escp */ Py_UNICODE_COPY(outp, inp, next_escp-inp); diff --git a/lib/markupsafe/tests.py b/lib/markupsafe/tests.py deleted file mode 100644 index 6369936296e2905865b20c7f52170357bc870d8c..0000000000000000000000000000000000000000 --- a/lib/markupsafe/tests.py +++ /dev/null @@ -1,179 +0,0 @@ -# -*- coding: utf-8 -*- -import gc -import sys -import unittest -from markupsafe import Markup, escape, escape_silent -from markupsafe._compat import text_type - - -class MarkupTestCase(unittest.TestCase): - - def test_adding(self): - # adding two strings should escape the unsafe one - unsafe = '<script type="application/x-some-script">alert("foo");</script>' - safe = Markup('<em>username</em>') - assert unsafe + safe == text_type(escape(unsafe)) + text_type(safe) - - def test_string_interpolation(self): - # string interpolations are safe to use too - assert Markup('<em>%s</em>') % '<bad user>' == \ - '<em><bad user></em>' - assert Markup('<em>%(username)s</em>') % { - 'username': '<bad user>' - } == '<em><bad user></em>' - - assert Markup('%i') % 3.14 == '3' - assert Markup('%.2f') % 3.14 == '3.14' - - def test_type_behavior(self): - # an escaped object is markup too - assert type(Markup('foo') + 'bar') is Markup - - # and it implements __html__ by returning itself - x = Markup("foo") - assert x.__html__() is x - - def test_html_interop(self): - # it also knows how to treat __html__ objects - class Foo(object): - def __html__(self): - return '<em>awesome</em>' - def __unicode__(self): - return 'awesome' - __str__ = __unicode__ - assert Markup(Foo()) == '<em>awesome</em>' - assert Markup('<strong>%s</strong>') % Foo() == \ - '<strong><em>awesome</em></strong>' - - def test_tuple_interpol(self): - self.assertEqual(Markup('<em>%s:%s</em>') % ( - '<foo>', - '<bar>', - ), Markup(u'<em><foo>:<bar></em>')) - - def test_dict_interpol(self): - self.assertEqual(Markup('<em>%(foo)s</em>') % { - 'foo': '<foo>', - }, Markup(u'<em><foo></em>')) - self.assertEqual(Markup('<em>%(foo)s:%(bar)s</em>') % { - 'foo': '<foo>', - 'bar': '<bar>', - }, Markup(u'<em><foo>:<bar></em>')) - - def test_escaping(self): - # escaping and unescaping - assert escape('"<>&\'') == '"<>&'' - assert Markup("<em>Foo & Bar</em>").striptags() == "Foo & Bar" - assert Markup("<test>").unescape() == "<test>" - - def test_formatting(self): - for actual, expected in ( - (Markup('%i') % 3.14, '3'), - (Markup('%.2f') % 3.14159, '3.14'), - (Markup('%s %s %s') % ('<', 123, '>'), '< 123 >'), - (Markup('<em>{awesome}</em>').format(awesome='<awesome>'), - '<em><awesome></em>'), - (Markup('{0[1][bar]}').format([0, {'bar': '<bar/>'}]), - '<bar/>'), - (Markup('{0[1][bar]}').format([0, {'bar': Markup('<bar/>')}]), - '<bar/>')): - assert actual == expected, "%r should be %r!" % (actual, expected) - - # This is new in 2.7 - if sys.version_info >= (2, 7): - def test_formatting_empty(self): - formatted = Markup('{}').format(0) - assert formatted == Markup('0') - - def test_custom_formatting(self): - class HasHTMLOnly(object): - def __html__(self): - return Markup('<foo>') - - class HasHTMLAndFormat(object): - def __html__(self): - return Markup('<foo>') - def __html_format__(self, spec): - return Markup('<FORMAT>') - - assert Markup('{0}').format(HasHTMLOnly()) == Markup('<foo>') - assert Markup('{0}').format(HasHTMLAndFormat()) == Markup('<FORMAT>') - - def test_complex_custom_formatting(self): - class User(object): - def __init__(self, id, username): - self.id = id - self.username = username - def __html_format__(self, format_spec): - if format_spec == 'link': - return Markup('<a href="/user/{0}">{1}</a>').format( - self.id, - self.__html__(), - ) - elif format_spec: - raise ValueError('Invalid format spec') - return self.__html__() - def __html__(self): - return Markup('<span class=user>{0}</span>').format(self.username) - - user = User(1, 'foo') - assert Markup('<p>User: {0:link}').format(user) == \ - Markup('<p>User: <a href="/user/1"><span class=user>foo</span></a>') - - def test_all_set(self): - import markupsafe as markup - for item in markup.__all__: - getattr(markup, item) - - def test_escape_silent(self): - assert escape_silent(None) == Markup() - assert escape(None) == Markup(None) - assert escape_silent('<foo>') == Markup(u'<foo>') - - def test_splitting(self): - self.assertEqual(Markup('a b').split(), [ - Markup('a'), - Markup('b') - ]) - self.assertEqual(Markup('a b').rsplit(), [ - Markup('a'), - Markup('b') - ]) - self.assertEqual(Markup('a\nb').splitlines(), [ - Markup('a'), - Markup('b') - ]) - - def test_mul(self): - self.assertEqual(Markup('a') * 3, Markup('aaa')) - - -class MarkupLeakTestCase(unittest.TestCase): - - def test_markup_leaks(self): - counts = set() - for count in range(20): - for item in range(1000): - escape("foo") - escape("<foo>") - escape(u"foo") - escape(u"<foo>") - counts.add(len(gc.get_objects())) - assert len(counts) == 1, 'ouch, c extension seems to leak objects' - - -def suite(): - suite = unittest.TestSuite() - suite.addTest(unittest.makeSuite(MarkupTestCase)) - - # this test only tests the c extension - if not hasattr(escape, 'func_code'): - suite.addTest(unittest.makeSuite(MarkupLeakTestCase)) - - return suite - - -if __name__ == '__main__': - unittest.main(defaultTest='suite') - -# vim:sts=4:sw=4:et: diff --git a/lib/sqliteshelf.py b/lib/sqliteshelf.py deleted file mode 100644 index 4e7f35879df1f8365b1a2770cb3db89c519d81d6..0000000000000000000000000000000000000000 --- a/lib/sqliteshelf.py +++ /dev/null @@ -1,161 +0,0 @@ -""" -by default, things are stored in a "shelf" table - ->>> d = SQLiteShelf("test.sdb") - -you can put multiple shelves into a single SQLite database - ->>> e = SQLiteShelf("test.sdb", "othertable") - -both are empty to start with - ->>> d -{} ->>> e -{} - -adding stuff is as simple as a regular dict ->>> d['a'] = "moo" ->>> e['a'] = "moo" - -regular dict actions work - ->>> d['a'] -'moo' ->>> e['a'] -'moo' ->>> 'a' in d -True ->>> len(d) -1 ->>> del d['a'] ->>> 'a' in d -False ->>> len(d) -0 ->>> del e['a'] - -objects can be stored in shelves - ->> class Test: -.. def __init__(self): -.. self.foo = "bar" -.. ->> t = Test() ->> d['t'] = t ->> print d['t'].foo -bar - -errors are as normal for a dict - ->>> d['x'] -Traceback (most recent call last): - ... -KeyError: 'x' ->>> del d['x'] -Traceback (most recent call last): - ... -KeyError: 'x' - -Adding and fetching binary strings - ->>> d[1] = "a\\x00b" ->>> d[1] -'a\\x00b' -""" - -try: - from UserDict import DictMixin -except ImportError: - from collections import MutableMapping as DictMixin - -try: - import cPickle as pickle -except ImportError: - import pickle - -import sys -valtype = str -if sys.version > '3': - buffer = memoryview - valtype = bytes - -import sqlite3 - -class SQLiteDict(DictMixin): - def __init__(self, filename=':memory:', table='shelf', flags='r', mode=None, valtype=valtype): - self.table = table - self.valtype = valtype - MAKE_SHELF = 'CREATE TABLE IF NOT EXISTS '+self.table+' (key TEXT, value BLOB)' - MAKE_INDEX = 'CREATE UNIQUE INDEX IF NOT EXISTS '+self.table+'_keyndx ON '+self.table+'(key)' - self.conn = sqlite3.connect(filename) - self.conn.text_factory = str - self.conn.execute(MAKE_SHELF) - self.conn.execute(MAKE_INDEX) - self.conn.commit() - - def __getitem__(self, key): - GET_ITEM = 'SELECT value FROM '+self.table+' WHERE key = ?' - item = self.conn.execute(GET_ITEM, (key,)).fetchone() - if item is None: - raise KeyError(key) - return self.valtype(item[0]) - - def __setitem__(self, key, item): - ADD_ITEM = 'REPLACE INTO '+self.table+' (key, value) VALUES (?,?)' - self.conn.execute(ADD_ITEM, (key, sqlite3.Binary(item))) - self.conn.commit() - - def __delitem__(self, key): - if key not in self: - raise KeyError(key) - DEL_ITEM = 'DELETE FROM '+self.table+' WHERE key = ?' - self.conn.execute(DEL_ITEM, (key,)) - self.conn.commit() - - def __iter__(self): - c = self.conn.cursor() - try: - c.execute('SELECT key FROM '+self.table+' ORDER BY key') - for row in c: - yield row[0] - finally: - c.close() - - def keys(self): - c = self.conn.cursor() - try: - c.execute('SELECT key FROM '+self.table+' ORDER BY key') - return [row[0] for row in c] - finally: - c.close() - - ################################################################### - # optional bits - - def __len__(self): - GET_LEN = 'SELECT COUNT(*) FROM '+self.table - return self.conn.execute(GET_LEN).fetchone()[0] - - def close(self): - if self.conn is not None: - self.conn.commit() - self.conn.close() - self.conn = None - - def __del__(self): - self.close() - - def __repr__(self): - return repr(dict(self)) - -class SQLiteShelf(SQLiteDict): - def __getitem__(self, key): - return pickle.loads(SQLiteDict.__getitem__(self, key)) - - def __setitem__(self, key, item): - SQLiteDict.__setitem__(self, key, pickle.dumps(item)) - -if __name__ == "__main__": - import doctest - doctest.testmod() diff --git a/lib/tornado/__init__.py b/lib/tornado/__init__.py index 3b10da51fad08ba03a22670ed142ea21266a0ed1..f054e4026a5d5edc77a81efeb924b9d69571f681 100644 --- a/lib/tornado/__init__.py +++ b/lib/tornado/__init__.py @@ -16,7 +16,7 @@ """The Tornado web server and tools.""" -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function # version is a human-readable version number. @@ -25,5 +25,5 @@ from __future__ import absolute_import, division, print_function, with_statement # is zero for an official release, positive for a development branch, # or negative for a release candidate or beta (after the base version # number has been incremented) -version = "4.4.2" -version_info = (4, 4, 2, 0) +version = "4.5.1" +version_info = (4, 5, 1, 0) diff --git a/lib/tornado/_locale_data.py b/lib/tornado/_locale_data.py index 8854ffd72d69622b29af51abe82af14f72f02be9..6fa2c29742af692196e9edd5ad953ef95acc462b 100644 --- a/lib/tornado/_locale_data.py +++ b/lib/tornado/_locale_data.py @@ -17,13 +17,12 @@ """Data used by the tornado.locale module.""" -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function LOCALE_NAMES = { "af_ZA": {"name_en": u"Afrikaans", "name": u"Afrikaans"}, "am_ET": {"name_en": u"Amharic", "name": u"አማርኛ"}, "ar_AR": {"name_en": u"Arabic", "name": u"العربية"}, - "ar_SA": {"name_en": u"Arabic (Saudi Arabia)", "name": u"(العربية (المملكة العربية السعودية"}, "bg_BG": {"name_en": u"Bulgarian", "name": u"Български"}, "bn_IN": {"name_en": u"Bengali", "name": u"বাংলা"}, "bs_BA": {"name_en": u"Bosnian", "name": u"Bosanski"}, @@ -59,7 +58,6 @@ LOCALE_NAMES = { "mk_MK": {"name_en": u"Macedonian", "name": u"Македонски"}, "ml_IN": {"name_en": u"Malayalam", "name": u"മലയാളം"}, "ms_MY": {"name_en": u"Malay", "name": u"Bahasa Melayu"}, - "no_NO": {"name_en": u"Norwegian", "name": u"Norsk"}, "nb_NO": {"name_en": u"Norwegian (bokmal)", "name": u"Norsk (bokmål)"}, "nl_NL": {"name_en": u"Dutch", "name": u"Nederlands"}, "nn_NO": {"name_en": u"Norwegian (nynorsk)", "name": u"Norsk (nynorsk)"}, diff --git a/lib/tornado/auth.py b/lib/tornado/auth.py index 44144061e644a69c403712c191dad0e81d420a11..f02d2898085f93f4087c2ecefc288cd6bbf8900f 100644 --- a/lib/tornado/auth.py +++ b/lib/tornado/auth.py @@ -65,7 +65,7 @@ Example usage for Google OAuth: errors are more consistently reported through the ``Future`` interfaces. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import base64 import binascii @@ -954,6 +954,20 @@ class FacebookGraphMixin(OAuth2Mixin): .. testoutput:: :hide: + This method returns a dictionary which may contain the following fields: + + * ``access_token``, a string which may be passed to `facebook_request` + * ``session_expires``, an integer encoded as a string representing + the time until the access token expires in seconds. This field should + be used like ``int(user['session_expires'])``; in a future version of + Tornado it will change from a string to an integer. + * ``id``, ``name``, ``first_name``, ``last_name``, ``locale``, ``picture``, + ``link``, plus any fields named in the ``extra_fields`` argument. These + fields are copied from the Facebook graph API `user object <https://developers.facebook.com/docs/graph-api/reference/user>`_ + + .. versionchanged:: 4.5 + The ``session_expires`` field was updated to support changes made to the + Facebook API in March 2017. """ http = self.get_auth_http_client() args = { @@ -978,10 +992,10 @@ class FacebookGraphMixin(OAuth2Mixin): future.set_exception(AuthError('Facebook auth error: %s' % str(response))) return - args = urlparse.parse_qs(escape.native_str(response.body)) + args = escape.json_decode(response.body) session = { - "access_token": args["access_token"][-1], - "expires": args.get("expires") + "access_token": args.get("access_token"), + "expires_in": args.get("expires_in") } self.facebook_request( @@ -1004,7 +1018,12 @@ class FacebookGraphMixin(OAuth2Mixin): for field in fields: fieldmap[field] = user.get(field) - fieldmap.update({"access_token": session["access_token"], "session_expires": session.get("expires")}) + # session_expires is converted to str for compatibility with + # older versions in which the server used url-encoding and + # this code simply returned the string verbatim. + # This should change in Tornado 5.0. + fieldmap.update({"access_token": session["access_token"], + "session_expires": str(session.get("expires_in"))}) future.set_result(fieldmap) @_auth_return_future diff --git a/lib/tornado/autoreload.py b/lib/tornado/autoreload.py index 5e0d00d1ffaa2ca797b2e0b4e9ecef1024d71c09..60571efe71fe83504ab1b84bbf16f5315b155071 100644 --- a/lib/tornado/autoreload.py +++ b/lib/tornado/autoreload.py @@ -45,7 +45,7 @@ incorrectly. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import os import sys @@ -103,10 +103,6 @@ except ImportError: # os.execv is broken on Windows and can't properly parse command line # arguments and executable name if they contain whitespaces. subprocess # fixes that behavior. -# This distinction is also important because when we use execv, we want to -# close the IOLoop and all its file descriptors, to guard against any -# file descriptors that were not set CLOEXEC. When execv is not available, -# we must not close the IOLoop because we want the process to exit cleanly. _has_execv = sys.platform != 'win32' _watched_files = set() @@ -127,8 +123,6 @@ def start(io_loop=None, check_time=500): _io_loops[io_loop] = True if len(_io_loops) > 1: gen_log.warning("tornado.autoreload started more than once in the same process") - if _has_execv: - add_reload_hook(functools.partial(io_loop.close, all_fds=True)) modify_times = {} callback = functools.partial(_reload_on_update, modify_times) scheduler = ioloop.PeriodicCallback(callback, check_time, io_loop=io_loop) @@ -249,6 +243,7 @@ def _reload(): # unwind, so just exit uncleanly. os._exit(0) + _USAGE = """\ Usage: python -m tornado.autoreload -m module.to.run [args...] diff --git a/lib/tornado/concurrent.py b/lib/tornado/concurrent.py index 05205f7374c7eb826b42245a3f03679e288c51d9..667e6b1788ecfe426e1f02b084d01840fd1929a3 100644 --- a/lib/tornado/concurrent.py +++ b/lib/tornado/concurrent.py @@ -21,7 +21,7 @@ a mostly-compatible `Future` class designed for use from coroutines, as well as some utility functions for interacting with the `concurrent.futures` package. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import functools import platform @@ -31,7 +31,7 @@ import sys from tornado.log import app_log from tornado.stack_context import ExceptionStackContext, wrap -from tornado.util import raise_exc_info, ArgReplacer +from tornado.util import raise_exc_info, ArgReplacer, is_finalizing try: from concurrent import futures @@ -123,8 +123,8 @@ class _TracebackLogger(object): self.exc_info = None self.formatted_tb = None - def __del__(self): - if self.formatted_tb: + def __del__(self, is_finalizing=is_finalizing): + if not is_finalizing() and self.formatted_tb: app_log.error('Future exception was never retrieved: %s', ''.join(self.formatted_tb).rstrip()) @@ -234,7 +234,10 @@ class Future(object): if self._result is not None: return self._result if self._exc_info is not None: - raise_exc_info(self._exc_info) + try: + raise_exc_info(self._exc_info) + finally: + self = None self._check_done() return self._result @@ -329,8 +332,8 @@ class Future(object): # cycle are never destroyed. It's no longer the case on Python 3.4 thanks to # the PEP 442. if _GC_CYCLE_FINALIZERS: - def __del__(self): - if not self._log_traceback: + def __del__(self, is_finalizing=is_finalizing): + if is_finalizing() or not self._log_traceback: # set_exception() was not called, or result() or exception() # has consumed the exception return @@ -340,6 +343,7 @@ class Future(object): app_log.error('Future %r exception was never retrieved: %s', self, ''.join(tb).rstrip()) + TracebackFuture = Future if futures is None: @@ -364,6 +368,7 @@ class DummyExecutor(object): def shutdown(self, wait=True): pass + dummy_executor = DummyExecutor() diff --git a/lib/tornado/curl_httpclient.py b/lib/tornado/curl_httpclient.py index fa317b2da9491bb1dad3cbeb67340895d4e18bef..eef4a17a6b4a7e6e30c2db0ab5b1f44c2791ebea 100644 --- a/lib/tornado/curl_httpclient.py +++ b/lib/tornado/curl_httpclient.py @@ -16,7 +16,7 @@ """Non-blocking HTTP client implementation using pycurl.""" -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import collections import functools @@ -278,6 +278,9 @@ class CurlAsyncHTTPClient(AsyncHTTPClient): if curl_log.isEnabledFor(logging.DEBUG): curl.setopt(pycurl.VERBOSE, 1) curl.setopt(pycurl.DEBUGFUNCTION, self._curl_debug) + if hasattr(pycurl, 'PROTOCOLS'): # PROTOCOLS first appeared in pycurl 7.19.5 (2014-07-12) + curl.setopt(pycurl.PROTOCOLS, pycurl.PROTO_HTTP | pycurl.PROTO_HTTPS) + curl.setopt(pycurl.REDIR_PROTOCOLS, pycurl.PROTO_HTTP | pycurl.PROTO_HTTPS) return curl def _curl_setup_request(self, curl, request, buffer, headers): @@ -342,6 +345,15 @@ class CurlAsyncHTTPClient(AsyncHTTPClient): credentials = '%s:%s' % (request.proxy_username, request.proxy_password) curl.setopt(pycurl.PROXYUSERPWD, credentials) + + if (request.proxy_auth_mode is None or + request.proxy_auth_mode == "basic"): + curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_BASIC) + elif request.proxy_auth_mode == "digest": + curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_DIGEST) + else: + raise ValueError( + "Unsupported proxy_auth_mode %s" % request.proxy_auth_mode) else: curl.setopt(pycurl.PROXY, '') curl.unsetopt(pycurl.PROXYUSERPWD) diff --git a/lib/tornado/escape.py b/lib/tornado/escape.py index 7a3b0e03495b927bc667b44f2df3c393fa5b987f..2ca3fe3fe883249d6cacc457d5d0565b04804731 100644 --- a/lib/tornado/escape.py +++ b/lib/tornado/escape.py @@ -20,7 +20,7 @@ Also includes a few other miscellaneous string manipulation functions that have crept in over time. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import json import re @@ -199,6 +199,7 @@ def utf8(value): ) return value.encode("utf-8") + _TO_UNICODE_TYPES = (unicode_type, type(None)) @@ -216,6 +217,7 @@ def to_unicode(value): ) return value.decode("utf-8") + # to_unicode was previously named _unicode not because it was private, # but to avoid conflicts with the built-in unicode() function/type _unicode = to_unicode @@ -264,6 +266,7 @@ def recursive_unicode(obj): else: return obj + # I originally used the regex from # http://daringfireball.net/2010/07/improved_regex_for_matching_urls # but it gets all exponential on certain patterns (such as too many trailing @@ -391,4 +394,5 @@ def _build_unicode_map(): unicode_map[name] = unichr(value) return unicode_map + _HTML_UNICODE_MAP = _build_unicode_map() diff --git a/lib/tornado/gen.py b/lib/tornado/gen.py index b308ca7d0b55b9ba6f060293ee7a4eca45e705ce..99f9106666b59b6d5282c47cc63b8f43f405c3b0 100644 --- a/lib/tornado/gen.py +++ b/lib/tornado/gen.py @@ -74,7 +74,7 @@ See the `convert_yielded` function to extend this mechanism. via ``singledispatch``. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import collections import functools @@ -83,6 +83,7 @@ import os import sys import textwrap import types +import weakref from tornado.concurrent import Future, TracebackFuture, is_future, chain_future from tornado.ioloop import IOLoop @@ -245,6 +246,26 @@ def coroutine(func, replace_callback=True): return _make_coroutine_wrapper(func, replace_callback=True) +# Ties lifetime of runners to their result futures. Github Issue #1769 +# Generators, like any object in Python, must be strong referenced +# in order to not be cleaned up by the garbage collector. When using +# coroutines, the Runner object is what strong-refs the inner +# generator. However, the only item that strong-reffed the Runner +# was the last Future that the inner generator yielded (via the +# Future's internal done_callback list). Usually this is enough, but +# it is also possible for this Future to not have any strong references +# other than other objects referenced by the Runner object (usually +# when using other callback patterns and/or weakrefs). In this +# situation, if a garbage collection ran, a cycle would be detected and +# Runner objects could be destroyed along with their inner generators +# and everything in their local scope. +# This map provides strong references to Runner objects as long as +# their result future objects also have strong references (typically +# from the parent coroutine's Runner). This keeps the coroutine's +# Runner alive. +_futures_to_runners = weakref.WeakKeyDictionary() + + def _make_coroutine_wrapper(func, replace_callback): """The inner workings of ``@gen.coroutine`` and ``@gen.engine``. @@ -254,10 +275,11 @@ def _make_coroutine_wrapper(func, replace_callback): """ # On Python 3.5, set the coroutine flag on our generator, to allow it # to be used with 'await'. + wrapped = func if hasattr(types, 'coroutine'): func = types.coroutine(func) - @functools.wraps(func) + @functools.wraps(wrapped) def wrapper(*args, **kwargs): future = TracebackFuture() @@ -294,7 +316,8 @@ def _make_coroutine_wrapper(func, replace_callback): except Exception: future.set_exc_info(sys.exc_info()) else: - Runner(result, future, yielded) + _futures_to_runners[future] = Runner(result, future, yielded) + yielded = None try: return future finally: @@ -309,9 +332,21 @@ def _make_coroutine_wrapper(func, replace_callback): future = None future.set_result(result) return future + + wrapper.__wrapped__ = wrapped + wrapper.__tornado_coroutine__ = True return wrapper +def is_coroutine_function(func): + """Return whether *func* is a coroutine function, i.e. a function + wrapped with `~.gen.coroutine`. + + .. versionadded:: 4.5 + """ + return getattr(func, '__tornado_coroutine__', False) + + class Return(Exception): """Special exception to return a value from a `coroutine`. @@ -685,6 +720,7 @@ def multi(children, quiet_exceptions=()): else: return multi_future(children, quiet_exceptions=quiet_exceptions) + Multi = multi @@ -930,6 +966,9 @@ coroutines that are likely to yield Futures that are ready instantly. Usage: ``yield gen.moment`` .. versionadded:: 4.0 + +.. deprecated:: 4.5 + ``yield None`` is now equivalent to ``yield gen.moment``. """ moment.set_result(None) @@ -960,6 +999,7 @@ class Runner(object): # of the coroutine. self.stack_context_deactivate = None if self.handle_yield(first_yielded): + gen = result_future = first_yielded = None self.run() def register_callback(self, key): @@ -1016,10 +1056,15 @@ class Runner(object): except Exception: self.had_exception = True exc_info = sys.exc_info() + future = None if exc_info is not None: - yielded = self.gen.throw(*exc_info) - exc_info = None + try: + yielded = self.gen.throw(*exc_info) + finally: + # Break up a reference to itself + # for faster GC on CPython. + exc_info = None else: yielded = self.gen.send(value) @@ -1052,6 +1097,7 @@ class Runner(object): return if not self.handle_yield(yielded): return + yielded = None finally: self.running = False @@ -1100,8 +1146,12 @@ class Runner(object): self.future.set_exc_info(sys.exc_info()) if not self.future.done() or self.future is moment: + def inner(f): + # Break a reference cycle to speed GC. + f = None # noqa + self.run() self.io_loop.add_future( - self.future, lambda f: self.run()) + self.future, inner) return False return True @@ -1123,6 +1173,7 @@ class Runner(object): self.stack_context_deactivate() self.stack_context_deactivate = None + Arguments = collections.namedtuple('Arguments', ['args', 'kwargs']) @@ -1142,6 +1193,7 @@ def _argument_adapter(callback): callback(None) return wrapper + # Convert Awaitables into Futures. It is unfortunately possible # to have infinite recursion here if those Awaitables assume that # we're using a different coroutine runner and yield objects @@ -1219,7 +1271,9 @@ def convert_yielded(yielded): .. versionadded:: 4.1 """ # Lists and dicts containing YieldPoints were handled earlier. - if isinstance(yielded, (list, dict)): + if yielded is None: + return moment + elif isinstance(yielded, (list, dict)): return multi(yielded) elif is_future(yielded): return yielded @@ -1228,6 +1282,7 @@ def convert_yielded(yielded): else: raise BadYieldError("yielded unknown object %r" % (yielded,)) + if singledispatch is not None: convert_yielded = singledispatch(convert_yielded) diff --git a/lib/tornado/http1connection.py b/lib/tornado/http1connection.py index d0e91b82766259b0c5c003dff523f8ffb3e90636..53744ece39481d153e06411f3aa7edf380e3cd5c 100644 --- a/lib/tornado/http1connection.py +++ b/lib/tornado/http1connection.py @@ -19,7 +19,7 @@ .. versionadded:: 4.0 """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import re @@ -257,6 +257,7 @@ class HTTP1Connection(httputil.HTTPConnection): if need_delegate_close: with _ExceptionLoggingContext(app_log): delegate.on_connection_close() + header_future = None self._clear_callbacks() raise gen.Return(True) @@ -489,7 +490,7 @@ class HTTP1Connection(httputil.HTTPConnection): elif ("Content-Length" in headers or headers.get("Transfer-Encoding", "").lower() == "chunked" or getattr(start_line, 'method', None) in ("HEAD", "GET")): - # start_line may be a request or reponse start line; only + # start_line may be a request or response start line; only # the former has a method attribute. return connection_header == "keep-alive" return False @@ -565,7 +566,7 @@ class HTTP1Connection(httputil.HTTPConnection): if content_length is not None: return self._read_fixed_body(content_length, delegate) - if headers.get("Transfer-Encoding") == "chunked": + if headers.get("Transfer-Encoding", "").lower() == "chunked": return self._read_chunked_body(delegate) if self.is_client: return self._read_body_until_close(delegate) diff --git a/lib/tornado/httpclient.py b/lib/tornado/httpclient.py index c62b6207cf6379c9423d83b705b92a73d675e504..8436ece46993755a167dcc0ac217940261543c1d 100644 --- a/lib/tornado/httpclient.py +++ b/lib/tornado/httpclient.py @@ -25,7 +25,7 @@ to switch to ``curl_httpclient`` for reasons such as the following: Note that if you are using ``curl_httpclient``, it is highly recommended that you use a recent version of ``libcurl`` and ``pycurl``. Currently the minimum supported version of libcurl is -7.21.1, and the minimum version of pycurl is 7.18.2. It is highly +7.22.0, and the minimum version of pycurl is 7.18.2. It is highly recommended that your ``libcurl`` installation is built with asynchronous DNS resolver (threaded or c-ares), otherwise you may encounter various problems with request timeouts (for more @@ -38,7 +38,7 @@ To select ``curl_httpclient``, call `AsyncHTTPClient.configure` at startup:: AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import functools import time @@ -61,7 +61,7 @@ class HTTPClient(object): http_client = httpclient.HTTPClient() try: response = http_client.fetch("http://www.google.com/") - print response.body + print(response.body) except httpclient.HTTPError as e: # HTTPError is raised for non-200 responses; the response # can be found in e.response. @@ -110,9 +110,9 @@ class AsyncHTTPClient(Configurable): def handle_response(response): if response.error: - print "Error:", response.error + print("Error: %s" % response.error) else: - print response.body + print(response.body) http_client = AsyncHTTPClient() http_client.fetch("http://www.google.com/", handle_response) @@ -310,10 +310,10 @@ class HTTPRequest(object): network_interface=None, streaming_callback=None, header_callback=None, prepare_curl_callback=None, proxy_host=None, proxy_port=None, proxy_username=None, - proxy_password=None, allow_nonstandard_methods=None, - validate_cert=None, ca_certs=None, - allow_ipv6=None, - client_key=None, client_cert=None, body_producer=None, + proxy_password=None, proxy_auth_mode=None, + allow_nonstandard_methods=None, validate_cert=None, + ca_certs=None, allow_ipv6=None, client_key=None, + client_cert=None, body_producer=None, expect_100_continue=False, decompress_response=None, ssl_options=None): r"""All parameters except ``url`` are optional. @@ -341,13 +341,15 @@ class HTTPRequest(object): Allowed values are implementation-defined; ``curl_httpclient`` supports "basic" and "digest"; ``simple_httpclient`` only supports "basic" - :arg float connect_timeout: Timeout for initial connection in seconds - :arg float request_timeout: Timeout for entire request in seconds + :arg float connect_timeout: Timeout for initial connection in seconds, + default 20 seconds + :arg float request_timeout: Timeout for entire request in seconds, + default 20 seconds :arg if_modified_since: Timestamp for ``If-Modified-Since`` header :type if_modified_since: `datetime` or `float` :arg bool follow_redirects: Should redirects be followed automatically - or return the 3xx response? - :arg int max_redirects: Limit for ``follow_redirects`` + or return the 3xx response? Default True. + :arg int max_redirects: Limit for ``follow_redirects``, default 5. :arg string user_agent: String to send as ``User-Agent`` header :arg bool decompress_response: Request a compressed response from the server and decompress it after downloading. Default is True. @@ -372,16 +374,18 @@ class HTTPRequest(object): a ``pycurl.Curl`` object to allow the application to make additional ``setopt`` calls. :arg string proxy_host: HTTP proxy hostname. To use proxies, - ``proxy_host`` and ``proxy_port`` must be set; ``proxy_username`` and - ``proxy_pass`` are optional. Proxies are currently only supported - with ``curl_httpclient``. + ``proxy_host`` and ``proxy_port`` must be set; ``proxy_username``, + ``proxy_pass`` and ``proxy_auth_mode`` are optional. Proxies are + currently only supported with ``curl_httpclient``. :arg int proxy_port: HTTP proxy port :arg string proxy_username: HTTP proxy username :arg string proxy_password: HTTP proxy password + :arg string proxy_auth_mode: HTTP proxy Authentication mode; + default is "basic". supports "basic" and "digest" :arg bool allow_nonstandard_methods: Allow unknown values for ``method`` - argument? + argument? Default is False. :arg bool validate_cert: For HTTPS requests, validate the server's - certificate? + certificate? Default is True. :arg string ca_certs: filename of CA certificates in PEM format, or None to use defaults. See note below when used with ``curl_httpclient``. @@ -419,6 +423,9 @@ class HTTPRequest(object): .. versionadded:: 4.2 The ``ssl_options`` argument. + + .. versionadded:: 4.5 + The ``proxy_auth_mode`` argument. """ # Note that some of these attributes go through property setters # defined below. @@ -430,6 +437,7 @@ class HTTPRequest(object): self.proxy_port = proxy_port self.proxy_username = proxy_username self.proxy_password = proxy_password + self.proxy_auth_mode = proxy_auth_mode self.url = url self.method = method self.body = body @@ -530,7 +538,7 @@ class HTTPResponse(object): * buffer: ``cStringIO`` object for response body - * body: response body as string (created on demand from ``self.buffer``) + * body: response body as bytes (created on demand from ``self.buffer``) * error: Exception object, if any @@ -665,5 +673,6 @@ def main(): print(native_str(response.body)) client.close() + if __name__ == "__main__": main() diff --git a/lib/tornado/httpserver.py b/lib/tornado/httpserver.py index ff235fe46b83d08d486d2d61b3c5eec62d5c6919..d757be188df1b10b3ef36e70c589ce110051adfb 100644 --- a/lib/tornado/httpserver.py +++ b/lib/tornado/httpserver.py @@ -26,7 +26,7 @@ class except to start a server at the beginning of the process to `tornado.httputil.HTTPServerRequest`. The old name remains as an alias. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import socket @@ -62,6 +62,13 @@ class HTTPServer(TCPServer, Configurable, if Tornado is run behind an SSL-decoding proxy that does not set one of the supported ``xheaders``. + By default, when parsing the ``X-Forwarded-For`` header, Tornado will + select the last (i.e., the closest) address on the list of hosts as the + remote host IP address. To select the next server in the chain, a list of + trusted downstream hosts may be passed as the ``trusted_downstream`` + argument. These hosts will be skipped when parsing the ``X-Forwarded-For`` + header. + To make this server serve SSL traffic, send the ``ssl_options`` keyword argument with an `ssl.SSLContext` object. For compatibility with older versions of Python ``ssl_options`` may also be a dictionary of keyword @@ -124,6 +131,9 @@ class HTTPServer(TCPServer, Configurable, .. versionchanged:: 4.2 `HTTPServer` is now a subclass of `tornado.util.Configurable`. + + .. versionchanged:: 4.5 + Added the ``trusted_downstream`` argument. """ def __init__(self, *args, **kwargs): # Ignore args to __init__; real initialization belongs in @@ -138,7 +148,8 @@ class HTTPServer(TCPServer, Configurable, decompress_request=False, chunk_size=None, max_header_size=None, idle_connection_timeout=None, body_timeout=None, - max_body_size=None, max_buffer_size=None): + max_body_size=None, max_buffer_size=None, + trusted_downstream=None): self.request_callback = request_callback self.no_keep_alive = no_keep_alive self.xheaders = xheaders @@ -149,11 +160,13 @@ class HTTPServer(TCPServer, Configurable, max_header_size=max_header_size, header_timeout=idle_connection_timeout or 3600, max_body_size=max_body_size, - body_timeout=body_timeout) + body_timeout=body_timeout, + no_keep_alive=no_keep_alive) TCPServer.__init__(self, io_loop=io_loop, ssl_options=ssl_options, max_buffer_size=max_buffer_size, read_chunk_size=chunk_size) self._connections = set() + self.trusted_downstream = trusted_downstream @classmethod def configurable_base(cls): @@ -172,21 +185,55 @@ class HTTPServer(TCPServer, Configurable, def handle_stream(self, stream, address): context = _HTTPRequestContext(stream, address, - self.protocol) + self.protocol, + self.trusted_downstream) conn = HTTP1ServerConnection( stream, self.conn_params, context) self._connections.add(conn) conn.start_serving(self) def start_request(self, server_conn, request_conn): - return _ServerRequestAdapter(self, server_conn, request_conn) + if isinstance(self.request_callback, httputil.HTTPServerConnectionDelegate): + delegate = self.request_callback.start_request(server_conn, request_conn) + else: + delegate = _CallableAdapter(self.request_callback, request_conn) + + if self.xheaders: + delegate = _ProxyAdapter(delegate, request_conn) + + return delegate def on_close(self, server_conn): self._connections.remove(server_conn) +class _CallableAdapter(httputil.HTTPMessageDelegate): + def __init__(self, request_callback, request_conn): + self.connection = request_conn + self.request_callback = request_callback + self.request = None + self.delegate = None + self._chunks = [] + + def headers_received(self, start_line, headers): + self.request = httputil.HTTPServerRequest( + connection=self.connection, start_line=start_line, + headers=headers) + + def data_received(self, chunk): + self._chunks.append(chunk) + + def finish(self): + self.request.body = b''.join(self._chunks) + self.request._parse_body() + self.request_callback(self.request) + + def on_connection_close(self): + self._chunks = None + + class _HTTPRequestContext(object): - def __init__(self, stream, address, protocol): + def __init__(self, stream, address, protocol, trusted_downstream=None): self.address = address # Save the socket's address family now so we know how to # interpret self.address even after the stream is closed @@ -210,6 +257,7 @@ class _HTTPRequestContext(object): self.protocol = "http" self._orig_remote_ip = self.remote_ip self._orig_protocol = self.protocol + self.trusted_downstream = set(trusted_downstream or []) def __str__(self): if self.address_family in (socket.AF_INET, socket.AF_INET6): @@ -226,7 +274,10 @@ class _HTTPRequestContext(object): """Rewrite the ``remote_ip`` and ``protocol`` fields.""" # Squid uses X-Forwarded-For, others use X-Real-Ip ip = headers.get("X-Forwarded-For", self.remote_ip) - ip = ip.split(',')[-1].strip() + # Skip trusted downstream hosts in X-Forwarded-For list + for ip in (cand.strip() for cand in reversed(ip.split(','))): + if ip not in self.trusted_downstream: + break ip = headers.get("X-Real-Ip", ip) if netutil.is_valid_ip(ip): self.remote_ip = ip @@ -247,58 +298,28 @@ class _HTTPRequestContext(object): self.protocol = self._orig_protocol -class _ServerRequestAdapter(httputil.HTTPMessageDelegate): - """Adapts the `HTTPMessageDelegate` interface to the interface expected - by our clients. - """ - def __init__(self, server, server_conn, request_conn): - self.server = server +class _ProxyAdapter(httputil.HTTPMessageDelegate): + def __init__(self, delegate, request_conn): self.connection = request_conn - self.request = None - if isinstance(server.request_callback, - httputil.HTTPServerConnectionDelegate): - self.delegate = server.request_callback.start_request( - server_conn, request_conn) - self._chunks = None - else: - self.delegate = None - self._chunks = [] + self.delegate = delegate def headers_received(self, start_line, headers): - if self.server.xheaders: - self.connection.context._apply_xheaders(headers) - if self.delegate is None: - self.request = httputil.HTTPServerRequest( - connection=self.connection, start_line=start_line, - headers=headers) - else: - return self.delegate.headers_received(start_line, headers) + self.connection.context._apply_xheaders(headers) + return self.delegate.headers_received(start_line, headers) def data_received(self, chunk): - if self.delegate is None: - self._chunks.append(chunk) - else: - return self.delegate.data_received(chunk) + return self.delegate.data_received(chunk) def finish(self): - if self.delegate is None: - self.request.body = b''.join(self._chunks) - self.request._parse_body() - self.server.request_callback(self.request) - else: - self.delegate.finish() + self.delegate.finish() self._cleanup() def on_connection_close(self): - if self.delegate is None: - self._chunks = None - else: - self.delegate.on_connection_close() + self.delegate.on_connection_close() self._cleanup() def _cleanup(self): - if self.server.xheaders: - self.connection.context._unapply_xheaders() + self.connection.context._unapply_xheaders() HTTPRequest = httputil.HTTPServerRequest diff --git a/lib/tornado/httputil.py b/lib/tornado/httputil.py index 21842caa22b59ac811012d67906f0a2377210060..818ea914cb4cd73f4ae89afc97bd09af89de924f 100644 --- a/lib/tornado/httputil.py +++ b/lib/tornado/httputil.py @@ -20,7 +20,7 @@ This module also defines the `HTTPServerRequest` class which is exposed via `tornado.web.RequestHandler.request`. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import calendar import collections @@ -38,11 +38,12 @@ from tornado.util import ObjectDict, PY3 if PY3: import http.cookies as Cookie from http.client import responses - from urllib.parse import urlencode + from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl else: import Cookie from httplib import responses from urllib import urlencode + from urlparse import urlparse, urlunparse, parse_qsl # responses is unused in this file, but we re-export it to other files. @@ -98,6 +99,7 @@ class _NormalizedHeaderCache(dict): del self[old_key] return normalized + _normalized_headers = _NormalizedHeaderCache(1000) @@ -337,7 +339,7 @@ class HTTPServerRequest(object): """ def __init__(self, method=None, uri=None, version="HTTP/1.0", headers=None, body=None, host=None, files=None, connection=None, - start_line=None): + start_line=None, server_connection=None): if start_line is not None: method, uri, version = start_line self.method = method @@ -352,8 +354,10 @@ class HTTPServerRequest(object): self.protocol = getattr(context, 'protocol', "http") self.host = host or self.headers.get("Host") or "127.0.0.1" + self.host_name = split_host_and_port(self.host.lower())[0] self.files = files or {} self.connection = connection + self.server_connection = server_connection self._start_time = time.time() self._finish_time = None @@ -599,11 +603,28 @@ def url_concat(url, args): >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")]) 'http://example.com/foo?a=b&c=d&c=d2' """ - if not args: + if args is None: return url - if url[-1] not in ('?', '&'): - url += '&' if ('?' in url) else '?' - return url + urlencode(args) + parsed_url = urlparse(url) + if isinstance(args, dict): + parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True) + parsed_query.extend(args.items()) + elif isinstance(args, list) or isinstance(args, tuple): + parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True) + parsed_query.extend(args) + else: + err = "'args' parameter should be dict, list or tuple. Not {0}".format( + type(args)) + raise TypeError(err) + final_query = urlencode(parsed_query) + url = urlunparse(( + parsed_url[0], + parsed_url[1], + parsed_url[2], + parsed_url[3], + final_query, + parsed_url[5])) + return url class HTTPFile(ObjectDict): @@ -918,10 +939,12 @@ def split_host_and_port(netloc): port = None return (host, port) + _OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") _QuotePatt = re.compile(r"[\\].") _nulljoin = ''.join + def _unquote_cookie(str): """Handle double quotes and escaping in cookie values. @@ -963,11 +986,11 @@ def _unquote_cookie(str): k = q_match.start(0) if q_match and (not o_match or k < j): # QuotePatt matched res.append(str[i:k]) - res.append(str[k+1]) + res.append(str[k + 1]) i = k + 2 else: # OctalPatt matched res.append(str[i:j]) - res.append(chr(int(str[j+1:j+4], 8))) + res.append(chr(int(str[j + 1:j + 4], 8))) i = j + 4 return _nulljoin(res) diff --git a/lib/tornado/ioloop.py b/lib/tornado/ioloop.py index cadb41161a7767156994821352741e9f0390be27..ad35787fcaa205ce396d06981e16db4ad8cc9d07 100644 --- a/lib/tornado/ioloop.py +++ b/lib/tornado/ioloop.py @@ -26,8 +26,9 @@ In addition to I/O events, the `IOLoop` can also schedule time-based events. `IOLoop.add_timeout` is a non-blocking alternative to `time.sleep`. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function +import collections import datetime import errno import functools @@ -616,10 +617,14 @@ class IOLoop(Configurable): # result, which should just be ignored. pass else: - self.add_future(ret, lambda f: f.result()) + self.add_future(ret, self._discard_future_result) except Exception: self.handle_callback_exception(callback) + def _discard_future_result(self, future): + """Avoid unhandled-exception warnings from spawned coroutines.""" + future.result() + def handle_callback_exception(self, callback): """This method is called whenever a callback run by the `IOLoop` throws an exception. @@ -689,8 +694,7 @@ class PollIOLoop(IOLoop): self.time_func = time_func or time.time self._handlers = {} self._events = {} - self._callbacks = [] - self._callback_lock = threading.Lock() + self._callbacks = collections.deque() self._timeouts = [] self._cancellations = 0 self._running = False @@ -708,11 +712,10 @@ class PollIOLoop(IOLoop): self.READ) def close(self, all_fds=False): - with self._callback_lock: - self._closing = True + self._closing = True self.remove_handler(self._waker.fileno()) if all_fds: - for fd, handler in self._handlers.values(): + for fd, handler in list(self._handlers.values()): self.close_fd(fd) self._waker.close() self._impl.close() @@ -796,9 +799,7 @@ class PollIOLoop(IOLoop): while True: # Prevent IO event starvation by delaying new callbacks # to the next iteration of the event loop. - with self._callback_lock: - callbacks = self._callbacks - self._callbacks = [] + ncallbacks = len(self._callbacks) # Add any timeouts that have come due to the callback list. # Do not run anything until we have determined which ones @@ -827,14 +828,14 @@ class PollIOLoop(IOLoop): if x.callback is not None] heapq.heapify(self._timeouts) - for callback in callbacks: - self._run_callback(callback) + for i in range(ncallbacks): + self._run_callback(self._callbacks.popleft()) for timeout in due_timeouts: if timeout.callback is not None: self._run_callback(timeout.callback) # Closures may be holding on to a lot of memory, so allow # them to be freed before we go into our poll wait. - callbacks = callback = due_timeouts = timeout = None + due_timeouts = timeout = None if self._callbacks: # If any callbacks or timeouts called add_callback, @@ -930,36 +931,20 @@ class PollIOLoop(IOLoop): self._cancellations += 1 def add_callback(self, callback, *args, **kwargs): + if self._closing: + return + # Blindly insert into self._callbacks. This is safe even + # from signal handlers because deque.append is atomic. + self._callbacks.append(functools.partial( + stack_context.wrap(callback), *args, **kwargs)) if thread.get_ident() != self._thread_ident: - # If we're not on the IOLoop's thread, we need to synchronize - # with other threads, or waking logic will induce a race. - with self._callback_lock: - if self._closing: - return - list_empty = not self._callbacks - self._callbacks.append(functools.partial( - stack_context.wrap(callback), *args, **kwargs)) - if list_empty: - # If we're not in the IOLoop's thread, and we added the - # first callback to an empty list, we may need to wake it - # up (it may wake up on its own, but an occasional extra - # wake is harmless). Waking up a polling IOLoop is - # relatively expensive, so we try to avoid it when we can. - self._waker.wake() + # This will write one byte but Waker.consume() reads many + # at once, so it's ok to write even when not strictly + # necessary. + self._waker.wake() else: - if self._closing: - return - # If we're on the IOLoop's thread, we don't need the lock, - # since we don't need to wake anyone, just add the - # callback. Blindly insert into self._callbacks. This is - # safe even from signal handlers because the GIL makes - # list.append atomic. One subtlety is that if the signal - # is interrupting another thread holding the - # _callback_lock block in IOLoop.start, we may modify - # either the old or new version of self._callbacks, but - # either way will work. - self._callbacks.append(functools.partial( - stack_context.wrap(callback), *args, **kwargs)) + # If we're on the IOLoop's thread, we don't need to wake anyone. + pass def add_callback_from_signal(self, callback, *args, **kwargs): with stack_context.NullContext(): diff --git a/lib/tornado/iostream.py b/lib/tornado/iostream.py index bcf444148c0ff2b639571ccb8796d569d8c7bf53..a1619c497415aa863246fd6dee364ee52f4fc973 100644 --- a/lib/tornado/iostream.py +++ b/lib/tornado/iostream.py @@ -24,7 +24,7 @@ Contents: * `PipeIOStream`: Pipe-based IOStream implementation. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import collections import errno @@ -82,6 +82,8 @@ _ERRNO_INPROGRESS = (errno.EINPROGRESS,) if hasattr(errno, "WSAEINPROGRESS"): _ERRNO_INPROGRESS += (errno.WSAEINPROGRESS,) # type: ignore +_WINDOWS = sys.platform.startswith('win') + class StreamClosedError(IOError): """Exception raised by `IOStream` methods when the stream is closed. @@ -158,11 +160,16 @@ class BaseIOStream(object): self.max_buffer_size // 2) self.max_write_buffer_size = max_write_buffer_size self.error = None - self._read_buffer = collections.deque() - self._write_buffer = collections.deque() + self._read_buffer = bytearray() + self._read_buffer_pos = 0 self._read_buffer_size = 0 + self._write_buffer = bytearray() + self._write_buffer_pos = 0 self._write_buffer_size = 0 self._write_buffer_frozen = False + self._total_write_index = 0 + self._total_write_done_index = 0 + self._pending_writes_while_frozen = [] self._read_delimiter = None self._read_regex = None self._read_max_bytes = None @@ -173,7 +180,7 @@ class BaseIOStream(object): self._read_future = None self._streaming_callback = None self._write_callback = None - self._write_future = None + self._write_futures = collections.deque() self._close_callback = None self._connect_callback = None self._connect_future = None @@ -367,36 +374,37 @@ class BaseIOStream(object): If no ``callback`` is given, this method returns a `.Future` that resolves (with a result of ``None``) when the write has been - completed. If `write` is called again before that `.Future` has - resolved, the previous future will be orphaned and will never resolve. + completed. + + The ``data`` argument may be of type `bytes` or `memoryview`. .. versionchanged:: 4.0 Now returns a `.Future` if no callback is given. + + .. versionchanged:: 4.5 + Added support for `memoryview` arguments. """ - assert isinstance(data, bytes) self._check_closed() - # We use bool(_write_buffer) as a proxy for write_buffer_size>0, - # so never put empty strings in the buffer. if data: if (self.max_write_buffer_size is not None and self._write_buffer_size + len(data) > self.max_write_buffer_size): raise StreamBufferFullError("Reached maximum write buffer size") - # Break up large contiguous strings before inserting them in the - # write buffer, so we don't have to recopy the entire thing - # as we slice off pieces to send to the socket. - WRITE_BUFFER_CHUNK_SIZE = 128 * 1024 - for i in range(0, len(data), WRITE_BUFFER_CHUNK_SIZE): - self._write_buffer.append(data[i:i + WRITE_BUFFER_CHUNK_SIZE]) - self._write_buffer_size += len(data) + if self._write_buffer_frozen: + self._pending_writes_while_frozen.append(data) + else: + self._write_buffer += data + self._write_buffer_size += len(data) + self._total_write_index += len(data) if callback is not None: self._write_callback = stack_context.wrap(callback) future = None else: - future = self._write_future = TracebackFuture() + future = TracebackFuture() future.add_done_callback(lambda f: f.exception()) + self._write_futures.append((self._total_write_index, future)) if not self._connecting: self._handle_write() - if self._write_buffer: + if self._write_buffer_size: self._add_io_state(self.io_loop.WRITE) self._maybe_add_error_listener() return future @@ -445,9 +453,8 @@ class BaseIOStream(object): if self._read_future is not None: futures.append(self._read_future) self._read_future = None - if self._write_future is not None: - futures.append(self._write_future) - self._write_future = None + futures += [future for _, future in self._write_futures] + self._write_futures.clear() if self._connect_future is not None: futures.append(self._connect_future) self._connect_future = None @@ -466,6 +473,7 @@ class BaseIOStream(object): # if the IOStream object is kept alive by a reference cycle. # TODO: Clear the read buffer too; it currently breaks some tests. self._write_buffer = None + self._write_buffer_size = 0 def reading(self): """Returns true if we are currently reading from the stream.""" @@ -473,7 +481,7 @@ class BaseIOStream(object): def writing(self): """Returns true if we are currently writing to the stream.""" - return bool(self._write_buffer) + return self._write_buffer_size > 0 def closed(self): """Returns true if the stream has been closed.""" @@ -743,7 +751,7 @@ class BaseIOStream(object): break if chunk is None: return 0 - self._read_buffer.append(chunk) + self._read_buffer += chunk self._read_buffer_size += len(chunk) if self._read_buffer_size > self.max_buffer_size: gen_log.error("Reached maximum read buffer size") @@ -791,30 +799,25 @@ class BaseIOStream(object): # since large merges are relatively expensive and get undone in # _consume(). if self._read_buffer: - while True: - loc = self._read_buffer[0].find(self._read_delimiter) - if loc != -1: - delimiter_len = len(self._read_delimiter) - self._check_max_bytes(self._read_delimiter, - loc + delimiter_len) - return loc + delimiter_len - if len(self._read_buffer) == 1: - break - _double_prefix(self._read_buffer) + loc = self._read_buffer.find(self._read_delimiter, + self._read_buffer_pos) + if loc != -1: + loc -= self._read_buffer_pos + delimiter_len = len(self._read_delimiter) + self._check_max_bytes(self._read_delimiter, + loc + delimiter_len) + return loc + delimiter_len self._check_max_bytes(self._read_delimiter, - len(self._read_buffer[0])) + self._read_buffer_size) elif self._read_regex is not None: if self._read_buffer: - while True: - m = self._read_regex.search(self._read_buffer[0]) - if m is not None: - self._check_max_bytes(self._read_regex, m.end()) - return m.end() - if len(self._read_buffer) == 1: - break - _double_prefix(self._read_buffer) - self._check_max_bytes(self._read_regex, - len(self._read_buffer[0])) + m = self._read_regex.search(self._read_buffer, + self._read_buffer_pos) + if m is not None: + loc = m.end() - self._read_buffer_pos + self._check_max_bytes(self._read_regex, loc) + return loc + self._check_max_bytes(self._read_regex, self._read_buffer_size) return None def _check_max_bytes(self, delimiter, size): @@ -824,35 +827,56 @@ class BaseIOStream(object): "delimiter %r not found within %d bytes" % ( delimiter, self._read_max_bytes)) + def _freeze_write_buffer(self, size): + self._write_buffer_frozen = size + + def _unfreeze_write_buffer(self): + self._write_buffer_frozen = False + self._write_buffer += b''.join(self._pending_writes_while_frozen) + self._write_buffer_size += sum(map(len, self._pending_writes_while_frozen)) + self._pending_writes_while_frozen[:] = [] + + def _got_empty_write(self, size): + """ + Called when a non-blocking write() failed writing anything. + Can be overridden in subclasses. + """ + def _handle_write(self): - while self._write_buffer: + while self._write_buffer_size: + assert self._write_buffer_size >= 0 try: - if not self._write_buffer_frozen: + start = self._write_buffer_pos + if self._write_buffer_frozen: + size = self._write_buffer_frozen + elif _WINDOWS: # On windows, socket.send blows up if given a # write buffer that's too large, instead of just # returning the number of bytes it was able to # process. Therefore we must not call socket.send # with more than 128KB at a time. - _merge_prefix(self._write_buffer, 128 * 1024) - num_bytes = self.write_to_fd(self._write_buffer[0]) + size = 128 * 1024 + else: + size = self._write_buffer_size + num_bytes = self.write_to_fd( + memoryview(self._write_buffer)[start:start + size]) if num_bytes == 0: - # With OpenSSL, if we couldn't write the entire buffer, - # the very same string object must be used on the - # next call to send. Therefore we suppress - # merging the write buffer after an incomplete send. - # A cleaner solution would be to set - # SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER, but this is - # not yet accessible from python - # (http://bugs.python.org/issue8240) - self._write_buffer_frozen = True + self._got_empty_write(size) break - self._write_buffer_frozen = False - _merge_prefix(self._write_buffer, num_bytes) - self._write_buffer.popleft() + self._write_buffer_pos += num_bytes self._write_buffer_size -= num_bytes + # Amortized O(1) shrink + # (this heuristic is implemented natively in Python 3.4+ + # but is replicated here for Python 2) + if self._write_buffer_pos > self._write_buffer_size: + del self._write_buffer[:self._write_buffer_pos] + self._write_buffer_pos = 0 + if self._write_buffer_frozen: + self._unfreeze_write_buffer() + self._total_write_done_index += num_bytes except (socket.error, IOError, OSError) as e: if e.args[0] in _ERRNO_WOULDBLOCK: - self._write_buffer_frozen = True + self._got_empty_write(size) break else: if not self._is_connreset(e): @@ -863,22 +887,38 @@ class BaseIOStream(object): self.fileno(), e) self.close(exc_info=True) return - if not self._write_buffer: + + while self._write_futures: + index, future = self._write_futures[0] + if index > self._total_write_done_index: + break + self._write_futures.popleft() + future.set_result(None) + + if not self._write_buffer_size: if self._write_callback: callback = self._write_callback self._write_callback = None self._run_callback(callback) - if self._write_future: - future = self._write_future - self._write_future = None - future.set_result(None) def _consume(self, loc): + # Consume loc bytes from the read buffer and return them if loc == 0: return b"" - _merge_prefix(self._read_buffer, loc) + assert loc <= self._read_buffer_size + # Slice the bytearray buffer into bytes, without intermediate copying + b = (memoryview(self._read_buffer) + [self._read_buffer_pos:self._read_buffer_pos + loc] + ).tobytes() + self._read_buffer_pos += loc self._read_buffer_size -= loc - return self._read_buffer.popleft() + # Amortized O(1) shrink + # (this heuristic is implemented natively in Python 3.4+ + # but is replicated here for Python 2) + if self._read_buffer_pos > self._read_buffer_size: + del self._read_buffer[:self._read_buffer_pos] + self._read_buffer_pos = 0 + return b def _check_closed(self): if self.closed(): @@ -1124,7 +1164,7 @@ class IOStream(BaseIOStream): suitably-configured `ssl.SSLContext` to disable. """ if (self._read_callback or self._read_future or - self._write_callback or self._write_future or + self._write_callback or self._write_futures or self._connect_callback or self._connect_future or self._pending_callbacks or self._closed or self._read_buffer or self._write_buffer): @@ -1251,6 +1291,17 @@ class SSLIOStream(IOStream): def writing(self): return self._handshake_writing or super(SSLIOStream, self).writing() + def _got_empty_write(self, size): + # With OpenSSL, if we couldn't write the entire buffer, + # the very same string object must be used on the + # next call to send. Therefore we suppress + # merging the write buffer after an incomplete send. + # A cleaner solution would be to set + # SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER, but this is + # not yet accessible from python + # (http://bugs.python.org/issue8240) + self._freeze_write_buffer(size) + def _do_ssl_handshake(self): # Based on code from test_ssl.py in the python stdlib try: @@ -1498,53 +1549,6 @@ class PipeIOStream(BaseIOStream): return chunk -def _double_prefix(deque): - """Grow by doubling, but don't split the second chunk just because the - first one is small. - """ - new_len = max(len(deque[0]) * 2, - (len(deque[0]) + len(deque[1]))) - _merge_prefix(deque, new_len) - - -def _merge_prefix(deque, size): - """Replace the first entries in a deque of strings with a single - string of up to size bytes. - - >>> d = collections.deque(['abc', 'de', 'fghi', 'j']) - >>> _merge_prefix(d, 5); print(d) - deque(['abcde', 'fghi', 'j']) - - Strings will be split as necessary to reach the desired size. - >>> _merge_prefix(d, 7); print(d) - deque(['abcdefg', 'hi', 'j']) - - >>> _merge_prefix(d, 3); print(d) - deque(['abc', 'defg', 'hi', 'j']) - - >>> _merge_prefix(d, 100); print(d) - deque(['abcdefghij']) - """ - if len(deque) == 1 and len(deque[0]) <= size: - return - prefix = [] - remaining = size - while deque and remaining > 0: - chunk = deque.popleft() - if len(chunk) > remaining: - deque.appendleft(chunk[remaining:]) - chunk = chunk[:remaining] - prefix.append(chunk) - remaining -= len(chunk) - # This data structure normally just contains byte strings, but - # the unittest gets messy if it doesn't use the default str() type, - # so do the merge based on the type of data that's actually present. - if prefix: - deque.appendleft(type(prefix[0])().join(prefix)) - if not deque: - deque.appendleft(b"") - - def doctests(): import doctest return doctest.DocTestSuite() diff --git a/lib/tornado/locale.py b/lib/tornado/locale.py index 4f80fd366382923c410cb71d946e645d25bebf33..7dba10d616c3a3440b1b14b77290536b99a41fa7 100644 --- a/lib/tornado/locale.py +++ b/lib/tornado/locale.py @@ -19,7 +19,7 @@ To load a locale and generate a translated string:: user_locale = tornado.locale.get("es_LA") - print user_locale.translate("Sign out") + print(user_locale.translate("Sign out")) `tornado.locale.get()` returns the closest matching locale, not necessarily the specific locale you requested. You can support pluralization with @@ -28,7 +28,7 @@ additional arguments to `~Locale.translate()`, e.g.:: people = [...] message = user_locale.translate( "%(list)s is online", "%(list)s are online", len(people)) - print message % {"list": user_locale.list(people)} + print(message % {"list": user_locale.list(people)}) The first string is chosen if ``len(people) == 1``, otherwise the second string is chosen. @@ -39,7 +39,7 @@ supported by `gettext` and related tools). If neither method is called, the `Locale.translate` method will simply return the original string. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import codecs import csv @@ -187,7 +187,7 @@ def load_gettext_translations(directory, domain): {directory}/{lang}/LC_MESSAGES/{domain}.mo - Three steps are required to have you app translated: + Three steps are required to have your app translated: 1. Generate POT translation file:: diff --git a/lib/tornado/locks.py b/lib/tornado/locks.py index d84a9a870d7485a2b0c6c56a48fa9f27ec90130b..4f9ecf6dfd626fa41e5d12b938344906ab9e43df 100644 --- a/lib/tornado/locks.py +++ b/lib/tornado/locks.py @@ -12,7 +12,7 @@ # License for the specific language governing permissions and limitations # under the License. -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import collections diff --git a/lib/tornado/log.py b/lib/tornado/log.py index ac1bb95ea781e421f4f1c94246686c2a059f06bb..654afc021e6e818b4d6900dfc2b23ba7c43f4de3 100644 --- a/lib/tornado/log.py +++ b/lib/tornado/log.py @@ -28,7 +28,7 @@ These streams may be configured independently using the standard library's `logging` module. For example, you may wish to send ``tornado.access`` logs to a separate file for analysis. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import logging import logging.handlers @@ -37,6 +37,11 @@ import sys from tornado.escape import _unicode from tornado.util import unicode_type, basestring_type +try: + import colorama +except ImportError: + colorama = None + try: import curses # type: ignore except ImportError: @@ -49,15 +54,21 @@ gen_log = logging.getLogger("tornado.general") def _stderr_supports_color(): - color = False - if curses and hasattr(sys.stderr, 'isatty') and sys.stderr.isatty(): - try: - curses.setupterm() - if curses.tigetnum("colors") > 0: - color = True - except Exception: - pass - return color + try: + if hasattr(sys.stderr, 'isatty') and sys.stderr.isatty(): + if curses: + curses.setupterm() + if curses.tigetnum("colors") > 0: + return True + elif colorama: + if sys.stderr is getattr(colorama.initialise, 'wrapped_stderr', + object()): + return True + except Exception: + # Very broad exception handling because it's always better to + # fall back to non-colored logs than to break at startup. + pass + return False def _safe_unicode(s): @@ -79,6 +90,17 @@ class LogFormatter(logging.Formatter): This formatter is enabled automatically by `tornado.options.parse_command_line` or `tornado.options.parse_config_file` (unless ``--logging=none`` is used). + + Color support on Windows versions that do not support ANSI color codes is + enabled by use of the colorama__ library. Applications that wish to use + this must first initialize colorama with a call to ``colorama.init``. + See the colorama documentation for details. + + __ https://pypi.python.org/pypi/colorama + + .. versionchanged:: 4.5 + Added support for ``colorama``. Changed the constructor + signature to be compatible with `logging.config.dictConfig`. """ DEFAULT_FORMAT = '%(color)s[%(levelname)1.1s %(asctime)s %(module)s:%(lineno)d]%(end_color)s %(message)s' DEFAULT_DATE_FORMAT = '%y%m%d %H:%M:%S' @@ -89,8 +111,8 @@ class LogFormatter(logging.Formatter): logging.ERROR: 1, # Red } - def __init__(self, color=True, fmt=DEFAULT_FORMAT, - datefmt=DEFAULT_DATE_FORMAT, colors=DEFAULT_COLORS): + def __init__(self, fmt=DEFAULT_FORMAT, datefmt=DEFAULT_DATE_FORMAT, + style='%', color=True, colors=DEFAULT_COLORS): r""" :arg bool color: Enables color support. :arg string fmt: Log message format. @@ -111,21 +133,28 @@ class LogFormatter(logging.Formatter): self._colors = {} if color and _stderr_supports_color(): - # The curses module has some str/bytes confusion in - # python3. Until version 3.2.3, most methods return - # bytes, but only accept strings. In addition, we want to - # output these strings with the logging module, which - # works with unicode strings. The explicit calls to - # unicode() below are harmless in python2 but will do the - # right conversion in python 3. - fg_color = (curses.tigetstr("setaf") or - curses.tigetstr("setf") or "") - if (3, 0) < sys.version_info < (3, 2, 3): - fg_color = unicode_type(fg_color, "ascii") - - for levelno, code in colors.items(): - self._colors[levelno] = unicode_type(curses.tparm(fg_color, code), "ascii") - self._normal = unicode_type(curses.tigetstr("sgr0"), "ascii") + if curses is not None: + # The curses module has some str/bytes confusion in + # python3. Until version 3.2.3, most methods return + # bytes, but only accept strings. In addition, we want to + # output these strings with the logging module, which + # works with unicode strings. The explicit calls to + # unicode() below are harmless in python2 but will do the + # right conversion in python 3. + fg_color = (curses.tigetstr("setaf") or + curses.tigetstr("setf") or "") + if (3, 0) < sys.version_info < (3, 2, 3): + fg_color = unicode_type(fg_color, "ascii") + + for levelno, code in colors.items(): + self._colors[levelno] = unicode_type(curses.tparm(fg_color, code), "ascii") + self._normal = unicode_type(curses.tigetstr("sgr0"), "ascii") + else: + # If curses is not present (currently we'll only get here for + # colorama on windows), assume hard-coded ANSI color codes. + for levelno, code in colors.items(): + self._colors[levelno] = '\033[2;3%dm' % code + self._normal = '\033[0m' else: self._normal = '' diff --git a/lib/tornado/netutil.py b/lib/tornado/netutil.py index 7bf9321393019a77e0c992ee504633e0c66eba0e..c34c8c8bb5bdba6c6348be5c7418584b554fdf6d 100644 --- a/lib/tornado/netutil.py +++ b/lib/tornado/netutil.py @@ -16,7 +16,7 @@ """Miscellaneous network utility code.""" -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import errno import os @@ -96,6 +96,9 @@ else: # thread now. u'foo'.encode('idna') +# For undiagnosed reasons, 'latin1' codec may also need to be preloaded. +u'foo'.encode('latin1') + # These errnos indicate that a non-blocking operation must be retried # at a later time. On most platforms they're the same value, but on # some they differ. @@ -197,6 +200,7 @@ def bind_sockets(port, address=None, family=socket.AF_UNSPEC, sockets.append(sock) return sockets + if hasattr(socket, 'AF_UNIX'): def bind_unix_socket(file, mode=0o600, backlog=_DEFAULT_BACKLOG): """Creates a listening unix socket. diff --git a/lib/tornado/options.py b/lib/tornado/options.py index 2fbb32ad02692458b6d267ea5266eed5ad7d31f0..0a72cc65edbd10b3c339c0c5a2844f35ab3cbe26 100644 --- a/lib/tornado/options.py +++ b/lib/tornado/options.py @@ -82,7 +82,7 @@ instances to define isolated sets of options, such as for subcommands. underscores. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import datetime import numbers diff --git a/lib/tornado/platform/asyncio.py b/lib/tornado/platform/asyncio.py index 9556da618d84c0990984700884af7e03058d0711..830ee1f3b1b68d97fc1871798f59740155e18403 100644 --- a/lib/tornado/platform/asyncio.py +++ b/lib/tornado/platform/asyncio.py @@ -19,7 +19,7 @@ loops. Windows. Use the `~asyncio.SelectorEventLoop` instead. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import functools import tornado.concurrent @@ -30,7 +30,7 @@ from tornado import stack_context try: # Import the real asyncio module for py33+ first. Older versions of the # trollius backport also use this name. - import asyncio # type: ignore + import asyncio # type: ignore except ImportError as e: # Asyncio itself isn't available; see if trollius is (backport to py26+). try: @@ -217,5 +217,6 @@ def to_asyncio_future(tornado_future): tornado.concurrent.chain_future(tornado_future, af) return af + if hasattr(convert_yielded, 'register'): convert_yielded.register(asyncio.Future, to_tornado_future) # type: ignore diff --git a/lib/tornado/platform/auto.py b/lib/tornado/platform/auto.py index 449b634b9bd53e030da07c97c1407c82dfcb3d91..1f4d7001939cc8b227a5c8c5cbf877eaac622d24 100644 --- a/lib/tornado/platform/auto.py +++ b/lib/tornado/platform/auto.py @@ -23,7 +23,7 @@ Most code that needs access to this functionality should do e.g.:: from tornado.platform.auto import set_close_exec """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import os diff --git a/lib/tornado/platform/caresresolver.py b/lib/tornado/platform/caresresolver.py index 4205de30c22afad08018caf785df3e25e7f227a0..fd6e9d2748c314dbe1494b44d6615f5262c0948f 100644 --- a/lib/tornado/platform/caresresolver.py +++ b/lib/tornado/platform/caresresolver.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import pycares # type: ignore import socket diff --git a/lib/tornado/platform/common.py b/lib/tornado/platform/common.py index b409a903f39c3dbaa77a6c6717e9185ce35bc8e8..a73f8db7fbac40834662126b874aa1fa79093d2e 100644 --- a/lib/tornado/platform/common.py +++ b/lib/tornado/platform/common.py @@ -1,10 +1,27 @@ """Lowest-common-denominator implementations of platform functionality.""" -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import errno import socket +import time from tornado.platform import interface +from tornado.util import errno_from_exception + + +def try_close(f): + # Avoid issue #875 (race condition when using the file in another + # thread). + for i in range(10): + try: + f.close() + except IOError: + # Yield to another thread + time.sleep(1e-3) + else: + break + # Try a last time and let raise + f.close() class Waker(interface.Waker): @@ -45,7 +62,7 @@ class Waker(interface.Waker): break # success except socket.error as detail: if (not hasattr(errno, 'WSAEADDRINUSE') or - detail[0] != errno.WSAEADDRINUSE): + errno_from_exception(detail) != errno.WSAEADDRINUSE): # "Address already in use" is the only error # I've seen on two WinXP Pro SP2 boxes, under # Pythons 2.3.5 and 2.4.1. @@ -75,7 +92,7 @@ class Waker(interface.Waker): def wake(self): try: self.writer.send(b"x") - except (IOError, socket.error): + except (IOError, socket.error, ValueError): pass def consume(self): @@ -89,4 +106,4 @@ class Waker(interface.Waker): def close(self): self.reader.close() - self.writer.close() + try_close(self.writer) diff --git a/lib/tornado/platform/epoll.py b/lib/tornado/platform/epoll.py index b08cc62810f0b993583734d392f8296200f49200..80bfd8af4c69a51425f8c34d1c24f0893c30796a 100644 --- a/lib/tornado/platform/epoll.py +++ b/lib/tornado/platform/epoll.py @@ -14,7 +14,7 @@ # License for the specific language governing permissions and limitations # under the License. """EPoll-based IOLoop implementation for Linux systems.""" -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import select diff --git a/lib/tornado/platform/interface.py b/lib/tornado/platform/interface.py index cc062391175fee2e0346a868eecf3b8b0c248066..c0ef2905c3d30ab604e8e3f62153d1f3d035edb2 100644 --- a/lib/tornado/platform/interface.py +++ b/lib/tornado/platform/interface.py @@ -21,7 +21,7 @@ for other tornado.platform modules. Most code should import the appropriate implementation from `tornado.platform.auto`. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function def set_close_exec(fd): @@ -62,5 +62,6 @@ class Waker(object): """Closes the waker's file descriptor(s).""" raise NotImplementedError() + def monotonic_time(): raise NotImplementedError() diff --git a/lib/tornado/platform/kqueue.py b/lib/tornado/platform/kqueue.py index f8f3e4a6113ee0901df45b15ae67a16b3cd2a7e2..3a5d417429d3b958860ae0c1d6616804b7b0efe0 100644 --- a/lib/tornado/platform/kqueue.py +++ b/lib/tornado/platform/kqueue.py @@ -14,7 +14,7 @@ # License for the specific language governing permissions and limitations # under the License. """KQueue-based IOLoop implementation for BSD/Mac systems.""" -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import select diff --git a/lib/tornado/platform/posix.py b/lib/tornado/platform/posix.py index 41a5794c63af29e397abbab4e5a2588007b2c3d5..9bf1f18868ca70f8016cc4f6667d71aa961d5bc2 100644 --- a/lib/tornado/platform/posix.py +++ b/lib/tornado/platform/posix.py @@ -16,12 +16,12 @@ """Posix implementations of platform-specific functionality.""" -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import fcntl import os -from tornado.platform import interface +from tornado.platform import common, interface def set_close_exec(fd): @@ -53,7 +53,7 @@ class Waker(interface.Waker): def wake(self): try: self.writer.write(b"x") - except IOError: + except (IOError, ValueError): pass def consume(self): @@ -67,4 +67,4 @@ class Waker(interface.Waker): def close(self): self.reader.close() - self.writer.close() + common.try_close(self.writer) diff --git a/lib/tornado/platform/select.py b/lib/tornado/platform/select.py index db52ef91063bf813f8ed39f5fe2ef4d803429e72..a18049f7cdab0a56bbc327ce67c52bf13bd637fe 100644 --- a/lib/tornado/platform/select.py +++ b/lib/tornado/platform/select.py @@ -17,7 +17,7 @@ Used as a fallback for systems that don't support epoll or kqueue. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import select diff --git a/lib/tornado/platform/twisted.py b/lib/tornado/platform/twisted.py index 92157c7c045cfd2127fb761eb43ee973c7992373..0f9787e84d7033a07eb138ed82adf9e1de86ad84 100644 --- a/lib/tornado/platform/twisted.py +++ b/lib/tornado/platform/twisted.py @@ -21,7 +21,7 @@ depending on which library's underlying event loop you want to use. This module has been tested with Twisted versions 11.0.0 and newer. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import datetime import functools @@ -574,6 +574,7 @@ class TwistedResolver(Resolver): ] raise gen.Return(result) + if hasattr(gen.convert_yielded, 'register'): @gen.convert_yielded.register(Deferred) # type: ignore def _(d): diff --git a/lib/tornado/platform/windows.py b/lib/tornado/platform/windows.py index 9a319f27715e2fea22d756e302f5f55f5490e23f..e94a0cf13dab716e1a9168e7a0c5e7f4dac4e9ec 100644 --- a/lib/tornado/platform/windows.py +++ b/lib/tornado/platform/windows.py @@ -2,7 +2,7 @@ # for production use. -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import ctypes # type: ignore import ctypes.wintypes # type: ignore diff --git a/lib/tornado/process.py b/lib/tornado/process.py index ffa26559504afcb99ff350046520e6bc61d1eae9..fae94f3c13cf89951b61622c3b12fd976c5e05cf 100644 --- a/lib/tornado/process.py +++ b/lib/tornado/process.py @@ -18,7 +18,7 @@ the server into multiple processes and managing subprocesses. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import errno import os @@ -67,7 +67,7 @@ def cpu_count(): pass try: return os.sysconf("SC_NPROCESSORS_CONF") - except (ValueError, AttributeError): + except (AttributeError, ValueError): pass gen_log.error("Could not detect number of processors; assuming 1") return 1 @@ -355,6 +355,10 @@ class Subprocess(object): else: assert os.WIFEXITED(status) self.returncode = os.WEXITSTATUS(status) + # We've taken over wait() duty from the subprocess.Popen + # object. If we don't inform it of the process's return code, + # it will log a warning at destruction in python 3.6+. + self.proc.returncode = self.returncode if self._exit_callback: callback = self._exit_callback self._exit_callback = None diff --git a/lib/tornado/queues.py b/lib/tornado/queues.py index b8e9b56939c82be6e66970bad5334e46dda4f741..0041a8008646d5d4fb22decd59011a1ed2f354de 100644 --- a/lib/tornado/queues.py +++ b/lib/tornado/queues.py @@ -12,7 +12,17 @@ # License for the specific language governing permissions and limitations # under the License. -from __future__ import absolute_import, division, print_function, with_statement +"""Asynchronous queues for coroutines. + +.. warning:: + + Unlike the standard library's `queue` module, the classes defined here + are *not* thread-safe. To use these queues from another thread, + use `.IOLoop.add_callback` to transfer control to the `.IOLoop` thread + before calling any queue methods. +""" + +from __future__ import absolute_import, division, print_function import collections import heapq diff --git a/lib/tornado/routes.py b/lib/tornado/routes.py index a7042aa0e596f3ce9f22934881013da6e89d2250..aa799d49c52b968451cbed00a56679ceaae04759 100644 --- a/lib/tornado/routes.py +++ b/lib/tornado/routes.py @@ -1,9 +1,9 @@ -import inspect -import os +""" Custom fast routes """ import tornado.web route_list = [] + class route(object): _routes = [] @@ -12,16 +12,17 @@ class route(object): self.name = name def __call__(self, _handler): - """gets called when we class decorate""" + """ Gets called when we decorate a class """ name = self.name and self.name or _handler.__name__ self._routes.append((self._uri, _handler, name)) return _handler @classmethod - def get_routes(self, webroot=''): - self._routes.reverse() - routes = [tornado.web.url(webroot + _uri, _handler, name=name) for _uri, _handler, name, in self._routes] + def get_routes(cls, webroot=''): + cls._routes.reverse() + routes = [tornado.web.url(webroot + _uri, _handler, name=name) for _uri, _handler, name, in cls._routes] return routes -def route_redirect(from_, to, name=None): - route._routes.append(tornado.web.url(from_, tornado.web.RedirectHandler, dict(url=to), name=name)) \ No newline at end of file + +def route_redirect(src, dst, name=None): + route._routes.append(tornado.web.url(src, tornado.web.RedirectHandler, dict(url=dst), name=name)) diff --git a/lib/tornado/routing.py b/lib/tornado/routing.py new file mode 100644 index 0000000000000000000000000000000000000000..6762dc05bcca4a867bea3eb8c03a9def0a6164a4 --- /dev/null +++ b/lib/tornado/routing.py @@ -0,0 +1,625 @@ +# Copyright 2015 The Tornado Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Flexible routing implementation. + +Tornado routes HTTP requests to appropriate handlers using `Router` +class implementations. The `tornado.web.Application` class is a +`Router` implementation and may be used directly, or the classes in +this module may be used for additional flexibility. The `RuleRouter` +class can match on more criteria than `.Application`, or the `Router` +interface can be subclassed for maximum customization. + +`Router` interface extends `~.httputil.HTTPServerConnectionDelegate` +to provide additional routing capabilities. This also means that any +`Router` implementation can be used directly as a ``request_callback`` +for `~.httpserver.HTTPServer` constructor. + +`Router` subclass must implement a ``find_handler`` method to provide +a suitable `~.httputil.HTTPMessageDelegate` instance to handle the +request: + +.. code-block:: python + + class CustomRouter(Router): + def find_handler(self, request, **kwargs): + # some routing logic providing a suitable HTTPMessageDelegate instance + return MessageDelegate(request.connection) + + class MessageDelegate(HTTPMessageDelegate): + def __init__(self, connection): + self.connection = connection + + def finish(self): + self.connection.write_headers( + ResponseStartLine("HTTP/1.1", 200, "OK"), + HTTPHeaders({"Content-Length": "2"}), + b"OK") + self.connection.finish() + + router = CustomRouter() + server = HTTPServer(router) + +The main responsibility of `Router` implementation is to provide a +mapping from a request to `~.httputil.HTTPMessageDelegate` instance +that will handle this request. In the example above we can see that +routing is possible even without instantiating an `~.web.Application`. + +For routing to `~.web.RequestHandler` implementations we need an +`~.web.Application` instance. `~.web.Application.get_handler_delegate` +provides a convenient way to create `~.httputil.HTTPMessageDelegate` +for a given request and `~.web.RequestHandler`. + +Here is a simple example of how we can we route to +`~.web.RequestHandler` subclasses by HTTP method: + +.. code-block:: python + + resources = {} + + class GetResource(RequestHandler): + def get(self, path): + if path not in resources: + raise HTTPError(404) + + self.finish(resources[path]) + + class PostResource(RequestHandler): + def post(self, path): + resources[path] = self.request.body + + class HTTPMethodRouter(Router): + def __init__(self, app): + self.app = app + + def find_handler(self, request, **kwargs): + handler = GetResource if request.method == "GET" else PostResource + return self.app.get_handler_delegate(request, handler, path_args=[request.path]) + + router = HTTPMethodRouter(Application()) + server = HTTPServer(router) + +`ReversibleRouter` interface adds the ability to distinguish between +the routes and reverse them to the original urls using route's name +and additional arguments. `~.web.Application` is itself an +implementation of `ReversibleRouter` class. + +`RuleRouter` and `ReversibleRuleRouter` are implementations of +`Router` and `ReversibleRouter` interfaces and can be used for +creating rule-based routing configurations. + +Rules are instances of `Rule` class. They contain a `Matcher`, which +provides the logic for determining whether the rule is a match for a +particular request and a target, which can be one of the following. + +1) An instance of `~.httputil.HTTPServerConnectionDelegate`: + +.. code-block:: python + + router = RuleRouter([ + Rule(PathMatches("/handler"), ConnectionDelegate()), + # ... more rules + ]) + + class ConnectionDelegate(HTTPServerConnectionDelegate): + def start_request(self, server_conn, request_conn): + return MessageDelegate(request_conn) + +2) A callable accepting a single argument of `~.httputil.HTTPServerRequest` type: + +.. code-block:: python + + router = RuleRouter([ + Rule(PathMatches("/callable"), request_callable) + ]) + + def request_callable(request): + request.write(b"HTTP/1.1 200 OK\\r\\nContent-Length: 2\\r\\n\\r\\nOK") + request.finish() + +3) Another `Router` instance: + +.. code-block:: python + + router = RuleRouter([ + Rule(PathMatches("/router.*"), CustomRouter()) + ]) + +Of course a nested `RuleRouter` or a `~.web.Application` is allowed: + +.. code-block:: python + + router = RuleRouter([ + Rule(HostMatches("example.com"), RuleRouter([ + Rule(PathMatches("/app1/.*"), Application([(r"/app1/handler", Handler)]))), + ])) + ]) + + server = HTTPServer(router) + +In the example below `RuleRouter` is used to route between applications: + +.. code-block:: python + + app1 = Application([ + (r"/app1/handler", Handler1), + # other handlers ... + ]) + + app2 = Application([ + (r"/app2/handler", Handler2), + # other handlers ... + ]) + + router = RuleRouter([ + Rule(PathMatches("/app1.*"), app1), + Rule(PathMatches("/app2.*"), app2) + ]) + + server = HTTPServer(router) + +For more information on application-level routing see docs for `~.web.Application`. + +.. versionadded:: 4.5 + +""" + +from __future__ import absolute_import, division, print_function + +import re +from functools import partial + +from tornado import httputil +from tornado.httpserver import _CallableAdapter +from tornado.escape import url_escape, url_unescape, utf8 +from tornado.log import app_log +from tornado.util import basestring_type, import_object, re_unescape, unicode_type + +try: + import typing # noqa +except ImportError: + pass + + +class Router(httputil.HTTPServerConnectionDelegate): + """Abstract router interface.""" + + def find_handler(self, request, **kwargs): + # type: (httputil.HTTPServerRequest, typing.Any)->httputil.HTTPMessageDelegate + """Must be implemented to return an appropriate instance of `~.httputil.HTTPMessageDelegate` + that can serve the request. + Routing implementations may pass additional kwargs to extend the routing logic. + + :arg httputil.HTTPServerRequest request: current HTTP request. + :arg kwargs: additional keyword arguments passed by routing implementation. + :returns: an instance of `~.httputil.HTTPMessageDelegate` that will be used to + process the request. + """ + raise NotImplementedError() + + def start_request(self, server_conn, request_conn): + return _RoutingDelegate(self, server_conn, request_conn) + + +class ReversibleRouter(Router): + """Abstract router interface for routers that can handle named routes + and support reversing them to original urls. + """ + + def reverse_url(self, name, *args): + """Returns url string for a given route name and arguments + or ``None`` if no match is found. + + :arg str name: route name. + :arg args: url parameters. + :returns: parametrized url string for a given route name (or ``None``). + """ + raise NotImplementedError() + + +class _RoutingDelegate(httputil.HTTPMessageDelegate): + def __init__(self, router, server_conn, request_conn): + self.server_conn = server_conn + self.request_conn = request_conn + self.delegate = None + self.router = router # type: Router + + def headers_received(self, start_line, headers): + request = httputil.HTTPServerRequest( + connection=self.request_conn, + server_connection=self.server_conn, + start_line=start_line, headers=headers) + + self.delegate = self.router.find_handler(request) + return self.delegate.headers_received(start_line, headers) + + def data_received(self, chunk): + return self.delegate.data_received(chunk) + + def finish(self): + self.delegate.finish() + + def on_connection_close(self): + self.delegate.on_connection_close() + + +class RuleRouter(Router): + """Rule-based router implementation.""" + + def __init__(self, rules=None): + """Constructs a router from an ordered list of rules:: + + RuleRouter([ + Rule(PathMatches("/handler"), Target), + # ... more rules + ]) + + You can also omit explicit `Rule` constructor and use tuples of arguments:: + + RuleRouter([ + (PathMatches("/handler"), Target), + ]) + + `PathMatches` is a default matcher, so the example above can be simplified:: + + RuleRouter([ + ("/handler", Target), + ]) + + In the examples above, ``Target`` can be a nested `Router` instance, an instance of + `~.httputil.HTTPServerConnectionDelegate` or an old-style callable, accepting a request argument. + + :arg rules: a list of `Rule` instances or tuples of `Rule` + constructor arguments. + """ + self.rules = [] # type: typing.List[Rule] + if rules: + self.add_rules(rules) + + def add_rules(self, rules): + """Appends new rules to the router. + + :arg rules: a list of Rule instances (or tuples of arguments, which are + passed to Rule constructor). + """ + for rule in rules: + if isinstance(rule, (tuple, list)): + assert len(rule) in (2, 3, 4) + if isinstance(rule[0], basestring_type): + rule = Rule(PathMatches(rule[0]), *rule[1:]) + else: + rule = Rule(*rule) + + self.rules.append(self.process_rule(rule)) + + def process_rule(self, rule): + """Override this method for additional preprocessing of each rule. + + :arg Rule rule: a rule to be processed. + :returns: the same or modified Rule instance. + """ + return rule + + def find_handler(self, request, **kwargs): + for rule in self.rules: + target_params = rule.matcher.match(request) + if target_params is not None: + if rule.target_kwargs: + target_params['target_kwargs'] = rule.target_kwargs + + delegate = self.get_target_delegate( + rule.target, request, **target_params) + + if delegate is not None: + return delegate + + return None + + def get_target_delegate(self, target, request, **target_params): + """Returns an instance of `~.httputil.HTTPMessageDelegate` for a + Rule's target. This method is called by `~.find_handler` and can be + extended to provide additional target types. + + :arg target: a Rule's target. + :arg httputil.HTTPServerRequest request: current request. + :arg target_params: additional parameters that can be useful + for `~.httputil.HTTPMessageDelegate` creation. + """ + if isinstance(target, Router): + return target.find_handler(request, **target_params) + + elif isinstance(target, httputil.HTTPServerConnectionDelegate): + return target.start_request(request.server_connection, request.connection) + + elif callable(target): + return _CallableAdapter( + partial(target, **target_params), request.connection + ) + + return None + + +class ReversibleRuleRouter(ReversibleRouter, RuleRouter): + """A rule-based router that implements ``reverse_url`` method. + + Each rule added to this router may have a ``name`` attribute that can be + used to reconstruct an original uri. The actual reconstruction takes place + in a rule's matcher (see `Matcher.reverse`). + """ + + def __init__(self, rules=None): + self.named_rules = {} # type: typing.Dict[str] + super(ReversibleRuleRouter, self).__init__(rules) + + def process_rule(self, rule): + rule = super(ReversibleRuleRouter, self).process_rule(rule) + + if rule.name: + if rule.name in self.named_rules: + app_log.warning( + "Multiple handlers named %s; replacing previous value", + rule.name) + self.named_rules[rule.name] = rule + + return rule + + def reverse_url(self, name, *args): + if name in self.named_rules: + return self.named_rules[name].matcher.reverse(*args) + + for rule in self.rules: + if isinstance(rule.target, ReversibleRouter): + reversed_url = rule.target.reverse_url(name, *args) + if reversed_url is not None: + return reversed_url + + return None + + +class Rule(object): + """A routing rule.""" + + def __init__(self, matcher, target, target_kwargs=None, name=None): + """Constructs a Rule instance. + + :arg Matcher matcher: a `Matcher` instance used for determining + whether the rule should be considered a match for a specific + request. + :arg target: a Rule's target (typically a ``RequestHandler`` or + `~.httputil.HTTPServerConnectionDelegate` subclass or even a nested `Router`, + depending on routing implementation). + :arg dict target_kwargs: a dict of parameters that can be useful + at the moment of target instantiation (for example, ``status_code`` + for a ``RequestHandler`` subclass). They end up in + ``target_params['target_kwargs']`` of `RuleRouter.get_target_delegate` + method. + :arg str name: the name of the rule that can be used to find it + in `ReversibleRouter.reverse_url` implementation. + """ + if isinstance(target, str): + # import the Module and instantiate the class + # Must be a fully qualified name (module.ClassName) + target = import_object(target) + + self.matcher = matcher # type: Matcher + self.target = target + self.target_kwargs = target_kwargs if target_kwargs else {} + self.name = name + + def reverse(self, *args): + return self.matcher.reverse(*args) + + def __repr__(self): + return '%s(%r, %s, kwargs=%r, name=%r)' % \ + (self.__class__.__name__, self.matcher, + self.target, self.target_kwargs, self.name) + + +class Matcher(object): + """Represents a matcher for request features.""" + + def match(self, request): + """Matches current instance against the request. + + :arg httputil.HTTPServerRequest request: current HTTP request + :returns: a dict of parameters to be passed to the target handler + (for example, ``handler_kwargs``, ``path_args``, ``path_kwargs`` + can be passed for proper `~.web.RequestHandler` instantiation). + An empty dict is a valid (and common) return value to indicate a match + when the argument-passing features are not used. + ``None`` must be returned to indicate that there is no match.""" + raise NotImplementedError() + + def reverse(self, *args): + """Reconstructs full url from matcher instance and additional arguments.""" + return None + + +class AnyMatches(Matcher): + """Matches any request.""" + + def match(self, request): + return {} + + +class HostMatches(Matcher): + """Matches requests from hosts specified by ``host_pattern`` regex.""" + + def __init__(self, host_pattern): + if isinstance(host_pattern, basestring_type): + if not host_pattern.endswith("$"): + host_pattern += "$" + self.host_pattern = re.compile(host_pattern) + else: + self.host_pattern = host_pattern + + def match(self, request): + if self.host_pattern.match(request.host_name): + return {} + + return None + + +class DefaultHostMatches(Matcher): + """Matches requests from host that is equal to application's default_host. + Always returns no match if ``X-Real-Ip`` header is present. + """ + + def __init__(self, application, host_pattern): + self.application = application + self.host_pattern = host_pattern + + def match(self, request): + # Look for default host if not behind load balancer (for debugging) + if "X-Real-Ip" not in request.headers: + if self.host_pattern.match(self.application.default_host): + return {} + return None + + +class PathMatches(Matcher): + """Matches requests with paths specified by ``path_pattern`` regex.""" + + def __init__(self, path_pattern): + if isinstance(path_pattern, basestring_type): + if not path_pattern.endswith('$'): + path_pattern += '$' + self.regex = re.compile(path_pattern) + else: + self.regex = path_pattern + + assert len(self.regex.groupindex) in (0, self.regex.groups), \ + ("groups in url regexes must either be all named or all " + "positional: %r" % self.regex.pattern) + + self._path, self._group_count = self._find_groups() + + def match(self, request): + match = self.regex.match(request.path) + if match is None: + return None + if not self.regex.groups: + return {} + + path_args, path_kwargs = [], {} + + # Pass matched groups to the handler. Since + # match.groups() includes both named and + # unnamed groups, we want to use either groups + # or groupdict but not both. + if self.regex.groupindex: + path_kwargs = dict( + (str(k), _unquote_or_none(v)) + for (k, v) in match.groupdict().items()) + else: + path_args = [_unquote_or_none(s) for s in match.groups()] + + return dict(path_args=path_args, path_kwargs=path_kwargs) + + def reverse(self, *args): + if self._path is None: + raise ValueError("Cannot reverse url regex " + self.regex.pattern) + assert len(args) == self._group_count, "required number of arguments " \ + "not found" + if not len(args): + return self._path + converted_args = [] + for a in args: + if not isinstance(a, (unicode_type, bytes)): + a = str(a) + converted_args.append(url_escape(utf8(a), plus=False)) + return self._path % tuple(converted_args) + + def _find_groups(self): + """Returns a tuple (reverse string, group count) for a url. + + For example: Given the url pattern /([0-9]{4})/([a-z-]+)/, this method + would return ('/%s/%s/', 2). + """ + pattern = self.regex.pattern + if pattern.startswith('^'): + pattern = pattern[1:] + if pattern.endswith('$'): + pattern = pattern[:-1] + + if self.regex.groups != pattern.count('('): + # The pattern is too complicated for our simplistic matching, + # so we can't support reversing it. + return None, None + + pieces = [] + for fragment in pattern.split('('): + if ')' in fragment: + paren_loc = fragment.index(')') + if paren_loc >= 0: + pieces.append('%s' + fragment[paren_loc + 1:]) + else: + try: + unescaped_fragment = re_unescape(fragment) + except ValueError as exc: + # If we can't unescape part of it, we can't + # reverse this url. + return (None, None) + pieces.append(unescaped_fragment) + + return ''.join(pieces), self.regex.groups + + +class URLSpec(Rule): + """Specifies mappings between URLs and handlers. + + .. versionchanged: 4.5 + `URLSpec` is now a subclass of a `Rule` with `PathMatches` matcher and is preserved for + backwards compatibility. + """ + def __init__(self, pattern, handler, kwargs=None, name=None): + """Parameters: + + * ``pattern``: Regular expression to be matched. Any capturing + groups in the regex will be passed in to the handler's + get/post/etc methods as arguments (by keyword if named, by + position if unnamed. Named and unnamed capturing groups may + may not be mixed in the same rule). + + * ``handler``: `~.web.RequestHandler` subclass to be invoked. + + * ``kwargs`` (optional): A dictionary of additional arguments + to be passed to the handler's constructor. + + * ``name`` (optional): A name for this handler. Used by + `~.web.Application.reverse_url`. + + """ + super(URLSpec, self).__init__(PathMatches(pattern), handler, kwargs, name) + + self.regex = self.matcher.regex + self.handler_class = self.target + self.kwargs = kwargs + + def __repr__(self): + return '%s(%r, %s, kwargs=%r, name=%r)' % \ + (self.__class__.__name__, self.regex.pattern, + self.handler_class, self.kwargs, self.name) + + +def _unquote_or_none(s): + """None-safe wrapper around url_unescape to handle unmatched optional + groups correctly. + + Note that args are passed as bytes so the handler can decide what + encoding to use. + """ + if s is None: + return s + return url_unescape(s, encoding=None, plus=False) diff --git a/lib/tornado/simple_httpclient.py b/lib/tornado/simple_httpclient.py index 82f8686446614e2f0e062617829ea323b0b89a3d..8fb70707f97be0b95880df21ec2052a13584ffe5 100644 --- a/lib/tornado/simple_httpclient.py +++ b/lib/tornado/simple_httpclient.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from tornado.escape import utf8, _unicode from tornado import gen @@ -330,7 +330,8 @@ class _HTTPConnection(httputil.HTTPMessageDelegate): raise KeyError("unknown method %s" % self.request.method) for key in ('network_interface', 'proxy_host', 'proxy_port', - 'proxy_username', 'proxy_password'): + 'proxy_username', 'proxy_password', + 'proxy_auth_mode'): if getattr(self.request, key, None): raise NotImplementedError('%s not supported' % key) if "Connection" not in self.request.headers: @@ -498,7 +499,7 @@ class _HTTPConnection(httputil.HTTPMessageDelegate): def _should_follow_redirect(self): return (self.request.follow_redirects and self.request.max_redirects > 0 and - self.code in (301, 302, 303, 307)) + self.code in (301, 302, 303, 307, 308)) def finish(self): data = b''.join(self.chunks) diff --git a/lib/tornado/speedups.c b/lib/tornado/speedups.c deleted file mode 100644 index c59bda00922ee9b59396b5a43185fea989726842..0000000000000000000000000000000000000000 --- a/lib/tornado/speedups.c +++ /dev/null @@ -1,52 +0,0 @@ -#define PY_SSIZE_T_CLEAN -#include <Python.h> - -static PyObject* websocket_mask(PyObject* self, PyObject* args) { - const char* mask; - Py_ssize_t mask_len; - const char* data; - Py_ssize_t data_len; - Py_ssize_t i; - PyObject* result; - char* buf; - - if (!PyArg_ParseTuple(args, "s#s#", &mask, &mask_len, &data, &data_len)) { - return NULL; - } - - result = PyBytes_FromStringAndSize(NULL, data_len); - if (!result) { - return NULL; - } - buf = PyBytes_AsString(result); - for (i = 0; i < data_len; i++) { - buf[i] = data[i] ^ mask[i % 4]; - } - - return result; -} - -static PyMethodDef methods[] = { - {"websocket_mask", websocket_mask, METH_VARARGS, ""}, - {NULL, NULL, 0, NULL} -}; - -#if PY_MAJOR_VERSION >= 3 -static struct PyModuleDef speedupsmodule = { - PyModuleDef_HEAD_INIT, - "speedups", - NULL, - -1, - methods -}; - -PyMODINIT_FUNC -PyInit_speedups(void) { - return PyModule_Create(&speedupsmodule); -} -#else // Python 2.x -PyMODINIT_FUNC -initspeedups(void) { - Py_InitModule("tornado.speedups", methods); -} -#endif diff --git a/lib/tornado/stack_context.py b/lib/tornado/stack_context.py index 2c0d9ee7d6450de14ca29ec4f8cb29807fcd6e99..61ae51f4eb10641d2bcb43afeacc11cd316e3321 100644 --- a/lib/tornado/stack_context.py +++ b/lib/tornado/stack_context.py @@ -67,7 +67,7 @@ Here are a few rules of thumb for when it's necessary: block that references your `StackContext`. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import sys import threading @@ -82,6 +82,8 @@ class StackContextInconsistentError(Exception): class _State(threading.local): def __init__(self): self.contexts = (tuple(), None) + + _state = _State() diff --git a/lib/tornado/tcpclient.py b/lib/tornado/tcpclient.py index f594d91b8857e69ddb772ec7e65ae5f18470ae36..33074bd58171eacc3c2b213b29cef42ab4dc2493 100644 --- a/lib/tornado/tcpclient.py +++ b/lib/tornado/tcpclient.py @@ -16,7 +16,7 @@ """A non-blocking TCP connection factory. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import functools import socket @@ -155,16 +155,30 @@ class TCPClient(object): @gen.coroutine def connect(self, host, port, af=socket.AF_UNSPEC, ssl_options=None, - max_buffer_size=None): + max_buffer_size=None, source_ip=None, source_port=None): """Connect to the given host and port. Asynchronously returns an `.IOStream` (or `.SSLIOStream` if ``ssl_options`` is not None). + + Using the ``source_ip`` kwarg, one can specify the source + IP address to use when establishing the connection. + In case the user needs to resolve and + use a specific interface, it has to be handled outside + of Tornado as this depends very much on the platform. + + Similarly, when the user requires a certain source port, it can + be specified using the ``source_port`` arg. + + .. versionchanged:: 4.5 + Added the ``source_ip`` and ``source_port`` arguments. """ addrinfo = yield self.resolver.resolve(host, port, af) connector = _Connector( addrinfo, self.io_loop, - functools.partial(self._create_stream, max_buffer_size)) + functools.partial(self._create_stream, max_buffer_size, + source_ip=source_ip, source_port=source_port) + ) af, addr, stream = yield connector.start() # TODO: For better performance we could cache the (af, addr) # information here and re-use it on subsequent connections to @@ -174,10 +188,35 @@ class TCPClient(object): server_hostname=host) raise gen.Return(stream) - def _create_stream(self, max_buffer_size, af, addr): + def _create_stream(self, max_buffer_size, af, addr, source_ip=None, + source_port=None): # Always connect in plaintext; we'll convert to ssl if necessary # after one connection has completed. - stream = IOStream(socket.socket(af), - io_loop=self.io_loop, - max_buffer_size=max_buffer_size) - return stream.connect(addr) + source_port_bind = source_port if isinstance(source_port, int) else 0 + source_ip_bind = source_ip + if source_port_bind and not source_ip: + # User required a specific port, but did not specify + # a certain source IP, will bind to the default loopback. + source_ip_bind = '::1' if af == socket.AF_INET6 else '127.0.0.1' + # Trying to use the same address family as the requested af socket: + # - 127.0.0.1 for IPv4 + # - ::1 for IPv6 + socket_obj = socket.socket(af) + if source_port_bind or source_ip_bind: + # If the user requires binding also to a specific IP/port. + try: + socket_obj.bind((source_ip_bind, source_port_bind)) + except socket.error: + socket_obj.close() + # Fail loudly if unable to use the IP/port. + raise + try: + stream = IOStream(socket_obj, + io_loop=self.io_loop, + max_buffer_size=max_buffer_size) + except socket.error as e: + fu = Future() + fu.set_exception(e) + return fu + else: + return stream.connect(addr) diff --git a/lib/tornado/tcpserver.py b/lib/tornado/tcpserver.py index 0839d392374b1e0f0d8ae46d55f42c0c8ea368df..f47ec89a4289e81cebf4439342b3b711cad3ca25 100644 --- a/lib/tornado/tcpserver.py +++ b/lib/tornado/tcpserver.py @@ -15,12 +15,13 @@ # under the License. """A non-blocking, single-threaded TCP server.""" -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import errno import os import socket +from tornado import gen from tornado.log import app_log from tornado.ioloop import IOLoop from tornado.iostream import IOStream, SSLIOStream @@ -39,7 +40,21 @@ class TCPServer(object): r"""A non-blocking, single-threaded TCP server. To use `TCPServer`, define a subclass which overrides the `handle_stream` - method. + method. For example, a simple echo server could be defined like this:: + + from tornado.tcpserver import TCPServer + from tornado.iostream import StreamClosedError + from tornado import gen + + class EchoServer(TCPServer): + @gen.coroutine + def handle_stream(self, stream, address): + while True: + try: + data = yield stream.read_until(b"\n") + yield stream.write(data) + except StreamClosedError: + break To make this server serve SSL traffic, send the ``ssl_options`` keyword argument with an `ssl.SSLContext` object. For compatibility with older @@ -95,6 +110,7 @@ class TCPServer(object): self._sockets = {} # fd -> socket object self._pending_sockets = [] self._started = False + self._stopped = False self.max_buffer_size = max_buffer_size self.read_chunk_size = read_chunk_size @@ -213,7 +229,11 @@ class TCPServer(object): Requests currently in progress may still continue after the server is stopped. """ + if self._stopped: + return + self._stopped = True for fd, sock in self._sockets.items(): + assert sock.fileno() == fd self.io_loop.remove_handler(fd) sock.close() @@ -271,8 +291,10 @@ class TCPServer(object): stream = IOStream(connection, io_loop=self.io_loop, max_buffer_size=self.max_buffer_size, read_chunk_size=self.read_chunk_size) + future = self.handle_stream(stream, address) if future is not None: - self.io_loop.add_future(future, lambda f: f.result()) + self.io_loop.add_future(gen.convert_yielded(future), + lambda f: f.result()) except Exception: app_log.error("Error in connection callback", exc_info=True) diff --git a/lib/tornado/template.py b/lib/tornado/template.py index cbb296a3a8b388895687fd2d9ff6e5a8e2f11f0b..3b2fa3feef2d95f13055e48c193b81ffdf136abe 100644 --- a/lib/tornado/template.py +++ b/lib/tornado/template.py @@ -19,13 +19,13 @@ Basic usage looks like:: t = template.Template("<html>{{ myvalue }}</html>") - print t.generate(myvalue="XXX") + print(t.generate(myvalue="XXX")) `Loader` is a class that loads templates from a root directory and caches the compiled templates:: loader = template.Loader("/home/btaylor") - print loader.load("test.html").generate(myvalue="XXX") + print(loader.load("test.html").generate(myvalue="XXX")) We compile all templates to raw Python. Error-reporting is currently... uh, interesting. Syntax for the templates:: @@ -196,7 +196,7 @@ if you need to include a literal ``{{``, ``{%``, or ``{#`` in the output. `filter_whitespace` for available options. New in Tornado 4.3. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import datetime import linecache diff --git a/lib/tornado/test/__main__.py b/lib/tornado/test/__main__.py index 5953443b19b20719fc2b74d3798911795c3081c6..c78478cbd3f201a50c3d1cd28781f8044b47de82 100644 --- a/lib/tornado/test/__main__.py +++ b/lib/tornado/test/__main__.py @@ -2,7 +2,7 @@ This only works in python 2.7+. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from tornado.test.runtests import all, main diff --git a/lib/tornado/test/asyncio_test.py b/lib/tornado/test/asyncio_test.py index b50b2048ee8e39bb7ae8eebb8f0751336b046d24..d0e3f2b020dfdea6bc9d5b5b1c36a7cf9ec8cc96 100644 --- a/lib/tornado/test/asyncio_test.py +++ b/lib/tornado/test/asyncio_test.py @@ -10,7 +10,7 @@ # License for the specific language governing permissions and limitations # under the License. -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from tornado import gen from tornado.testing import AsyncTestCase, gen_test @@ -41,8 +41,14 @@ class AsyncIOLoopTest(AsyncTestCase): @gen_test def test_asyncio_future(self): # Test that we can yield an asyncio future from a tornado coroutine. - # Without 'yield from', we must wrap coroutines in asyncio.async. - x = yield asyncio.async( + # Without 'yield from', we must wrap coroutines in ensure_future, + # which was introduced during Python 3.4, deprecating the prior "async". + if hasattr(asyncio, 'ensure_future'): + ensure_future = asyncio.ensure_future + else: + ensure_future = asyncio.async + + x = yield ensure_future( asyncio.get_event_loop().run_in_executor(None, lambda: 42)) self.assertEqual(x, 42) diff --git a/lib/tornado/test/auth_test.py b/lib/tornado/test/auth_test.py index 92616fa30632e005a4f35d80dd3a61cd915530bd..400fc4f4582454d0b168422a27071ccbfe53a721 100644 --- a/lib/tornado/test/auth_test.py +++ b/lib/tornado/test/auth_test.py @@ -4,7 +4,7 @@ # python 3) -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from tornado.auth import OpenIdMixin, OAuthMixin, OAuth2Mixin, TwitterMixin, AuthError, GoogleOAuth2Mixin, FacebookGraphMixin from tornado.concurrent import Future from tornado.escape import json_decode @@ -149,7 +149,7 @@ class FacebookClientLoginHandler(RequestHandler, FacebookGraphMixin): class FacebookServerAccessTokenHandler(RequestHandler): def get(self): - self.write('access_token=asdf') + self.write(dict(access_token="asdf", expires_in=3600)) class FacebookServerMeHandler(RequestHandler): @@ -401,6 +401,9 @@ class AuthTest(AsyncHTTPTestCase): self.assertTrue('/facebook/server/authorize?' in response.headers['Location']) response = self.fetch('/facebook/client/login?code=1234', follow_redirects=False) self.assertEqual(response.code, 200) + user = json_decode(response.body) + self.assertEqual(user['access_token'], 'asdf') + self.assertEqual(user['session_expires'], '3600') def base_twitter_redirect(self, url): # Same as test_oauth10a_redirect diff --git a/lib/tornado/test/concurrent_test.py b/lib/tornado/test/concurrent_test.py index 8ce095ec1b3118e2d49a99b168c844d9399d139e..4d89f572375e61f9806dafb94417a1d2384775d3 100644 --- a/lib/tornado/test/concurrent_test.py +++ b/lib/tornado/test/concurrent_test.py @@ -13,8 +13,9 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function +import gc import logging import re import socket @@ -25,9 +26,10 @@ from tornado.concurrent import Future, return_future, ReturnValueIgnoredError, r from tornado.escape import utf8, to_unicode from tornado import gen from tornado.iostream import IOStream +from tornado.log import app_log from tornado import stack_context from tornado.tcpserver import TCPServer -from tornado.testing import AsyncTestCase, LogTrapTestCase, bind_unused_port, gen_test +from tornado.testing import AsyncTestCase, ExpectLog, LogTrapTestCase, bind_unused_port, gen_test from tornado.test.util import unittest @@ -171,6 +173,24 @@ class ReturnFutureTest(AsyncTestCase): tb = traceback.extract_tb(sys.exc_info()[2]) self.assertIn(self.expected_frame, tb) + @gen_test + def test_uncaught_exception_log(self): + @gen.coroutine + def f(): + yield gen.moment + 1 / 0 + + g = f() + + with ExpectLog(app_log, + "(?s)Future.* exception was never retrieved:" + ".*ZeroDivisionError"): + yield gen.moment + yield gen.moment + del g + gc.collect() # for PyPy + + # The following series of classes demonstrate and test various styles # of use, with and without generators and futures. diff --git a/lib/tornado/test/curl_httpclient_test.py b/lib/tornado/test/curl_httpclient_test.py index b115454276234485b05025135c4e8ba043620bde..eb6f89d6673bc56659aa477499f28b539234f528 100644 --- a/lib/tornado/test/curl_httpclient_test.py +++ b/lib/tornado/test/curl_httpclient_test.py @@ -1,5 +1,5 @@ # coding: utf-8 -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from hashlib import md5 diff --git a/lib/tornado/test/escape_test.py b/lib/tornado/test/escape_test.py index b3562cd92872ed8281b956e0ac169083ddabf18a..5ae75d002ab142efdb07bfc07ccad7ea4d3368d5 100644 --- a/lib/tornado/test/escape_test.py +++ b/lib/tornado/test/escape_test.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import tornado.escape from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode, json_decode, json_encode, squeeze, recursive_unicode diff --git a/lib/tornado/test/gen_test.py b/lib/tornado/test/gen_test.py index 4c873f4b5a580b0d68bfdce4ec7e48148caf56e4..fea4c644978ad5f4fe841ff52c208f42e5b4f4e1 100644 --- a/lib/tornado/test/gen_test.py +++ b/lib/tornado/test/gen_test.py @@ -1,5 +1,6 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function +import gc import contextlib import datetime import functools @@ -276,6 +277,13 @@ class GenEngineTest(AsyncTestCase): pass self.orphaned_callback() + def test_none(self): + @gen.engine + def f(): + yield None + self.stop() + self.run_gen(f) + def test_multi(self): @gen.engine def f(): @@ -657,6 +665,28 @@ class GenCoroutineTest(AsyncTestCase): super(GenCoroutineTest, self).tearDown() assert self.finished + def test_attributes(self): + self.finished = True + + def f(): + yield gen.moment + + coro = gen.coroutine(f) + self.assertEqual(coro.__name__, f.__name__) + self.assertEqual(coro.__module__, f.__module__) + self.assertIs(coro.__wrapped__, f) + + def test_is_coroutine_function(self): + self.finished = True + + def f(): + yield gen.moment + + coro = gen.coroutine(f) + self.assertFalse(gen.is_coroutine_function(f)) + self.assertTrue(gen.is_coroutine_function(coro)) + self.assertFalse(gen.is_coroutine_function(coro())) + @gen_test def test_sync_gen_return(self): @gen.coroutine @@ -730,6 +760,21 @@ class GenCoroutineTest(AsyncTestCase): self.assertEqual(result, 42) self.finished = True + @skipBefore35 + @gen_test + def test_asyncio_sleep_zero(self): + # asyncio.sleep(0) turns into a special case (equivalent to + # `yield None`) + namespace = exec_test(globals(), locals(), """ + async def f(): + import asyncio + await asyncio.sleep(0) + return 42 + """) + result = yield namespace['f']() + self.assertEqual(result, 42) + self.finished = True + @skipBefore35 @gen_test def test_async_await_mixed_multi_native_future(self): @@ -970,6 +1015,31 @@ class GenCoroutineTest(AsyncTestCase): self.finished = True + @skipNotCPython + def test_coroutine_refcounting(self): + # On CPython, tasks and their arguments should be released immediately + # without waiting for garbage collection. + @gen.coroutine + def inner(): + class Foo(object): + pass + local_var = Foo() + self.local_ref = weakref.ref(local_var) + yield gen.coroutine(lambda: None)() + raise ValueError('Some error') + + @gen.coroutine + def inner2(): + try: + yield inner() + except ValueError: + pass + + self.io_loop.run_sync(inner2, timeout=3) + + self.assertIs(self.local_ref(), None) + self.finished = True + class GenSequenceHandler(RequestHandler): @asynchronous @@ -1368,5 +1438,30 @@ class WaitIteratorTest(AsyncTestCase): gen.WaitIterator(gen.sleep(0)).next()) +class RunnerGCTest(AsyncTestCase): + """Github issue 1769: Runner objects can get GCed unexpectedly""" + @gen_test + def test_gc(self): + """Runners shouldn't GC if future is alive""" + # Create the weakref + weakref_scope = [None] + + def callback(): + gc.collect(2) + weakref_scope[0]().set_result(123) + + @gen.coroutine + def tester(): + fut = Future() + weakref_scope[0] = weakref.ref(fut) + self.io_loop.add_callback(callback) + yield fut + + yield gen.with_timeout( + datetime.timedelta(seconds=0.2), + tester() + ) + + if __name__ == '__main__': unittest.main() diff --git a/lib/tornado/test/http1connection_test.py b/lib/tornado/test/http1connection_test.py index 815051b91d3660b7d5ba9a62a4ce89e597fa8b92..8aaaaf35b7374e39cdb4c49c402f67d0d44d50d3 100644 --- a/lib/tornado/test/http1connection_test.py +++ b/lib/tornado/test/http1connection_test.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import socket diff --git a/lib/tornado/test/httpclient_test.py b/lib/tornado/test/httpclient_test.py index 8c9a99d9f5a3ea852ee8dafd27c45b153c291028..320454e41afb6991e5a569bde784649e5c3982c4 100644 --- a/lib/tornado/test/httpclient_test.py +++ b/lib/tornado/test/httpclient_test.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import base64 import binascii diff --git a/lib/tornado/test/httpserver_test.py b/lib/tornado/test/httpserver_test.py index 27052e696ba6f0fdac8a5c4b182a3738175fa049..11cb72313765905e20dc59e9e2bf80e4ce2a43e8 100644 --- a/lib/tornado/test/httpserver_test.py +++ b/lib/tornado/test/httpserver_test.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from tornado import netutil from tornado.escape import json_decode, json_encode, utf8, _unicode, recursive_unicode, native_str from tornado import gen @@ -411,14 +411,14 @@ class HTTPServerRawTest(AsyncHTTPTestCase): self.stream.write(b'asdf\r\n\r\n') # TODO: need an async version of ExpectLog so we don't need # hard-coded timeouts here. - self.io_loop.add_timeout(datetime.timedelta(seconds=0.01), + self.io_loop.add_timeout(datetime.timedelta(seconds=0.05), self.stop) self.wait() def test_malformed_headers(self): with ExpectLog(gen_log, '.*Malformed HTTP headers'): self.stream.write(b'GET / HTTP/1.0\r\nasdf\r\n\r\n') - self.io_loop.add_timeout(datetime.timedelta(seconds=0.01), + self.io_loop.add_timeout(datetime.timedelta(seconds=0.05), self.stop) self.wait() @@ -436,6 +436,25 @@ foo= bar 0 +""".replace(b"\n", b"\r\n")) + read_stream_body(self.stream, self.stop) + headers, response = self.wait() + self.assertEqual(json_decode(response), {u'foo': [u'bar']}) + + def test_chunked_request_uppercase(self): + # As per RFC 2616 section 3.6, "Transfer-Encoding" header's value is + # case-insensitive. + self.stream.write(b"""\ +POST /echo HTTP/1.1 +Transfer-Encoding: Chunked +Content-Type: application/x-www-form-urlencoded + +4 +foo= +3 +bar +0 + """.replace(b"\n", b"\r\n")) read_stream_body(self.stream, self.stop) headers, response = self.wait() @@ -461,7 +480,7 @@ class XHeaderTest(HandlerBaseTestCase): remote_protocol=self.request.protocol)) def get_httpserver_options(self): - return dict(xheaders=True) + return dict(xheaders=True, trusted_downstream=['5.5.5.5']) def test_ip_headers(self): self.assertEqual(self.fetch_json("/")["remote_ip"], "127.0.0.1") @@ -501,6 +520,13 @@ class XHeaderTest(HandlerBaseTestCase): self.fetch_json("/", headers=invalid_host)["remote_ip"], "127.0.0.1") + def test_trusted_downstream(self): + + valid_ipv4_list = {"X-Forwarded-For": "127.0.0.1, 4.4.4.4, 5.5.5.5"} + self.assertEqual( + self.fetch_json("/", headers=valid_ipv4_list)["remote_ip"], + "4.4.4.4") + def test_scheme_headers(self): self.assertEqual(self.fetch_json("/")["remote_protocol"], "http") diff --git a/lib/tornado/test/httputil_test.py b/lib/tornado/test/httputil_test.py index 3eb104d1ce9296e617c12c33afa06cf1e9d688b1..d1278567bbcd4c3c770620d8b6dfd20b4069b20f 100644 --- a/lib/tornado/test/httputil_test.py +++ b/lib/tornado/test/httputil_test.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from tornado.httputil import url_concat, parse_multipart_form_data, HTTPHeaders, format_timestamp, HTTPServerRequest, parse_request_start_line, parse_cookie from tornado.escape import utf8, native_str from tornado.log import gen_log @@ -43,14 +43,14 @@ class TestUrlConcat(unittest.TestCase): "https://localhost/path?x", [('y', 'y'), ('z', 'z')], ) - self.assertEqual(url, "https://localhost/path?x&y=y&z=z") + self.assertEqual(url, "https://localhost/path?x=&y=y&z=z") def test_url_concat_trailing_amp(self): url = url_concat( "https://localhost/path?x&", [('y', 'y'), ('z', 'z')], ) - self.assertEqual(url, "https://localhost/path?x&y=y&z=z") + self.assertEqual(url, "https://localhost/path?x=&y=y&z=z") def test_url_concat_mult_params(self): url = url_concat( @@ -66,6 +66,41 @@ class TestUrlConcat(unittest.TestCase): ) self.assertEqual(url, "https://localhost/path?r=1&t=2") + def test_url_concat_none_params(self): + url = url_concat( + "https://localhost/path?r=1&t=2", + None, + ) + self.assertEqual(url, "https://localhost/path?r=1&t=2") + + def test_url_concat_with_frag(self): + url = url_concat( + "https://localhost/path#tab", + [('y', 'y')], + ) + self.assertEqual(url, "https://localhost/path?y=y#tab") + + def test_url_concat_multi_same_params(self): + url = url_concat( + "https://localhost/path", + [('y', 'y1'), ('y', 'y2')], + ) + self.assertEqual(url, "https://localhost/path?y=y1&y=y2") + + def test_url_concat_multi_same_query_params(self): + url = url_concat( + "https://localhost/path?r=1&r=2", + [('y', 'y')], + ) + self.assertEqual(url, "https://localhost/path?r=1&r=2&y=y") + + def test_url_concat_dict_params(self): + url = url_concat( + "https://localhost/path", + dict(y='y'), + ) + self.assertEqual(url, "https://localhost/path?y=y") + class MultipartFormDataTest(unittest.TestCase): def test_file_upload(self): diff --git a/lib/tornado/test/import_test.py b/lib/tornado/test/import_test.py index a50566d0d2dfee86c761b715d57e2ace15d49c7f..88d02e027039dfcbe32813dbf7e9e2fcb3614a11 100644 --- a/lib/tornado/test/import_test.py +++ b/lib/tornado/test/import_test.py @@ -1,5 +1,5 @@ # flake8: noqa -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from tornado.test.util import unittest @@ -11,7 +11,6 @@ class ImportTest(unittest.TestCase): import tornado.auth import tornado.autoreload import tornado.concurrent - # import tornado.curl_httpclient # depends on pycurl import tornado.escape import tornado.gen import tornado.http1connection @@ -28,6 +27,7 @@ class ImportTest(unittest.TestCase): import tornado.simple_httpclient import tornado.stack_context import tornado.tcpserver + import tornado.tcpclient import tornado.template import tornado.testing import tornado.util diff --git a/lib/tornado/test/ioloop_test.py b/lib/tornado/test/ioloop_test.py index 8570e73f011eb98f9fd6394216ed674a3b274a48..1601813f44c5285fc66af16aeb05b440cbc6d935 100644 --- a/lib/tornado/test/ioloop_test.py +++ b/lib/tornado/test/ioloop_test.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import contextlib import datetime import functools @@ -9,6 +9,7 @@ import socket import sys import threading import time +import types from tornado import gen from tornado.ioloop import IOLoop, TimeoutError, PollIOLoop, PeriodicCallback @@ -61,6 +62,25 @@ class FakeTimeIOLoop(PollIOLoop): class TestIOLoop(AsyncTestCase): + def test_add_callback_return_sequence(self): + # A callback returning {} or [] shouldn't spin the CPU, see Issue #1803. + self.calls = 0 + + loop = self.io_loop + test = self + old_add_callback = loop.add_callback + + def add_callback(self, callback, *args, **kwargs): + test.calls += 1 + old_add_callback(callback, *args, **kwargs) + + loop.add_callback = types.MethodType(add_callback, loop) + loop.add_callback(lambda: {}) + loop.add_callback(lambda: []) + loop.add_timeout(datetime.timedelta(milliseconds=50), loop.stop) + loop.start() + self.assertLess(self.calls, 10) + @skipOnTravis def test_add_callback_wakeup(self): # Make sure that add_callback from inside a running IOLoop diff --git a/lib/tornado/test/iostream_test.py b/lib/tornado/test/iostream_test.py index 6e15136c3b751da4948811d95eb0cd9004bdb9ee..91bc7bf6add6bcd948834a86053ca225ed918e97 100644 --- a/lib/tornado/test/iostream_test.py +++ b/lib/tornado/test/iostream_test.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from tornado.concurrent import Future from tornado import gen from tornado import netutil @@ -602,6 +602,17 @@ class TestIOStreamMixin(object): server.close() client.close() + def test_write_memoryview(self): + server, client = self.make_iostream_pair() + try: + client.read_bytes(4, self.stop) + server.write(memoryview(b"hello")) + data = self.wait() + self.assertEqual(data, b"hell") + finally: + server.close() + client.close() + def test_read_bytes_partial(self): server, client = self.make_iostream_pair() try: @@ -797,6 +808,40 @@ class TestIOStreamMixin(object): server.close() client.close() + def test_future_write(self): + """ + Test that write() Futures are never orphaned. + """ + # Run concurrent writers that will write enough bytes so as to + # clog the socket buffer and accumulate bytes in our write buffer. + m, n = 10000, 1000 + nproducers = 10 + total_bytes = m * n * nproducers + server, client = self.make_iostream_pair(max_buffer_size=total_bytes) + + @gen.coroutine + def produce(): + data = b'x' * m + for i in range(n): + yield server.write(data) + + @gen.coroutine + def consume(): + nread = 0 + while nread < total_bytes: + res = yield client.read_bytes(m) + nread += len(res) + + @gen.coroutine + def main(): + yield [produce() for i in range(nproducers)] + [consume()] + + try: + self.io_loop.run_sync(main) + finally: + server.close() + client.close() + class TestIOStreamWebHTTP(TestIOStreamWebMixin, AsyncHTTPTestCase): def _make_client_iostream(self): diff --git a/lib/tornado/test/locale_test.py b/lib/tornado/test/locale_test.py index e57a66e686f217de94788b3009d46370a3873f77..d548ffb861b2978319b5b476b1a701da5a35fc37 100644 --- a/lib/tornado/test/locale_test.py +++ b/lib/tornado/test/locale_test.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import datetime import os diff --git a/lib/tornado/test/locks_test.py b/lib/tornado/test/locks_test.py index 020ec105e0dd835ff6a9385f4c4b86a25fd3a2c7..844d4fb0ff42921cd2b41d737c502bd373a1ab14 100644 --- a/lib/tornado/test/locks_test.py +++ b/lib/tornado/test/locks_test.py @@ -11,7 +11,7 @@ # under the License. -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from datetime import timedelta from tornado import gen, locks diff --git a/lib/tornado/test/log_test.py b/lib/tornado/test/log_test.py index da78fc027d2fdc3ba1d56a5135d1eeab09f861fe..888964e7b018c1e617a787a75b73d49b432518f5 100644 --- a/lib/tornado/test/log_test.py +++ b/lib/tornado/test/log_test.py @@ -13,7 +13,7 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import contextlib import glob diff --git a/lib/tornado/test/netutil_test.py b/lib/tornado/test/netutil_test.py index 549c4fe1c69edbbc642e9369db3a382629337833..9564290abd2300ae4a85c97380d944add70b0d5f 100644 --- a/lib/tornado/test/netutil_test.py +++ b/lib/tornado/test/netutil_test.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import errno import os diff --git a/lib/tornado/test/options_test.py b/lib/tornado/test/options_test.py index f7b215c5a5629d0c35e541f1c99e930f464e6abc..bafeea6fd35f4897a094dc6df0e7a1b58a8f519a 100644 --- a/lib/tornado/test/options_test.py +++ b/lib/tornado/test/options_test.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import datetime import os @@ -36,7 +36,7 @@ class OptionsTest(unittest.TestCase): options.define("port", default=80) options.define("username", default='foo') options.define("my_path") - config_path = os.path.join(os.path.dirname(__file__), + config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "options_test.cfg") options.parse_config_file(config_path) self.assertEqual(options.port, 443) diff --git a/lib/tornado/test/process_test.py b/lib/tornado/test/process_test.py index d5fff1706a7143bee26ad6781ffc046f65d919ae..74c10abf19dfb002df301711167faa6cc36b7a1a 100644 --- a/lib/tornado/test/process_test.py +++ b/lib/tornado/test/process_test.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import logging import os import signal @@ -149,7 +149,7 @@ class SubprocessTest(AsyncTestCase): stdin=Subprocess.STREAM, stdout=Subprocess.STREAM, stderr=subprocess.STDOUT, io_loop=self.io_loop) - self.addCleanup(lambda: os.kill(subproc.pid, signal.SIGTERM)) + self.addCleanup(lambda: (subproc.proc.terminate(), subproc.proc.wait())) subproc.stdout.read_until(b'>>> ', self.stop) self.wait() subproc.stdin.write(b"print('hello')\n") @@ -170,7 +170,7 @@ class SubprocessTest(AsyncTestCase): stdin=Subprocess.STREAM, stdout=Subprocess.STREAM, stderr=subprocess.STDOUT, io_loop=self.io_loop) - self.addCleanup(lambda: os.kill(subproc.pid, signal.SIGTERM)) + self.addCleanup(lambda: (subproc.proc.terminate(), subproc.proc.wait())) subproc.stdout.read_until(b'>>> ', self.stop) self.wait() subproc.stdin.close() @@ -186,7 +186,7 @@ class SubprocessTest(AsyncTestCase): r"import sys; sys.stderr.write('hello\n')"], stderr=Subprocess.STREAM, io_loop=self.io_loop) - self.addCleanup(lambda: os.kill(subproc.pid, signal.SIGTERM)) + self.addCleanup(lambda: (subproc.proc.terminate(), subproc.proc.wait())) subproc.stderr.read_until(b'\n', self.stop) data = self.wait() self.assertEqual(data, b'hello\n') @@ -219,10 +219,27 @@ class SubprocessTest(AsyncTestCase): self.addCleanup(Subprocess.uninitialize) subproc = Subprocess([sys.executable, '-c', 'import time; time.sleep(30)'], + stdout=Subprocess.STREAM, io_loop=self.io_loop) subproc.set_exit_callback(self.stop) os.kill(subproc.pid, signal.SIGTERM) - ret = self.wait() + try: + ret = self.wait(timeout=1.0) + except AssertionError: + # We failed to get the termination signal. This test is + # occasionally flaky on pypy, so try to get a little more + # information: did the process close its stdout + # (indicating that the problem is in the parent process's + # signal handling) or did the child process somehow fail + # to terminate? + subproc.stdout.read_until_close(callback=self.stop) + try: + self.wait(timeout=1.0) + except AssertionError: + raise AssertionError("subprocess failed to terminate") + else: + raise AssertionError("subprocess closed stdout but failed to " + "get termination signal") self.assertEqual(subproc.returncode, ret) self.assertEqual(ret, -signal.SIGTERM) diff --git a/lib/tornado/test/queues_test.py b/lib/tornado/test/queues_test.py index e72b6ed5f8dce12f198f999ccde82347b0e02977..48ed5e20617462ee76e0ed5c940757cf5a34faaa 100644 --- a/lib/tornado/test/queues_test.py +++ b/lib/tornado/test/queues_test.py @@ -11,7 +11,7 @@ # under the License. -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from datetime import timedelta from random import random diff --git a/lib/tornado/test/resolve_test_helper.py b/lib/tornado/test/resolve_test_helper.py index 070222f0dfc58cc5271e86f946f26541dcefa1ac..429671962f27c07721be163d614161340a773b79 100644 --- a/lib/tornado/test/resolve_test_helper.py +++ b/lib/tornado/test/resolve_test_helper.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from tornado.ioloop import IOLoop from tornado.netutil import ThreadedResolver diff --git a/lib/tornado/test/routing_test.py b/lib/tornado/test/routing_test.py new file mode 100644 index 0000000000000000000000000000000000000000..a1040df32b1572a9a8e9ca013ab16279941e160f --- /dev/null +++ b/lib/tornado/test/routing_test.py @@ -0,0 +1,224 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +from __future__ import absolute_import, division, print_function + +from tornado.httputil import HTTPHeaders, HTTPMessageDelegate, HTTPServerConnectionDelegate, ResponseStartLine +from tornado.routing import HostMatches, PathMatches, ReversibleRouter, Router, Rule, RuleRouter +from tornado.testing import AsyncHTTPTestCase +from tornado.web import Application, HTTPError, RequestHandler +from tornado.wsgi import WSGIContainer + + +class BasicRouter(Router): + def find_handler(self, request, **kwargs): + + class MessageDelegate(HTTPMessageDelegate): + def __init__(self, connection): + self.connection = connection + + def finish(self): + self.connection.write_headers( + ResponseStartLine("HTTP/1.1", 200, "OK"), HTTPHeaders({"Content-Length": "2"}), b"OK" + ) + self.connection.finish() + + return MessageDelegate(request.connection) + + +class BasicRouterTestCase(AsyncHTTPTestCase): + def get_app(self): + return BasicRouter() + + def test_basic_router(self): + response = self.fetch("/any_request") + self.assertEqual(response.body, b"OK") + + +resources = {} + + +class GetResource(RequestHandler): + def get(self, path): + if path not in resources: + raise HTTPError(404) + + self.finish(resources[path]) + + +class PostResource(RequestHandler): + def post(self, path): + resources[path] = self.request.body + + +class HTTPMethodRouter(Router): + def __init__(self, app): + self.app = app + + def find_handler(self, request, **kwargs): + handler = GetResource if request.method == "GET" else PostResource + return self.app.get_handler_delegate(request, handler, path_args=[request.path]) + + +class HTTPMethodRouterTestCase(AsyncHTTPTestCase): + def get_app(self): + return HTTPMethodRouter(Application()) + + def test_http_method_router(self): + response = self.fetch("/post_resource", method="POST", body="data") + self.assertEqual(response.code, 200) + + response = self.fetch("/get_resource") + self.assertEqual(response.code, 404) + + response = self.fetch("/post_resource") + self.assertEqual(response.code, 200) + self.assertEqual(response.body, b"data") + + +def _get_named_handler(handler_name): + class Handler(RequestHandler): + def get(self, *args, **kwargs): + if self.application.settings.get("app_name") is not None: + self.write(self.application.settings["app_name"] + ": ") + + self.finish(handler_name + ": " + self.reverse_url(handler_name)) + + return Handler + + +FirstHandler = _get_named_handler("first_handler") +SecondHandler = _get_named_handler("second_handler") + + +class CustomRouter(ReversibleRouter): + def __init__(self): + super(CustomRouter, self).__init__() + self.routes = {} + + def add_routes(self, routes): + self.routes.update(routes) + + def find_handler(self, request, **kwargs): + if request.path in self.routes: + app, handler = self.routes[request.path] + return app.get_handler_delegate(request, handler) + + def reverse_url(self, name, *args): + handler_path = '/' + name + return handler_path if handler_path in self.routes else None + + +class CustomRouterTestCase(AsyncHTTPTestCase): + def get_app(self): + class CustomApplication(Application): + def reverse_url(self, name, *args): + return router.reverse_url(name, *args) + + router = CustomRouter() + app1 = CustomApplication(app_name="app1") + app2 = CustomApplication(app_name="app2") + + router.add_routes({ + "/first_handler": (app1, FirstHandler), + "/second_handler": (app2, SecondHandler), + "/first_handler_second_app": (app2, FirstHandler), + }) + + return router + + def test_custom_router(self): + response = self.fetch("/first_handler") + self.assertEqual(response.body, b"app1: first_handler: /first_handler") + response = self.fetch("/second_handler") + self.assertEqual(response.body, b"app2: second_handler: /second_handler") + response = self.fetch("/first_handler_second_app") + self.assertEqual(response.body, b"app2: first_handler: /first_handler") + + +class ConnectionDelegate(HTTPServerConnectionDelegate): + def start_request(self, server_conn, request_conn): + + class MessageDelegate(HTTPMessageDelegate): + def __init__(self, connection): + self.connection = connection + + def finish(self): + response_body = b"OK" + self.connection.write_headers( + ResponseStartLine("HTTP/1.1", 200, "OK"), + HTTPHeaders({"Content-Length": str(len(response_body))})) + self.connection.write(response_body) + self.connection.finish() + + return MessageDelegate(request_conn) + + +class RuleRouterTest(AsyncHTTPTestCase): + def get_app(self): + app = Application() + + def request_callable(request): + request.write(b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nOK") + request.finish() + + app.add_handlers(".*", [ + (HostMatches("www.example.com"), [ + (PathMatches("/first_handler"), "tornado.test.routing_test.SecondHandler", {}, "second_handler") + ]), + Rule(PathMatches("/first_handler"), FirstHandler, name="first_handler"), + Rule(PathMatches("/request_callable"), request_callable), + ("/connection_delegate", ConnectionDelegate()) + ]) + + return app + + def test_rule_based_router(self): + response = self.fetch("/first_handler") + self.assertEqual(response.body, b"first_handler: /first_handler") + response = self.fetch("/first_handler", headers={'Host': 'www.example.com'}) + self.assertEqual(response.body, b"second_handler: /first_handler") + + response = self.fetch("/connection_delegate") + self.assertEqual(response.body, b"OK") + + response = self.fetch("/request_callable") + self.assertEqual(response.body, b"OK") + + response = self.fetch("/404") + self.assertEqual(response.code, 404) + + +class WSGIContainerTestCase(AsyncHTTPTestCase): + def get_app(self): + wsgi_app = WSGIContainer(self.wsgi_app) + + class Handler(RequestHandler): + def get(self, *args, **kwargs): + self.finish(self.reverse_url("tornado")) + + return RuleRouter([ + (PathMatches("/tornado.*"), Application([(r"/tornado/test", Handler, {}, "tornado")])), + (PathMatches("/wsgi"), wsgi_app), + ]) + + def wsgi_app(self, environ, start_response): + start_response("200 OK", []) + return [b"WSGI"] + + def test_wsgi_container(self): + response = self.fetch("/tornado/test") + self.assertEqual(response.body, b"/tornado/test") + + response = self.fetch("/wsgi") + self.assertEqual(response.body, b"WSGI") diff --git a/lib/tornado/test/runtests.py b/lib/tornado/test/runtests.py index f4dd46de36484de525e8493247fe2dc01986e18b..b81c5f225ebdbcbbe6aa888c75611c663880c533 100644 --- a/lib/tornado/test/runtests.py +++ b/lib/tornado/test/runtests.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import gc import locale # system locale module, not tornado.locale import logging @@ -43,6 +43,7 @@ TEST_MODULES = [ 'tornado.test.options_test', 'tornado.test.process_test', 'tornado.test.queues_test', + 'tornado.test.routing_test', 'tornado.test.simple_httpclient_test', 'tornado.test.stack_context_test', 'tornado.test.tcpclient_test', @@ -125,6 +126,9 @@ def main(): # Silence the warning until we can drop 3.5.[01]. warnings.filterwarnings("ignore", category=PendingDeprecationWarning, message=".*legacy __aiter__ protocol") + # 3.5.2's PendingDeprecationWarning became a DeprecationWarning in 3.6. + warnings.filterwarnings("ignore", category=DeprecationWarning, + message=".*legacy __aiter__ protocol") logging.getLogger("tornado.access").setLevel(logging.CRITICAL) @@ -181,5 +185,6 @@ def main(): log_counter.warning_count, log_counter.error_count) sys.exit(1) + if __name__ == '__main__': main() diff --git a/lib/tornado/test/simple_httpclient_test.py b/lib/tornado/test/simple_httpclient_test.py index 861602b86712b7ca0d6d8ceceb87a24c9807b1eb..02d57c5fb0d944409149ca7088bcc0061cd27a3b 100644 --- a/lib/tornado/test/simple_httpclient_test.py +++ b/lib/tornado/test/simple_httpclient_test.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import collections from contextlib import closing diff --git a/lib/tornado/test/stack_context_test.py b/lib/tornado/test/stack_context_test.py index 853260e30ebcd7c92664e172b4716a64ec4f1120..59d25474c3acdbaf699e83a70601df5b24d2b92e 100644 --- a/lib/tornado/test/stack_context_test.py +++ b/lib/tornado/test/stack_context_test.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from tornado import gen from tornado.log import app_log @@ -284,5 +284,6 @@ class StackContextTest(AsyncTestCase): f1) self.assertEqual(self.active_contexts, []) + if __name__ == '__main__': unittest.main() diff --git a/lib/tornado/test/tcpclient_test.py b/lib/tornado/test/tcpclient_test.py index 1a4201e6b7712afdf5a00574868114d1dc7380bf..76206e85ea3735ad94103e209c6fabe7819ae86e 100644 --- a/lib/tornado/test/tcpclient_test.py +++ b/lib/tornado/test/tcpclient_test.py @@ -14,7 +14,7 @@ # License for the specific language governing permissions and limitations # under the License. -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from contextlib import closing import os @@ -22,10 +22,11 @@ import socket from tornado.concurrent import Future from tornado.netutil import bind_sockets, Resolver +from tornado.queues import Queue from tornado.tcpclient import TCPClient, _Connector from tornado.tcpserver import TCPServer from tornado.testing import AsyncTestCase, gen_test -from tornado.test.util import skipIfNoIPv6, unittest, refusing_port +from tornado.test.util import skipIfNoIPv6, unittest, refusing_port, skipIfNonUnix # Fake address families for testing. Used in place of AF_INET # and AF_INET6 because some installations do not have AF_INET6. @@ -36,12 +37,14 @@ class TestTCPServer(TCPServer): def __init__(self, family): super(TestTCPServer, self).__init__() self.streams = [] + self.queue = Queue() sockets = bind_sockets(None, 'localhost', family) self.add_sockets(sockets) self.port = sockets[0].getsockname()[1] def handle_stream(self, stream, address): self.streams.append(stream) + self.queue.put(stream) def stop(self): super(TestTCPServer, self).stop() @@ -81,12 +84,15 @@ class TCPClientTest(AsyncTestCase): self.skipTest("localhost does not resolve to ipv6") @gen_test - def do_test_connect(self, family, host): + def do_test_connect(self, family, host, source_ip=None, source_port=None): port = self.start_server(family) - stream = yield self.client.connect(host, port) + stream = yield self.client.connect(host, port, + source_ip=source_ip, + source_port=source_port) + server_stream = yield self.server.queue.get() with closing(stream): stream.write(b"hello") - data = yield self.server.streams[0].read_bytes(5) + data = yield server_stream.read_bytes(5) self.assertEqual(data, b"hello") def test_connect_ipv4_ipv4(self): @@ -125,6 +131,33 @@ class TCPClientTest(AsyncTestCase): with self.assertRaises(IOError): yield self.client.connect('127.0.0.1', port) + def test_source_ip_fail(self): + ''' + Fail when trying to use the source IP Address '8.8.8.8'. + ''' + self.assertRaises(socket.error, + self.do_test_connect, + socket.AF_INET, + '127.0.0.1', + source_ip='8.8.8.8') + + def test_source_ip_success(self): + ''' + Success when trying to use the source IP Address '127.0.0.1' + ''' + self.do_test_connect(socket.AF_INET, '127.0.0.1', source_ip='127.0.0.1') + + @skipIfNonUnix + def test_source_port_fail(self): + ''' + Fail when trying to use source port 1. + ''' + self.assertRaises(socket.error, + self.do_test_connect, + socket.AF_INET, + '127.0.0.1', + source_port=1) + class TestConnectorSplit(unittest.TestCase): def test_one_family(self): diff --git a/lib/tornado/test/tcpserver_test.py b/lib/tornado/test/tcpserver_test.py index c01c04ddfb2baf903a76fbf5dfa182c3c2d21172..9afb54202a572718cfee924a01111e3f6608bcc1 100644 --- a/lib/tornado/test/tcpserver_test.py +++ b/lib/tornado/test/tcpserver_test.py @@ -1,4 +1,5 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function + import socket from tornado import gen @@ -6,6 +7,7 @@ from tornado.iostream import IOStream from tornado.log import app_log from tornado.stack_context import NullContext from tornado.tcpserver import TCPServer +from tornado.test.util import skipBefore35, exec_test from tornado.testing import AsyncTestCase, ExpectLog, bind_unused_port, gen_test @@ -37,3 +39,32 @@ class TCPServerTest(AsyncTestCase): server.stop() if client is not None: client.close() + + @skipBefore35 + @gen_test + def test_handle_stream_native_coroutine(self): + # handle_stream may be a native coroutine. + + namespace = exec_test(globals(), locals(), """ + class TestServer(TCPServer): + async def handle_stream(self, stream, address): + stream.write(b'data') + stream.close() + """) + + sock, port = bind_unused_port() + server = namespace['TestServer']() + server.add_socket(sock) + client = IOStream(socket.socket()) + yield client.connect(('localhost', port)) + result = yield client.read_until_close() + self.assertEqual(result, b'data') + server.stop() + client.close() + + def test_stop_twice(self): + sock, port = bind_unused_port() + server = TCPServer() + server.add_socket(sock) + server.stop() + server.stop() diff --git a/lib/tornado/test/template_test.py b/lib/tornado/test/template_test.py index dfcf380564be310ea30acb2175ad8bb497c88036..2f1e88c1d15f8dbc2fe88bfc56ceaf481e444cee 100644 --- a/lib/tornado/test/template_test.py +++ b/lib/tornado/test/template_test.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import os import sys @@ -6,8 +6,8 @@ import traceback from tornado.escape import utf8, native_str, to_unicode from tornado.template import Template, DictLoader, ParseError, Loader -from tornado.test.util import unittest -from tornado.util import ObjectDict, unicode_type +from tornado.test.util import unittest, is_coverage_running +from tornado.util import ObjectDict, unicode_type, PY3 class TemplateTest(unittest.TestCase): @@ -175,6 +175,11 @@ try{% set y = 1/x %} self.assertEqual(template.generate(), '0') def test_non_ascii_name(self): + if PY3 and is_coverage_running(): + try: + os.fsencode(u"t\u00e9st.html") + except UnicodeEncodeError: + self.skipTest("coverage tries to access unencodable filename") loader = DictLoader({u"t\u00e9st.html": "hello"}) self.assertEqual(loader.load(u"t\u00e9st.html").generate(), b"hello") diff --git a/lib/tornado/test/testing_test.py b/lib/tornado/test/testing_test.py index e00058ac343128afc1fe5e26d9c5e45d841c97df..b3d6d8c5bb7db45930ef88fdefe67f1765c1bac8 100644 --- a/lib/tornado/test/testing_test.py +++ b/lib/tornado/test/testing_test.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from tornado import gen, ioloop from tornado.log import app_log diff --git a/lib/tornado/test/twisted_test.py b/lib/tornado/test/twisted_test.py index 298da6c9cf7a5e879761d7d4e5b2b6b8a7952466..1604ce52f46b0b8defffe77d470db275bd52a870 100644 --- a/lib/tornado/test/twisted_test.py +++ b/lib/tornado/test/twisted_test.py @@ -17,7 +17,7 @@ Unittest for the twisted-style reactor. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import logging import os diff --git a/lib/tornado/test/util.py b/lib/tornado/test/util.py index 2e3d779fd1f6dfc8ede702c04cc8f44b8a300fe2..6c032da63f85b28a7298ddb8015b8c2e851a1d8b 100644 --- a/lib/tornado/test/util.py +++ b/lib/tornado/test/util.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import os import platform @@ -76,3 +76,21 @@ def exec_test(caller_globals, caller_locals, s): local_namespace = {} exec(textwrap.dedent(s), global_namespace, local_namespace) return local_namespace + + +def is_coverage_running(): + """Return whether coverage is currently running. + """ + if 'coverage' not in sys.modules: + return False + tracer = sys.gettrace() + if tracer is None: + return False + try: + mod = tracer.__module__ + except AttributeError: + try: + mod = tracer.__class__.__module__ + except AttributeError: + return False + return mod.startswith('coverage') diff --git a/lib/tornado/test/util_test.py b/lib/tornado/test/util_test.py index 48b16f89e697276e3d445984050821e7a98b389f..459cb9c327164f20ad7e8b81436fc7b984aefe32 100644 --- a/lib/tornado/test/util_test.py +++ b/lib/tornado/test/util_test.py @@ -1,12 +1,12 @@ # coding: utf-8 -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import re import sys import datetime import tornado.escape from tornado.escape import utf8 -from tornado.util import raise_exc_info, Configurable, exec_in, ArgReplacer, timedelta_to_seconds, import_object, re_unescape, PY3 +from tornado.util import raise_exc_info, Configurable, exec_in, ArgReplacer, timedelta_to_seconds, import_object, re_unescape, is_finalizing, PY3 from tornado.test.util import unittest if PY3: @@ -220,3 +220,8 @@ class ReUnescapeTest(unittest.TestCase): re_unescape('\\b') with self.assertRaises(ValueError): re_unescape('\\Z') + + +class IsFinalizingTest(unittest.TestCase): + def test_basic(self): + self.assertFalse(is_finalizing()) diff --git a/lib/tornado/test/web_test.py b/lib/tornado/test/web_test.py index 14f6904aa3538b01785be52ef65ccce0aa82374e..d79ea52c1be27111e387f1a23bbd8c5954ae2258 100644 --- a/lib/tornado/test/web_test.py +++ b/lib/tornado/test/web_test.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from tornado.concurrent import Future from tornado import gen from tornado.escape import json_decode, utf8, to_unicode, recursive_unicode, native_str, to_basestring @@ -1348,6 +1348,8 @@ class HostMatchingTest(WebTestCase): [("/bar", HostMatchingTest.Handler, {"reply": "[1]"})]) self.app.add_handlers("www.example.com", [("/baz", HostMatchingTest.Handler, {"reply": "[2]"})]) + self.app.add_handlers("www.e.*e.com", + [("/baz", HostMatchingTest.Handler, {"reply": "[3]"})]) response = self.fetch("/foo") self.assertEqual(response.body, b"wildcard") @@ -1362,6 +1364,40 @@ class HostMatchingTest(WebTestCase): self.assertEqual(response.body, b"[1]") response = self.fetch("/baz", headers={'Host': 'www.example.com'}) self.assertEqual(response.body, b"[2]") + response = self.fetch("/baz", headers={'Host': 'www.exe.com'}) + self.assertEqual(response.body, b"[3]") + + +@wsgi_safe +class DefaultHostMatchingTest(WebTestCase): + def get_handlers(self): + return [] + + def get_app_kwargs(self): + return {'default_host': "www.example.com"} + + def test_default_host_matching(self): + self.app.add_handlers("www.example.com", + [("/foo", HostMatchingTest.Handler, {"reply": "[0]"})]) + self.app.add_handlers(r"www\.example\.com", + [("/bar", HostMatchingTest.Handler, {"reply": "[1]"})]) + self.app.add_handlers("www.test.com", + [("/baz", HostMatchingTest.Handler, {"reply": "[2]"})]) + + response = self.fetch("/foo") + self.assertEqual(response.body, b"[0]") + response = self.fetch("/bar") + self.assertEqual(response.body, b"[1]") + response = self.fetch("/baz") + self.assertEqual(response.code, 404) + + response = self.fetch("/foo", headers={"X-Real-Ip": "127.0.0.1"}) + self.assertEqual(response.code, 404) + + self.app.default_host = "www.test.com" + + response = self.fetch("/baz") + self.assertEqual(response.body, b"[2]") @wsgi_safe @@ -1538,7 +1574,6 @@ class GzipTestCase(SimpleHandlerTestCase): response.headers.get('X-Consumed-Content-Encoding')), 'gzip') - def test_gzip(self): response = self.fetch('/') self.assert_compressed(response) @@ -1569,6 +1604,7 @@ class GzipTestCase(SimpleHandlerTestCase): self.assertEqual([s.strip() for s in response.headers['Vary'].split(',')], ['Accept-Language', 'Cookie', 'Accept-Encoding']) + @wsgi_safe class PathArgsInPrepareTest(WebTestCase): class Handler(RequestHandler): @@ -2834,3 +2870,20 @@ class URLSpecReverseTest(unittest.TestCase): def test_reverse_arguments(self): self.assertEqual('/api/v1/foo/bar', url(r'^/api/v1/foo/(\w+)$', None).reverse('bar')) + + +class RedirectHandlerTest(WebTestCase): + def get_handlers(self): + return [ + ('/src', WebRedirectHandler, {'url': '/dst'}), + (r'/(.*?)/(.*?)/(.*)', WebRedirectHandler, {'url': '/{1}/{0}/{2}'})] + + def test_basic_redirect(self): + response = self.fetch('/src', follow_redirects=False) + self.assertEqual(response.code, 301) + self.assertEqual(response.headers['Location'], '/dst') + + def test_redirect_pattern(self): + response = self.fetch('/a/b/c', follow_redirects=False) + self.assertEqual(response.code, 301) + self.assertEqual(response.headers['Location'], '/b/a/c') diff --git a/lib/tornado/test/websocket_test.py b/lib/tornado/test/websocket_test.py index ed5c7070fc43fcf8fd9fb2b0fc8b575d35c5fd7b..d47a74e651e86ab8ecd8ab76d6936c23edd06e05 100644 --- a/lib/tornado/test/websocket_test.py +++ b/lib/tornado/test/websocket_test.py @@ -1,13 +1,16 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function +import functools +import sys import traceback from tornado.concurrent import Future from tornado import gen from tornado.httpclient import HTTPError, HTTPRequest from tornado.log import gen_log, app_log +from tornado.template import DictLoader from tornado.testing import AsyncHTTPTestCase, gen_test, bind_unused_port, ExpectLog -from tornado.test.util import unittest +from tornado.test.util import unittest, skipBefore35, exec_test from tornado.web import Application, RequestHandler try: @@ -57,16 +60,36 @@ class ErrorInOnMessageHandler(TestWebSocketHandler): class HeaderHandler(TestWebSocketHandler): def open(self): - try: - # In a websocket context, many RequestHandler methods - # raise RuntimeErrors. - self.set_status(503) - raise Exception("did not get expected exception") - except RuntimeError: - pass + methods_to_test = [ + functools.partial(self.write, 'This should not work'), + functools.partial(self.redirect, 'http://localhost/elsewhere'), + functools.partial(self.set_header, 'X-Test', ''), + functools.partial(self.set_cookie, 'Chocolate', 'Chip'), + functools.partial(self.set_status, 503), + self.flush, + self.finish, + ] + for method in methods_to_test: + try: + # In a websocket context, many RequestHandler methods + # raise RuntimeErrors. + method() + raise Exception("did not get expected exception") + except RuntimeError: + pass self.write_message(self.request.headers.get('X-Test', '')) +class HeaderEchoHandler(TestWebSocketHandler): + def set_default_headers(self): + self.set_header("X-Extra-Response-Header", "Extra-Response-Value") + + def prepare(self): + for k, v in self.request.headers.get_all(): + if k.lower().startswith('x-test'): + self.set_header(k, v) + + class NonWebSocketHandler(RequestHandler): def get(self): self.write('ok') @@ -92,12 +115,33 @@ class PathArgsHandler(TestWebSocketHandler): self.write_message(arg) +class CoroutineOnMessageHandler(TestWebSocketHandler): + def initialize(self, close_future, compression_options=None): + super(CoroutineOnMessageHandler, self).initialize(close_future, + compression_options) + self.sleeping = 0 + + @gen.coroutine + def on_message(self, message): + if self.sleeping > 0: + self.write_message('another coroutine is already sleeping') + self.sleeping += 1 + yield gen.sleep(0.01) + self.sleeping -= 1 + self.write_message(message) + + +class RenderMessageHandler(TestWebSocketHandler): + def on_message(self, message): + self.write_message(self.render_string('message.html', message=message)) + + class WebSocketBaseTestCase(AsyncHTTPTestCase): @gen.coroutine - def ws_connect(self, path, compression_options=None): + def ws_connect(self, path, **kwargs): ws = yield websocket_connect( 'ws://127.0.0.1:%d%s' % (self.get_http_port(), path), - compression_options=compression_options) + **kwargs) raise gen.Return(ws) @gen.coroutine @@ -118,6 +162,8 @@ class WebSocketTest(WebSocketBaseTestCase): ('/echo', EchoHandler, dict(close_future=self.close_future)), ('/non_ws', NonWebSocketHandler), ('/header', HeaderHandler, dict(close_future=self.close_future)), + ('/header_echo', HeaderEchoHandler, + dict(close_future=self.close_future)), ('/close_reason', CloseReasonHandler, dict(close_future=self.close_future)), ('/error_in_on_message', ErrorInOnMessageHandler, @@ -126,13 +172,30 @@ class WebSocketTest(WebSocketBaseTestCase): dict(close_future=self.close_future)), ('/path_args/(.*)', PathArgsHandler, dict(close_future=self.close_future)), - ]) + ('/coroutine', CoroutineOnMessageHandler, + dict(close_future=self.close_future)), + ('/render', RenderMessageHandler, + dict(close_future=self.close_future)), + ], template_loader=DictLoader({ + 'message.html': '<b>{{ message }}</b>', + })) + + def tearDown(self): + super(WebSocketTest, self).tearDown() + RequestHandler._template_loaders.clear() def test_http_request(self): # WS server, HTTP client. response = self.fetch('/echo') self.assertEqual(response.code, 400) + def test_bad_websocket_version(self): + response = self.fetch('/echo', + headers={'Connection': 'Upgrade', + 'Upgrade': 'WebSocket', + 'Sec-WebSocket-Version': '12'}) + self.assertEqual(response.code, 426) + @gen_test def test_websocket_gen(self): ws = yield self.ws_connect('/echo') @@ -170,6 +233,14 @@ class WebSocketTest(WebSocketBaseTestCase): self.assertEqual(response, u'hello \u00e9') yield self.close(ws) + @gen_test + def test_render_message(self): + ws = yield self.ws_connect('/render') + ws.write_message('hello') + response = yield ws.read_message() + self.assertEqual(response, '<b>hello</b>') + yield self.close(ws) + @gen_test def test_error_in_on_message(self): ws = yield self.ws_connect('/error_in_on_message') @@ -221,6 +292,18 @@ class WebSocketTest(WebSocketBaseTestCase): self.assertEqual(response, 'hello') yield self.close(ws) + @gen_test + def test_websocket_header_echo(self): + # Ensure that headers can be returned in the response. + # Specifically, that arbitrary headers passed through websocket_connect + # can be returned. + ws = yield websocket_connect( + HTTPRequest('ws://127.0.0.1:%d/header_echo' % self.get_http_port(), + headers={'X-Test-Hello': 'hello'})) + self.assertEqual(ws.headers.get('X-Test-Hello'), 'hello') + self.assertEqual(ws.headers.get('X-Extra-Response-Header'), 'Extra-Response-Value') + yield self.close(ws) + @gen_test def test_server_close_reason(self): ws = yield self.ws_connect('/close_reason') @@ -259,6 +342,17 @@ class WebSocketTest(WebSocketBaseTestCase): res = yield ws.read_message() self.assertEqual(res, 'hello') + @gen_test + def test_coroutine(self): + ws = yield self.ws_connect('/coroutine') + # Send both messages immediately, coroutine must process one at a time. + yield ws.write_message('hello1') + yield ws.write_message('hello2') + res = yield ws.read_message() + self.assertEqual(res, 'hello1') + res = yield ws.read_message() + self.assertEqual(res, 'hello2') + @gen_test def test_check_origin_valid_no_path(self): port = self.get_http_port() @@ -330,6 +424,42 @@ class WebSocketTest(WebSocketBaseTestCase): self.assertEqual(cm.exception.code, 403) +if sys.version_info >= (3, 5): + NativeCoroutineOnMessageHandler = exec_test(globals(), locals(), """ +class NativeCoroutineOnMessageHandler(TestWebSocketHandler): + def initialize(self, close_future, compression_options=None): + super().initialize(close_future, compression_options) + self.sleeping = 0 + + async def on_message(self, message): + if self.sleeping > 0: + self.write_message('another coroutine is already sleeping') + self.sleeping += 1 + await gen.sleep(0.01) + self.sleeping -= 1 + self.write_message(message)""")['NativeCoroutineOnMessageHandler'] + + +class WebSocketNativeCoroutineTest(WebSocketBaseTestCase): + def get_app(self): + self.close_future = Future() + return Application([ + ('/native', NativeCoroutineOnMessageHandler, + dict(close_future=self.close_future))]) + + @skipBefore35 + @gen_test + def test_native_coroutine(self): + ws = yield self.ws_connect('/native') + # Send both messages immediately, coroutine must process one at a time. + yield ws.write_message('hello1') + yield ws.write_message('hello2') + res = yield ws.read_message() + self.assertEqual(res, 'hello1') + res = yield ws.read_message() + self.assertEqual(res, 'hello2') + + class CompressionTestMixin(object): MESSAGE = 'Hello world. Testing 123 123' @@ -429,3 +559,73 @@ class PythonMaskFunctionTest(MaskFunctionMixin, unittest.TestCase): class CythonMaskFunctionTest(MaskFunctionMixin, unittest.TestCase): def mask(self, mask, data): return speedups.websocket_mask(mask, data) + + +class ServerPeriodicPingTest(WebSocketBaseTestCase): + def get_app(self): + class PingHandler(TestWebSocketHandler): + def on_pong(self, data): + self.write_message("got pong") + + self.close_future = Future() + return Application([ + ('/', PingHandler, dict(close_future=self.close_future)), + ], websocket_ping_interval=0.01) + + @gen_test + def test_server_ping(self): + ws = yield self.ws_connect('/') + for i in range(3): + response = yield ws.read_message() + self.assertEqual(response, "got pong") + yield self.close(ws) + # TODO: test that the connection gets closed if ping responses stop. + + +class ClientPeriodicPingTest(WebSocketBaseTestCase): + def get_app(self): + class PingHandler(TestWebSocketHandler): + def on_ping(self, data): + self.write_message("got ping") + + self.close_future = Future() + return Application([ + ('/', PingHandler, dict(close_future=self.close_future)), + ]) + + @gen_test + def test_client_ping(self): + ws = yield self.ws_connect('/', ping_interval=0.01) + for i in range(3): + response = yield ws.read_message() + self.assertEqual(response, "got ping") + yield self.close(ws) + # TODO: test that the connection gets closed if ping responses stop. + + +class MaxMessageSizeTest(WebSocketBaseTestCase): + def get_app(self): + self.close_future = Future() + return Application([ + ('/', EchoHandler, dict(close_future=self.close_future)), + ], websocket_max_message_size=1024) + + @gen_test + def test_large_message(self): + ws = yield self.ws_connect('/') + + # Write a message that is allowed. + msg = 'a' * 1024 + ws.write_message(msg) + resp = yield ws.read_message() + self.assertEqual(resp, msg) + + # Write a message that is too large. + ws.write_message(msg + 'b') + resp = yield ws.read_message() + # A message of None means the other side closed the connection. + self.assertIs(resp, None) + self.assertEqual(ws.close_code, 1009) + self.assertEqual(ws.close_reason, "message too big") + # TODO: Needs tests of messages split over multiple + # continuation frames. diff --git a/lib/tornado/test/windows_test.py b/lib/tornado/test/windows_test.py index 26e01614dc3611522514ea7c99513df75758b8e9..e5cb33813909def00d43621ffed025a8022be884 100644 --- a/lib/tornado/test/windows_test.py +++ b/lib/tornado/test/windows_test.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import, division, print_function import functools import os import socket @@ -7,6 +8,7 @@ from tornado.platform.auto import set_close_exec skipIfNonWindows = unittest.skipIf(os.name != 'nt', 'non-windows platform') + @skipIfNonWindows class WindowsTest(unittest.TestCase): def test_set_close_exec(self): diff --git a/lib/tornado/test/wsgi_test.py b/lib/tornado/test/wsgi_test.py index 5b19aad7edcbeeac12f1a3735b35b8d0d0956940..e6ccc82ae0c8c63d4c04d8966f7375f87e5d362c 100644 --- a/lib/tornado/test/wsgi_test.py +++ b/lib/tornado/test/wsgi_test.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function from wsgiref.validate import validator from tornado.escape import json_decode @@ -83,6 +83,8 @@ def wrap_web_tests_application(): return WSGIContainer(validator(self.app)) result["WSGIApplication_" + cls.__name__] = WSGIApplicationWrappedTest return result + + globals().update(wrap_web_tests_application()) @@ -96,4 +98,6 @@ def wrap_web_tests_adapter(): return WSGIContainer(validator(WSGIAdapter(self.app))) result["WSGIAdapter_" + cls.__name__] = WSGIAdapterWrappedTest return result + + globals().update(wrap_web_tests_adapter()) diff --git a/lib/tornado/testing.py b/lib/tornado/testing.py index 35cc6eac2c2ca1fb720e3e85136b71d07f955cf9..74d04b6000b8f2ac61a3bf2793e1cfe27cc3098a 100644 --- a/lib/tornado/testing.py +++ b/lib/tornado/testing.py @@ -2,7 +2,7 @@ """Support classes for automated testing. * `AsyncTestCase` and `AsyncHTTPTestCase`: Subclasses of unittest.TestCase - with additional support for testing asynchronous (`.IOLoop` based) code. + with additional support for testing asynchronous (`.IOLoop`-based) code. * `ExpectLog` and `LogTrapTestCase`: Make test logs less spammy. @@ -10,7 +10,7 @@ for the tornado.autoreload module to rerun the tests when code changes. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function try: from tornado import gen @@ -127,7 +127,7 @@ class _TestMethodWrapper(object): method yields it must use a decorator to consume the generator), but will also detect other kinds of return values (these are not necessarily errors, but we alert anyway since there is no good - reason to return a value from a test. + reason to return a value from a test). """ def __init__(self, orig_method): self.orig_method = orig_method @@ -621,7 +621,7 @@ class ExpectLog(logging.Filter): an empty string to watch the root logger. :param regex: Regular expression to match. Any log entries on the specified logger that match this regex will be suppressed. - :param required: If true, an exeption will be raised if the end of + :param required: If true, an exception will be raised if the end of the ``with`` statement is reached without matching any log entries. """ if isinstance(logger, basestring_type): @@ -656,7 +656,9 @@ def main(**kwargs): This test runner is essentially equivalent to `unittest.main` from the standard library, but adds support for tornado-style option - parsing and log formatting. + parsing and log formatting. It is *not* necessary to use this + `main` function to run tests using `AsyncTestCase`; these tests + are self-contained and can run with any test runner. The easiest way to run a test is via the command line:: @@ -735,5 +737,6 @@ def main(**kwargs): gen_log.error('FAIL') raise + if __name__ == '__main__': main() diff --git a/lib/tornado/util.py b/lib/tornado/util.py index 53584f98b4282900ab8363d7cb033f5f84bed3f3..981b94c8eaeba783f832e6c0480ec1f0485415d3 100644 --- a/lib/tornado/util.py +++ b/lib/tornado/util.py @@ -10,9 +10,10 @@ interface of its subclasses, including `.AsyncHTTPClient`, `.IOLoop`, and `.Resolver`. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import array +import atexit import os import re import sys @@ -66,6 +67,23 @@ else: _BaseString = Union[bytes, unicode_type] +try: + from sys import is_finalizing +except ImportError: + # Emulate it + def _get_emulated_is_finalizing(): + L = [] + atexit.register(lambda: L.append(None)) + + def is_finalizing(): + # Not referencing any globals here + return L != [] + + return is_finalizing + + is_finalizing = _get_emulated_is_finalizing() + + class ObjectDict(_ObjectDictBase): """Makes a dictionary behave like an object, with attribute-style access. """ @@ -165,30 +183,26 @@ def raise_exc_info(exc_info): def exec_in(code, glob, loc=None): # type: (Any, Dict[str, Any], Optional[Mapping[str, Any]]) -> Any - pass + if isinstance(code, basestring_type): + # exec(string) inherits the caller's future imports; compile + # the string first to prevent that. + code = compile(code, '<string>', 'exec', dont_inherit=True) + exec(code, glob, loc) if PY3: exec(""" def raise_exc_info(exc_info): - raise exc_info[1].with_traceback(exc_info[2]) + try: + raise exc_info[1].with_traceback(exc_info[2]) + finally: + exc_info = None -def exec_in(code, glob, loc=None): - if isinstance(code, str): - code = compile(code, '<string>', 'exec', dont_inherit=True) - exec(code, glob, loc) """) else: exec(""" def raise_exc_info(exc_info): raise exc_info[0], exc_info[1], exc_info[2] - -def exec_in(code, glob, loc=None): - if isinstance(code, basestring): - # exec(string) inherits the caller's future imports; compile - # the string first to prevent that. - code = compile(code, '<string>', 'exec', dont_inherit=True) - exec code in glob, loc """) @@ -222,6 +236,7 @@ def _re_unescape_replacement(match): raise ValueError("cannot unescape '\\\\%s'" % group[0]) return group + _re_unescape_pattern = re.compile(r'\\(.)', re.DOTALL) @@ -440,6 +455,7 @@ def _websocket_mask_python(mask, data): else: return unmasked_arr.tostring() + if (os.environ.get('TORNADO_NO_EXTENSION') or os.environ.get('TORNADO_EXTENSION') == '0'): # These environment variables exist to make it easier to do performance diff --git a/lib/tornado/web.py b/lib/tornado/web.py index f54c4d039d98560cbdae1d3cc7654534126b17b8..d79889fa3768df9daaa3eb18ad8de068735068a3 100644 --- a/lib/tornado/web.py +++ b/lib/tornado/web.py @@ -56,7 +56,7 @@ request. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import base64 import binascii @@ -77,6 +77,7 @@ import time import tornado import traceback import types +from inspect import isclass from io import BytesIO from tornado.concurrent import Future @@ -89,9 +90,13 @@ from tornado.log import access_log, app_log, gen_log from tornado import stack_context from tornado import template from tornado.escape import utf8, _unicode -from tornado.util import (import_object, ObjectDict, raise_exc_info, - unicode_type, _websocket_mask, re_unescape, PY3) -from tornado.httputil import split_host_and_port +from tornado.routing import (AnyMatches, DefaultHostMatches, HostMatches, + ReversibleRouter, Rule, ReversibleRuleRouter, + URLSpec) +from tornado.util import (ObjectDict, raise_exc_info, + unicode_type, _websocket_mask, PY3) + +url = URLSpec if PY3: import http.cookies as Cookie @@ -527,7 +532,7 @@ class RequestHandler(object): Additional keyword arguments are set on the Cookie.Morsel directly. - See http://docs.python.org/library/cookie.html#morsel-objects + See https://docs.python.org/2/library/cookie.html#Cookie.Morsel for available attributes. """ # The cookie library only accepts type str, in both python 2 and 3 @@ -751,45 +756,21 @@ class RequestHandler(object): if body_part: html_bodies.append(utf8(body_part)) - def is_absolute(path): - return any(path.startswith(x) for x in ["/", "http:", "https:"]) if js_files: # Maintain order of JavaScript files given by modules - paths = [] - unique_paths = set() - for path in js_files: - if not is_absolute(path): - path = self.static_url(path) - if path not in unique_paths: - paths.append(path) - unique_paths.add(path) - js = ''.join('<script src="' + escape.xhtml_escape(p) + - '" type="text/javascript"></script>' - for p in paths) + js = self.render_linked_js(js_files) sloc = html.rindex(b'</body>') html = html[:sloc] + utf8(js) + b'\n' + html[sloc:] if js_embed: - js = b'<script type="text/javascript">\n//<![CDATA[\n' + \ - b'\n'.join(js_embed) + b'\n//]]>\n</script>' + js = self.render_embed_js(js_embed) sloc = html.rindex(b'</body>') html = html[:sloc] + js + b'\n' + html[sloc:] if css_files: - paths = [] - unique_paths = set() - for path in css_files: - if not is_absolute(path): - path = self.static_url(path) - if path not in unique_paths: - paths.append(path) - unique_paths.add(path) - css = ''.join('<link href="' + escape.xhtml_escape(p) + '" ' - 'type="text/css" rel="stylesheet"/>' - for p in paths) + css = self.render_linked_css(css_files) hloc = html.index(b'</head>') html = html[:hloc] + utf8(css) + b'\n' + html[hloc:] if css_embed: - css = b'<style type="text/css">\n' + b'\n'.join(css_embed) + \ - b'\n</style>' + css = self.render_embed_css(css_embed) hloc = html.index(b'</head>') html = html[:hloc] + css + b'\n' + html[hloc:] if html_heads: @@ -800,6 +781,64 @@ class RequestHandler(object): html = html[:hloc] + b''.join(html_bodies) + b'\n' + html[hloc:] self.finish(html) + def render_linked_js(self, js_files): + """Default method used to render the final js links for the + rendered webpage. + + Override this method in a sub-classed controller to change the output. + """ + paths = [] + unique_paths = set() + + for path in js_files: + if not is_absolute(path): + path = self.static_url(path) + if path not in unique_paths: + paths.append(path) + unique_paths.add(path) + + return ''.join('<script src="' + escape.xhtml_escape(p) + + '" type="text/javascript"></script>' + for p in paths) + + def render_embed_js(self, js_embed): + """Default method used to render the final embedded js for the + rendered webpage. + + Override this method in a sub-classed controller to change the output. + """ + return b'<script type="text/javascript">\n//<![CDATA[\n' + \ + b'\n'.join(js_embed) + b'\n//]]>\n</script>' + + def render_linked_css(self, css_files): + """Default method used to render the final css links for the + rendered webpage. + + Override this method in a sub-classed controller to change the output. + """ + paths = [] + unique_paths = set() + + for path in css_files: + if not is_absolute(path): + path = self.static_url(path) + if path not in unique_paths: + paths.append(path) + unique_paths.add(path) + + return ''.join('<link href="' + escape.xhtml_escape(p) + '" ' + 'type="text/css" rel="stylesheet"/>' + for p in paths) + + def render_embed_css(self, css_embed): + """Default method used to render the final embedded css for the + rendered webpage. + + Override this method in a sub-classed controller to change the output. + """ + return b'<style type="text/css">\n' + b'\n'.join(css_embed) + \ + b'\n</style>' + def render_string(self, template_name, **kwargs): """Generate the given template with the given arguments. @@ -954,6 +993,9 @@ class RequestHandler(object): self._log() self._finished = True self.on_finish() + self._break_cycles() + + def _break_cycles(self): # Break up a reference cycle between this handler and the # _ui_module closures to allow for faster GC on CPython. self.ui = None @@ -1109,7 +1151,7 @@ class RequestHandler(object): may not, so the latter form is necessary if loading the user requires asynchronous operations. - The user object may any type of the application's choosing. + The user object may be any type of the application's choosing. """ if not hasattr(self, "_current_user"): self._current_user = self.get_current_user() @@ -1667,9 +1709,8 @@ def stream_request_body(cls): * The regular HTTP method (``post``, ``put``, etc) will be called after the entire body has been read. - There is a subtle interaction between ``data_received`` and asynchronous - ``prepare``: The first call to ``data_received`` may occur at any point - after the call to ``prepare`` has returned *or yielded*. + See the `file receiver demo <https://github.com/tornadoweb/tornado/tree/master/demos/file_upload/>`_ + for example usage. """ if not issubclass(cls, RequestHandler): raise TypeError("expected subclass of RequestHandler, got %r", cls) @@ -1727,7 +1768,38 @@ def addslash(method): return wrapper -class Application(httputil.HTTPServerConnectionDelegate): +class _ApplicationRouter(ReversibleRuleRouter): + """Routing implementation used internally by `Application`. + + Provides a binding between `Application` and `RequestHandler`. + This implementation extends `~.routing.ReversibleRuleRouter` in a couple of ways: + * it allows to use `RequestHandler` subclasses as `~.routing.Rule` target and + * it allows to use a list/tuple of rules as `~.routing.Rule` target. + ``process_rule`` implementation will substitute this list with an appropriate + `_ApplicationRouter` instance. + """ + + def __init__(self, application, rules=None): + assert isinstance(application, Application) + self.application = application + super(_ApplicationRouter, self).__init__(rules) + + def process_rule(self, rule): + rule = super(_ApplicationRouter, self).process_rule(rule) + + if isinstance(rule.target, (list, tuple)): + rule.target = _ApplicationRouter(self.application, rule.target) + + return rule + + def get_target_delegate(self, target, request, **target_params): + if isclass(target) and issubclass(target, RequestHandler): + return self.application.get_handler_delegate(request, target, **target_params) + + return super(_ApplicationRouter, self).get_target_delegate(target, request, **target_params) + + +class Application(ReversibleRouter): """A collection of request handlers that make up a web application. Instances of this class are callable and can be passed directly to @@ -1740,20 +1812,35 @@ class Application(httputil.HTTPServerConnectionDelegate): http_server.listen(8080) ioloop.IOLoop.current().start() - The constructor for this class takes in a list of `URLSpec` objects - or (regexp, request_class) tuples. When we receive requests, we - iterate over the list in order and instantiate an instance of the - first request class whose regexp matches the request path. - The request class can be specified as either a class object or a - (fully-qualified) name. + The constructor for this class takes in a list of `~.routing.Rule` + objects or tuples of values corresponding to the arguments of + `~.routing.Rule` constructor: ``(matcher, target, [target_kwargs], [name])``, + the values in square brackets being optional. The default matcher is + `~.routing.PathMatches`, so ``(regexp, target)`` tuples can also be used + instead of ``(PathMatches(regexp), target)``. + + A common routing target is a `RequestHandler` subclass, but you can also + use lists of rules as a target, which create a nested routing configuration:: + + application = web.Application([ + (HostMatches("example.com"), [ + (r"/", MainPageHandler), + (r"/feed", FeedHandler), + ]), + ]) + + In addition to this you can use nested `~.routing.Router` instances, + `~.httputil.HTTPMessageDelegate` subclasses and callables as routing targets + (see `~.routing` module docs for more information). - Each tuple can contain additional elements, which correspond to the - arguments to the `URLSpec` constructor. (Prior to Tornado 3.2, - only tuples of two or three elements were allowed). + When we receive requests, we iterate over the list in order and + instantiate an instance of the first request class whose regexp + matches the request path. The request class can be specified as + either a class object or a (fully-qualified) name. - A dictionary may be passed as the third element of the tuple, - which will be used as keyword arguments to the handler's - constructor and `~RequestHandler.initialize` method. This pattern + A dictionary may be passed as the third element (``target_kwargs``) + of the tuple, which will be used as keyword arguments to the handler's + constructor and `~RequestHandler.initialize` method. This pattern is used for the `StaticFileHandler` in this example (note that a `StaticFileHandler` can be installed automatically with the static_path setting described below):: @@ -1769,6 +1856,9 @@ class Application(httputil.HTTPServerConnectionDelegate): (r"/article/([0-9]+)", ArticleHandler), ]) + If there's no match for the current request's host, then ``default_host`` + parameter value is matched against host regular expressions. + You can serve static files by sending the ``static_path`` setting as a keyword argument. We will serve those files from the ``/static/`` URI (this is configurable with the @@ -1777,8 +1867,10 @@ class Application(httputil.HTTPServerConnectionDelegate): `StaticFileHandler` can be specified with the ``static_handler_class`` setting. + .. versionchanged:: 4.5 + Integration with the new `tornado.routing` module. """ - def __init__(self, handlers=None, default_host="", transforms=None, + def __init__(self, handlers=None, default_host=None, transforms=None, **settings): if transforms is None: self.transforms = [] @@ -1786,8 +1878,6 @@ class Application(httputil.HTTPServerConnectionDelegate): self.transforms.append(GZipContentEncoding) else: self.transforms = transforms - self.handlers = [] - self.named_handlers = {} self.default_host = default_host self.settings = settings self.ui_modules = {'linkify': _linkify, @@ -1810,8 +1900,6 @@ class Application(httputil.HTTPServerConnectionDelegate): r"/(favicon\.ico)", r"/(robots\.txt)"]: handlers.insert(0, (pattern, static_handler_class, static_handler_args)) - if handlers: - self.add_handlers(".*$", handlers) if self.settings.get('debug'): self.settings.setdefault('autoreload', True) @@ -1819,6 +1907,11 @@ class Application(httputil.HTTPServerConnectionDelegate): self.settings.setdefault('static_hash_cache', False) self.settings.setdefault('serve_traceback', True) + self.wildcard_router = _ApplicationRouter(self, handlers) + self.default_router = _ApplicationRouter(self, [ + Rule(AnyMatches(), self.wildcard_router) + ]) + # Automatically reload modified modules if self.settings.get('autoreload'): from tornado import autoreload @@ -1856,47 +1949,20 @@ class Application(httputil.HTTPServerConnectionDelegate): Host patterns are processed sequentially in the order they were added. All matching patterns will be considered. """ - if not host_pattern.endswith("$"): - host_pattern += "$" - handlers = [] - # The handlers with the wildcard host_pattern are a special - # case - they're added in the constructor but should have lower - # precedence than the more-precise handlers added later. - # If a wildcard handler group exists, it should always be last - # in the list, so insert new groups just before it. - if self.handlers and self.handlers[-1][0].pattern == '.*$': - self.handlers.insert(-1, (re.compile(host_pattern), handlers)) - else: - self.handlers.append((re.compile(host_pattern), handlers)) - - for spec in host_handlers: - if isinstance(spec, (tuple, list)): - assert len(spec) in (2, 3, 4) - spec = URLSpec(*spec) - handlers.append(spec) - if spec.name: - if spec.name in self.named_handlers: - app_log.warning( - "Multiple handlers named %s; replacing previous value", - spec.name) - self.named_handlers[spec.name] = spec + host_matcher = HostMatches(host_pattern) + rule = Rule(host_matcher, _ApplicationRouter(self, host_handlers)) + + self.default_router.rules.insert(-1, rule) + + if self.default_host is not None: + self.wildcard_router.add_rules([( + DefaultHostMatches(self, host_matcher.host_pattern), + host_handlers + )]) def add_transform(self, transform_class): self.transforms.append(transform_class) - def _get_host_handlers(self, request): - host = split_host_and_port(request.host.lower())[0] - matches = [] - for pattern, handlers in self.handlers: - if pattern.match(host): - matches.extend(handlers) - # Look for default host if not behind load balancer (for debugging) - if not matches and "X-Real-Ip" not in request.headers: - for pattern, handlers in self.handlers: - if pattern.match(self.default_host): - matches.extend(handlers) - return matches or None - def _load_ui_methods(self, methods): if isinstance(methods, types.ModuleType): self._load_ui_methods(dict((n, getattr(methods, n)) @@ -1926,16 +1992,40 @@ class Application(httputil.HTTPServerConnectionDelegate): except TypeError: pass - def start_request(self, server_conn, request_conn): - # Modern HTTPServer interface - return _RequestDispatcher(self, request_conn) - def __call__(self, request): # Legacy HTTPServer interface - dispatcher = _RequestDispatcher(self, None) - dispatcher.set_request(request) + dispatcher = self.find_handler(request) return dispatcher.execute() + def find_handler(self, request, **kwargs): + route = self.default_router.find_handler(request) + if route is not None: + return route + + if self.settings.get('default_handler_class'): + return self.get_handler_delegate( + request, + self.settings['default_handler_class'], + self.settings.get('default_handler_args', {})) + + return self.get_handler_delegate( + request, ErrorHandler, {'status_code': 404}) + + def get_handler_delegate(self, request, target_class, target_kwargs=None, + path_args=None, path_kwargs=None): + """Returns `~.httputil.HTTPMessageDelegate` that can serve a request + for application and `RequestHandler` subclass. + + :arg httputil.HTTPServerRequest request: current HTTP request. + :arg RequestHandler target_class: a `RequestHandler` class. + :arg dict target_kwargs: keyword arguments for ``target_class`` constructor. + :arg list path_args: positional arguments for ``target_class`` HTTP method that + will be executed while handling a request (``get``, ``post`` or any other). + :arg dict path_kwargs: keyword arguments for ``target_class`` HTTP method. + """ + return _HandlerDelegate( + self, request, target_class, target_kwargs, path_args, path_kwargs) + def reverse_url(self, name, *args): """Returns a URL path for handler named ``name`` @@ -1945,8 +2035,10 @@ class Application(httputil.HTTPServerConnectionDelegate): They will be converted to strings if necessary, encoded as utf8, and url-escaped. """ - if name in self.named_handlers: - return self.named_handlers[name].reverse(*args) + reversed_url = self.default_router.reverse_url(name, *args) + if reversed_url is not None: + return reversed_url + raise KeyError("%s not found in named urls" % name) def log_request(self, handler): @@ -1971,67 +2063,24 @@ class Application(httputil.HTTPServerConnectionDelegate): handler._request_summary(), request_time) -class _RequestDispatcher(httputil.HTTPMessageDelegate): - def __init__(self, application, connection): +class _HandlerDelegate(httputil.HTTPMessageDelegate): + def __init__(self, application, request, handler_class, handler_kwargs, + path_args, path_kwargs): self.application = application - self.connection = connection - self.request = None + self.connection = request.connection + self.request = request + self.handler_class = handler_class + self.handler_kwargs = handler_kwargs or {} + self.path_args = path_args or [] + self.path_kwargs = path_kwargs or {} self.chunks = [] - self.handler_class = None - self.handler_kwargs = None - self.path_args = [] - self.path_kwargs = {} + self.stream_request_body = _has_stream_request_body(self.handler_class) def headers_received(self, start_line, headers): - self.set_request(httputil.HTTPServerRequest( - connection=self.connection, start_line=start_line, - headers=headers)) if self.stream_request_body: self.request.body = Future() return self.execute() - def set_request(self, request): - self.request = request - self._find_handler() - self.stream_request_body = _has_stream_request_body(self.handler_class) - - def _find_handler(self): - # Identify the handler to use as soon as we have the request. - # Save url path arguments for later. - app = self.application - handlers = app._get_host_handlers(self.request) - if not handlers: - self.handler_class = RedirectHandler - self.handler_kwargs = dict(url="%s://%s/" - % (self.request.protocol, - app.default_host)) - return - for spec in handlers: - match = spec.regex.match(self.request.path) - if match: - self.handler_class = spec.handler_class - self.handler_kwargs = spec.kwargs - if spec.regex.groups: - # Pass matched groups to the handler. Since - # match.groups() includes both named and - # unnamed groups, we want to use either groups - # or groupdict but not both. - if spec.regex.groupindex: - self.path_kwargs = dict( - (str(k), _unquote_or_none(v)) - for (k, v) in match.groupdict().items()) - else: - self.path_args = [_unquote_or_none(s) - for s in match.groups()] - return - if app.settings.get('default_handler_class'): - self.handler_class = app.settings['default_handler_class'] - self.handler_kwargs = app.settings.get( - 'default_handler_args', {}) - else: - self.handler_class = ErrorHandler - self.handler_kwargs = dict(status_code=404) - def data_received(self, data): if self.stream_request_body: return self.handler.data_received(data) @@ -2188,13 +2237,32 @@ class RedirectHandler(RequestHandler): application = web.Application([ (r"/oldpath", web.RedirectHandler, {"url": "/newpath"}), ]) + + `RedirectHandler` supports regular expression substitutions. E.g., to + swap the first and second parts of a path while preserving the remainder:: + + application = web.Application([ + (r"/(.*?)/(.*?)/(.*)", web.RedirectHandler, {"url": "/{1}/{0}/{2}"}), + ]) + + The final URL is formatted with `str.format` and the substrings that match + the capturing groups. In the above example, a request to "/a/b/c" would be + formatted like:: + + str.format("/{1}/{0}/{2}", "a", "b", "c") # -> "/b/a/c" + + Use Python's :ref:`format string syntax <formatstrings>` to customize how + values are substituted. + + .. versionchanged:: 4.5 + Added support for substitutions into the destination URL. """ def initialize(self, url, permanent=True): self._url = url self._permanent = permanent - def get(self): - self.redirect(self._url, permanent=self._permanent) + def get(self, *args): + self.redirect(self._url.format(*args), permanent=self._permanent) class StaticFileHandler(RequestHandler): @@ -2990,99 +3058,6 @@ class _UIModuleNamespace(object): raise AttributeError(str(e)) -class URLSpec(object): - """Specifies mappings between URLs and handlers.""" - def __init__(self, pattern, handler, kwargs=None, name=None): - """Parameters: - - * ``pattern``: Regular expression to be matched. Any capturing - groups in the regex will be passed in to the handler's - get/post/etc methods as arguments (by keyword if named, by - position if unnamed. Named and unnamed capturing groups may - may not be mixed in the same rule). - - * ``handler``: `RequestHandler` subclass to be invoked. - - * ``kwargs`` (optional): A dictionary of additional arguments - to be passed to the handler's constructor. - - * ``name`` (optional): A name for this handler. Used by - `Application.reverse_url`. - - """ - if not pattern.endswith('$'): - pattern += '$' - self.regex = re.compile(pattern) - assert len(self.regex.groupindex) in (0, self.regex.groups), \ - ("groups in url regexes must either be all named or all " - "positional: %r" % self.regex.pattern) - - if isinstance(handler, str): - # import the Module and instantiate the class - # Must be a fully qualified name (module.ClassName) - handler = import_object(handler) - - self.handler_class = handler - self.kwargs = kwargs or {} - self.name = name - self._path, self._group_count = self._find_groups() - - def __repr__(self): - return '%s(%r, %s, kwargs=%r, name=%r)' % \ - (self.__class__.__name__, self.regex.pattern, - self.handler_class, self.kwargs, self.name) - - def _find_groups(self): - """Returns a tuple (reverse string, group count) for a url. - - For example: Given the url pattern /([0-9]{4})/([a-z-]+)/, this method - would return ('/%s/%s/', 2). - """ - pattern = self.regex.pattern - if pattern.startswith('^'): - pattern = pattern[1:] - if pattern.endswith('$'): - pattern = pattern[:-1] - - if self.regex.groups != pattern.count('('): - # The pattern is too complicated for our simplistic matching, - # so we can't support reversing it. - return (None, None) - - pieces = [] - for fragment in pattern.split('('): - if ')' in fragment: - paren_loc = fragment.index(')') - if paren_loc >= 0: - pieces.append('%s' + fragment[paren_loc + 1:]) - else: - try: - unescaped_fragment = re_unescape(fragment) - except ValueError as exc: - # If we can't unescape part of it, we can't - # reverse this url. - return (None, None) - pieces.append(unescaped_fragment) - - return (''.join(pieces), self.regex.groups) - - def reverse(self, *args): - if self._path is None: - raise ValueError("Cannot reverse url regex " + self.regex.pattern) - assert len(args) == self._group_count, "required number of arguments "\ - "not found" - if not len(args): - return self._path - converted_args = [] - for a in args: - if not isinstance(a, (unicode_type, bytes)): - a = str(a) - converted_args.append(escape.url_escape(utf8(a), plus=False)) - return self._path % tuple(converted_args) - -url = URLSpec - - if hasattr(hmac, 'compare_digest'): # python 3.3 _time_independent_equals = hmac.compare_digest else: @@ -3147,6 +3122,7 @@ def create_signed_value(secret, name, value, version=None, clock=None, else: raise ValueError("Unsupported version %d" % version) + # A leading version number in decimal # with no leading zeros, followed by a pipe. _signed_value_version_re = re.compile(br"^([1-9][0-9]*)\|(.*)$") @@ -3305,13 +3281,5 @@ def _create_signature_v2(secret, s): return utf8(hash.hexdigest()) -def _unquote_or_none(s): - """None-safe wrapper around url_unescape to handle unamteched optional - groups correctly. - - Note that args are passed as bytes so the handler can decide what - encoding to use. - """ - if s is None: - return s - return escape.url_unescape(s, encoding=None, plus=False) +def is_absolute(path): + return any(path.startswith(x) for x in ["/", "http:", "https:"]) diff --git a/lib/tornado/websocket.py b/lib/tornado/websocket.py index 3bbd08ab40dafbf59edf8127cac86e459d72a446..69437ee4e3d9cc335b0eb1b785a3565fff14ab70 100644 --- a/lib/tornado/websocket.py +++ b/lib/tornado/websocket.py @@ -16,7 +16,7 @@ the protocol (known as "draft 76") and are not compatible with this module. Removed support for the draft 76 protocol version. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function # Author: Jacob Kristhammar, 2010 import base64 @@ -30,8 +30,8 @@ import zlib from tornado.concurrent import TracebackFuture from tornado.escape import utf8, native_str, to_unicode -from tornado import httpclient, httputil -from tornado.ioloop import IOLoop +from tornado import gen, httpclient, httputil +from tornado.ioloop import IOLoop, PeriodicCallback from tornado.iostream import StreamClosedError from tornado.log import gen_log, app_log from tornado import simple_httpclient @@ -65,6 +65,10 @@ class WebSocketHandler(tornado.web.RequestHandler): override `open` and `on_close` to handle opened and closed connections. + Custom upgrade response headers can be sent by overriding + `~tornado.web.RequestHandler.set_default_headers` or + `~tornado.web.RequestHandler.prepare`. + See http://dev.w3.org/html5/websockets/ for details on the JavaScript interface. The protocol is specified at http://tools.ietf.org/html/rfc6455. @@ -122,6 +126,17 @@ class WebSocketHandler(tornado.web.RequestHandler): to show the "accept this certificate" dialog but has nowhere to show it. You must first visit a regular HTML page using the same certificate to accept it before the websocket connection will succeed. + + If the application setting ``websocket_ping_interval`` has a non-zero + value, a ping will be sent periodically, and the connection will be + closed if a response is not received before the ``websocket_ping_timeout``. + + Messages larger than the ``websocket_max_message_size`` application setting + (default 10MiB) will not be accepted. + + .. versionchanged:: 4.5 + Added ``websocket_ping_interval``, ``websocket_ping_timeout``, and + ``websocket_max_message_size``. """ def __init__(self, application, request, **kwargs): super(WebSocketHandler, self).__init__(application, request, **kwargs) @@ -176,18 +191,42 @@ class WebSocketHandler(tornado.web.RequestHandler): gen_log.debug(log_msg) return - self.stream = self.request.connection.detach() - self.stream.set_close_callback(self.on_connection_close) - self.ws_connection = self.get_websocket_protocol() if self.ws_connection: self.ws_connection.accept_connection() else: - if not self.stream.closed(): - self.stream.write(tornado.escape.utf8( - "HTTP/1.1 426 Upgrade Required\r\n" - "Sec-WebSocket-Version: 7, 8, 13\r\n\r\n")) - self.stream.close() + self.set_status(426, "Upgrade Required") + self.set_header("Sec-WebSocket-Version", "7, 8, 13") + self.finish() + + stream = None + + @property + def ping_interval(self): + """The interval for websocket keep-alive pings. + + Set websocket_ping_interval = 0 to disable pings. + """ + return self.settings.get('websocket_ping_interval', None) + + @property + def ping_timeout(self): + """If no ping is received in this many seconds, + close the websocket connection (VPNs, etc. can fail to cleanly close ws connections). + Default is max of 3 pings or 30 seconds. + """ + return self.settings.get('websocket_ping_timeout', None) + + @property + def max_message_size(self): + """Maximum allowed message size. + + If the remote peer sends a message larger than this, the connection + will be closed. + + Default is 10MiB. + """ + return self.settings.get('websocket_max_message_size', None) def write_message(self, message, binary=False): """Sends the given message to the client of this Web Socket. @@ -231,11 +270,22 @@ class WebSocketHandler(tornado.web.RequestHandler): If this method returns None (the default), compression will be disabled. If it returns a dict (even an empty one), it will be enabled. The contents of the dict may be used to - control the memory and CPU usage of the compression, - but no such options are currently implemented. + control the following compression options: + + ``compression_level`` specifies the compression level. + + ``mem_level`` specifies the amount of memory used for the internal compression state. + + These parameters are documented in details here: + https://docs.python.org/3.6/library/zlib.html#zlib.compressobj .. versionadded:: 4.1 + + .. versionchanged:: 4.5 + + Added ``compression_level`` and ``mem_level``. """ + # TODO: Add wbits option. return None def open(self, *args, **kwargs): @@ -251,6 +301,10 @@ class WebSocketHandler(tornado.web.RequestHandler): """Handle incoming messages on the WebSocket This method must be overridden. + + .. versionchanged:: 4.5 + + ``on_message`` can be a coroutine. """ raise NotImplementedError @@ -264,6 +318,10 @@ class WebSocketHandler(tornado.web.RequestHandler): """Invoked when the response to a ping frame is received.""" pass + def on_ping(self, data): + """Invoked when the a ping frame is received.""" + pass + def on_close(self): """Invoked when the WebSocket is closed. @@ -315,6 +373,19 @@ class WebSocketHandler(tornado.web.RequestHandler): browsers, since WebSockets are allowed to bypass the usual same-origin policies and don't use CORS headers. + .. warning:: + + This is an important security measure; don't disable it + without understanding the security implications. In + particular, if your authentication is cookie-based, you + must either restrict the origins allowed by + ``check_origin()`` or implement your own XSRF-like + protection for websocket connections. See `these + <https://www.christian-schneider.net/CrossSiteWebSocketHijacking.html>`_ + `articles + <https://devcenter.heroku.com/articles/websocket-security>`_ + for more. + To accept all cross-origin traffic (which was the default prior to Tornado 4.0), simply override this method to always return true:: @@ -329,6 +400,7 @@ class WebSocketHandler(tornado.web.RequestHandler): return parsed_origin.netloc.endswith(".mydomain.com") .. versionadded:: 4.0 + """ parsed_origin = urlparse(origin) origin = parsed_origin.netloc @@ -362,6 +434,16 @@ class WebSocketHandler(tornado.web.RequestHandler): if not self._on_close_called: self._on_close_called = True self.on_close() + self._break_cycles() + + def _break_cycles(self): + # WebSocketHandlers call finish() early, but we don't want to + # break up reference cycles (which makes it impossible to call + # self.render_string) until after we've really closed the + # connection (if it was established in the first place, + # indicated by status code 101). + if self.get_status() != 101 or self._on_close_called: + super(WebSocketHandler, self)._break_cycles() def send_error(self, *args, **kwargs): if self.stream is None: @@ -379,18 +461,17 @@ class WebSocketHandler(tornado.web.RequestHandler): return WebSocketProtocol13( self, compression_options=self.get_compression_options()) + def _attach_stream(self): + self.stream = self.request.connection.detach() + self.stream.set_close_callback(self.on_connection_close) + # disable non-WS methods + for method in ["write", "redirect", "set_header", "set_cookie", + "set_status", "flush", "finish"]: + setattr(self, method, _raise_not_supported_for_websockets) -def _wrap_method(method): - def _disallow_for_websocket(self, *args, **kwargs): - if self.stream is None: - method(self, *args, **kwargs) - else: - raise RuntimeError("Method not supported for Web Sockets") - return _disallow_for_websocket -for method in ["write", "redirect", "set_header", "set_cookie", - "set_status", "flush", "finish"]: - setattr(WebSocketHandler, method, - _wrap_method(getattr(WebSocketHandler, method))) + +def _raise_not_supported_for_websockets(*args, **kwargs): + raise RuntimeError("Method not supported for Web Sockets") class WebSocketProtocol(object): @@ -406,14 +487,20 @@ class WebSocketProtocol(object): def _run_callback(self, callback, *args, **kwargs): """Runs the given callback with exception handling. - On error, aborts the websocket connection and returns False. + If the callback is a coroutine, returns its Future. On error, aborts the + websocket connection and returns None. """ try: - callback(*args, **kwargs) + result = callback(*args, **kwargs) except Exception: app_log.error("Uncaught exception in %s", - self.request.path, exc_info=True) + getattr(self.request, 'path', None), exc_info=True) self._abort() + else: + if result is not None: + result = gen.convert_yielded(result) + self.stream.io_loop.add_future(result, lambda f: f.result()) + return result def on_connection_close(self): self._abort() @@ -427,7 +514,7 @@ class WebSocketProtocol(object): class _PerMessageDeflateCompressor(object): - def __init__(self, persistent, max_wbits): + def __init__(self, persistent, max_wbits, compression_options=None): if max_wbits is None: max_wbits = zlib.MAX_WBITS # There is no symbolic constant for the minimum wbits value. @@ -435,14 +522,24 @@ class _PerMessageDeflateCompressor(object): raise ValueError("Invalid max_wbits value %r; allowed range 8-%d", max_wbits, zlib.MAX_WBITS) self._max_wbits = max_wbits + + if compression_options is None or 'compression_level' not in compression_options: + self._compression_level = tornado.web.GZipContentEncoding.GZIP_LEVEL + else: + self._compression_level = compression_options['compression_level'] + + if compression_options is None or 'mem_level' not in compression_options: + self._mem_level = 8 + else: + self._mem_level = compression_options['mem_level'] + if persistent: self._compressor = self._create_compressor() else: self._compressor = None def _create_compressor(self): - return zlib.compressobj(tornado.web.GZipContentEncoding.GZIP_LEVEL, - zlib.DEFLATED, -self._max_wbits) + return zlib.compressobj(self._compression_level, zlib.DEFLATED, -self._max_wbits, self._mem_level) def compress(self, data): compressor = self._compressor or self._create_compressor() @@ -453,7 +550,7 @@ class _PerMessageDeflateCompressor(object): class _PerMessageDeflateDecompressor(object): - def __init__(self, persistent, max_wbits): + def __init__(self, persistent, max_wbits, compression_options=None): if max_wbits is None: max_wbits = zlib.MAX_WBITS if not (8 <= max_wbits <= zlib.MAX_WBITS): @@ -512,6 +609,9 @@ class WebSocketProtocol13(WebSocketProtocol): # the effect of compression, frame overhead, and control frames. self._wire_bytes_in = 0 self._wire_bytes_out = 0 + self.ping_callback = None + self.last_ping = 0 + self.last_pong = 0 def accept_connection(self): try: @@ -548,46 +648,42 @@ class WebSocketProtocol13(WebSocketProtocol): self.request.headers.get("Sec-Websocket-Key")) def _accept_connection(self): - subprotocol_header = '' subprotocols = self.request.headers.get("Sec-WebSocket-Protocol", '') subprotocols = [s.strip() for s in subprotocols.split(',')] if subprotocols: selected = self.handler.select_subprotocol(subprotocols) if selected: assert selected in subprotocols - subprotocol_header = ("Sec-WebSocket-Protocol: %s\r\n" - % selected) + self.handler.set_header("Sec-WebSocket-Protocol", selected) - extension_header = '' extensions = self._parse_extensions_header(self.request.headers) for ext in extensions: if (ext[0] == 'permessage-deflate' and self._compression_options is not None): # TODO: negotiate parameters if compression_options # specifies limits. - self._create_compressors('server', ext[1]) + self._create_compressors('server', ext[1], self._compression_options) if ('client_max_window_bits' in ext[1] and ext[1]['client_max_window_bits'] is None): # Don't echo an offered client_max_window_bits # parameter with no value. del ext[1]['client_max_window_bits'] - extension_header = ('Sec-WebSocket-Extensions: %s\r\n' % - httputil._encode_header( - 'permessage-deflate', ext[1])) + self.handler.set_header("Sec-WebSocket-Extensions", + httputil._encode_header( + 'permessage-deflate', ext[1])) break - if self.stream.closed(): - self._abort() - return - self.stream.write(tornado.escape.utf8( - "HTTP/1.1 101 Switching Protocols\r\n" - "Upgrade: websocket\r\n" - "Connection: Upgrade\r\n" - "Sec-WebSocket-Accept: %s\r\n" - "%s%s" - "\r\n" % (self._challenge_response(), - subprotocol_header, extension_header))) + self.handler.clear_header("Content-Type") + self.handler.set_status(101) + self.handler.set_header("Upgrade", "websocket") + self.handler.set_header("Connection", "Upgrade") + self.handler.set_header("Sec-WebSocket-Accept", self._challenge_response()) + self.handler.finish() + + self.handler._attach_stream() + self.stream = self.handler.stream + self.start_pinging() self._run_callback(self.handler.open, *self.handler.open_args, **self.handler.open_kwargs) self._receive_frame() @@ -617,7 +713,7 @@ class WebSocketProtocol13(WebSocketProtocol): else: raise ValueError("unsupported extension %r", ext) - def _get_compressor_options(self, side, agreed_parameters): + def _get_compressor_options(self, side, agreed_parameters, compression_options=None): """Converts a websocket agreed_parameters set to keyword arguments for our compressor objects. """ @@ -628,9 +724,10 @@ class WebSocketProtocol13(WebSocketProtocol): options['max_wbits'] = zlib.MAX_WBITS else: options['max_wbits'] = int(wbits_header) + options['compression_options'] = compression_options return options - def _create_compressors(self, side, agreed_parameters): + def _create_compressors(self, side, agreed_parameters, compression_options=None): # TODO: handle invalid parameters gracefully allowed_keys = set(['server_no_context_takeover', 'client_no_context_takeover', @@ -641,9 +738,9 @@ class WebSocketProtocol13(WebSocketProtocol): raise ValueError("unsupported compression parameter %r" % key) other_side = 'client' if (side == 'server') else 'server' self._compressor = _PerMessageDeflateCompressor( - **self._get_compressor_options(side, agreed_parameters)) + **self._get_compressor_options(side, agreed_parameters, compression_options)) self._decompressor = _PerMessageDeflateDecompressor( - **self._get_compressor_options(other_side, agreed_parameters)) + **self._get_compressor_options(other_side, agreed_parameters, compression_options)) def _write_frame(self, fin, opcode, data, flags=0): if fin: @@ -724,8 +821,7 @@ class WebSocketProtocol13(WebSocketProtocol): if self._masked_frame: self.stream.read_bytes(4, self._on_masking_key) else: - self.stream.read_bytes(self._frame_length, - self._on_frame_data) + self._read_frame_data(False) elif payloadlen == 126: self.stream.read_bytes(2, self._on_frame_length_16) elif payloadlen == 127: @@ -733,6 +829,17 @@ class WebSocketProtocol13(WebSocketProtocol): except StreamClosedError: self._abort() + def _read_frame_data(self, masked): + new_len = self._frame_length + if self._fragmented_message_buffer is not None: + new_len += len(self._fragmented_message_buffer) + if new_len > (self.handler.max_message_size or 10 * 1024 * 1024): + self.close(1009, "message too big") + return + self.stream.read_bytes( + self._frame_length, + self._on_masked_frame_data if masked else self._on_frame_data) + def _on_frame_length_16(self, data): self._wire_bytes_in += len(data) self._frame_length = struct.unpack("!H", data)[0] @@ -740,7 +847,7 @@ class WebSocketProtocol13(WebSocketProtocol): if self._masked_frame: self.stream.read_bytes(4, self._on_masking_key) else: - self.stream.read_bytes(self._frame_length, self._on_frame_data) + self._read_frame_data(False) except StreamClosedError: self._abort() @@ -751,7 +858,7 @@ class WebSocketProtocol13(WebSocketProtocol): if self._masked_frame: self.stream.read_bytes(4, self._on_masking_key) else: - self.stream.read_bytes(self._frame_length, self._on_frame_data) + self._read_frame_data(False) except StreamClosedError: self._abort() @@ -759,8 +866,7 @@ class WebSocketProtocol13(WebSocketProtocol): self._wire_bytes_in += len(data) self._frame_mask = data try: - self.stream.read_bytes(self._frame_length, - self._on_masked_frame_data) + self._read_frame_data(True) except StreamClosedError: self._abort() @@ -769,6 +875,8 @@ class WebSocketProtocol13(WebSocketProtocol): self._on_frame_data(_websocket_mask(self._frame_mask, data)) def _on_frame_data(self, data): + handled_future = None + self._wire_bytes_in += len(data) if self._frame_opcode_is_control: # control frames may be interleaved with a series of fragmented @@ -801,12 +909,18 @@ class WebSocketProtocol13(WebSocketProtocol): self._fragmented_message_buffer = data if self._final_frame: - self._handle_message(opcode, data) + handled_future = self._handle_message(opcode, data) if not self.client_terminated: - self._receive_frame() + if handled_future: + # on_message is a coroutine, process more frames once it's done. + handled_future.add_done_callback( + lambda future: self._receive_frame()) + else: + self._receive_frame() def _handle_message(self, opcode, data): + """Execute on_message, returning its Future if it is a coroutine.""" if self.client_terminated: return @@ -821,11 +935,11 @@ class WebSocketProtocol13(WebSocketProtocol): except UnicodeDecodeError: self._abort() return - self._run_callback(self.handler.on_message, decoded) + return self._run_callback(self.handler.on_message, decoded) elif opcode == 0x2: # Binary data self._message_bytes_in += len(data) - self._run_callback(self.handler.on_message, data) + return self._run_callback(self.handler.on_message, data) elif opcode == 0x8: # Close self.client_terminated = True @@ -838,9 +952,11 @@ class WebSocketProtocol13(WebSocketProtocol): elif opcode == 0x9: # Ping self._write_frame(True, 0xA, data) + self._run_callback(self.handler.on_ping, data) elif opcode == 0xA: # Pong - self._run_callback(self.handler.on_pong, data) + self.last_pong = IOLoop.current().time() + return self._run_callback(self.handler.on_pong, data) else: self._abort() @@ -869,6 +985,51 @@ class WebSocketProtocol13(WebSocketProtocol): self._waiting = self.stream.io_loop.add_timeout( self.stream.io_loop.time() + 5, self._abort) + @property + def ping_interval(self): + interval = self.handler.ping_interval + if interval is not None: + return interval + return 0 + + @property + def ping_timeout(self): + timeout = self.handler.ping_timeout + if timeout is not None: + return timeout + return max(3 * self.ping_interval, 30) + + def start_pinging(self): + """Start sending periodic pings to keep the connection alive""" + if self.ping_interval > 0: + self.last_ping = self.last_pong = IOLoop.current().time() + self.ping_callback = PeriodicCallback( + self.periodic_ping, self.ping_interval * 1000) + self.ping_callback.start() + + def periodic_ping(self): + """Send a ping to keep the websocket alive + + Called periodically if the websocket_ping_interval is set and non-zero. + """ + if self.stream.closed() and self.ping_callback is not None: + self.ping_callback.stop() + return + + # Check for timeout on pong. Make sure that we really have + # sent a recent ping in case the machine with both server and + # client has been suspended since the last ping. + now = IOLoop.current().time() + since_last_pong = now - self.last_pong + since_last_ping = now - self.last_ping + if (since_last_ping < 2 * self.ping_interval and + since_last_pong > self.ping_timeout): + self.close() + return + + self.write_ping(b'') + self.last_ping = now + class WebSocketClientConnection(simple_httpclient._HTTPConnection): """WebSocket client connection. @@ -877,7 +1038,8 @@ class WebSocketClientConnection(simple_httpclient._HTTPConnection): `websocket_connect` function instead. """ def __init__(self, io_loop, request, on_message_callback=None, - compression_options=None): + compression_options=None, ping_interval=None, ping_timeout=None, + max_message_size=None): self.compression_options = compression_options self.connect_future = TracebackFuture() self.protocol = None @@ -886,6 +1048,9 @@ class WebSocketClientConnection(simple_httpclient._HTTPConnection): self.key = base64.b64encode(os.urandom(16)) self._on_message_callback = on_message_callback self.close_code = self.close_reason = None + self.ping_interval = ping_interval + self.ping_timeout = ping_timeout + self.max_message_size = max_message_size scheme, sep, rest = request.url.partition(':') scheme = {'ws': 'http', 'wss': 'https'}[scheme] @@ -949,6 +1114,7 @@ class WebSocketClientConnection(simple_httpclient._HTTPConnection): self.headers = headers self.protocol = self.get_websocket_protocol() self.protocol._process_server_headers(self.key, self.headers) + self.protocol.start_pinging() self.protocol._receive_frame() if self._timeout is not None: @@ -1002,13 +1168,18 @@ class WebSocketClientConnection(simple_httpclient._HTTPConnection): def on_pong(self, data): pass + def on_ping(self, data): + pass + def get_websocket_protocol(self): return WebSocketProtocol13(self, mask_outgoing=True, compression_options=self.compression_options) def websocket_connect(url, io_loop=None, callback=None, connect_timeout=None, - on_message_callback=None, compression_options=None): + on_message_callback=None, compression_options=None, + ping_interval=None, ping_timeout=None, + max_message_size=None): """Client-side websocket support. Takes a url and returns a Future whose result is a @@ -1037,6 +1208,10 @@ def websocket_connect(url, io_loop=None, callback=None, connect_timeout=None, .. versionchanged:: 4.1 Added ``compression_options`` and ``on_message_callback``. The ``io_loop`` argument is deprecated. + + .. versionchanged:: 4.5 + Added the ``ping_interval``, ``ping_timeout``, and ``max_message_size`` + arguments, which have the same meaning as in `WebSocketHandler`. """ if io_loop is None: io_loop = IOLoop.current() @@ -1052,7 +1227,10 @@ def websocket_connect(url, io_loop=None, callback=None, connect_timeout=None, request, httpclient.HTTPRequest._DEFAULTS) conn = WebSocketClientConnection(io_loop, request, on_message_callback=on_message_callback, - compression_options=compression_options) + compression_options=compression_options, + ping_interval=ping_interval, + ping_timeout=ping_timeout, + max_message_size=max_message_size) if callback is not None: io_loop.add_future(conn.connect_future, callback) return conn.connect_future diff --git a/lib/tornado/wsgi.py b/lib/tornado/wsgi.py index e9ead300da036fbb3156feb7df69e26b2cd41e86..68a7615a0ef930a83d47f6a0bc024cd4ee36ed91 100644 --- a/lib/tornado/wsgi.py +++ b/lib/tornado/wsgi.py @@ -29,7 +29,7 @@ provides WSGI support in two ways: and Tornado handlers in a single server. """ -from __future__ import absolute_import, division, print_function, with_statement +from __future__ import absolute_import, division, print_function import sys from io import BytesIO diff --git a/lib/webencodings/__init__.py b/lib/webencodings/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d21d697c887bed1f8ab7f36d10185e986d9f1e54 --- /dev/null +++ b/lib/webencodings/__init__.py @@ -0,0 +1,342 @@ +# coding: utf-8 +""" + + webencodings + ~~~~~~~~~~~~ + + This is a Python implementation of the `WHATWG Encoding standard + <http://encoding.spec.whatwg.org/>`. See README for details. + + :copyright: Copyright 2012 by Simon Sapin + :license: BSD, see LICENSE for details. + +""" + +from __future__ import unicode_literals + +import codecs + +from .labels import LABELS + + +VERSION = '0.5.1' + + +# Some names in Encoding are not valid Python aliases. Remap these. +PYTHON_NAMES = { + 'iso-8859-8-i': 'iso-8859-8', + 'x-mac-cyrillic': 'mac-cyrillic', + 'macintosh': 'mac-roman', + 'windows-874': 'cp874'} + +CACHE = {} + + +def ascii_lower(string): + r"""Transform (only) ASCII letters to lower case: A-Z is mapped to a-z. + + :param string: An Unicode string. + :returns: A new Unicode string. + + This is used for `ASCII case-insensitive + <http://encoding.spec.whatwg.org/#ascii-case-insensitive>`_ + matching of encoding labels. + The same matching is also used, among other things, + for `CSS keywords <http://dev.w3.org/csswg/css-values/#keywords>`_. + + This is different from the :meth:`~py:str.lower` method of Unicode strings + which also affect non-ASCII characters, + sometimes mapping them into the ASCII range: + + >>> keyword = u'Bac\N{KELVIN SIGN}ground' + >>> assert keyword.lower() == u'background' + >>> assert ascii_lower(keyword) != keyword.lower() + >>> assert ascii_lower(keyword) == u'bac\N{KELVIN SIGN}ground' + + """ + # This turns out to be faster than unicode.translate() + return string.encode('utf8').lower().decode('utf8') + + +def lookup(label): + """ + Look for an encoding by its label. + This is the spec’s `get an encoding + <http://encoding.spec.whatwg.org/#concept-encoding-get>`_ algorithm. + Supported labels are listed there. + + :param label: A string. + :returns: + An :class:`Encoding` object, or :obj:`None` for an unknown label. + + """ + # Only strip ASCII whitespace: U+0009, U+000A, U+000C, U+000D, and U+0020. + label = ascii_lower(label.strip('\t\n\f\r ')) + name = LABELS.get(label) + if name is None: + return None + encoding = CACHE.get(name) + if encoding is None: + if name == 'x-user-defined': + from .x_user_defined import codec_info + else: + python_name = PYTHON_NAMES.get(name, name) + # Any python_name value that gets to here should be valid. + codec_info = codecs.lookup(python_name) + encoding = Encoding(name, codec_info) + CACHE[name] = encoding + return encoding + + +def _get_encoding(encoding_or_label): + """ + Accept either an encoding object or label. + + :param encoding: An :class:`Encoding` object or a label string. + :returns: An :class:`Encoding` object. + :raises: :exc:`~exceptions.LookupError` for an unknown label. + + """ + if hasattr(encoding_or_label, 'codec_info'): + return encoding_or_label + + encoding = lookup(encoding_or_label) + if encoding is None: + raise LookupError('Unknown encoding label: %r' % encoding_or_label) + return encoding + + +class Encoding(object): + """Reresents a character encoding such as UTF-8, + that can be used for decoding or encoding. + + .. attribute:: name + + Canonical name of the encoding + + .. attribute:: codec_info + + The actual implementation of the encoding, + a stdlib :class:`~codecs.CodecInfo` object. + See :func:`codecs.register`. + + """ + def __init__(self, name, codec_info): + self.name = name + self.codec_info = codec_info + + def __repr__(self): + return '<Encoding %s>' % self.name + + +#: The UTF-8 encoding. Should be used for new content and formats. +UTF8 = lookup('utf-8') + +_UTF16LE = lookup('utf-16le') +_UTF16BE = lookup('utf-16be') + + +def decode(input, fallback_encoding, errors='replace'): + """ + Decode a single string. + + :param input: A byte string + :param fallback_encoding: + An :class:`Encoding` object or a label string. + The encoding to use if :obj:`input` does note have a BOM. + :param errors: Type of error handling. See :func:`codecs.register`. + :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. + :return: + A ``(output, encoding)`` tuple of an Unicode string + and an :obj:`Encoding`. + + """ + # Fail early if `encoding` is an invalid label. + fallback_encoding = _get_encoding(fallback_encoding) + bom_encoding, input = _detect_bom(input) + encoding = bom_encoding or fallback_encoding + return encoding.codec_info.decode(input, errors)[0], encoding + + +def _detect_bom(input): + """Return (bom_encoding, input), with any BOM removed from the input.""" + if input.startswith(b'\xFF\xFE'): + return _UTF16LE, input[2:] + if input.startswith(b'\xFE\xFF'): + return _UTF16BE, input[2:] + if input.startswith(b'\xEF\xBB\xBF'): + return UTF8, input[3:] + return None, input + + +def encode(input, encoding=UTF8, errors='strict'): + """ + Encode a single string. + + :param input: An Unicode string. + :param encoding: An :class:`Encoding` object or a label string. + :param errors: Type of error handling. See :func:`codecs.register`. + :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. + :return: A byte string. + + """ + return _get_encoding(encoding).codec_info.encode(input, errors)[0] + + +def iter_decode(input, fallback_encoding, errors='replace'): + """ + "Pull"-based decoder. + + :param input: + An iterable of byte strings. + + The input is first consumed just enough to determine the encoding + based on the precense of a BOM, + then consumed on demand when the return value is. + :param fallback_encoding: + An :class:`Encoding` object or a label string. + The encoding to use if :obj:`input` does note have a BOM. + :param errors: Type of error handling. See :func:`codecs.register`. + :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. + :returns: + An ``(output, encoding)`` tuple. + :obj:`output` is an iterable of Unicode strings, + :obj:`encoding` is the :obj:`Encoding` that is being used. + + """ + + decoder = IncrementalDecoder(fallback_encoding, errors) + generator = _iter_decode_generator(input, decoder) + encoding = next(generator) + return generator, encoding + + +def _iter_decode_generator(input, decoder): + """Return a generator that first yields the :obj:`Encoding`, + then yields output chukns as Unicode strings. + + """ + decode = decoder.decode + input = iter(input) + for chunck in input: + output = decode(chunck) + if output: + assert decoder.encoding is not None + yield decoder.encoding + yield output + break + else: + # Input exhausted without determining the encoding + output = decode(b'', final=True) + assert decoder.encoding is not None + yield decoder.encoding + if output: + yield output + return + + for chunck in input: + output = decode(chunck) + if output: + yield output + output = decode(b'', final=True) + if output: + yield output + + +def iter_encode(input, encoding=UTF8, errors='strict'): + """ + “Pull”-based encoder. + + :param input: An iterable of Unicode strings. + :param encoding: An :class:`Encoding` object or a label string. + :param errors: Type of error handling. See :func:`codecs.register`. + :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. + :returns: An iterable of byte strings. + + """ + # Fail early if `encoding` is an invalid label. + encode = IncrementalEncoder(encoding, errors).encode + return _iter_encode_generator(input, encode) + + +def _iter_encode_generator(input, encode): + for chunck in input: + output = encode(chunck) + if output: + yield output + output = encode('', final=True) + if output: + yield output + + +class IncrementalDecoder(object): + """ + “Push”-based decoder. + + :param fallback_encoding: + An :class:`Encoding` object or a label string. + The encoding to use if :obj:`input` does note have a BOM. + :param errors: Type of error handling. See :func:`codecs.register`. + :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. + + """ + def __init__(self, fallback_encoding, errors='replace'): + # Fail early if `encoding` is an invalid label. + self._fallback_encoding = _get_encoding(fallback_encoding) + self._errors = errors + self._buffer = b'' + self._decoder = None + #: The actual :class:`Encoding` that is being used, + #: or :obj:`None` if that is not determined yet. + #: (Ie. if there is not enough input yet to determine + #: if there is a BOM.) + self.encoding = None # Not known yet. + + def decode(self, input, final=False): + """Decode one chunk of the input. + + :param input: A byte string. + :param final: + Indicate that no more input is available. + Must be :obj:`True` if this is the last call. + :returns: An Unicode string. + + """ + decoder = self._decoder + if decoder is not None: + return decoder(input, final) + + input = self._buffer + input + encoding, input = _detect_bom(input) + if encoding is None: + if len(input) < 3 and not final: # Not enough data yet. + self._buffer = input + return '' + else: # No BOM + encoding = self._fallback_encoding + decoder = encoding.codec_info.incrementaldecoder(self._errors).decode + self._decoder = decoder + self.encoding = encoding + return decoder(input, final) + + +class IncrementalEncoder(object): + """ + “Push”-based encoder. + + :param encoding: An :class:`Encoding` object or a label string. + :param errors: Type of error handling. See :func:`codecs.register`. + :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. + + .. method:: encode(input, final=False) + + :param input: An Unicode string. + :param final: + Indicate that no more input is available. + Must be :obj:`True` if this is the last call. + :returns: A byte string. + + """ + def __init__(self, encoding=UTF8, errors='strict'): + encoding = _get_encoding(encoding) + self.encode = encoding.codec_info.incrementalencoder(errors).encode diff --git a/lib/webencodings/labels.py b/lib/webencodings/labels.py new file mode 100644 index 0000000000000000000000000000000000000000..29cbf91ef79b89971e51db9ddfc3720d8b4db82a --- /dev/null +++ b/lib/webencodings/labels.py @@ -0,0 +1,231 @@ +""" + + webencodings.labels + ~~~~~~~~~~~~~~~~~~~ + + Map encoding labels to their name. + + :copyright: Copyright 2012 by Simon Sapin + :license: BSD, see LICENSE for details. + +""" + +# XXX Do not edit! +# This file is automatically generated by mklabels.py + +LABELS = { + 'unicode-1-1-utf-8': 'utf-8', + 'utf-8': 'utf-8', + 'utf8': 'utf-8', + '866': 'ibm866', + 'cp866': 'ibm866', + 'csibm866': 'ibm866', + 'ibm866': 'ibm866', + 'csisolatin2': 'iso-8859-2', + 'iso-8859-2': 'iso-8859-2', + 'iso-ir-101': 'iso-8859-2', + 'iso8859-2': 'iso-8859-2', + 'iso88592': 'iso-8859-2', + 'iso_8859-2': 'iso-8859-2', + 'iso_8859-2:1987': 'iso-8859-2', + 'l2': 'iso-8859-2', + 'latin2': 'iso-8859-2', + 'csisolatin3': 'iso-8859-3', + 'iso-8859-3': 'iso-8859-3', + 'iso-ir-109': 'iso-8859-3', + 'iso8859-3': 'iso-8859-3', + 'iso88593': 'iso-8859-3', + 'iso_8859-3': 'iso-8859-3', + 'iso_8859-3:1988': 'iso-8859-3', + 'l3': 'iso-8859-3', + 'latin3': 'iso-8859-3', + 'csisolatin4': 'iso-8859-4', + 'iso-8859-4': 'iso-8859-4', + 'iso-ir-110': 'iso-8859-4', + 'iso8859-4': 'iso-8859-4', + 'iso88594': 'iso-8859-4', + 'iso_8859-4': 'iso-8859-4', + 'iso_8859-4:1988': 'iso-8859-4', + 'l4': 'iso-8859-4', + 'latin4': 'iso-8859-4', + 'csisolatincyrillic': 'iso-8859-5', + 'cyrillic': 'iso-8859-5', + 'iso-8859-5': 'iso-8859-5', + 'iso-ir-144': 'iso-8859-5', + 'iso8859-5': 'iso-8859-5', + 'iso88595': 'iso-8859-5', + 'iso_8859-5': 'iso-8859-5', + 'iso_8859-5:1988': 'iso-8859-5', + 'arabic': 'iso-8859-6', + 'asmo-708': 'iso-8859-6', + 'csiso88596e': 'iso-8859-6', + 'csiso88596i': 'iso-8859-6', + 'csisolatinarabic': 'iso-8859-6', + 'ecma-114': 'iso-8859-6', + 'iso-8859-6': 'iso-8859-6', + 'iso-8859-6-e': 'iso-8859-6', + 'iso-8859-6-i': 'iso-8859-6', + 'iso-ir-127': 'iso-8859-6', + 'iso8859-6': 'iso-8859-6', + 'iso88596': 'iso-8859-6', + 'iso_8859-6': 'iso-8859-6', + 'iso_8859-6:1987': 'iso-8859-6', + 'csisolatingreek': 'iso-8859-7', + 'ecma-118': 'iso-8859-7', + 'elot_928': 'iso-8859-7', + 'greek': 'iso-8859-7', + 'greek8': 'iso-8859-7', + 'iso-8859-7': 'iso-8859-7', + 'iso-ir-126': 'iso-8859-7', + 'iso8859-7': 'iso-8859-7', + 'iso88597': 'iso-8859-7', + 'iso_8859-7': 'iso-8859-7', + 'iso_8859-7:1987': 'iso-8859-7', + 'sun_eu_greek': 'iso-8859-7', + 'csiso88598e': 'iso-8859-8', + 'csisolatinhebrew': 'iso-8859-8', + 'hebrew': 'iso-8859-8', + 'iso-8859-8': 'iso-8859-8', + 'iso-8859-8-e': 'iso-8859-8', + 'iso-ir-138': 'iso-8859-8', + 'iso8859-8': 'iso-8859-8', + 'iso88598': 'iso-8859-8', + 'iso_8859-8': 'iso-8859-8', + 'iso_8859-8:1988': 'iso-8859-8', + 'visual': 'iso-8859-8', + 'csiso88598i': 'iso-8859-8-i', + 'iso-8859-8-i': 'iso-8859-8-i', + 'logical': 'iso-8859-8-i', + 'csisolatin6': 'iso-8859-10', + 'iso-8859-10': 'iso-8859-10', + 'iso-ir-157': 'iso-8859-10', + 'iso8859-10': 'iso-8859-10', + 'iso885910': 'iso-8859-10', + 'l6': 'iso-8859-10', + 'latin6': 'iso-8859-10', + 'iso-8859-13': 'iso-8859-13', + 'iso8859-13': 'iso-8859-13', + 'iso885913': 'iso-8859-13', + 'iso-8859-14': 'iso-8859-14', + 'iso8859-14': 'iso-8859-14', + 'iso885914': 'iso-8859-14', + 'csisolatin9': 'iso-8859-15', + 'iso-8859-15': 'iso-8859-15', + 'iso8859-15': 'iso-8859-15', + 'iso885915': 'iso-8859-15', + 'iso_8859-15': 'iso-8859-15', + 'l9': 'iso-8859-15', + 'iso-8859-16': 'iso-8859-16', + 'cskoi8r': 'koi8-r', + 'koi': 'koi8-r', + 'koi8': 'koi8-r', + 'koi8-r': 'koi8-r', + 'koi8_r': 'koi8-r', + 'koi8-u': 'koi8-u', + 'csmacintosh': 'macintosh', + 'mac': 'macintosh', + 'macintosh': 'macintosh', + 'x-mac-roman': 'macintosh', + 'dos-874': 'windows-874', + 'iso-8859-11': 'windows-874', + 'iso8859-11': 'windows-874', + 'iso885911': 'windows-874', + 'tis-620': 'windows-874', + 'windows-874': 'windows-874', + 'cp1250': 'windows-1250', + 'windows-1250': 'windows-1250', + 'x-cp1250': 'windows-1250', + 'cp1251': 'windows-1251', + 'windows-1251': 'windows-1251', + 'x-cp1251': 'windows-1251', + 'ansi_x3.4-1968': 'windows-1252', + 'ascii': 'windows-1252', + 'cp1252': 'windows-1252', + 'cp819': 'windows-1252', + 'csisolatin1': 'windows-1252', + 'ibm819': 'windows-1252', + 'iso-8859-1': 'windows-1252', + 'iso-ir-100': 'windows-1252', + 'iso8859-1': 'windows-1252', + 'iso88591': 'windows-1252', + 'iso_8859-1': 'windows-1252', + 'iso_8859-1:1987': 'windows-1252', + 'l1': 'windows-1252', + 'latin1': 'windows-1252', + 'us-ascii': 'windows-1252', + 'windows-1252': 'windows-1252', + 'x-cp1252': 'windows-1252', + 'cp1253': 'windows-1253', + 'windows-1253': 'windows-1253', + 'x-cp1253': 'windows-1253', + 'cp1254': 'windows-1254', + 'csisolatin5': 'windows-1254', + 'iso-8859-9': 'windows-1254', + 'iso-ir-148': 'windows-1254', + 'iso8859-9': 'windows-1254', + 'iso88599': 'windows-1254', + 'iso_8859-9': 'windows-1254', + 'iso_8859-9:1989': 'windows-1254', + 'l5': 'windows-1254', + 'latin5': 'windows-1254', + 'windows-1254': 'windows-1254', + 'x-cp1254': 'windows-1254', + 'cp1255': 'windows-1255', + 'windows-1255': 'windows-1255', + 'x-cp1255': 'windows-1255', + 'cp1256': 'windows-1256', + 'windows-1256': 'windows-1256', + 'x-cp1256': 'windows-1256', + 'cp1257': 'windows-1257', + 'windows-1257': 'windows-1257', + 'x-cp1257': 'windows-1257', + 'cp1258': 'windows-1258', + 'windows-1258': 'windows-1258', + 'x-cp1258': 'windows-1258', + 'x-mac-cyrillic': 'x-mac-cyrillic', + 'x-mac-ukrainian': 'x-mac-cyrillic', + 'chinese': 'gbk', + 'csgb2312': 'gbk', + 'csiso58gb231280': 'gbk', + 'gb2312': 'gbk', + 'gb_2312': 'gbk', + 'gb_2312-80': 'gbk', + 'gbk': 'gbk', + 'iso-ir-58': 'gbk', + 'x-gbk': 'gbk', + 'gb18030': 'gb18030', + 'hz-gb-2312': 'hz-gb-2312', + 'big5': 'big5', + 'big5-hkscs': 'big5', + 'cn-big5': 'big5', + 'csbig5': 'big5', + 'x-x-big5': 'big5', + 'cseucpkdfmtjapanese': 'euc-jp', + 'euc-jp': 'euc-jp', + 'x-euc-jp': 'euc-jp', + 'csiso2022jp': 'iso-2022-jp', + 'iso-2022-jp': 'iso-2022-jp', + 'csshiftjis': 'shift_jis', + 'ms_kanji': 'shift_jis', + 'shift-jis': 'shift_jis', + 'shift_jis': 'shift_jis', + 'sjis': 'shift_jis', + 'windows-31j': 'shift_jis', + 'x-sjis': 'shift_jis', + 'cseuckr': 'euc-kr', + 'csksc56011987': 'euc-kr', + 'euc-kr': 'euc-kr', + 'iso-ir-149': 'euc-kr', + 'korean': 'euc-kr', + 'ks_c_5601-1987': 'euc-kr', + 'ks_c_5601-1989': 'euc-kr', + 'ksc5601': 'euc-kr', + 'ksc_5601': 'euc-kr', + 'windows-949': 'euc-kr', + 'csiso2022kr': 'iso-2022-kr', + 'iso-2022-kr': 'iso-2022-kr', + 'utf-16be': 'utf-16be', + 'utf-16': 'utf-16le', + 'utf-16le': 'utf-16le', + 'x-user-defined': 'x-user-defined', +} diff --git a/lib/webencodings/mklabels.py b/lib/webencodings/mklabels.py new file mode 100644 index 0000000000000000000000000000000000000000..295dc928ba71fc00caa52708ac70097abe6dc3e4 --- /dev/null +++ b/lib/webencodings/mklabels.py @@ -0,0 +1,59 @@ +""" + + webencodings.mklabels + ~~~~~~~~~~~~~~~~~~~~~ + + Regenarate the webencodings.labels module. + + :copyright: Copyright 2012 by Simon Sapin + :license: BSD, see LICENSE for details. + +""" + +import json +try: + from urllib import urlopen +except ImportError: + from urllib.request import urlopen + + +def assert_lower(string): + assert string == string.lower() + return string + + +def generate(url): + parts = ['''\ +""" + + webencodings.labels + ~~~~~~~~~~~~~~~~~~~ + + Map encoding labels to their name. + + :copyright: Copyright 2012 by Simon Sapin + :license: BSD, see LICENSE for details. + +""" + +# XXX Do not edit! +# This file is automatically generated by mklabels.py + +LABELS = { +'''] + labels = [ + (repr(assert_lower(label)).lstrip('u'), + repr(encoding['name']).lstrip('u')) + for category in json.loads(urlopen(url).read().decode('ascii')) + for encoding in category['encodings'] + for label in encoding['labels']] + max_len = max(len(label) for label, name in labels) + parts.extend( + ' %s:%s %s,\n' % (label, ' ' * (max_len - len(label)), name) + for label, name in labels) + parts.append('}') + return ''.join(parts) + + +if __name__ == '__main__': + print(generate('http://encoding.spec.whatwg.org/encodings.json')) diff --git a/lib/webencodings/tests.py b/lib/webencodings/tests.py new file mode 100644 index 0000000000000000000000000000000000000000..e12c10d033026f09cf97b81d29555e12aae8c762 --- /dev/null +++ b/lib/webencodings/tests.py @@ -0,0 +1,153 @@ +# coding: utf-8 +""" + + webencodings.tests + ~~~~~~~~~~~~~~~~~~ + + A basic test suite for Encoding. + + :copyright: Copyright 2012 by Simon Sapin + :license: BSD, see LICENSE for details. + +""" + +from __future__ import unicode_literals + +from . import (lookup, LABELS, decode, encode, iter_decode, iter_encode, + IncrementalDecoder, IncrementalEncoder, UTF8) + + +def assert_raises(exception, function, *args, **kwargs): + try: + function(*args, **kwargs) + except exception: + return + else: # pragma: no cover + raise AssertionError('Did not raise %s.' % exception) + + +def test_labels(): + assert lookup('utf-8').name == 'utf-8' + assert lookup('Utf-8').name == 'utf-8' + assert lookup('UTF-8').name == 'utf-8' + assert lookup('utf8').name == 'utf-8' + assert lookup('utf8').name == 'utf-8' + assert lookup('utf8 ').name == 'utf-8' + assert lookup(' \r\nutf8\t').name == 'utf-8' + assert lookup('u8') is None # Python label. + assert lookup('utf-8 ') is None # Non-ASCII white space. + + assert lookup('US-ASCII').name == 'windows-1252' + assert lookup('iso-8859-1').name == 'windows-1252' + assert lookup('latin1').name == 'windows-1252' + assert lookup('LATIN1').name == 'windows-1252' + assert lookup('latin-1') is None + assert lookup('LATİN1') is None # ASCII-only case insensitivity. + + +def test_all_labels(): + for label in LABELS: + assert decode(b'', label) == ('', lookup(label)) + assert encode('', label) == b'' + for repeat in [0, 1, 12]: + output, _ = iter_decode([b''] * repeat, label) + assert list(output) == [] + assert list(iter_encode([''] * repeat, label)) == [] + decoder = IncrementalDecoder(label) + assert decoder.decode(b'') == '' + assert decoder.decode(b'', final=True) == '' + encoder = IncrementalEncoder(label) + assert encoder.encode('') == b'' + assert encoder.encode('', final=True) == b'' + # All encoding names are valid labels too: + for name in set(LABELS.values()): + assert lookup(name).name == name + + +def test_invalid_label(): + assert_raises(LookupError, decode, b'\xEF\xBB\xBF\xc3\xa9', 'invalid') + assert_raises(LookupError, encode, 'é', 'invalid') + assert_raises(LookupError, iter_decode, [], 'invalid') + assert_raises(LookupError, iter_encode, [], 'invalid') + assert_raises(LookupError, IncrementalDecoder, 'invalid') + assert_raises(LookupError, IncrementalEncoder, 'invalid') + + +def test_decode(): + assert decode(b'\x80', 'latin1') == ('€', lookup('latin1')) + assert decode(b'\x80', lookup('latin1')) == ('€', lookup('latin1')) + assert decode(b'\xc3\xa9', 'utf8') == ('é', lookup('utf8')) + assert decode(b'\xc3\xa9', UTF8) == ('é', lookup('utf8')) + assert decode(b'\xc3\xa9', 'ascii') == ('é', lookup('ascii')) + assert decode(b'\xEF\xBB\xBF\xc3\xa9', 'ascii') == ('é', lookup('utf8')) # UTF-8 with BOM + + assert decode(b'\xFE\xFF\x00\xe9', 'ascii') == ('é', lookup('utf-16be')) # UTF-16-BE with BOM + assert decode(b'\xFF\xFE\xe9\x00', 'ascii') == ('é', lookup('utf-16le')) # UTF-16-LE with BOM + assert decode(b'\xFE\xFF\xe9\x00', 'ascii') == ('\ue900', lookup('utf-16be')) + assert decode(b'\xFF\xFE\x00\xe9', 'ascii') == ('\ue900', lookup('utf-16le')) + + assert decode(b'\x00\xe9', 'UTF-16BE') == ('é', lookup('utf-16be')) + assert decode(b'\xe9\x00', 'UTF-16LE') == ('é', lookup('utf-16le')) + assert decode(b'\xe9\x00', 'UTF-16') == ('é', lookup('utf-16le')) + + assert decode(b'\xe9\x00', 'UTF-16BE') == ('\ue900', lookup('utf-16be')) + assert decode(b'\x00\xe9', 'UTF-16LE') == ('\ue900', lookup('utf-16le')) + assert decode(b'\x00\xe9', 'UTF-16') == ('\ue900', lookup('utf-16le')) + + +def test_encode(): + assert encode('é', 'latin1') == b'\xe9' + assert encode('é', 'utf8') == b'\xc3\xa9' + assert encode('é', 'utf8') == b'\xc3\xa9' + assert encode('é', 'utf-16') == b'\xe9\x00' + assert encode('é', 'utf-16le') == b'\xe9\x00' + assert encode('é', 'utf-16be') == b'\x00\xe9' + + +def test_iter_decode(): + def iter_decode_to_string(input, fallback_encoding): + output, _encoding = iter_decode(input, fallback_encoding) + return ''.join(output) + assert iter_decode_to_string([], 'latin1') == '' + assert iter_decode_to_string([b''], 'latin1') == '' + assert iter_decode_to_string([b'\xe9'], 'latin1') == 'é' + assert iter_decode_to_string([b'hello'], 'latin1') == 'hello' + assert iter_decode_to_string([b'he', b'llo'], 'latin1') == 'hello' + assert iter_decode_to_string([b'hell', b'o'], 'latin1') == 'hello' + assert iter_decode_to_string([b'\xc3\xa9'], 'latin1') == 'é' + assert iter_decode_to_string([b'\xEF\xBB\xBF\xc3\xa9'], 'latin1') == 'é' + assert iter_decode_to_string([ + b'\xEF\xBB\xBF', b'\xc3', b'\xa9'], 'latin1') == 'é' + assert iter_decode_to_string([ + b'\xEF\xBB\xBF', b'a', b'\xc3'], 'latin1') == 'a\uFFFD' + assert iter_decode_to_string([ + b'', b'\xEF', b'', b'', b'\xBB\xBF\xc3', b'\xa9'], 'latin1') == 'é' + assert iter_decode_to_string([b'\xEF\xBB\xBF'], 'latin1') == '' + assert iter_decode_to_string([b'\xEF\xBB'], 'latin1') == 'ï»' + assert iter_decode_to_string([b'\xFE\xFF\x00\xe9'], 'latin1') == 'é' + assert iter_decode_to_string([b'\xFF\xFE\xe9\x00'], 'latin1') == 'é' + assert iter_decode_to_string([ + b'', b'\xFF', b'', b'', b'\xFE\xe9', b'\x00'], 'latin1') == 'é' + assert iter_decode_to_string([ + b'', b'h\xe9', b'llo'], 'x-user-defined') == 'h\uF7E9llo' + + +def test_iter_encode(): + assert b''.join(iter_encode([], 'latin1')) == b'' + assert b''.join(iter_encode([''], 'latin1')) == b'' + assert b''.join(iter_encode(['é'], 'latin1')) == b'\xe9' + assert b''.join(iter_encode(['', 'é', '', ''], 'latin1')) == b'\xe9' + assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16')) == b'\xe9\x00' + assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16le')) == b'\xe9\x00' + assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16be')) == b'\x00\xe9' + assert b''.join(iter_encode([ + '', 'h\uF7E9', '', 'llo'], 'x-user-defined')) == b'h\xe9llo' + + +def test_x_user_defined(): + encoded = b'2,\x0c\x0b\x1aO\xd9#\xcb\x0f\xc9\xbbt\xcf\xa8\xca' + decoded = '2,\x0c\x0b\x1aO\uf7d9#\uf7cb\x0f\uf7c9\uf7bbt\uf7cf\uf7a8\uf7ca' + encoded = b'aa' + decoded = 'aa' + assert decode(encoded, 'x-user-defined') == (decoded, lookup('x-user-defined')) + assert encode(decoded, 'x-user-defined') == encoded diff --git a/lib/webencodings/x_user_defined.py b/lib/webencodings/x_user_defined.py new file mode 100644 index 0000000000000000000000000000000000000000..d16e326024c05a59548619e13258acad781e0a6d --- /dev/null +++ b/lib/webencodings/x_user_defined.py @@ -0,0 +1,325 @@ +# coding: utf-8 +""" + + webencodings.x_user_defined + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + An implementation of the x-user-defined encoding. + + :copyright: Copyright 2012 by Simon Sapin + :license: BSD, see LICENSE for details. + +""" + +from __future__ import unicode_literals + +import codecs + + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self, input, errors='strict'): + return codecs.charmap_encode(input, errors, encoding_table) + + def decode(self, input, errors='strict'): + return codecs.charmap_decode(input, errors, decoding_table) + + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input, self.errors, encoding_table)[0] + + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input, self.errors, decoding_table)[0] + + +class StreamWriter(Codec, codecs.StreamWriter): + pass + + +class StreamReader(Codec, codecs.StreamReader): + pass + + +### encodings module API + +codec_info = codecs.CodecInfo( + name='x-user-defined', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, +) + + +### Decoding Table + +# Python 3: +# for c in range(256): print(' %r' % chr(c if c < 128 else c + 0xF700)) +decoding_table = ( + '\x00' + '\x01' + '\x02' + '\x03' + '\x04' + '\x05' + '\x06' + '\x07' + '\x08' + '\t' + '\n' + '\x0b' + '\x0c' + '\r' + '\x0e' + '\x0f' + '\x10' + '\x11' + '\x12' + '\x13' + '\x14' + '\x15' + '\x16' + '\x17' + '\x18' + '\x19' + '\x1a' + '\x1b' + '\x1c' + '\x1d' + '\x1e' + '\x1f' + ' ' + '!' + '"' + '#' + '$' + '%' + '&' + "'" + '(' + ')' + '*' + '+' + ',' + '-' + '.' + '/' + '0' + '1' + '2' + '3' + '4' + '5' + '6' + '7' + '8' + '9' + ':' + ';' + '<' + '=' + '>' + '?' + '@' + 'A' + 'B' + 'C' + 'D' + 'E' + 'F' + 'G' + 'H' + 'I' + 'J' + 'K' + 'L' + 'M' + 'N' + 'O' + 'P' + 'Q' + 'R' + 'S' + 'T' + 'U' + 'V' + 'W' + 'X' + 'Y' + 'Z' + '[' + '\\' + ']' + '^' + '_' + '`' + 'a' + 'b' + 'c' + 'd' + 'e' + 'f' + 'g' + 'h' + 'i' + 'j' + 'k' + 'l' + 'm' + 'n' + 'o' + 'p' + 'q' + 'r' + 's' + 't' + 'u' + 'v' + 'w' + 'x' + 'y' + 'z' + '{' + '|' + '}' + '~' + '\x7f' + '\uf780' + '\uf781' + '\uf782' + '\uf783' + '\uf784' + '\uf785' + '\uf786' + '\uf787' + '\uf788' + '\uf789' + '\uf78a' + '\uf78b' + '\uf78c' + '\uf78d' + '\uf78e' + '\uf78f' + '\uf790' + '\uf791' + '\uf792' + '\uf793' + '\uf794' + '\uf795' + '\uf796' + '\uf797' + '\uf798' + '\uf799' + '\uf79a' + '\uf79b' + '\uf79c' + '\uf79d' + '\uf79e' + '\uf79f' + '\uf7a0' + '\uf7a1' + '\uf7a2' + '\uf7a3' + '\uf7a4' + '\uf7a5' + '\uf7a6' + '\uf7a7' + '\uf7a8' + '\uf7a9' + '\uf7aa' + '\uf7ab' + '\uf7ac' + '\uf7ad' + '\uf7ae' + '\uf7af' + '\uf7b0' + '\uf7b1' + '\uf7b2' + '\uf7b3' + '\uf7b4' + '\uf7b5' + '\uf7b6' + '\uf7b7' + '\uf7b8' + '\uf7b9' + '\uf7ba' + '\uf7bb' + '\uf7bc' + '\uf7bd' + '\uf7be' + '\uf7bf' + '\uf7c0' + '\uf7c1' + '\uf7c2' + '\uf7c3' + '\uf7c4' + '\uf7c5' + '\uf7c6' + '\uf7c7' + '\uf7c8' + '\uf7c9' + '\uf7ca' + '\uf7cb' + '\uf7cc' + '\uf7cd' + '\uf7ce' + '\uf7cf' + '\uf7d0' + '\uf7d1' + '\uf7d2' + '\uf7d3' + '\uf7d4' + '\uf7d5' + '\uf7d6' + '\uf7d7' + '\uf7d8' + '\uf7d9' + '\uf7da' + '\uf7db' + '\uf7dc' + '\uf7dd' + '\uf7de' + '\uf7df' + '\uf7e0' + '\uf7e1' + '\uf7e2' + '\uf7e3' + '\uf7e4' + '\uf7e5' + '\uf7e6' + '\uf7e7' + '\uf7e8' + '\uf7e9' + '\uf7ea' + '\uf7eb' + '\uf7ec' + '\uf7ed' + '\uf7ee' + '\uf7ef' + '\uf7f0' + '\uf7f1' + '\uf7f2' + '\uf7f3' + '\uf7f4' + '\uf7f5' + '\uf7f6' + '\uf7f7' + '\uf7f8' + '\uf7f9' + '\uf7fa' + '\uf7fb' + '\uf7fc' + '\uf7fd' + '\uf7fe' + '\uf7ff' +) + +### Encoding table +encoding_table = codecs.charmap_build(decoding_table) diff --git a/sickbeard/imdbPopular.py b/sickbeard/imdbPopular.py index 649095e8ab94054e08a0ecd9e464b851175cb39f..684a01b7c9e11a87f3b8e7bcba9c62ecf563819c 100644 --- a/sickbeard/imdbPopular.py +++ b/sickbeard/imdbPopular.py @@ -39,7 +39,7 @@ class imdbPopular(object): if not data: return None - soup = BeautifulSoup(data, 'html.parser') + soup = BeautifulSoup(data, 'html5lib') results = soup.find_all("div", {"class": "lister-item"}) for row in results: diff --git a/sickbeard/webserveInit.py b/sickbeard/webserveInit.py index 0b7a507331e7a5bdabca601620f49a9f2930d4bd..c604d4abac30633e0052e074612dc2793f02aeeb 100644 --- a/sickbeard/webserveInit.py +++ b/sickbeard/webserveInit.py @@ -83,27 +83,6 @@ class SRWebServer(threading.Thread): # pylint: disable=too-many-instance-attrib login_url='{0}/login/'.format(self.options['web_root']), ) - # Main Handlers - self.app.add_handlers('.*$', [ - # webapi handler - (r'{0}(/?.*)'.format(self.options['api_root']), ApiHandler), - - # webapi key retrieval - (r'{0}/getkey(/?.*)'.format(self.options['web_root']), KeyHandler), - - # webapi builder redirect - (r'{0}/api/builder'.format(self.options['web_root']), RedirectHandler, {"url": self.options['web_root'] + '/apibuilder/'}), - - # webui login/logout handlers - (r'{0}/login(/?)'.format(self.options['web_root']), LoginHandler), - (r'{0}/logout(/?)'.format(self.options['web_root']), LogoutHandler), - - # Web calendar handler (Needed because option Unprotected calendar) - (r'{0}/calendar(/?)'.format(self.options['web_root']), CalendarHandler), - - # webui handlers - ] + route.get_routes(self.options['web_root'])) - # Static File Handlers self.app.add_handlers(".*$", [ # favicon @@ -135,6 +114,27 @@ class SRWebServer(threading.Thread): # pylint: disable=too-many-instance-attrib {"path": self.video_root}) ]) + # Main Handlers + self.app.add_handlers('.*$', [ + # webapi handler + (r'{0}(/?.*)'.format(self.options['api_root']), ApiHandler), + + # webapi key retrieval + (r'{0}/getkey(/?.*)'.format(self.options['web_root']), KeyHandler), + + # webapi builder redirect + (r'{0}/api/builder'.format(self.options['web_root']), RedirectHandler, {"url": self.options['web_root'] + '/apibuilder/'}), + + # webui login/logout handlers + (r'{0}/login(/?)'.format(self.options['web_root']), LoginHandler), + (r'{0}/logout(/?)'.format(self.options['web_root']), LogoutHandler), + + # Web calendar handler (Needed because option Unprotected calendar) + (r'{0}/calendar'.format(self.options['web_root']), CalendarHandler), + + # webui handlers + ] + route.get_routes(self.options['web_root'])) + def run(self): if self.enable_https: protocol = "https" @@ -170,6 +170,6 @@ class SRWebServer(threading.Thread): # pylint: disable=too-many-instance-attrib # Ignore errors like "ValueError: I/O operation on closed kqueue fd". These might be thrown during a reload. pass - def shutDown(self): + def shutdown(self): self.alive = False self.io_loop.stop()