Hello community,
here is the log from the commit of package python-html5-parser for openSUSE:Factory checked in at 2018-06-28 15:14:24
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-html5-parser (Old)
and /work/SRC/openSUSE:Factory/.python-html5-parser.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-html5-parser"
Thu Jun 28 15:14:24 2018 rev:4 rq:619429 version:0.4.5
Changes:
--------
--- /work/SRC/openSUSE:Factory/python-html5-parser/python-html5-parser.changes 2017-10-02 16:54:21.853039967 +0200
+++ /work/SRC/openSUSE:Factory/.python-html5-parser.new/python-html5-parser.changes 2018-06-28 15:14:26.967479594 +0200
@@ -1,0 +2,8 @@
+Wed Jun 27 17:15:29 UTC 2018 - ecsos@opensuse.org
+
+- update to 0.4.5
+ No changelog from upstream.
+ See instead here:
+ https://github.com/kovidgoyal/html5-parser/compare/v0.4.4...v0.4.5?diff=unified&name=v0.4.5
+
+-------------------------------------------------------------------
Old:
----
v0.4.4.tar.gz
New:
----
v0.4.5.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-html5-parser.spec ++++++
--- /var/tmp/diff_new_pack.FVDYL3/_old 2018-06-28 15:14:27.623478393 +0200
+++ /var/tmp/diff_new_pack.FVDYL3/_new 2018-06-28 15:14:27.627478386 +0200
@@ -1,7 +1,7 @@
#
# spec file for package python-html5-parser
#
-# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany.
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -19,7 +19,7 @@
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
Name: python-html5-parser
-Version: 0.4.4
+Version: 0.4.5
Release: 0
Summary: C based HTML 5 parsing for Python
License: Apache-2.0
++++++ v0.4.4.tar.gz -> v0.4.5.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/.travis.yml new/html5-parser-0.4.5/.travis.yml
--- old/html5-parser-0.4.4/.travis.yml 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/.travis.yml 2018-04-22 17:07:13.000000000 +0200
@@ -1,11 +1,3 @@
-cache: pip
-dist: trusty
-sudo: false
-addons:
- apt:
- packages:
- - libxml2-dev
-
env:
global:
- PYTHONHASHSEED=random
@@ -15,31 +7,69 @@
- os: linux
language: python
python: 2.7
- env: BUILDER=build.py CC=gcc
+ env: BUILDER=build.py CC=gcc PYTHON=python
+ group: beta
+ dist: trusty
+ sudo: false
+ addons:
+ apt:
+ packages:
+ - libxml2-dev
- os: linux
language: python
python: 2.7
- env: BUILDER=build.py CC=clang
+ env: BUILDER=build.py CC=clang PYTHON=python LSAN_OPTIONS=verbosity=1:log_threads=1
+ group: beta
+ dist: trusty
+ # See https://github.com/travis-ci/travis-ci/issues/9033
+ sudo: required
+ addons:
+ apt:
+ packages:
+ - libxml2-dev
- os: linux
language: python
python: 2.7
- env: BUILDER=setup.py
+ env: BUILDER=setup.py PYTHON=python
+ group: beta
+ dist: trusty
+ sudo: false
+ addons:
+ apt:
+ packages:
+ - libxml2-dev
- os: linux
language: python
- python: 3.4
- env: BUILDER=setup.py
-
+ python: 3.6
+ env: BUILDER=setup.py PYTHON=python
+ group: beta
+ dist: trusty
+ sudo: false
+ addons:
+ apt:
+ packages:
+ - libxml2-dev
- os: osx
- python:
language: generic
- env: BUILDER=setup.py
+ env: BUILDER=setup.py PYTHON=python3
-install:
- - pip install --no-binary lxml chardet lxml beautifulsoup4
- - if [[ $TRAVIS_PYTHON_VERSION == 2.* ]]; then pip install BeautifulSoup; fi
- - python -c "from lxml import etree; print(etree)"
- - git clone --depth 1 "https://github.com/html5lib/html5lib-tests.git" test/html5lib-tests
+install: |
+ set -e
+ if [[ "$TRAVIS_OS_NAME" == 'osx' ]]; then
+ brew update;
+ brew upgrade python;
+ python3 --version
+ pip3 install --no-binary lxml chardet lxml beautifulsoup4
+ else
+ PLIB=$(ldd `which python` | grep libpython | cut -d ' ' -f 3)
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`dirname $PLIB`
+ pip install --no-binary lxml chardet lxml beautifulsoup4
+ if [[ $TRAVIS_PYTHON_VERSION == 2.* ]]; then pip install BeautifulSoup; fi
+ fi
+ $PYTHON -c "from lxml import etree; print(etree)"
+ git clone --depth 1 "https://github.com/html5lib/html5lib-tests.git" test/html5lib-tests
+ set +e
script:
- - python $BUILDER test
- - if [[ $BUILDER == "build.py" ]]; then python $BUILDER leak; fi
+ - $PYTHON $BUILDER test
+ - if [[ $BUILDER == "build.py" ]]; then $PYTHON $BUILDER leak; fi
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/build.py new/html5-parser-0.4.5/build.py
--- old/html5-parser-0.4.4/build.py 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/build.py 2018-04-22 17:07:13.000000000 +0200
@@ -180,7 +180,7 @@
def find_c_files(src_dir):
ans, headers = [], []
- for x in os.listdir(src_dir):
+ for x in sorted(os.listdir(src_dir)):
ext = os.path.splitext(x)[1]
if ext == '.c' and not x.endswith('-check.c'):
ans.append(os.path.join(src_dir, x))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/gumbo.h new/html5-parser-0.4.5/gumbo/gumbo.h
--- old/html5-parser-0.4.4/gumbo/gumbo.h 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/gumbo/gumbo.h 2018-04-22 17:07:13.000000000 +0200
@@ -189,7 +189,7 @@
/**
* Fixes the case of SVG elements that are not all lowercase.
- * http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-constructio...
+ * https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inforeign
* This is not done at parse time because there's no place to store a mutated
* tag name. tag_name is an enum (which will be TAG_UNKNOWN for most SVG tags
* without special handling), while original_tag_name is a pointer into the
@@ -199,7 +199,7 @@
* no normalization is called for. The return value is static data and owned by
* the library.
*/
-const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname, uint8_t *sz);
+const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
/**
* Converts a tag name string (which may be in upper or mixed case) to a tag
@@ -230,7 +230,7 @@
* The namespace for the attribute. This will usually be
* GUMBO_ATTR_NAMESPACE_NONE, but some XLink/XMLNS/XML attributes take special
* values, per:
- * http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-constructio...
+ * https://html.spec.whatwg.org/multipage/parsing.html#adjust-foreign-attribute...
*/
GumboAttributeNamespaceEnum attr_namespace;
@@ -319,7 +319,7 @@
*/
typedef struct GumboInternalNode GumboNode;
-/** http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-m... */
+/** https://dom.spec.whatwg.org/#concept-document-quirks */
typedef enum {
GUMBO_DOCTYPE_NO_QUIRKS,
GUMBO_DOCTYPE_QUIRKS,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/parser.c new/html5-parser-0.4.5/gumbo/parser.c
--- old/html5-parser-0.4.4/gumbo/parser.c 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/gumbo/parser.c 2018-04-22 17:07:13.000000000 +0200
@@ -30,6 +30,7 @@
#include "utf8.h"
#include "util.h"
#include "vector.h"
+#include "replacement.h"
#define AVOID_UNUSED_VARIABLE_WARNING(i) (void) (i)
@@ -44,7 +45,7 @@
#define TAG_MATHML(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_MATHML)
#define TAGSET_INCLUDES(tagset, namespace, tag) \
- (tag < GUMBO_TAG_LAST && tagset[(int) tag] == (1 << (int) namespace))
+ (tag < GUMBO_TAG_LAST && tagset[(int) tag] & (1 << (int) namespace))
// selected forward declarations as it is getting hard to find
// an appropriate order
@@ -165,121 +166,11 @@
static const char* kLegalXmlns[] = {"http://www.w3.org/1999/xhtml",
"http://www.w3.org/2000/svg", "http://www.w3.org/1998/Math/MathML"};
-typedef struct _ReplacementEntry {
+typedef struct {
const GumboStringPiece from;
const GumboStringPiece to;
} ReplacementEntry;
-#define REPLACEMENT_ENTRY(from, to) \
- { GUMBO_STRING(from), GUMBO_STRING(to) }
-
-// Static data for SVG attribute replacements.
-// https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-no...
-static const ReplacementEntry kSvgAttributeReplacements[] = {
- REPLACEMENT_ENTRY("attributename", "attributeName"),
- REPLACEMENT_ENTRY("attributetype", "attributeType"),
- REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
- REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
- REPLACEMENT_ENTRY("calcmode", "calcMode"),
- REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
- // REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
- // REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
- REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
- REPLACEMENT_ENTRY("edgemode", "edgeMode"),
- // REPLACEMENT_ENTRY("externalresourcesrequired",
- // "externalResourcesRequired"),
- // REPLACEMENT_ENTRY("filterres", "filterRes"),
- REPLACEMENT_ENTRY("filterunits", "filterUnits"),
- REPLACEMENT_ENTRY("glyphref", "glyphRef"),
- REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
- REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
- REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
- REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
- REPLACEMENT_ENTRY("keypoints", "keyPoints"),
- REPLACEMENT_ENTRY("keysplines", "keySplines"),
- REPLACEMENT_ENTRY("keytimes", "keyTimes"),
- REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
- REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
- REPLACEMENT_ENTRY("markerheight", "markerHeight"),
- REPLACEMENT_ENTRY("markerunits", "markerUnits"),
- REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
- REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
- REPLACEMENT_ENTRY("maskunits", "maskUnits"),
- REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
- REPLACEMENT_ENTRY("pathlength", "pathLength"),
- REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
- REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
- REPLACEMENT_ENTRY("patternunits", "patternUnits"),
- REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
- REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
- REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
- REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
- REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
- REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
- REPLACEMENT_ENTRY("refx", "refX"), REPLACEMENT_ENTRY("refy", "refY"),
- REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
- REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
- REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
- REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
- REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
- REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
- REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
- REPLACEMENT_ENTRY("startoffset", "startOffset"),
- REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
- REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
- REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
- REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
- REPLACEMENT_ENTRY("tablevalues", "tableValues"),
- REPLACEMENT_ENTRY("targetx", "targetX"),
- REPLACEMENT_ENTRY("targety", "targetY"),
- REPLACEMENT_ENTRY("textlength", "textLength"),
- REPLACEMENT_ENTRY("viewbox", "viewBox"),
- REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
- REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
- REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
- REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
-};
-
-static const ReplacementEntry kSvgTagReplacements[] = {
- REPLACEMENT_ENTRY("altglyph", "altGlyph"),
- REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
- REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
- REPLACEMENT_ENTRY("animatecolor", "animateColor"),
- REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
- REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
- REPLACEMENT_ENTRY("clippath", "clipPath"),
- REPLACEMENT_ENTRY("feblend", "feBlend"),
- REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
- REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
- REPLACEMENT_ENTRY("fecomposite", "feComposite"),
- REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
- REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
- REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
- REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
- REPLACEMENT_ENTRY("feflood", "feFlood"),
- REPLACEMENT_ENTRY("fefunca", "feFuncA"),
- REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
- REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
- REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
- REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
- REPLACEMENT_ENTRY("feimage", "feImage"),
- REPLACEMENT_ENTRY("femerge", "feMerge"),
- REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
- REPLACEMENT_ENTRY("femorphology", "feMorphology"),
- REPLACEMENT_ENTRY("feoffset", "feOffset"),
- REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
- REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
- REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
- REPLACEMENT_ENTRY("fetile", "feTile"),
- REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
- REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
- REPLACEMENT_ENTRY("glyphref", "glyphRef"),
- REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
- REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
- REPLACEMENT_ENTRY("solidcolor", "solidcolor"),
- REPLACEMENT_ENTRY("textpath", "textPath"),
-};
-
typedef struct _NamespacedAttributeReplacement {
const char* from;
const char* local_name;
@@ -1577,12 +1468,20 @@
TAG(PARAM), TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION),
TAG(SELECT), TAG(STYLE), TAG(SUMMARY), TAG(TABLE), TAG(TBODY),
TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA), TAG(TFOOT), TAG(TH),
- TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
+ TAG(THEAD), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
- TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC)});
+ TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
+
+ // This TagSet needs to include the "title" element in both the HTML and
+ // SVG namespaces. Using both TAG(TITLE) and TAG_SVG(TITLE) won't work, due
+ // to the simplistic way in which the TAG macros are implemented, so we do
+ // it like this instead:
+ [GUMBO_TAG_TITLE] = (1 << GUMBO_NAMESPACE_HTML) | (1 << GUMBO_NAMESPACE_SVG)
+ }
+ );
}
// Implicitly closes currently open elements until it reaches an element with
@@ -1674,16 +1573,9 @@
}
const char* gumbo_normalize_svg_tagname(
- const GumboStringPiece* tag, uint8_t* sz) {
- for (unsigned int i = 0;
- i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry); ++i) {
- const ReplacementEntry* entry = &kSvgTagReplacements[i];
- if (gumbo_string_equals_ignore_case(tag, &entry->from)) {
- *sz = entry->to.length;
- return entry->to.data;
- }
- }
- return NULL;
+ const GumboStringPiece* tag) {
+ const StringReplacement *replacement = gumbo_get_svg_tag_replacement(tag->data, tag->length);
+ return replacement ? replacement->to : NULL;
}
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-constructio...
@@ -1713,16 +1605,15 @@
static void adjust_svg_attributes(GumboToken* token) {
assert(token->type == GUMBO_TOKEN_START_TAG);
const GumboVector* attributes = &token->v.start_tag.attributes;
- for (unsigned int i = 0;
- i < sizeof(kSvgAttributeReplacements) / sizeof(ReplacementEntry); ++i) {
- const ReplacementEntry* entry = &kSvgAttributeReplacements[i];
- GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from.data);
- if (!attr) {
+ for (unsigned int i = 0, n = attributes->length; i < n; i++) {
+ GumboAttribute* attr = (GumboAttribute*) attributes->data[i];
+ const StringReplacement* replacement = gumbo_get_svg_attr_replacement(attr->name, attr->original_name.length);
+ if (!replacement) {
continue;
}
/* TODO:vmg refactor to use attribute helpers */
gumbo_free((void*) attr->name);
- attr->name = gumbo_strdup(entry->to.data);
+ attr->name = gumbo_strdup(replacement->to);
}
}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/replacement.h new/html5-parser-0.4.5/gumbo/replacement.h
--- old/html5-parser-0.4.4/gumbo/replacement.h 1970-01-01 01:00:00.000000000 +0100
+++ new/html5-parser-0.4.5/gumbo/replacement.h 2018-04-22 17:07:13.000000000 +0200
@@ -0,0 +1,18 @@
+#pragma once
+
+#include
+
+typedef struct {
+ const char *const from;
+ const char *const to;
+} StringReplacement;
+
+const StringReplacement *gumbo_get_svg_tag_replacement(
+ const char* str,
+ size_t len
+);
+
+const StringReplacement *gumbo_get_svg_attr_replacement(
+ const char* str,
+ size_t len
+);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/svg_attrs.c new/html5-parser-0.4.5/gumbo/svg_attrs.c
--- old/html5-parser-0.4.4/gumbo/svg_attrs.c 1970-01-01 01:00:00.000000000 +0100
+++ new/html5-parser-0.4.5/gumbo/svg_attrs.c 2018-04-22 17:07:13.000000000 +0200
@@ -0,0 +1,306 @@
+/* ANSI-C code produced by gperf version 3.1 */
+/* Command-line: gperf -m100 svg_attrs.gperf */
+/* Computed positions: -k'1,10,$' */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646. */
+#error "gperf generated tables don't work with this execution character set. Please report a bug to ."
+#endif
+
+#line 2 "svg_attrs.gperf"
+
+#include "replacement.h"
+#include
+
+#define TOTAL_KEYWORDS 58
+#define MIN_WORD_LENGTH 4
+#define MAX_WORD_LENGTH 19
+#define MIN_HASH_VALUE 5
+#define MAX_HASH_VALUE 77
+/* maximum key range = 73, duplicates = 0 */
+
+#ifndef GPERF_DOWNCASE
+#define GPERF_DOWNCASE 1
+static unsigned char gperf_downcase[256] =
+ {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
+ 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
+ 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+ 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+ 255
+ };
+#endif
+
+#ifndef GPERF_CASE_MEMCMP
+#define GPERF_CASE_MEMCMP 1
+static int
+gperf_case_memcmp (register const char *s1, register const char *s2, register size_t n)
+{
+ for (; n > 0;)
+ {
+ unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
+ unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
+ if (c1 == c2)
+ {
+ n--;
+ continue;
+ }
+ return (int)c1 - (int)c2;
+ }
+ return 0;
+}
+#endif
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+hash (register const char *str, register size_t len)
+{
+ static const unsigned char asso_values[] =
+ {
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 5, 78, 39, 14, 1,
+ 31, 31, 13, 13, 78, 78, 22, 25, 10, 2,
+ 7, 78, 22, 0, 1, 3, 1, 78, 0, 36,
+ 14, 17, 20, 78, 78, 78, 78, 5, 78, 39,
+ 14, 1, 31, 31, 13, 13, 78, 78, 22, 25,
+ 10, 2, 7, 78, 22, 0, 1, 3, 1, 78,
+ 0, 36, 14, 17, 20, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78
+ };
+ register unsigned int hval = len;
+
+ switch (hval)
+ {
+ default:
+ hval += asso_values[(unsigned char)str[9]];
+ /*FALLTHROUGH*/
+ case 9:
+ case 8:
+ case 7:
+ case 6:
+ case 5:
+ case 4:
+ case 3:
+ case 2:
+ case 1:
+ hval += asso_values[(unsigned char)str[0]+2];
+ break;
+ }
+ return hval + asso_values[(unsigned char)str[len - 1]];
+}
+
+static const unsigned char lengthtable[] =
+ {
+ 0, 0, 0, 0, 0, 4, 0, 7, 7, 0, 8, 9, 10, 11,
+ 11, 11, 11, 10, 16, 18, 16, 12, 16, 11, 13, 11, 12, 11,
+ 16, 0, 17, 9, 9, 8, 9, 10, 13, 10, 12, 14, 8, 4,
+ 12, 19, 7, 9, 12, 12, 11, 14, 10, 19, 8, 16, 13, 16,
+ 16, 15, 10, 12, 0, 0, 13, 13, 13, 0, 0, 9, 16, 0,
+ 0, 0, 0, 0, 0, 0, 0, 17
+ };
+
+static const StringReplacement wordlist[] =
+ {
+ {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
+ {(char*)0},
+#line 58 "svg_attrs.gperf"
+ {"refx", "refX"},
+ {(char*)0},
+#line 76 "svg_attrs.gperf"
+ {"viewbox", "viewBox"},
+#line 73 "svg_attrs.gperf"
+ {"targetx", "targetX"},
+ {(char*)0},
+#line 27 "svg_attrs.gperf"
+ {"calcmode", "calcMode"},
+#line 46 "svg_attrs.gperf"
+ {"maskunits", "maskUnits"},
+#line 77 "svg_attrs.gperf"
+ {"viewtarget", "viewTarget"},
+#line 72 "svg_attrs.gperf"
+ {"tablevalues", "tableValues"},
+#line 43 "svg_attrs.gperf"
+ {"markerunits", "markerUnits"},
+#line 69 "svg_attrs.gperf"
+ {"stitchtiles", "stitchTiles"},
+#line 67 "svg_attrs.gperf"
+ {"startoffset", "startOffset"},
+#line 47 "svg_attrs.gperf"
+ {"numoctaves", "numOctaves"},
+#line 63 "svg_attrs.gperf"
+ {"requiredfeatures", "requiredFeatures"},
+#line 62 "svg_attrs.gperf"
+ {"requiredextensions", "requiredExtensions"},
+#line 65 "svg_attrs.gperf"
+ {"specularexponent", "specularExponent"},
+#line 70 "svg_attrs.gperf"
+ {"surfacescale", "surfaceScale"},
+#line 64 "svg_attrs.gperf"
+ {"specularconstant", "specularConstant"},
+#line 60 "svg_attrs.gperf"
+ {"repeatcount", "repeatCount"},
+#line 28 "svg_attrs.gperf"
+ {"clippathunits", "clipPathUnits"},
+#line 31 "svg_attrs.gperf"
+ {"filterunits", "filterUnits"},
+#line 40 "svg_attrs.gperf"
+ {"lengthadjust", "lengthAdjust"},
+#line 44 "svg_attrs.gperf"
+ {"markerwidth", "markerWidth"},
+#line 45 "svg_attrs.gperf"
+ {"maskcontentunits", "maskContentUnits"},
+ {(char*)0},
+#line 41 "svg_attrs.gperf"
+ {"limitingconeangle", "limitingConeAngle"},
+#line 52 "svg_attrs.gperf"
+ {"pointsatx", "pointsAtX"},
+#line 61 "svg_attrs.gperf"
+ {"repeatdur", "repeatDur"},
+#line 39 "svg_attrs.gperf"
+ {"keytimes", "keyTimes"},
+#line 37 "svg_attrs.gperf"
+ {"keypoints", "keyPoints"},
+#line 38 "svg_attrs.gperf"
+ {"keysplines", "keySplines"},
+#line 34 "svg_attrs.gperf"
+ {"gradientunits", "gradientUnits"},
+#line 75 "svg_attrs.gperf"
+ {"textlength", "textLength"},
+#line 68 "svg_attrs.gperf"
+ {"stddeviation", "stdDeviation"},
+#line 57 "svg_attrs.gperf"
+ {"primitiveunits", "primitiveUnits"},
+#line 30 "svg_attrs.gperf"
+ {"edgemode", "edgeMode"},
+#line 59 "svg_attrs.gperf"
+ {"refy", "refY"},
+#line 66 "svg_attrs.gperf"
+ {"spreadmethod", "spreadMethod"},
+#line 56 "svg_attrs.gperf"
+ {"preserveaspectratio", "preserveAspectRatio"},
+#line 74 "svg_attrs.gperf"
+ {"targety", "targetY"},
+#line 54 "svg_attrs.gperf"
+ {"pointsatz", "pointsAtZ"},
+#line 42 "svg_attrs.gperf"
+ {"markerheight", "markerHeight"},
+#line 51 "svg_attrs.gperf"
+ {"patternunits", "patternUnits"},
+#line 26 "svg_attrs.gperf"
+ {"baseprofile", "baseProfile"},
+#line 71 "svg_attrs.gperf"
+ {"systemlanguage", "systemLanguage"},
+#line 80 "svg_attrs.gperf"
+ {"zoomandpan", "zoomAndPan"},
+#line 49 "svg_attrs.gperf"
+ {"patterncontentunits", "patternContentUnits"},
+#line 32 "svg_attrs.gperf"
+ {"glyphref", "glyphRef"},
+#line 78 "svg_attrs.gperf"
+ {"xchannelselector", "xChannelSelector"},
+#line 24 "svg_attrs.gperf"
+ {"attributetype", "attributeType"},
+#line 36 "svg_attrs.gperf"
+ {"kernelunitlength", "kernelUnitLength"},
+#line 79 "svg_attrs.gperf"
+ {"ychannelselector", "yChannelSelector"},
+#line 29 "svg_attrs.gperf"
+ {"diffuseconstant", "diffuseConstant"},
+#line 48 "svg_attrs.gperf"
+ {"pathlength", "pathLength"},
+#line 35 "svg_attrs.gperf"
+ {"kernelmatrix", "kernelMatrix"},
+ {(char*)0}, {(char*)0},
+#line 55 "svg_attrs.gperf"
+ {"preservealpha", "preserveAlpha"},
+#line 23 "svg_attrs.gperf"
+ {"attributename", "attributeName"},
+#line 25 "svg_attrs.gperf"
+ {"basefrequency", "baseFrequency"},
+ {(char*)0}, {(char*)0},
+#line 53 "svg_attrs.gperf"
+ {"pointsaty", "pointsAtY"},
+#line 50 "svg_attrs.gperf"
+ {"patterntransform", "patternTransform"},
+ {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
+ {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
+#line 33 "svg_attrs.gperf"
+ {"gradienttransform", "gradientTransform"}
+ };
+
+const StringReplacement *
+gumbo_get_svg_attr_replacement (register const char *str, register size_t len)
+{
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register unsigned int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE)
+ if (len == lengthtable[key])
+ {
+ register const char *s = wordlist[key].from;
+
+ if (s && (((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_memcmp (str, s, len))
+ return &wordlist[key];
+ }
+ }
+ return 0;
+}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/svg_attrs.gperf new/html5-parser-0.4.5/gumbo/svg_attrs.gperf
--- old/html5-parser-0.4.4/gumbo/svg_attrs.gperf 1970-01-01 01:00:00.000000000 +0100
+++ new/html5-parser-0.4.5/gumbo/svg_attrs.gperf 2018-04-22 17:07:13.000000000 +0200
@@ -0,0 +1,80 @@
+// See https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-no...
+%{
+#include "replacement.h"
+%}
+
+%ignore-case
+%struct-type
+%omit-struct-type
+%compare-lengths
+%global-table
+%readonly-tables
+%null-strings
+%includes
+%define lookup-function-name gumbo_get_svg_attr_replacement
+%define slot-name from
+StringReplacement;
+
+// "contentscripttype", "contentScriptType"
+// "contentstyletype", "contentStyleType"
+// "externalresourcesrequired", "externalResourcesRequired"
+// "filterres", "filterRes"
+%%
+"attributename", "attributeName"
+"attributetype", "attributeType"
+"basefrequency", "baseFrequency"
+"baseprofile", "baseProfile"
+"calcmode", "calcMode"
+"clippathunits", "clipPathUnits"
+"diffuseconstant", "diffuseConstant"
+"edgemode", "edgeMode"
+"filterunits", "filterUnits"
+"glyphref", "glyphRef"
+"gradienttransform", "gradientTransform"
+"gradientunits", "gradientUnits"
+"kernelmatrix", "kernelMatrix"
+"kernelunitlength", "kernelUnitLength"
+"keypoints", "keyPoints"
+"keysplines", "keySplines"
+"keytimes", "keyTimes"
+"lengthadjust", "lengthAdjust"
+"limitingconeangle", "limitingConeAngle"
+"markerheight", "markerHeight"
+"markerunits", "markerUnits"
+"markerwidth", "markerWidth"
+"maskcontentunits", "maskContentUnits"
+"maskunits", "maskUnits"
+"numoctaves", "numOctaves"
+"pathlength", "pathLength"
+"patterncontentunits", "patternContentUnits"
+"patterntransform", "patternTransform"
+"patternunits", "patternUnits"
+"pointsatx", "pointsAtX"
+"pointsaty", "pointsAtY"
+"pointsatz", "pointsAtZ"
+"preservealpha", "preserveAlpha"
+"preserveaspectratio", "preserveAspectRatio"
+"primitiveunits", "primitiveUnits"
+"refx", "refX"
+"refy", "refY"
+"repeatcount", "repeatCount"
+"repeatdur", "repeatDur"
+"requiredextensions", "requiredExtensions"
+"requiredfeatures", "requiredFeatures"
+"specularconstant", "specularConstant"
+"specularexponent", "specularExponent"
+"spreadmethod", "spreadMethod"
+"startoffset", "startOffset"
+"stddeviation", "stdDeviation"
+"stitchtiles", "stitchTiles"
+"surfacescale", "surfaceScale"
+"systemlanguage", "systemLanguage"
+"tablevalues", "tableValues"
+"targetx", "targetX"
+"targety", "targetY"
+"textlength", "textLength"
+"viewbox", "viewBox"
+"viewtarget", "viewTarget"
+"xchannelselector", "xChannelSelector"
+"ychannelselector", "yChannelSelector"
+"zoomandpan", "zoomAndPan"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/svg_tags.c new/html5-parser-0.4.5/gumbo/svg_tags.c
--- old/html5-parser-0.4.4/gumbo/svg_tags.c 1970-01-01 01:00:00.000000000 +0100
+++ new/html5-parser-0.4.5/gumbo/svg_tags.c 2018-04-22 17:07:13.000000000 +0200
@@ -0,0 +1,248 @@
+/* ANSI-C code produced by gperf version 3.1 */
+/* Command-line: gperf -m100 svg_tags.gperf */
+/* Computed positions: -k'3,7' */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646. */
+#error "gperf generated tables don't work with this execution character set. Please report a bug to ."
+#endif
+
+#line 1 "svg_tags.gperf"
+
+#include "replacement.h"
+#include
+
+#define TOTAL_KEYWORDS 36
+#define MIN_WORD_LENGTH 6
+#define MAX_WORD_LENGTH 19
+#define MIN_HASH_VALUE 6
+#define MAX_HASH_VALUE 42
+/* maximum key range = 37, duplicates = 0 */
+
+#ifndef GPERF_DOWNCASE
+#define GPERF_DOWNCASE 1
+static unsigned char gperf_downcase[256] =
+ {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
+ 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
+ 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+ 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+ 255
+ };
+#endif
+
+#ifndef GPERF_CASE_MEMCMP
+#define GPERF_CASE_MEMCMP 1
+static int
+gperf_case_memcmp (register const char *s1, register const char *s2, register size_t n)
+{
+ for (; n > 0;)
+ {
+ unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
+ unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
+ if (c1 == c2)
+ {
+ n--;
+ continue;
+ }
+ return (int)c1 - (int)c2;
+ }
+ return 0;
+}
+#endif
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+hash (register const char *str, register size_t len)
+{
+ static const unsigned char asso_values[] =
+ {
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 12, 2, 10, 22,
+ 1, 28, 15, 1, 43, 43, 43, 0, 9, 26,
+ 3, 17, 1, 11, 0, 22, 5, 43, 3, 2,
+ 43, 43, 43, 43, 43, 43, 43, 43, 12, 2,
+ 10, 22, 1, 28, 15, 1, 43, 43, 43, 0,
+ 9, 26, 3, 17, 1, 11, 0, 22, 5, 43,
+ 3, 2, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43
+ };
+ register unsigned int hval = len;
+
+ switch (hval)
+ {
+ default:
+ hval += asso_values[(unsigned char)str[6]+1];
+ /*FALLTHROUGH*/
+ case 6:
+ case 5:
+ case 4:
+ case 3:
+ hval += asso_values[(unsigned char)str[2]];
+ break;
+ }
+ return hval;
+}
+
+static const unsigned char lengthtable[] =
+ {
+ 0, 0, 0, 0, 0, 0, 6, 0, 7, 7, 7, 8, 11, 12,
+ 12, 13, 11, 12, 16, 7, 7, 16, 11, 7, 19, 8, 13, 17,
+ 11, 12, 7, 8, 17, 8, 18, 8, 14, 12, 14, 14, 13, 7,
+ 14
+ };
+
+static const StringReplacement wordlist[] =
+ {
+ {""}, {""}, {""}, {""}, {""}, {""},
+#line 46 "svg_tags.gperf"
+ {"fetile", "feTile"},
+ {""},
+#line 39 "svg_tags.gperf"
+ {"femerge", "feMerge"},
+#line 38 "svg_tags.gperf"
+ {"feimage", "feImage"},
+#line 34 "svg_tags.gperf"
+ {"fefuncb", "feFuncB"},
+#line 49 "svg_tags.gperf"
+ {"glyphref", "glyphRef"},
+#line 40 "svg_tags.gperf"
+ {"femergenode", "feMergeNode"},
+#line 41 "svg_tags.gperf"
+ {"femorphology", "feMorphology"},
+#line 20 "svg_tags.gperf"
+ {"animatecolor", "animateColor"},
+#line 21 "svg_tags.gperf"
+ {"animatemotion", "animateMotion"},
+#line 27 "svg_tags.gperf"
+ {"fecomposite", "feComposite"},
+#line 47 "svg_tags.gperf"
+ {"feturbulence", "feTurbulence"},
+#line 22 "svg_tags.gperf"
+ {"animatetransform", "animateTransform"},
+#line 36 "svg_tags.gperf"
+ {"fefuncr", "feFuncR"},
+#line 33 "svg_tags.gperf"
+ {"fefunca", "feFuncA"},
+#line 28 "svg_tags.gperf"
+ {"feconvolvematrix", "feConvolveMatrix"},
+#line 45 "svg_tags.gperf"
+ {"fespotlight", "feSpotLight"},
+#line 35 "svg_tags.gperf"
+ {"fefuncg", "feFuncG"},
+#line 26 "svg_tags.gperf"
+ {"fecomponenttransfer", "feComponentTransfer"},
+#line 17 "svg_tags.gperf"
+ {"altglyph", "altGlyph"},
+#line 25 "svg_tags.gperf"
+ {"fecolormatrix", "feColorMatrix"},
+#line 30 "svg_tags.gperf"
+ {"fedisplacementmap", "feDisplacementMap"},
+#line 18 "svg_tags.gperf"
+ {"altglyphdef", "altGlyphDef"},
+#line 19 "svg_tags.gperf"
+ {"altglyphitem", "altGlyphItem"},
+#line 32 "svg_tags.gperf"
+ {"feflood", "feFlood"},
+#line 23 "svg_tags.gperf"
+ {"clippath", "clipPath"},
+#line 29 "svg_tags.gperf"
+ {"fediffuselighting", "feDiffuseLighting"},
+#line 52 "svg_tags.gperf"
+ {"textpath", "textPath"},
+#line 44 "svg_tags.gperf"
+ {"fespecularlighting", "feSpecularLighting"},
+#line 42 "svg_tags.gperf"
+ {"feoffset", "feOffset"},
+#line 31 "svg_tags.gperf"
+ {"fedistantlight", "feDistantLight"},
+#line 43 "svg_tags.gperf"
+ {"fepointlight", "fePointLight"},
+#line 50 "svg_tags.gperf"
+ {"lineargradient", "linearGradient"},
+#line 51 "svg_tags.gperf"
+ {"radialgradient", "radialGradient"},
+#line 48 "svg_tags.gperf"
+ {"foreignobject", "foreignObject"},
+#line 24 "svg_tags.gperf"
+ {"feblend", "feBlend"},
+#line 37 "svg_tags.gperf"
+ {"fegaussianblur", "feGaussianBlur"}
+ };
+
+const StringReplacement *
+gumbo_get_svg_tag_replacement (register const char *str, register size_t len)
+{
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register unsigned int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE)
+ if (len == lengthtable[key])
+ {
+ register const char *s = wordlist[key].from;
+
+ if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_memcmp (str, s, len))
+ return &wordlist[key];
+ }
+ }
+ return 0;
+}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/svg_tags.gperf new/html5-parser-0.4.5/gumbo/svg_tags.gperf
--- old/html5-parser-0.4.4/gumbo/svg_tags.gperf 1970-01-01 01:00:00.000000000 +0100
+++ new/html5-parser-0.4.5/gumbo/svg_tags.gperf 2018-04-22 17:07:13.000000000 +0200
@@ -0,0 +1,53 @@
+%{
+#include "replacement.h"
+%}
+
+%ignore-case
+%struct-type
+%omit-struct-type
+%compare-lengths
+%readonly-tables
+%null-strings
+%includes
+%global-table
+%define lookup-function-name gumbo_get_svg_tag_replacement
+%define slot-name from
+StringReplacement;
+
+%%
+"altglyph", "altGlyph"
+"altglyphdef", "altGlyphDef"
+"altglyphitem", "altGlyphItem"
+"animatecolor", "animateColor"
+"animatemotion", "animateMotion"
+"animatetransform", "animateTransform"
+"clippath", "clipPath"
+"feblend", "feBlend"
+"fecolormatrix", "feColorMatrix"
+"fecomponenttransfer", "feComponentTransfer"
+"fecomposite", "feComposite"
+"feconvolvematrix", "feConvolveMatrix"
+"fediffuselighting", "feDiffuseLighting"
+"fedisplacementmap", "feDisplacementMap"
+"fedistantlight", "feDistantLight"
+"feflood", "feFlood"
+"fefunca", "feFuncA"
+"fefuncb", "feFuncB"
+"fefuncg", "feFuncG"
+"fefuncr", "feFuncR"
+"fegaussianblur", "feGaussianBlur"
+"feimage", "feImage"
+"femerge", "feMerge"
+"femergenode", "feMergeNode"
+"femorphology", "feMorphology"
+"feoffset", "feOffset"
+"fepointlight", "fePointLight"
+"fespecularlighting", "feSpecularLighting"
+"fespotlight", "feSpotLight"
+"fetile", "feTile"
+"feturbulence", "feTurbulence"
+"foreignobject", "foreignObject"
+"glyphref", "glyphRef"
+"lineargradient", "linearGradient"
+"radialgradient", "radialGradient"
+"textpath", "textPath"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/utf8.c new/html5-parser-0.4.5/gumbo/utf8.c
--- old/html5-parser-0.4.4/gumbo/utf8.c 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/gumbo/utf8.c 2018-04-22 17:07:13.000000000 +0200
@@ -32,7 +32,7 @@
// Wikipedia: http://en.wikipedia.org/wiki/UTF-8#Description
// RFC 3629: http://tools.ietf.org/html/rfc3629
// HTML5 Unicode handling:
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#prep...
+// https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-...
//
// This implementation is based on a DFA-based decoder by Bjoern Hoehrmann
// . We wrap the inner table-based decoder routine in our
@@ -141,7 +141,7 @@
// the HTML5 spec. Since we're looking for particular 7-bit literal
// characters, we operate in terms of chars and only need a check for iter
// overrun, instead of having to read in a full next code point.
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#pre...
+ // https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-...
if (code_point == '\r') {
assert(iter->_width == 1);
const char* next = c + 1;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/utf8.h new/html5-parser-0.4.5/gumbo/utf8.h
--- old/html5-parser-0.4.4/gumbo/utf8.h 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/gumbo/utf8.h 2018-04-22 17:07:13.000000000 +0200
@@ -20,7 +20,7 @@
// 1. Decoding errors are parse errors.
// 2. Certain other codepoints (eg. control characters) are parse errors.
// 3. Carriage returns and CR/LF groups are converted to line feeds.
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/infrastructure.h...
+// https://encoding.spec.whatwg.org/#utf-8-decode
//
// Also, we want to keep track of source positions for error handling. As a
// result, we fold all that functionality into this decoder, and can't use an
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/src/as-libxml.c new/html5-parser-0.4.5/src/as-libxml.c
--- old/html5-parser-0.4.4/src/as-libxml.c 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/src/as-libxml.c 2018-04-22 17:07:13.000000000 +0200
@@ -205,7 +205,6 @@
bool ok = true;
const xmlChar *tag_name = NULL;
const char *tag;
- uint8_t tag_sz;
char buf[MAX_TAG_NAME_SZ] = {0};
char *nsprefix = NULL;
xmlNsPtr namespace = NULL;
@@ -214,7 +213,7 @@
if (UNLIKELY(elem->tag >= GUMBO_TAG_UNKNOWN)) {
gumbo_tag_from_original_text(&(elem->original_tag));
- tag_sz = MIN(sizeof(buf) - 1, elem->original_tag.length);
+ uint8_t tag_sz = MIN(sizeof(buf) - 1, elem->original_tag.length);
memcpy(buf, elem->original_tag.data, tag_sz);
tag = buf;
if (pd->maybe_xhtml) {
@@ -226,9 +225,9 @@
tag_name = xmlDictLookup(doc->dict, BAD_CAST tag, tag_sz);
} else if (UNLIKELY(elem->tag_namespace == GUMBO_NAMESPACE_SVG)) {
gumbo_tag_from_original_text(&(elem->original_tag));
- tag = gumbo_normalize_svg_tagname(&(elem->original_tag), &tag_sz);
+ tag = gumbo_normalize_svg_tagname(&(elem->original_tag));
if (tag == NULL) tag_name = lookup_standard_tag(doc, pd, elem->tag);
- else tag_name = xmlDictLookup(doc->dict, BAD_CAST tag, tag_sz);
+ else tag_name = xmlDictLookup(doc->dict, BAD_CAST tag, elem->original_tag.length);
} else tag_name = lookup_standard_tag(doc, pd, elem->tag);
if (UNLIKELY(!tag_name)) ABORT;
@@ -321,6 +320,7 @@
}
opts->line_number_attr = xmlDictLookup(doc->dict, BAD_CAST opts->line_number_attr, -1);
}
+ doc->encoding = xmlStrdup(BAD_CAST "UTF-8");
}
return doc;
}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/src/as-python-tree.c new/html5-parser-0.4.5/src/as-python-tree.c
--- old/html5-parser-0.4.4/src/as-python-tree.c 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/src/as-python-tree.c 2018-04-22 17:07:13.000000000 +0200
@@ -126,7 +126,6 @@
static inline PyObject*
create_element(GumboElement *elem, PyObject *new_tag) {
PyObject *tag_name = NULL, *tag_obj = NULL, *attributes = NULL;
- uint8_t tag_sz;
const char *tag;
if (UNLIKELY(elem->tag >= GUMBO_TAG_UNKNOWN)) {
@@ -134,9 +133,9 @@
tag_name = PyUnicode_FromStringAndSize(elem->original_tag.data, elem->original_tag.length);
} else if (UNLIKELY(elem->tag_namespace == GUMBO_NAMESPACE_SVG)) {
gumbo_tag_from_original_text(&(elem->original_tag));
- tag = gumbo_normalize_svg_tagname(&(elem->original_tag), &tag_sz);
+ tag = gumbo_normalize_svg_tagname(&(elem->original_tag));
if (tag) {
- tag_name = PyUnicode_FromStringAndSize(tag, tag_sz);
+ tag_name = PyUnicode_FromStringAndSize(tag, elem->original_tag.length);
} else {
tag_name = PyTuple_GET_ITEM(KNOWN_TAG_NAMES, elem->tag);
Py_INCREF(tag_name);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/src/html5_parser/soup.py new/html5-parser-0.4.5/src/html5_parser/soup.py
--- old/html5-parser-0.4.4/src/html5_parser/soup.py 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/src/html5_parser/soup.py 2018-04-22 17:07:13.000000000 +0200
@@ -6,6 +6,27 @@
unicode = type('')
+cdata_list_attributes = None
+universal_cdata_list_attributes = None
+empty = ()
+
+
+def init_bs4_cdata_list_attributes():
+ global cdata_list_attributes, universal_cdata_list_attributes
+ from bs4.builder import HTMLTreeBuilder
+ cdata_list_attributes = {
+ k: frozenset(v) for k, v in HTMLTreeBuilder.cdata_list_attributes.items()
+ }
+ universal_cdata_list_attributes = cdata_list_attributes['*']
+
+
+def map_list_attributes(tag_name, name, val):
+ if name in universal_cdata_list_attributes:
+ return val.split()
+ if name in cdata_list_attributes.get(tag_name, empty):
+ return val.split()
+ return val
+
def soup_module():
if soup_module.ans is None:
@@ -43,6 +64,7 @@
def bs4_new_tag(Tag, soup):
def new_tag(name, attrs):
+ attrs = {k: map_list_attributes(name, k, v) for k, v in attrs.items()}
return Tag(soup, name=name, attrs=attrs)
return new_tag
@@ -79,9 +101,13 @@
'area base br col embed hr img input keygen link menuitem meta param source track wbr'.split())
+def is_bs3():
+ return soup_module().__version__.startswith('3.')
+
+
def init_soup():
bs = soup_module()
- if bs.__version__.startswith('3.'):
+ if is_bs3():
soup = bs.BeautifulSoup()
new_tag = bs3_new_tag(bs.Tag, soup)
append = bs3_fast_append
@@ -90,6 +116,8 @@
soup = bs.BeautifulSoup('', 'lxml')
new_tag = bs4_new_tag(bs.Tag, soup)
append = bs4_fast_append
+ if universal_cdata_list_attributes is None:
+ init_bs4_cdata_list_attributes()
return bs, soup, new_tag, bs.Comment, append, bs.NavigableString
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/src/python-wrapper.c new/html5-parser-0.4.5/src/python-wrapper.c
--- old/html5-parser-0.4.4/src/python-wrapper.c 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/src/python-wrapper.c 2018-04-22 17:07:13.000000000 +0200
@@ -15,7 +15,7 @@
#define MAJOR 0
#define MINOR 4
-#define PATCH 4
+#define PATCH 5
static char *NAME = "libxml2:xmlDoc";
static char *DESTRUCTOR = "destructor:xmlFreeDoc";
@@ -35,14 +35,14 @@
return doc;
}
-static inline libxml_doc*
+static inline libxml_doc*
parse_with_options(const char* buffer, size_t buffer_length, Options *opts) {
GumboOutput *output = NULL;
libxml_doc* doc = NULL;
Py_BEGIN_ALLOW_THREADS;
output = gumbo_parse_with_options(&(opts->gumbo_opts), buffer, buffer_length);
Py_END_ALLOW_THREADS;
- if (output == NULL) PyErr_NoMemory();
+ if (output == NULL) PyErr_NoMemory();
else {
doc = convert_tree(output, opts);
gumbo_destroy_output(output);
@@ -50,7 +50,7 @@
return doc;
}
-static void
+static void
free_encapsulated_doc(PyObject *capsule) {
libxml_doc *doc = (libxml_doc*)PyCapsule_GetPointer(capsule, NAME);
if (doc != NULL) {
@@ -108,7 +108,7 @@
Py_BEGIN_ALLOW_THREADS;
output = gumbo_parse_with_options(&(opts.gumbo_opts), buffer, (size_t)sz);
Py_END_ALLOW_THREADS;
- if (output == NULL) PyErr_NoMemory();
+ if (output == NULL) PyErr_NoMemory();
GumboDocument* document = &(output->document->v.document);
if (new_doctype != Py_None && document->has_doctype) {
@@ -132,7 +132,7 @@
return encapsulate(doc);
}
-static PyMethodDef
+static PyMethodDef
methods[] = {
{"parse", (PyCFunction)parse, METH_VARARGS | METH_KEYWORDS,
"parse()\n\nParse specified bytestring which must be in the UTF-8 encoding."
@@ -154,7 +154,7 @@
#if PY_MAJOR_VERSION >= 3
-static struct PyModuleDef
+static struct PyModuleDef
moduledef = {
PyModuleDef_HEAD_INIT,
MODULE_NAME,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/test/adapt.py new/html5-parser-0.4.5/test/adapt.py
--- old/html5-parser-0.4.4/test/adapt.py 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/test/adapt.py 2018-04-22 17:07:13.000000000 +0200
@@ -86,6 +86,7 @@
for soup_name, soup in soups:
set_soup_module(soup)
self.do_soup_test(soup_name)
+ set_soup_module(None)
def do_soup_test(self, soup_name):
root = parse(HTML, treebuilder='soup')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.4/test/soup.py new/html5-parser-0.4.5/test/soup.py
--- old/html5-parser-0.4.4/test/soup.py 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/test/soup.py 2018-04-22 17:07:13.000000000 +0200
@@ -6,7 +6,7 @@
import gc
-from html5_parser.soup import parse
+from html5_parser.soup import parse, is_bs3
from . import TestCase
@@ -46,6 +46,12 @@
root = parse('<p><x xmlns:a="b">')
self.ae(type('')(root), '<html><head></head><body><p><x xmlns:a="b"></x></p></body></html>')
+ def test_soup_list_attrs(self):
+ if is_bs3():
+ self.skipTest('No bs4 module found')
+ root = parse('<a class="a b" rel="x y">')
+ self.ae(root.body.a.attrs, {'class': 'a b'.split(), 'rel': 'x y'.split()})
+
def test_soup_leak(self):
HTML = '<p a=1>\n<a b=2 id=3>y</a>z1'
parse(HTML) # So that BS and html_parser set up any internal objects