Hello community, here is the log from the commit of package python-w3lib for openSUSE:Factory checked in at 2019-03-29 20:43:29 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-w3lib (Old) and /work/SRC/openSUSE:Factory/.python-w3lib.new.25356 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "python-w3lib" Fri Mar 29 20:43:29 2019 rev:4 rq:689787 version:1.20.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-w3lib/python-w3lib.changes 2018-12-13 19:45:26.596934568 +0100 +++ /work/SRC/openSUSE:Factory/.python-w3lib.new.25356/python-w3lib.changes 2019-03-29 20:43:30.906679078 +0100 @@ -1,0 +2,10 @@ +Fri Mar 29 09:53:27 UTC 2019 - pgajdos@suse.com + +- version update to 1.20.0 + * Fix url_query_cleaner to do not append "?" to urls without a + query string (issue #109) + * Add support for Python 3.7 and drop Python 3.3 (issue #113) + * Add `w3lib.url.add_or_replace_parameters` helper (issue #117) + * Documentation fixes (issue #115) + +------------------------------------------------------------------- Old: ---- w3lib-1.19.0.tar.gz New: ---- w3lib-1.20.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-w3lib.spec ++++++ --- /var/tmp/diff_new_pack.iUwS4f/_old 2019-03-29 20:43:31.466679192 +0100 +++ /var/tmp/diff_new_pack.iUwS4f/_new 2019-03-29 20:43:31.466679192 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-w3lib # -# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-w3lib -Version: 1.19.0 +Version: 1.20.0 Release: 0 Summary: Library of Web-Related Functions License: BSD-3-Clause ++++++ w3lib-1.19.0.tar.gz -> w3lib-1.20.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.19.0/PKG-INFO new/w3lib-1.20.0/PKG-INFO --- old/w3lib-1.19.0/PKG-INFO 2018-01-25 01:58:11.000000000 +0100 +++ new/w3lib-1.20.0/PKG-INFO 2019-01-11 15:01:52.000000000 +0100 @@ -1,12 +1,11 @@ Metadata-Version: 1.1 Name: w3lib -Version: 1.19.0 +Version: 1.20.0 Summary: Library of web-related functions Home-page: https://github.com/scrapy/w3lib Author: Scrapy project Author-email: info@scrapy.org License: BSD -Description-Content-Type: UNKNOWN Description: UNKNOWN Platform: Any Classifier: Development Status :: 5 - Production/Stable @@ -16,10 +15,10 @@ Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Internet :: WWW/HTTP diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.19.0/README.rst new/w3lib-1.20.0/README.rst --- old/w3lib-1.19.0/README.rst 2018-01-25 01:57:32.000000000 +0100 +++ new/w3lib-1.20.0/README.rst 2019-01-11 15:01:17.000000000 +0100 @@ -27,7 +27,7 @@ Requirements ============ -Python 2.7 or Python 3.3+ +Python 2.7 or Python 3.4+ Install ======= diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.19.0/docs/conf.py new/w3lib-1.20.0/docs/conf.py --- old/w3lib-1.19.0/docs/conf.py 2018-01-25 01:57:32.000000000 +0100 +++ new/w3lib-1.20.0/docs/conf.py 2019-01-11 15:01:17.000000000 +0100 @@ -53,7 +53,7 @@ # built documents. # # The full version, including alpha/beta/rc tags. -release = '1.19.0' +release = '1.20.0' # The short X.Y version. version = '.'.join(release.split('.')[:2]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.19.0/docs/index.rst new/w3lib-1.20.0/docs/index.rst --- old/w3lib-1.19.0/docs/index.rst 2018-01-25 01:57:32.000000000 +0100 +++ new/w3lib-1.20.0/docs/index.rst 2019-01-11 15:01:17.000000000 +0100 @@ -8,7 +8,7 @@ * remove comments, or tags from HTML snippets * extract base url from HTML snippets -* translate entites on HTML strings +* translate entities on HTML strings * convert raw HTTP headers to dicts and vice-versa * construct HTTP auth header * converting HTML pages to unicode diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.19.0/setup.py new/w3lib-1.20.0/setup.py --- old/w3lib-1.19.0/setup.py 2018-01-25 01:57:32.000000000 +0100 +++ new/w3lib-1.20.0/setup.py 2019-01-11 15:01:17.000000000 +0100 @@ -3,7 +3,7 @@ setup( name='w3lib', - version='1.19.0', + version='1.20.0', license='BSD', description='Library of web-related functions', author='Scrapy project', @@ -21,10 +21,10 @@ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Topic :: Internet :: WWW/HTTP', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.19.0/tests/test_url.py new/w3lib-1.20.0/tests/test_url.py --- old/w3lib-1.19.0/tests/test_url.py 2018-01-25 01:57:32.000000000 +0100 +++ new/w3lib-1.20.0/tests/test_url.py 2019-01-11 15:01:17.000000000 +0100 @@ -5,7 +5,7 @@ from w3lib.url import (is_url, safe_url_string, safe_download_url, url_query_parameter, add_or_replace_parameter, url_query_cleaner, file_uri_to_path, parse_data_uri, path_to_file_uri, any_to_uri, - urljoin_rfc, canonicalize_url, parse_url) + urljoin_rfc, canonicalize_url, parse_url, add_or_replace_parameters) from six.moves.urllib.parse import urlparse @@ -283,7 +283,21 @@ self.assertEqual(add_or_replace_parameter(url, 'pageurl', 'test'), 'http://example.com/?version=1&pageurl=test¶m2=value2') + def test_add_or_replace_parameters(self): + url = 'http://domain/test' + self.assertEqual(add_or_replace_parameters(url, {'arg': 'v'}), + 'http://domain/test?arg=v') + url = 'http://domain/test?arg1=v1&arg2=v2&arg3=v3' + self.assertEqual(add_or_replace_parameters(url, {'arg4': 'v4'}), + 'http://domain/test?arg1=v1&arg2=v2&arg3=v3&arg4=v4') + self.assertEqual(add_or_replace_parameters(url, {'arg4': 'v4', 'arg3': 'v3new'}), + 'http://domain/test?arg1=v1&arg2=v2&arg3=v3new&arg4=v4') + def test_url_query_cleaner(self): + self.assertEqual('product.html', + url_query_cleaner("product.html?")) + self.assertEqual('product.html', + url_query_cleaner("product.html?&")) self.assertEqual('product.html?id=200', url_query_cleaner("product.html?id=200&foo=bar&name=wired", ['id'])) self.assertEqual('product.html?id=200', @@ -308,6 +322,10 @@ url_query_cleaner("product.html?id=2&foo=bar&name=wired", ['id', 'foo'], remove=True)) self.assertEqual('product.html?foo=bar&name=wired', url_query_cleaner("product.html?id=2&foo=bar&name=wired", ['id', 'footo'], remove=True)) + self.assertEqual('product.html', + url_query_cleaner("product.html", ['id'], remove=True)) + self.assertEqual('product.html', + url_query_cleaner("product.html?&", ['id'], remove=True)) self.assertEqual('product.html?foo=bar', url_query_cleaner("product.html?foo=bar&name=wired", 'foo')) self.assertEqual('product.html?foobar=wired', @@ -321,7 +339,7 @@ def test_path_to_file_uri(self): if os.name == 'nt': - self.assertEqual(path_to_file_uri("C:\\windows\clock.avi"), + self.assertEqual(path_to_file_uri(r"C:\\windows\clock.avi"), "file:///C:/windows/clock.avi") else: self.assertEqual(path_to_file_uri("/some/path.txt"), @@ -329,13 +347,13 @@ fn = "test.txt" x = path_to_file_uri(fn) - self.assert_(x.startswith('file:///')) + self.assertTrue(x.startswith('file:///')) self.assertEqual(file_uri_to_path(x).lower(), os.path.abspath(fn).lower()) def test_file_uri_to_path(self): if os.name == 'nt': self.assertEqual(file_uri_to_path("file:///C:/windows/clock.avi"), - "C:\\windows\clock.avi") + r"C:\\windows\clock.avi") uri = "file:///C:/windows/clock.avi" uri2 = path_to_file_uri(file_uri_to_path(uri)) self.assertEqual(uri, uri2) @@ -353,7 +371,7 @@ def test_any_to_uri(self): if os.name == 'nt': - self.assertEqual(any_to_uri("C:\\windows\clock.avi"), + self.assertEqual(any_to_uri(r"C:\\windows\clock.avi"), "file:///C:/windows/clock.avi") else: self.assertEqual(any_to_uri("/some/path.txt"), diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.19.0/tox.ini new/w3lib-1.20.0/tox.ini --- old/w3lib-1.19.0/tox.ini 2018-01-25 01:57:32.000000000 +0100 +++ new/w3lib-1.20.0/tox.ini 2019-01-11 15:01:17.000000000 +0100 @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py27, pypy, py33, py34, py35, py36, pypy3 +envlist = py27, pypy, py34, py35, py36, py37, pypy3 [testenv] deps = diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.19.0/w3lib/__init__.py new/w3lib-1.20.0/w3lib/__init__.py --- old/w3lib-1.19.0/w3lib/__init__.py 2018-01-25 01:57:32.000000000 +0100 +++ new/w3lib-1.20.0/w3lib/__init__.py 2019-01-11 15:01:17.000000000 +0100 @@ -1,3 +1,3 @@ -__version__ = "1.19.0" +__version__ = "1.20.0" version_info = tuple(int(v) if v.isdigit() else v for v in __version__.split('.')) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.19.0/w3lib/encoding.py new/w3lib-1.20.0/w3lib/encoding.py --- old/w3lib-1.19.0/w3lib/encoding.py 2018-01-25 01:57:32.000000000 +0100 +++ new/w3lib-1.20.0/w3lib/encoding.py 2019-01-11 15:01:17.000000000 +0100 @@ -23,7 +23,7 @@ # regexp for parsing HTTP meta tags _TEMPLATE = r'''%s\s*=\s*["']?\s*%s\s*["']?''' -_SKIP_ATTRS = '''(?x)(?:\\s+ +_SKIP_ATTRS = '''(?:\\s+ [^=<>/\\s"'\x00-\x1f\x7f]+ # Attribute name (?:\\s*=\\s* (?: # ' and " are entity encoded (', "), so no need for \', \" @@ -33,7 +33,7 @@ | [^'"\\s]+ # attr having no ' nor " ))? -)*?''' +)*?''' # must be used with re.VERBOSE flag _HTTPEQUIV_RE = _TEMPLATE % ('http-equiv', 'Content-Type') _CONTENT_RE = _TEMPLATE % ('content', r'(?P<mime>[^;]+);\s*charset=(?P<charset>[\w-]+)') _CONTENT2_RE = _TEMPLATE % ('charset', r'(?P<charset2>[\w-]+)') @@ -42,8 +42,9 @@ # check for meta tags, or xml decl. and stop search if a body tag is encountered _BODY_ENCODING_PATTERN = r'<\s*(?:meta%s(?:(?:\s+%s|\s+%s){2}|\s+%s)|\?xml\s[^>]+%s|body)' % ( _SKIP_ATTRS, _HTTPEQUIV_RE, _CONTENT_RE, _CONTENT2_RE, _XML_ENCODING_RE) -_BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.I) -_BODY_ENCODING_BYTES_RE = re.compile(_BODY_ENCODING_PATTERN.encode('ascii'), re.I) +_BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.I | re.VERBOSE) +_BODY_ENCODING_BYTES_RE = re.compile(_BODY_ENCODING_PATTERN.encode('ascii'), + re.I | re.VERBOSE) def html_body_declared_encoding(html_body_str): '''Return the encoding specified in meta tags in the html body, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.19.0/w3lib/url.py new/w3lib-1.20.0/w3lib/url.py --- old/w3lib-1.19.0/w3lib/url.py 2018-01-25 01:57:32.000000000 +0100 +++ new/w3lib-1.20.0/w3lib/url.py 2019-01-11 15:01:17.000000000 +0100 @@ -9,7 +9,7 @@ import posixpath import warnings import string -from collections import namedtuple +from collections import namedtuple, OrderedDict import six from six.moves.urllib.parse import (urljoin, urlsplit, urlunsplit, urldefrag, urlencode, urlparse, @@ -182,6 +182,8 @@ seen = set() querylist = [] for ksv in query.split(sep): + if not ksv: + continue k, _, _ = ksv.partition(kvsep) if unique and k in seen: continue @@ -198,6 +200,17 @@ return url +def _add_or_replace_parameters(url, params): + parsed = urlsplit(url) + args = parse_qsl(parsed.query, keep_blank_values=True) + + new_args = OrderedDict(args) + new_args.update(params) + + query = urlencode(new_args) + return urlunsplit(parsed._replace(query=query)) + + def add_or_replace_parameter(url, name, new_value): """Add or remove a parameter to a given url @@ -211,23 +224,22 @@ >>> """ - parsed = urlsplit(url) - args = parse_qsl(parsed.query, keep_blank_values=True) + return _add_or_replace_parameters(url, {name: new_value}) - new_args = [] - found = False - for name_, value_ in args: - if name_ == name: - new_args.append((name_, new_value)) - found = True - else: - new_args.append((name_, value_)) - if not found: - new_args.append((name, new_value)) +def add_or_replace_parameters(url, new_parameters): + """Add or remove a parameters to a given url - query = urlencode(new_args) - return urlunsplit(parsed._replace(query=query)) + >>> import w3lib.url + >>> w3lib.url.add_or_replace_parameters('http://www.example.com/index.php', {'arg': 'v'}) + 'http://www.example.com/index.php?arg=v' + >>> args = {'arg4': 'v4', 'arg3': 'v3new'} + >>> w3lib.url.add_or_replace_parameters('http://www.example.com/index.php?arg1=v1&arg2=v2&arg3=v3', args) + 'http://www.example.com/index.php?arg1=v1&arg2=v2&arg3=v3new&arg4=v4' + >>> + + """ + return _add_or_replace_parameters(url, new_parameters) def path_to_file_uri(path): @@ -291,6 +303,7 @@ _ParseDataURIResult = namedtuple("ParseDataURIResult", "media_type media_type_parameters data") + def parse_data_uri(uri): """ @@ -355,6 +368,7 @@ __all__ = ["add_or_replace_parameter", + "add_or_replace_parameters", "any_to_uri", "canonicalize_url", "file_uri_to_path", diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.19.0/w3lib.egg-info/PKG-INFO new/w3lib-1.20.0/w3lib.egg-info/PKG-INFO --- old/w3lib-1.19.0/w3lib.egg-info/PKG-INFO 2018-01-25 01:58:11.000000000 +0100 +++ new/w3lib-1.20.0/w3lib.egg-info/PKG-INFO 2019-01-11 15:01:52.000000000 +0100 @@ -1,12 +1,11 @@ Metadata-Version: 1.1 Name: w3lib -Version: 1.19.0 +Version: 1.20.0 Summary: Library of web-related functions Home-page: https://github.com/scrapy/w3lib Author: Scrapy project Author-email: info@scrapy.org License: BSD -Description-Content-Type: UNKNOWN Description: UNKNOWN Platform: Any Classifier: Development Status :: 5 - Production/Stable @@ -16,10 +15,10 @@ Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Internet :: WWW/HTTP