![](https://seccdn.libravatar.org/avatar/e2145bc5cf53dda95c308a3c75e8fef3.jpg?s=120&d=mm&r=g)
Hello community, here is the log from the commit of package python-parsel for openSUSE:Factory checked in at 2018-04-19 15:28:31 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-parsel (Old) and /work/SRC/openSUSE:Factory/.python-parsel.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "python-parsel" Thu Apr 19 15:28:31 2018 rev:2 rq:593919 version:1.4.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-parsel/python-parsel.changes 2018-01-10 23:32:54.366391697 +0100 +++ /work/SRC/openSUSE:Factory/.python-parsel.new/python-parsel.changes 2018-04-19 15:28:31.805407866 +0200 @@ -1,0 +2,11 @@ +Fri Apr 6 08:38:41 UTC 2018 - mpluskal@suse.com + +- Update to version 1.4.0: + * has-class XPath extension function; + * parsel.xpathfuncs.set_xpathfunc is a simplified way to register + XPath extensions; + * Selector.remove_namespaces now removes namespace declarations; + * Python 3.3 support is dropped; + * make htmlview command for easier Parsel docs development. + +------------------------------------------------------------------- Old: ---- parsel-1.2.0.tar.gz New: ---- parsel-1.4.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-parsel.spec ++++++ --- /var/tmp/diff_new_pack.YkfUCE/_old 2018-04-19 15:28:32.353385471 +0200 +++ /var/tmp/diff_new_pack.YkfUCE/_new 2018-04-19 15:28:32.357385307 +0200 @@ -1,7 +1,7 @@ # # spec file for package python-parsel # -# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -18,12 +18,12 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-parsel -Version: 1.2.0 +Version: 1.4.0 Release: 0 Summary: Library to extract data from HTML and XML using XPath and CSS selectors License: BSD-3-Clause Group: Development/Languages/Python -Url: http://github.com/scrapy/parsel +URL: http://github.com/scrapy/parsel Source: https://files.pythonhosted.org/packages/source/p/parsel/parsel-%{version}.tar.gz BuildRequires: %{python_module cssselect} >= 0.9 BuildRequires: %{python_module devel} @@ -59,7 +59,8 @@ %python_exec setup.py test %files %{python_files} -%doc README.rst LICENSE +%license LICENSE +%doc README.rst %{python_sitelib}/* %changelog ++++++ parsel-1.2.0.tar.gz -> parsel-1.4.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.2.0/NEWS new/parsel-1.4.0/NEWS --- old/parsel-1.2.0/NEWS 2017-05-17 22:25:05.000000000 +0200 +++ new/parsel-1.4.0/NEWS 2018-02-08 16:04:00.000000000 +0100 @@ -3,14 +3,42 @@ History ------- -1.2.0 (2017-05-XX) +1.4.0 (2018-02-08) ~~~~~~~~~~~~~~~~~~ -* Add :meth:`~parsel.selector.SelectorList.get` and :meth:`~parsel.selector.SelectorList.getall` - methods as aliases for :meth:`~parsel.selector.SelectorList.extract_first` - and :meth:`~parsel.selector.SelectorList.extract` respectively -* Add default value parameter to :meth:`~parsel.selector.SelectorList.re_first` method -* Add :meth:`~parsel.selector.Selector.re_first` method to :class:`parsel.selector.Selector` class +* ``Selector`` and ``SelectorList`` can't be pickled because + pickling/unpickling doesn't work for ``lxml.html.HtmlElement``; + parsel now raises TypeError explicitly instead of allowing pickle to + silently produce wrong output. This is technically backwards-incompatible + if you're using Python < 3.6. + +1.3.1 (2017-12-28) +~~~~~~~~~~~~~~~~~~ + +* Fix artifact uploads to pypi. + +1.3.0 (2017-12-28) +~~~~~~~~~~~~~~~~~~ + +* ``has-class`` XPath extension function; +* ``parsel.xpathfuncs.set_xpathfunc`` is a simplified way to register + XPath extensions; +* ``Selector.remove_namespaces`` now removes namespace declarations; +* Python 3.3 support is dropped; +* ``make htmlview`` command for easier Parsel docs development. +* CI: PyPy installation is fixed; parsel now runs tests for PyPy3 as well. + + +1.2.0 (2017-05-17) +~~~~~~~~~~~~~~~~~~ + +* Add ``SelectorList.get`` and ``SelectorList.getall`` + methods as aliases for ``SelectorList.extract_first`` + and ``SelectorList.extract`` respectively +* Add default value parameter to ``SelectorList.re_first`` method +* Add ``Selector.re_first`` method +* Add ``replace_entities`` argument on ``.re()`` and ``.re_first()`` + to turn off replacing of character entity references * Bug fix: detect ``None`` result from lxml parsing and fallback with an empty document * Rearrange XML/HTML examples in the selectors usage docs * Travis CI: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.2.0/PKG-INFO new/parsel-1.4.0/PKG-INFO --- old/parsel-1.2.0/PKG-INFO 2017-05-17 22:25:33.000000000 +0200 +++ new/parsel-1.4.0/PKG-INFO 2018-02-08 16:04:49.000000000 +0100 @@ -1,20 +1,23 @@ Metadata-Version: 1.1 Name: parsel -Version: 1.2.0 +Version: 1.4.0 Summary: Parsel is a library to extract data from HTML and XML using XPath and CSS selectors Home-page: https://github.com/scrapy/parsel Author: Scrapy project Author-email: info@scrapy.org License: BSD +Description-Content-Type: UNKNOWN Description: =============================== Parsel =============================== - .. image:: https://img.shields.io/travis/scrapy/parsel.svg + .. image:: https://img.shields.io/travis/scrapy/parsel/master.svg :target: https://travis-ci.org/scrapy/parsel + :alt: Build Status .. image:: https://img.shields.io/pypi/v/parsel.svg :target: https://pypi.python.org/pypi/parsel + :alt: PyPI Version .. image:: https://img.shields.io/codecov/c/github/scrapy/parsel/master.svg :target: http://codecov.io/github/scrapy/parsel?branch=master @@ -62,14 +65,42 @@ History ------- - 1.2.0 (2017-05-XX) + 1.4.0 (2018-02-08) ~~~~~~~~~~~~~~~~~~ - * Add :meth:`~parsel.selector.SelectorList.get` and :meth:`~parsel.selector.SelectorList.getall` - methods as aliases for :meth:`~parsel.selector.SelectorList.extract_first` - and :meth:`~parsel.selector.SelectorList.extract` respectively - * Add default value parameter to :meth:`~parsel.selector.SelectorList.re_first` method - * Add :meth:`~parsel.selector.Selector.re_first` method to :class:`parsel.selector.Selector` class + * ``Selector`` and ``SelectorList`` can't be pickled because + pickling/unpickling doesn't work for ``lxml.html.HtmlElement``; + parsel now raises TypeError explicitly instead of allowing pickle to + silently produce wrong output. This is technically backwards-incompatible + if you're using Python < 3.6. + + 1.3.1 (2017-12-28) + ~~~~~~~~~~~~~~~~~~ + + * Fix artifact uploads to pypi. + + 1.3.0 (2017-12-28) + ~~~~~~~~~~~~~~~~~~ + + * ``has-class`` XPath extension function; + * ``parsel.xpathfuncs.set_xpathfunc`` is a simplified way to register + XPath extensions; + * ``Selector.remove_namespaces`` now removes namespace declarations; + * Python 3.3 support is dropped; + * ``make htmlview`` command for easier Parsel docs development. + * CI: PyPy installation is fixed; parsel now runs tests for PyPy3 as well. + + + 1.2.0 (2017-05-17) + ~~~~~~~~~~~~~~~~~~ + + * Add ``SelectorList.get`` and ``SelectorList.getall`` + methods as aliases for ``SelectorList.extract_first`` + and ``SelectorList.extract`` respectively + * Add default value parameter to ``SelectorList.re_first`` method + * Add ``Selector.re_first`` method + * Add ``replace_entities`` argument on ``.re()`` and ``.re_first()`` + to turn off replacing of character entity references * Bug fix: detect ``None`` result from lxml parsing and fallback with an empty document * Rearrange XML/HTML examples in the selectors usage docs * Travis CI: @@ -175,7 +206,6 @@ Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.2.0/README.rst new/parsel-1.4.0/README.rst --- old/parsel-1.2.0/README.rst 2017-05-17 22:25:05.000000000 +0200 +++ new/parsel-1.4.0/README.rst 2018-02-08 16:04:00.000000000 +0100 @@ -2,11 +2,13 @@ Parsel =============================== -.. image:: https://img.shields.io/travis/scrapy/parsel.svg +.. image:: https://img.shields.io/travis/scrapy/parsel/master.svg :target: https://travis-ci.org/scrapy/parsel + :alt: Build Status .. image:: https://img.shields.io/pypi/v/parsel.svg :target: https://pypi.python.org/pypi/parsel + :alt: PyPI Version .. image:: https://img.shields.io/codecov/c/github/scrapy/parsel/master.svg :target: http://codecov.io/github/scrapy/parsel?branch=master diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.2.0/docs/Makefile new/parsel-1.4.0/docs/Makefile --- old/parsel-1.2.0/docs/Makefile 2017-05-17 22:25:05.000000000 +0200 +++ new/parsel-1.4.0/docs/Makefile 2018-02-08 16:04:00.000000000 +0100 @@ -2,6 +2,7 @@ # # You can set these variables from the command line. +PYTHON = python SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = @@ -45,6 +46,7 @@ @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " htmlview to view the compiled HTML files in browser" clean: rm -rf $(BUILDDIR)/* @@ -175,3 +177,7 @@ $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." + +htmlview: html + $(PYTHON) -c "import webbrowser, os; webbrowser.open('file://' + \ + os.path.realpath('_build/html/index.html'))" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.2.0/docs/usage.rst new/parsel-1.4.0/docs/usage.rst --- old/parsel-1.2.0/docs/usage.rst 2017-05-17 22:25:05.000000000 +0200 +++ new/parsel-1.4.0/docs/usage.rst 2018-02-08 16:04:00.000000000 +0100 @@ -450,6 +450,47 @@ .. _regular expressions: http://exslt.org/regexp/index.html .. _set manipulation: http://exslt.org/set/index.html +Other XPath extensions +---------------------- + +Parsel also defines a sorely missed XPath extension function ``has-class`` that +returns ``True`` for nodes that have all of the specified HTML classes:: + + >>> from parsel import Selector + >>> sel = Selector(""" + ... <p class="foo bar-baz">First</p> + ... <p class="foo">Second</p> + ... <p class="bar">Third</p> + ... <p>Fourth</p> + ... """) + ... + >>> sel = Selector(u""" + ... <p class="foo bar-baz">First</p> + ... <p class="foo">Second</p> + ... <p class="bar">Third</p> + ... <p>Fourth</p> + ... """) + ... + >>> sel.xpath('//p[has-class("foo")]') + [<Selector xpath='//p[has-class("foo")]' data=u'<p class="foo bar-baz">First</p>'>, + <Selector xpath='//p[has-class("foo")]' data=u'<p class="foo">Second</p>'>] + >>> sel.xpath('//p[has-class("foo", "bar-baz")]') + [<Selector xpath='//p[has-class("foo", "bar-baz")]' data=u'<p class="foo bar-baz">First</p>'>] + >>> sel.xpath('//p[has-class("foo", "bar")]') + [] + +So XPath ``//p[has-class("foo", "bar-baz")]`` is roughly equivalent to CSS +``p.foo.bar-baz``. Please note, that it is slower in most of the cases, +because it's a pure-Python function that's invoked for every node in question +whereas the CSS lookup is translated into XPath and thus runs more efficiently, +so performance-wise its uses are limited to situations that are not easily +described with CSS selectors. + +Parsel also simplifies adding your own XPath extensions. + +.. autofunction:: parsel.xpathfuncs.set_xpathfunc + + Some XPath tips --------------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.2.0/parsel/__init__.py new/parsel-1.4.0/parsel/__init__.py --- old/parsel-1.2.0/parsel/__init__.py 2017-05-17 22:25:05.000000000 +0200 +++ new/parsel-1.4.0/parsel/__init__.py 2018-02-08 16:04:00.000000000 +0100 @@ -5,7 +5,10 @@ __author__ = 'Scrapy project' __email__ = 'info@scrapy.org' -__version__ = '1.2.0' +__version__ = '1.4.0' from parsel.selector import Selector, SelectorList # NOQA from parsel.csstranslator import css2xpath # NOQA +from parsel import xpathfuncs # NOQA + +xpathfuncs.setup() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.2.0/parsel/selector.py new/parsel-1.4.0/parsel/selector.py --- old/parsel-1.2.0/parsel/selector.py 2017-05-17 22:25:05.000000000 +0200 +++ new/parsel-1.4.0/parsel/selector.py 2018-02-08 16:04:00.000000000 +0100 @@ -61,6 +61,9 @@ o = super(SelectorList, self).__getitem__(pos) return self.__class__(o) if isinstance(pos, slice) else o + def __getstate__(self): + raise TypeError("can't pickle SelectorList objects") + def xpath(self, xpath, namespaces=None, **kwargs): """ Call the ``.xpath()`` method for each element in this list and return @@ -187,6 +190,9 @@ self.root = root self._expr = _expr + def __getstate__(self): + raise TypeError("can't pickle Selector objects") + def _get_root(self, text, base_url=None): return create_root_node(text, self._parser, base_url=base_url) @@ -321,6 +327,8 @@ for an in el.attrib.keys(): if an.startswith('{'): el.attrib[an.split('}', 1)[1]] = el.attrib.pop(an) + # remove namespace declarations + etree.cleanup_namespaces(self.root) def __bool__(self): """ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.2.0/parsel/xpathfuncs.py new/parsel-1.4.0/parsel/xpathfuncs.py --- old/parsel-1.2.0/parsel/xpathfuncs.py 1970-01-01 01:00:00.000000000 +0100 +++ new/parsel-1.4.0/parsel/xpathfuncs.py 2018-02-08 16:04:00.000000000 +0100 @@ -0,0 +1,54 @@ +from lxml import etree + +from six import string_types + + +def set_xpathfunc(fname, func): + """Register a custom extension function to use in XPath expressions. + + The function ``func`` registered under ``fname`` identifier will be called + for every matching node, being passed a ``context`` parameter as well as + any parameters passed from the corresponding XPath expression. + + If ``func`` is ``None``, the extension function will be removed. + + See more `in lxml documentation`_. + + .. _`in lxml documentation`: http://lxml.de/extensions.html#xpath-extension-functions + + """ + ns_fns = etree.FunctionNamespace(None) + if func is not None: + ns_fns[fname] = func + else: + del ns_fns[fname] + + +def setup(): + set_xpathfunc('has-class', has_class) + + +def has_class(context, *classes): + """has-class function. + + Return True if all ``classes`` are present in element's class attr. + + """ + if not context.eval_context.get('args_checked'): + if not classes: + raise ValueError( + 'XPath error: has-class must have at least 1 argument') + for c in classes: + if not isinstance(c, string_types): + raise ValueError( + 'XPath error: has-class arguments must be strings') + context.eval_context['args_checked'] = True + + node_cls = context.context_node.get('class') + if node_cls is None: + return False + node_cls = ' ' + node_cls + ' ' + for cls in classes: + if ' ' + cls + ' ' not in node_cls: + return False + return True diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.2.0/parsel.egg-info/PKG-INFO new/parsel-1.4.0/parsel.egg-info/PKG-INFO --- old/parsel-1.2.0/parsel.egg-info/PKG-INFO 2017-05-17 22:25:33.000000000 +0200 +++ new/parsel-1.4.0/parsel.egg-info/PKG-INFO 2018-02-08 16:04:49.000000000 +0100 @@ -1,20 +1,23 @@ Metadata-Version: 1.1 Name: parsel -Version: 1.2.0 +Version: 1.4.0 Summary: Parsel is a library to extract data from HTML and XML using XPath and CSS selectors Home-page: https://github.com/scrapy/parsel Author: Scrapy project Author-email: info@scrapy.org License: BSD +Description-Content-Type: UNKNOWN Description: =============================== Parsel =============================== - .. image:: https://img.shields.io/travis/scrapy/parsel.svg + .. image:: https://img.shields.io/travis/scrapy/parsel/master.svg :target: https://travis-ci.org/scrapy/parsel + :alt: Build Status .. image:: https://img.shields.io/pypi/v/parsel.svg :target: https://pypi.python.org/pypi/parsel + :alt: PyPI Version .. image:: https://img.shields.io/codecov/c/github/scrapy/parsel/master.svg :target: http://codecov.io/github/scrapy/parsel?branch=master @@ -62,14 +65,42 @@ History ------- - 1.2.0 (2017-05-XX) + 1.4.0 (2018-02-08) ~~~~~~~~~~~~~~~~~~ - * Add :meth:`~parsel.selector.SelectorList.get` and :meth:`~parsel.selector.SelectorList.getall` - methods as aliases for :meth:`~parsel.selector.SelectorList.extract_first` - and :meth:`~parsel.selector.SelectorList.extract` respectively - * Add default value parameter to :meth:`~parsel.selector.SelectorList.re_first` method - * Add :meth:`~parsel.selector.Selector.re_first` method to :class:`parsel.selector.Selector` class + * ``Selector`` and ``SelectorList`` can't be pickled because + pickling/unpickling doesn't work for ``lxml.html.HtmlElement``; + parsel now raises TypeError explicitly instead of allowing pickle to + silently produce wrong output. This is technically backwards-incompatible + if you're using Python < 3.6. + + 1.3.1 (2017-12-28) + ~~~~~~~~~~~~~~~~~~ + + * Fix artifact uploads to pypi. + + 1.3.0 (2017-12-28) + ~~~~~~~~~~~~~~~~~~ + + * ``has-class`` XPath extension function; + * ``parsel.xpathfuncs.set_xpathfunc`` is a simplified way to register + XPath extensions; + * ``Selector.remove_namespaces`` now removes namespace declarations; + * Python 3.3 support is dropped; + * ``make htmlview`` command for easier Parsel docs development. + * CI: PyPy installation is fixed; parsel now runs tests for PyPy3 as well. + + + 1.2.0 (2017-05-17) + ~~~~~~~~~~~~~~~~~~ + + * Add ``SelectorList.get`` and ``SelectorList.getall`` + methods as aliases for ``SelectorList.extract_first`` + and ``SelectorList.extract`` respectively + * Add default value parameter to ``SelectorList.re_first`` method + * Add ``Selector.re_first`` method + * Add ``replace_entities`` argument on ``.re()`` and ``.re_first()`` + to turn off replacing of character entity references * Bug fix: detect ``None`` result from lxml parsing and fallback with an empty document * Rearrange XML/HTML examples in the selectors usage docs * Travis CI: @@ -175,7 +206,6 @@ Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.2.0/parsel.egg-info/SOURCES.txt new/parsel-1.4.0/parsel.egg-info/SOURCES.txt --- old/parsel-1.2.0/parsel.egg-info/SOURCES.txt 2017-05-17 22:25:33.000000000 +0200 +++ new/parsel-1.4.0/parsel.egg-info/SOURCES.txt 2018-02-08 16:04:49.000000000 +0100 @@ -18,6 +18,7 @@ parsel/csstranslator.py parsel/selector.py parsel/utils.py +parsel/xpathfuncs.py parsel.egg-info/PKG-INFO parsel.egg-info/SOURCES.txt parsel.egg-info/dependency_links.txt @@ -26,4 +27,5 @@ parsel.egg-info/top_level.txt tests/requirements.txt tests/test_selector.py -tests/test_selector_csstranslator.py \ No newline at end of file +tests/test_selector_csstranslator.py +tests/test_xpathfuncs.py \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.2.0/setup.py new/parsel-1.4.0/setup.py --- old/parsel-1.2.0/setup.py 2017-05-17 22:25:05.000000000 +0200 +++ new/parsel-1.4.0/setup.py 2018-02-08 16:04:00.000000000 +0100 @@ -16,7 +16,7 @@ setup( name='parsel', - version='1.2.0', + version='1.4.0', description="Parsel is a library to extract data from HTML and XML using XPath and CSS selectors", long_description=readme + '\n\n' + history, author="Scrapy project", @@ -48,7 +48,6 @@ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.2.0/tests/test_selector.py new/parsel-1.4.0/tests/test_selector.py --- old/parsel-1.2.0/tests/test_selector.py 2017-05-17 22:25:05.000000000 +0200 +++ new/parsel-1.4.0/tests/test_selector.py 2018-02-08 16:04:00.000000000 +0100 @@ -3,6 +3,8 @@ import weakref import six import unittest +import pickle + from parsel import Selector @@ -10,6 +12,19 @@ sscls = Selector + def test_pickle_selector(self): + sel = self.sscls(text=u'<html><body><p>some text</p></body></html>') + self.assertRaises(TypeError, lambda s: pickle.dumps(s, protocol=2), sel) + + def test_pickle_selector_list(self): + sel = self.sscls(text=u'<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>') + sel_list = sel.css('li') + empty_sel_list = sel.css('p') + self.assertIsInstance(sel_list, self.sscls.selectorlist_cls) + self.assertIsInstance(empty_sel_list, self.sscls.selectorlist_cls) + self.assertRaises(TypeError, lambda s: pickle.dumps(s, protocol=2), sel_list) + self.assertRaises(TypeError, lambda s: pickle.dumps(s, protocol=2), empty_sel_list) + def test_simple_selection(self): """Simple selector tests""" body = u"<p><input name='a'value='1'/><input name='b'value='2'/></p>" @@ -588,8 +603,10 @@ """ sel = self.sscls(text=xml, type='xml') self.assertEqual(len(sel.xpath("//link")), 0) + self.assertEqual(len(sel.xpath("./namespace::*")), 3) sel.remove_namespaces() self.assertEqual(len(sel.xpath("//link")), 2) + self.assertEqual(len(sel.xpath("./namespace::*")), 1) def test_remove_attributes_namespaces(self): xml = u"""<?xml version="1.0" encoding="UTF-8"?> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.2.0/tests/test_xpathfuncs.py new/parsel-1.4.0/tests/test_xpathfuncs.py --- old/parsel-1.2.0/tests/test_xpathfuncs.py 1970-01-01 01:00:00.000000000 +0100 +++ new/parsel-1.4.0/tests/test_xpathfuncs.py 2018-02-08 16:04:00.000000000 +0100 @@ -0,0 +1,97 @@ +# coding: utf-8 + +from parsel import Selector +from parsel.xpathfuncs import set_xpathfunc +import unittest + + +class XPathFuncsTestCase(unittest.TestCase): + def test_has_class_simple(self): + body = u""" + <p class="foo bar-baz">First</p> + <p class="foo">Second</p> + <p class="bar">Third</p> + <p>Fourth</p> + """ + sel = Selector(text=body) + self.assertEqual( + [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')], + [u'First', u'Second']) + self.assertEqual( + [x.extract() for x in sel.xpath('//p[has-class("bar")]/text()')], + [u'Third']) + self.assertEqual( + [x.extract() for x in sel.xpath('//p[has-class("foo","bar")]/text()')], + []) + self.assertEqual( + [x.extract() for x in sel.xpath('//p[has-class("foo","bar-baz")]/text()')], + [u'First']) + + def test_has_class_error_no_args(self): + body = u""" + <p CLASS="foo">First</p> + """ + sel = Selector(text=body) + self.assertRaisesRegexp( + ValueError, 'has-class must have at least 1 argument', + sel.xpath, 'has-class()') + + def test_has_class_error_invalid_arg_type(self): + body = u""" + <p CLASS="foo">First</p> + """ + sel = Selector(text=body) + self.assertRaisesRegexp( + ValueError, 'has-class arguments must be strings', + sel.xpath, 'has-class(.)') + + def test_has_class_error_invalid_unicode(self): + body = u""" + <p CLASS="foo">First</p> + """ + sel = Selector(text=body) + self.assertRaisesRegexp( + ValueError, 'All strings must be XML compatible', + sel.xpath, u'has-class("héllö")'.encode('utf-8')) + + def test_has_class_unicode(self): + body = u""" + <p CLASS="fóó">First</p> + """ + sel = Selector(text=body) + self.assertEqual( + [x.extract() for x in sel.xpath(u'//p[has-class("fóó")]/text()')], + [u'First']) + + def test_has_class_uppercase(self): + body = u""" + <p CLASS="foo">First</p> + """ + sel = Selector(text=body) + self.assertEqual( + [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')], + [u'First']) + + def test_set_xpathfunc(self): + + def myfunc(ctx): + myfunc.call_count += 1 + + myfunc.call_count = 0 + + body = u""" + <p CLASS="foo">First</p> + """ + sel = Selector(text=body) + self.assertRaisesRegexp( + ValueError, 'Unregistered function in myfunc', + sel.xpath, 'myfunc()') + + set_xpathfunc('myfunc', myfunc) + sel.xpath('myfunc()') + self.assertEqual(myfunc.call_count, 1) + + set_xpathfunc('myfunc', None) + self.assertRaisesRegexp( + ValueError, 'Unregistered function in myfunc', + sel.xpath, 'myfunc()')