commit python-itemloaders for openSUSE:Factory
Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-itemloaders for openSUSE:Factory checked in at 2024-06-05 17:42:16 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-itemloaders (Old) and /work/SRC/openSUSE:Factory/.python-itemloaders.new.24587 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "python-itemloaders" Wed Jun 5 17:42:16 2024 rev:6 rq:1178615 version:1.3.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-itemloaders/python-itemloaders.changes 2024-04-21 20:29:29.514789591 +0200 +++ /work/SRC/openSUSE:Factory/.python-itemloaders.new.24587/python-itemloaders.changes 2024-06-05 17:43:00.175422463 +0200 @@ -1,0 +2,9 @@ +Tue Jun 4 20:38:34 UTC 2024 - Dirk Müller <dmueller@suse.com> + +- update to 1.3.0: + * Added support for method chaining to the `add_*` and + `replace_*` methods + * Added type hints and `py.typed` + * Made the docs builds reproducible + +------------------------------------------------------------------- Old: ---- itemloaders-1.2.0-gh.tar.gz New: ---- itemloaders-1.3.0-gh.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-itemloaders.spec ++++++ --- /var/tmp/diff_new_pack.FztGX7/_old 2024-06-05 17:43:00.863447520 +0200 +++ /var/tmp/diff_new_pack.FztGX7/_new 2024-06-05 17:43:00.863447520 +0200 @@ -18,7 +18,7 @@ %{?sle15_python_module_pythons} Name: python-itemloaders -Version: 1.2.0 +Version: 1.3.0 Release: 0 Summary: Base library for scrapy's ItemLoader License: BSD-3-Clause ++++++ itemloaders-1.2.0-gh.tar.gz -> itemloaders-1.3.0-gh.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/.bumpversion.cfg new/itemloaders-1.3.0/.bumpversion.cfg --- old/itemloaders-1.2.0/.bumpversion.cfg 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/.bumpversion.cfg 2024-05-30 13:05:17.000000000 +0200 @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.2.0 +current_version = 1.3.0 commit = True tag = True diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/docs/conf.py new/itemloaders-1.3.0/docs/conf.py --- old/itemloaders-1.2.0/docs/conf.py 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/docs/conf.py 2024-05-30 13:05:17.000000000 +0200 @@ -12,7 +12,6 @@ # serve to show the default. import sys -from datetime import datetime from os import path import sphinx_rtd_theme @@ -50,7 +49,7 @@ # General information about the project. project = "itemloaders" -copyright = "2020–{}, Zyte Group Ltd".format(datetime.now().year) +copyright = "Zyte Group Ltd" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/docs/release-notes.rst new/itemloaders-1.3.0/docs/release-notes.rst --- old/itemloaders-1.2.0/docs/release-notes.rst 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/docs/release-notes.rst 2024-05-30 13:05:17.000000000 +0200 @@ -5,6 +5,20 @@ Release notes ============= +.. _release-1.3.0: + +itemloaders 1.3.0 (2024-05-30) +------------------------------ + +- Added support for method chaining to the ``add_*`` and ``replace_*`` + methods, so you can now write code such as + ``loader.add_xpath("name", "//body/text()").add_value("url", "http://example.com")`` + (:gh:`81`) + +- Added type hints and ``py.typed`` (:gh:`80`, :gh:`83`) + +- Made the docs builds reproducible (:gh:`82`) + .. _release-1.2.0: itemloaders 1.2.0 (2024-04-18) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/itemloaders/__init__.py new/itemloaders-1.3.0/itemloaders/__init__.py --- old/itemloaders-1.2.0/itemloaders/__init__.py 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/itemloaders/__init__.py 2024-05-30 13:05:17.000000000 +0200 @@ -4,24 +4,43 @@ See documentation in docs/topics/loaders.rst """ +from __future__ import annotations + from contextlib import suppress +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterable, + List, + MutableMapping, + Optional, + Pattern, + Union, +) from itemadapter import ItemAdapter +from parsel import Selector from parsel.utils import extract_regex, flatten from itemloaders.common import wrap_loader_context from itemloaders.processors import Identity from itemloaders.utils import arg_to_iter +if TYPE_CHECKING: + # typing.Self requires Python 3.11 + from typing_extensions import Self + -def unbound_method(method): +def unbound_method(method: Callable[..., Any]) -> Callable[..., Any]: """ Allow to use single-argument functions as input or output processors (no need to define an unused first 'self' argument) """ with suppress(AttributeError): if "." not in method.__qualname__: - return method.__func__ + return method.__func__ # type: ignore[attr-defined, no-any-return] return method @@ -96,40 +115,46 @@ .. _parsel: https://parsel.readthedocs.io/en/latest/ """ - default_item_class = dict - default_input_processor = Identity() - default_output_processor = Identity() - - def __init__(self, item=None, selector=None, parent=None, **context): - self.selector = selector + default_item_class: type = dict + default_input_processor: Callable[..., Any] = Identity() + default_output_processor: Callable[..., Any] = Identity() + + def __init__( + self, + item: Any = None, + selector: Optional[Selector] = None, + parent: Optional[ItemLoader] = None, + **context: Any, + ): + self.selector: Optional[Selector] = selector context.update(selector=selector) if item is None: item = self.default_item_class() self._local_item = item context["item"] = item - self.context = context - self.parent = parent - self._local_values = {} + self.context: MutableMapping[str, Any] = context + self.parent: Optional[ItemLoader] = parent + self._local_values: Dict[str, List[Any]] = {} # values from initial item for field_name, value in ItemAdapter(item).items(): self._values.setdefault(field_name, []) self._values[field_name] += arg_to_iter(value) @property - def _values(self): + def _values(self) -> Dict[str, List[Any]]: if self.parent is not None: return self.parent._values else: return self._local_values @property - def item(self): + def item(self) -> Any: if self.parent is not None: return self.parent.item else: return self._local_item - def nested_xpath(self, xpath, **context): + def nested_xpath(self, xpath: str, **context: Any) -> Self: """ Create a nested loader with an xpath selector. The supplied selector is applied relative to selector associated @@ -137,12 +162,14 @@ with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`, :meth:`add_value`, :meth:`replace_value`, etc. will behave as expected. """ + self._check_selector_method() + assert self.selector selector = self.selector.xpath(xpath) context.update(selector=selector) subloader = self.__class__(item=self.item, parent=self, **context) return subloader - def nested_css(self, css, **context): + def nested_css(self, css: str, **context: Any) -> Self: """ Create a nested loader with a css selector. The supplied selector is applied relative to selector associated @@ -150,12 +177,21 @@ with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`, :meth:`add_value`, :meth:`replace_value`, etc. will behave as expected. """ + self._check_selector_method() + assert self.selector selector = self.selector.css(css) context.update(selector=selector) subloader = self.__class__(item=self.item, parent=self, **context) return subloader - def add_value(self, field_name, value, *processors, re=None, **kw): + def add_value( + self, + field_name: Optional[str], + value: Any, + *processors: Callable[..., Any], + re: Union[str, Pattern[str], None] = None, + **kw: Any, + ) -> Self: """ Process and then add the given ``value`` for the given field. @@ -169,6 +205,9 @@ multiple fields may be added. And the processed value should be a dict with field_name mapped to values. + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader + Examples:: loader.add_value('name', 'Color TV') @@ -176,6 +215,7 @@ loader.add_value('length', '100') loader.add_value('name', 'name: foo', TakeFirst(), re='name: (.+)') loader.add_value(None, {'name': 'foo', 'sex': 'male'}) + """ value = self.get_value(value, *processors, re=re, **kw) if value is None: @@ -185,11 +225,22 @@ self._add_value(k, v) else: self._add_value(field_name, value) + return self - def replace_value(self, field_name, value, *processors, re=None, **kw): + def replace_value( + self, + field_name: Optional[str], + value: Any, + *processors: Callable[..., Any], + re: Union[str, Pattern[str], None] = None, + **kw: Any, + ) -> Self: """ Similar to :meth:`add_value` but replaces the collected data with the new value instead of adding it. + + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader """ value = self.get_value(value, *processors, re=re, **kw) if value is None: @@ -199,19 +250,26 @@ self._replace_value(k, v) else: self._replace_value(field_name, value) + return self - def _add_value(self, field_name, value): + def _add_value(self, field_name: str, value: Any) -> None: value = arg_to_iter(value) processed_value = self._process_input_value(field_name, value) if processed_value: self._values.setdefault(field_name, []) self._values[field_name] += arg_to_iter(processed_value) - def _replace_value(self, field_name, value): + def _replace_value(self, field_name: str, value: Any) -> None: self._values.pop(field_name, None) self._add_value(field_name, value) - def get_value(self, value, *processors, re=None, **kw): + def get_value( + self, + value: Any, + *processors: Callable[..., Any], + re: Union[str, Pattern[str], None] = None, + **kw: Any, + ) -> Any: """ Process the given ``value`` by the given ``processors`` and keyword arguments. @@ -221,7 +279,7 @@ :param re: a regular expression to use for extracting data from the given value using :func:`~parsel.utils.extract_regex` method, applied before processors - :type re: str or typing.Pattern + :type re: str or typing.Pattern[str] Examples: @@ -249,7 +307,7 @@ ) from e return value - def load_item(self): + def load_item(self) -> Any: """ Populate the item with the data collected so far, and return it. The data collected is first passed through the :ref:`output processors @@ -263,7 +321,7 @@ return adapter.item - def get_output_value(self, field_name): + def get_output_value(self, field_name: str) -> Any: """ Return the collected values parsed using the output processor, for the given field. This method doesn't populate or modify the item at all. @@ -279,11 +337,11 @@ % (field_name, value, type(e).__name__, str(e)) ) from e - def get_collected_values(self, field_name): + def get_collected_values(self, field_name: str) -> List[Any]: """Return the collected values for the given field.""" return self._values.get(field_name, []) - def get_input_processor(self, field_name): + def get_input_processor(self, field_name: str) -> Callable[..., Any]: proc = getattr(self, "%s_in" % field_name, None) if not proc: proc = self._get_item_field_attr( @@ -291,7 +349,7 @@ ) return unbound_method(proc) - def get_output_processor(self, field_name): + def get_output_processor(self, field_name: str) -> Callable[..., Any]: proc = getattr(self, "%s_out" % field_name, None) if not proc: proc = self._get_item_field_attr( @@ -299,11 +357,13 @@ ) return unbound_method(proc) - def _get_item_field_attr(self, field_name, key, default=None): + def _get_item_field_attr( + self, field_name: str, key: Any, default: Any = None + ) -> Any: field_meta = ItemAdapter(self.item).get_field_meta(field_name) return field_meta.get(key, default) - def _process_input_value(self, field_name, value): + def _process_input_value(self, field_name: str, value: Any) -> Any: proc = self.get_input_processor(field_name) _proc = proc proc = wrap_loader_context(proc, self.context) @@ -322,14 +382,21 @@ ) ) from e - def _check_selector_method(self): + def _check_selector_method(self) -> None: if self.selector is None: raise RuntimeError( "To use XPath or CSS selectors, %s " "must be instantiated with a selector" % self.__class__.__name__ ) - def add_xpath(self, field_name, xpath, *processors, re=None, **kw): + def add_xpath( + self, + field_name: Optional[str], + xpath: Union[str, Iterable[str]], + *processors: Callable[..., Any], + re: Union[str, Pattern[str], None] = None, + **kw: Any, + ) -> Self: """ Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a value, which is used to extract a list of strings from the @@ -340,6 +407,9 @@ :param xpath: the XPath to extract data from :type xpath: str + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader + Examples:: # HTML snippet: <p class="product-name">Color TV</p> @@ -349,16 +419,33 @@ """ values = self._get_xpathvalues(xpath, **kw) - self.add_value(field_name, values, *processors, re=re, **kw) + return self.add_value(field_name, values, *processors, re=re, **kw) - def replace_xpath(self, field_name, xpath, *processors, re=None, **kw): + def replace_xpath( + self, + field_name: Optional[str], + xpath: Union[str, Iterable[str]], + *processors: Callable[..., Any], + re: Union[str, Pattern[str], None] = None, + **kw: Any, + ) -> Self: """ Similar to :meth:`add_xpath` but replaces collected data instead of adding it. + + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader + """ values = self._get_xpathvalues(xpath, **kw) - self.replace_value(field_name, values, *processors, re=re, **kw) + return self.replace_value(field_name, values, *processors, re=re, **kw) - def get_xpath(self, xpath, *processors, re=None, **kw): + def get_xpath( + self, + xpath: Union[str, Iterable[str]], + *processors: Callable[..., Any], + re: Union[str, Pattern[str], None] = None, + **kw: Any, + ) -> Any: """ Similar to :meth:`ItemLoader.get_value` but receives an XPath instead of a value, which is used to extract a list of unicode strings from the @@ -369,7 +456,7 @@ :param re: a regular expression to use for extracting data from the selected XPath region - :type re: str or typing.Pattern + :type re: str or typing.Pattern[str] Examples:: @@ -382,12 +469,22 @@ values = self._get_xpathvalues(xpath, **kw) return self.get_value(values, *processors, re=re, **kw) - def _get_xpathvalues(self, xpaths, **kw): + def _get_xpathvalues( + self, xpaths: Union[str, Iterable[str]], **kw: Any + ) -> List[Any]: self._check_selector_method() + assert self.selector xpaths = arg_to_iter(xpaths) return flatten(self.selector.xpath(xpath, **kw).getall() for xpath in xpaths) - def add_css(self, field_name, css, *processors, re=None, **kw): + def add_css( + self, + field_name: Optional[str], + css: Union[str, Iterable[str]], + *processors: Callable[..., Any], + re: Union[str, Pattern[str], None] = None, + **kw: Any, + ) -> Self: """ Similar to :meth:`ItemLoader.add_value` but receives a CSS selector instead of a value, which is used to extract a list of unicode strings @@ -398,24 +495,45 @@ :param css: the CSS selector to extract data from :type css: str + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader + Examples:: # HTML snippet: <p class="product-name">Color TV</p> loader.add_css('name', 'p.product-name') # HTML snippet: <p id="price">the price is $1200</p> loader.add_css('price', 'p#price', re='the price is (.*)') + """ values = self._get_cssvalues(css) - self.add_value(field_name, values, *processors, re=re, **kw) + return self.add_value(field_name, values, *processors, re=re, **kw) - def replace_css(self, field_name, css, *processors, re=None, **kw): + def replace_css( + self, + field_name: Optional[str], + css: Union[str, Iterable[str]], + *processors: Callable[..., Any], + re: Union[str, Pattern[str], None] = None, + **kw: Any, + ) -> Self: """ Similar to :meth:`add_css` but replaces collected data instead of adding it. + + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader + """ values = self._get_cssvalues(css) - self.replace_value(field_name, values, *processors, re=re, **kw) + return self.replace_value(field_name, values, *processors, re=re, **kw) - def get_css(self, css, *processors, re=None, **kw): + def get_css( + self, + css: Union[str, Iterable[str]], + *processors: Callable[..., Any], + re: Union[str, Pattern[str], None] = None, + **kw: Any, + ) -> Any: """ Similar to :meth:`ItemLoader.get_value` but receives a CSS selector instead of a value, which is used to extract a list of unicode strings @@ -426,7 +544,7 @@ :param re: a regular expression to use for extracting data from the selected CSS region - :type re: str or typing.Pattern + :type re: str or typing.Pattern[str] Examples:: @@ -438,12 +556,20 @@ values = self._get_cssvalues(css) return self.get_value(values, *processors, re=re, **kw) - def _get_cssvalues(self, csss): + def _get_cssvalues(self, csss: Union[str, Iterable[str]]) -> List[Any]: self._check_selector_method() + assert self.selector csss = arg_to_iter(csss) return flatten(self.selector.css(css).getall() for css in csss) - def add_jmes(self, field_name, jmes, *processors, re=None, **kw): + def add_jmes( + self, + field_name: Optional[str], + jmes: str, + *processors: Callable[..., Any], + re: Union[str, Pattern[str], None] = None, + **kw: Any, + ) -> Self: """ Similar to :meth:`ItemLoader.add_value` but receives a JMESPath selector instead of a value, which is used to extract a list of unicode strings @@ -454,6 +580,9 @@ :param jmes: the JMESPath selector to extract data from :type jmes: str + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader + Examples:: # HTML snippet: {"name": "Color TV"} @@ -462,16 +591,32 @@ loader.add_jmes('price', TakeFirst(), re='the price is (.*)') """ values = self._get_jmesvalues(jmes) - self.add_value(field_name, values, *processors, re=re, **kw) + return self.add_value(field_name, values, *processors, re=re, **kw) - def replace_jmes(self, field_name, jmes, *processors, re=None, **kw): + def replace_jmes( + self, + field_name: Optional[str], + jmes: Union[str, Iterable[str]], + *processors: Callable[..., Any], + re: Union[str, Pattern[str], None] = None, + **kw: Any, + ) -> Self: """ Similar to :meth:`add_jmes` but replaces collected data instead of adding it. + + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader """ values = self._get_jmesvalues(jmes) - self.replace_value(field_name, values, *processors, re=re, **kw) + return self.replace_value(field_name, values, *processors, re=re, **kw) - def get_jmes(self, jmes, *processors, re=None, **kw): + def get_jmes( + self, + jmes: Union[str, Iterable[str]], + *processors: Callable[..., Any], + re: Union[str, Pattern[str], None] = None, + **kw: Any, + ) -> Any: """ Similar to :meth:`ItemLoader.get_value` but receives a JMESPath selector instead of a value, which is used to extract a list of unicode strings @@ -494,8 +639,9 @@ values = self._get_jmesvalues(jmes) return self.get_value(values, *processors, re=re, **kw) - def _get_jmesvalues(self, jmess): + def _get_jmesvalues(self, jmess: Union[str, Iterable[str]]) -> List[Any]: self._check_selector_method() + assert self.selector jmess = arg_to_iter(jmess) if not hasattr(self.selector, "jmespath"): raise AttributeError( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/itemloaders/common.py new/itemloaders-1.3.0/itemloaders/common.py --- old/itemloaders-1.2.0/itemloaders/common.py 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/itemloaders/common.py 2024-05-30 13:05:17.000000000 +0200 @@ -1,11 +1,14 @@ """Common functions used in Item Loaders code""" from functools import partial +from typing import Any, Callable, MutableMapping from itemloaders.utils import get_func_args -def wrap_loader_context(function, context): +def wrap_loader_context( + function: Callable[..., Any], context: MutableMapping[str, Any] +) -> Callable[..., Any]: """Wrap functions that receive loader_context to contain the context "pre-loaded" and expose a interface that receives only one argument """ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/itemloaders/processors.py new/itemloaders-1.3.0/itemloaders/processors.py --- old/itemloaders-1.2.0/itemloaders/processors.py 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/itemloaders/processors.py 2024-05-30 13:05:17.000000000 +0200 @@ -5,6 +5,7 @@ """ from collections import ChainMap +from typing import Any, Callable, Iterable, List, MutableMapping, Optional from itemloaders.common import wrap_loader_context from itemloaders.utils import arg_to_iter @@ -54,19 +55,22 @@ .. _`parsel selectors`: https://parsel.readthedocs.io/en/latest/parsel.html#parsel.selector.Selector... """ # noqa - def __init__(self, *functions, **default_loader_context): + def __init__(self, *functions: Callable[..., Any], **default_loader_context: Any): self.functions = functions self.default_loader_context = default_loader_context - def __call__(self, value, loader_context=None): + def __call__( + self, value: Any, loader_context: Optional[MutableMapping[str, Any]] = None + ) -> Iterable[Any]: values = arg_to_iter(value) + context: MutableMapping[str, Any] if loader_context: context = ChainMap(loader_context, self.default_loader_context) else: context = self.default_loader_context wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions] for func in wrapped_funcs: - next_values = [] + next_values: List[Any] = [] for v in values: try: next_values += arg_to_iter(func(v)) @@ -109,12 +113,15 @@ <itemloaders.ItemLoader.context>` attribute. """ - def __init__(self, *functions, **default_loader_context): + def __init__(self, *functions: Callable[..., Any], **default_loader_context: Any): self.functions = functions self.stop_on_none = default_loader_context.get("stop_on_none", True) self.default_loader_context = default_loader_context - def __call__(self, value, loader_context=None): + def __call__( + self, value: Any, loader_context: Optional[MutableMapping[str, Any]] = None + ) -> Any: + context: MutableMapping[str, Any] if loader_context: context = ChainMap(loader_context, self.default_loader_context) else: @@ -148,7 +155,7 @@ 'one' """ - def __call__(self, values): + def __call__(self, values: Any) -> Any: for value in values: if value is not None and value != "": return value @@ -168,7 +175,7 @@ ['one', 'two', 'three'] """ - def __call__(self, values): + def __call__(self, values: Any) -> Any: return values @@ -198,13 +205,15 @@ ['bar'] """ - def __init__(self, json_path): - self.json_path = json_path - import jmespath + def __init__(self, json_path: str): + self.json_path: str = json_path + import jmespath.parser + + self.compiled_path: jmespath.parser.ParsedResult = jmespath.compile( + self.json_path + ) - self.compiled_path = jmespath.compile(self.json_path) - - def __call__(self, value): + def __call__(self, value: Any) -> Any: """Query value for the jmespath query and return answer :param value: a data structure (dict, list) to extract from :return: Element extracted according to jmespath query @@ -231,8 +240,8 @@ 'one<br>two<br>three' """ - def __init__(self, separator=" "): + def __init__(self, separator: str = " "): self.separator = separator - def __call__(self, values): + def __call__(self, values: Any) -> str: return self.separator.join(values) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/itemloaders/utils.py new/itemloaders-1.3.0/itemloaders/utils.py --- old/itemloaders-1.2.0/itemloaders/utils.py 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/itemloaders/utils.py 2024-05-30 13:05:17.000000000 +0200 @@ -5,10 +5,10 @@ import inspect from functools import partial -from typing import Generator +from typing import Any, Callable, Generator, Iterable, List -def arg_to_iter(arg): +def arg_to_iter(arg: Any) -> Iterable[Any]: """Return an iterable based on *arg*. If *arg* is a list, a tuple or a generator, it will be returned as is. @@ -25,12 +25,12 @@ return [arg] -def get_func_args(func, stripself=False): +def get_func_args(func: Callable[..., Any], stripself: bool = False) -> List[str]: """Return the argument name list of a callable object""" if not callable(func): raise TypeError(f"func must be callable, got {type(func).__name__!r}") - args = [] + args: List[str] = [] try: sig = inspect.signature(func) except ValueError: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/setup.cfg new/itemloaders-1.3.0/setup.cfg --- old/itemloaders-1.2.0/setup.cfg 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/setup.cfg 2024-05-30 13:05:17.000000000 +0200 @@ -1,8 +1,15 @@ [flake8] -ignore = E266, E501, W503 +ignore = E266, E501, E704, W503 max-line-length = 100 select = B,C,E,F,W,T4,B9 exclude = .git,__pycache__,.venv [isort] profile = black + +[mypy] + +[mypy-tests.*] +# Allow test functions to be untyped +allow_untyped_defs = true +check_untyped_defs = true diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/setup.py new/itemloaders-1.3.0/setup.py --- old/itemloaders-1.2.0/setup.py 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/setup.py 2024-05-30 13:05:17.000000000 +0200 @@ -5,7 +5,7 @@ setup( name="itemloaders", - version="1.2.0", + version="1.3.0", url="https://github.com/scrapy/itemloaders", project_urls={ "Documentation": "https://itemloaders.readthedocs.io/", @@ -18,6 +18,9 @@ author_email="opensource@zyte.com", license="BSD", packages=find_packages(exclude=("tests", "tests.*")), + package_data={ + "itemadapter": ["py.typed"], + }, include_package_data=True, zip_safe=False, classifiers=[ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/tests/test_base_loader.py new/itemloaders-1.3.0/tests/test_base_loader.py --- old/itemloaders-1.2.0/tests/test_base_loader.py 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/tests/test_base_loader.py 2024-05-30 13:05:17.000000000 +0200 @@ -300,17 +300,17 @@ il.add_value("name", ["mar", "ta"]) self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"]) - class TakeFirstItemLoader(CustomItemLoader): + class TakeFirstItemLoader1(CustomItemLoader): name_out = Join() - il = TakeFirstItemLoader() + il = TakeFirstItemLoader1() il.add_value("name", ["mar", "ta"]) self.assertEqual(il.get_output_value("name"), "Mar Ta") - class TakeFirstItemLoader(CustomItemLoader): + class TakeFirstItemLoader2(CustomItemLoader): name_out = Join("<br>") - il = TakeFirstItemLoader() + il = TakeFirstItemLoader2() il.add_value("name", ["mar", "ta"]) self.assertEqual(il.get_output_value("name"), "Mar<br>Ta") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/tests/test_loader_initialization.py new/itemloaders-1.3.0/tests/test_loader_initialization.py --- old/itemloaders-1.2.0/tests/test_loader_initialization.py 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/tests/test_loader_initialization.py 2024-05-30 13:05:17.000000000 +0200 @@ -1,12 +1,21 @@ import unittest +from typing import Any, Protocol from itemloaders import ItemLoader +class InitializationTestProtocol(Protocol): + item_class: Any + + def assertEqual(self, first: Any, second: Any, msg: Any = ...) -> None: ... + + def assertIsInstance(self, obj: object, cls: type, msg: Any = None) -> None: ... + + class InitializationTestMixin: - item_class = None + item_class: Any = None - def test_keep_single_value(self): + def test_keep_single_value(self: InitializationTestProtocol) -> None: """Loaded item should contain values from the initial item""" input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) @@ -14,7 +23,7 @@ self.assertIsInstance(loaded_item, self.item_class) self.assertEqual(dict(loaded_item), {"name": ["foo"]}) - def test_keep_list(self): + def test_keep_list(self: InitializationTestProtocol) -> None: """Loaded item should contain values from the initial item""" input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) @@ -22,7 +31,9 @@ self.assertIsInstance(loaded_item, self.item_class) self.assertEqual(dict(loaded_item), {"name": ["foo", "bar"]}) - def test_add_value_singlevalue_singlevalue(self): + def test_add_value_singlevalue_singlevalue( + self: InitializationTestProtocol, + ) -> None: """Values added after initialization should be appended""" input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) @@ -31,7 +42,7 @@ self.assertIsInstance(loaded_item, self.item_class) self.assertEqual(dict(loaded_item), {"name": ["foo", "bar"]}) - def test_add_value_singlevalue_list(self): + def test_add_value_singlevalue_list(self: InitializationTestProtocol) -> None: """Values added after initialization should be appended""" input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) @@ -40,7 +51,7 @@ self.assertIsInstance(loaded_item, self.item_class) self.assertEqual(dict(loaded_item), {"name": ["foo", "item", "loader"]}) - def test_add_value_list_singlevalue(self): + def test_add_value_list_singlevalue(self: InitializationTestProtocol) -> None: """Values added after initialization should be appended""" input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) @@ -49,7 +60,7 @@ self.assertIsInstance(loaded_item, self.item_class) self.assertEqual(dict(loaded_item), {"name": ["foo", "bar", "qwerty"]}) - def test_add_value_list_list(self): + def test_add_value_list_list(self: InitializationTestProtocol) -> None: """Values added after initialization should be appended""" input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) @@ -58,7 +69,7 @@ self.assertIsInstance(loaded_item, self.item_class) self.assertEqual(dict(loaded_item), {"name": ["foo", "bar", "item", "loader"]}) - def test_get_output_value_singlevalue(self): + def test_get_output_value_singlevalue(self: InitializationTestProtocol) -> None: """Getting output value must not remove value from item""" input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) @@ -67,7 +78,7 @@ self.assertIsInstance(loaded_item, self.item_class) self.assertEqual(loaded_item, {"name": ["foo"]}) - def test_get_output_value_list(self): + def test_get_output_value_list(self: InitializationTestProtocol) -> None: """Getting output value must not remove value from item""" input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) @@ -76,13 +87,13 @@ self.assertIsInstance(loaded_item, self.item_class) self.assertEqual(loaded_item, {"name": ["foo", "bar"]}) - def test_values_single(self): + def test_values_single(self: InitializationTestProtocol) -> None: """Values from initial item must be added to loader._values""" input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) self.assertEqual(il._values.get("name"), ["foo"]) - def test_values_list(self): + def test_values_list(self: InitializationTestProtocol) -> None: """Values from initial item must be added to loader._values""" input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/tests/test_nested_items.py new/itemloaders-1.3.0/tests/test_nested_items.py --- old/itemloaders-1.2.0/tests/test_nested_items.py 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/tests/test_nested_items.py 2024-05-30 13:05:17.000000000 +0200 @@ -1,4 +1,5 @@ import unittest +from typing import Any from itemloaders import ItemLoader @@ -6,7 +7,7 @@ class NestedItemTest(unittest.TestCase): """Test that adding items as values works as expected.""" - def _test_item(self, item): + def _test_item(self, item: Any) -> None: il = ItemLoader() il.add_value("item_list", item) self.assertEqual(il.load_item(), {"item_list": [item]}) @@ -44,7 +45,8 @@ except ImportError: self.skipTest("Cannot import Field or Item from scrapy") - class TestItem(Item): + # needs py.typed in Scrapy + class TestItem(Item): # type: ignore[misc] foo = Field() self._test_item(TestItem(foo="bar")) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/tests/test_nested_loader.py new/itemloaders-1.3.0/tests/test_nested_loader.py --- old/itemloaders-1.2.0/tests/test_nested_loader.py 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/tests/test_nested_loader.py 2024-05-30 13:05:17.000000000 +0200 @@ -28,6 +28,7 @@ nl = loader.nested_xpath("//header") nl.add_xpath("name", "div/text()") nl.add_css("name_div", "#id") + assert nl.selector nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall()) self.assertEqual(loader.get_output_value("name"), ["marta"]) @@ -49,6 +50,7 @@ nl = loader.nested_css("header") nl.add_xpath("name", "div/text()") nl.add_css("name_div", "#id") + assert nl.selector nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall()) self.assertEqual(loader.get_output_value("name"), ["marta"]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/tests/test_output_processor.py new/itemloaders-1.3.0/tests/test_output_processor.py --- old/itemloaders-1.2.0/tests/test_output_processor.py 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/tests/test_output_processor.py 2024-05-30 13:05:17.000000000 +0200 @@ -1,4 +1,5 @@ import unittest +from typing import Any, Dict from itemloaders import ItemLoader from itemloaders.processors import Compose, Identity, TakeFirst @@ -6,7 +7,7 @@ class TestOutputProcessorDict(unittest.TestCase): def test_output_processor(self): - class TempDict(dict): + class TempDict(Dict[str, Any]): def __init__(self, *args, **kwargs): super(TempDict, self).__init__(self, *args, **kwargs) self.setdefault("temp", 0.3) @@ -28,7 +29,7 @@ default_input_processor = Identity() default_output_processor = Compose(TakeFirst()) - item = {} + item: Dict[str, Any] = {} item.setdefault("temp", 0.3) loader = TempLoader(item=item) item = loader.load_item() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/tests/test_selector_loader.py new/itemloaders-1.3.0/tests/test_selector_loader.py --- old/itemloaders-1.2.0/tests/test_selector_loader.py 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/tests/test_selector_loader.py 2024-05-30 13:05:17.000000000 +0200 @@ -273,3 +273,19 @@ self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) loader.replace_jmes("url", "website.url", re=r"http://www\.(.+)") self.assertEqual(loader.get_output_value("url"), ["scrapy.org"]) + + def test_fluent_interface(self): + loader = ItemLoader(selector=self.selector) + item = ( + loader.add_xpath("name", "//body/text()") + .replace_xpath("name", "//div/text()") + .add_css("description", "div::text") + .replace_css("description", "p::text") + .add_value("url", "http://example.com") + .replace_value("url", "http://foo") + .load_item() + ) + self.assertEqual( + item, + {"name": ["marta"], "description": ["paragraph"], "url": ["http://foo"]}, + ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/tests/test_utils_python.py new/itemloaders-1.3.0/tests/test_utils_python.py --- old/itemloaders-1.2.0/tests/test_utils_python.py 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/tests/test_utils_python.py 2024-05-30 13:05:17.000000000 +0200 @@ -2,6 +2,7 @@ import operator import platform import unittest +from typing import Any from itemloaders.utils import get_func_args @@ -18,7 +19,7 @@ pass class A: - def __init__(self, a, b, c): + def __init__(self, a: Any, b: Any, c: Any): pass def method(self, a, b, c): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/itemloaders-1.2.0/tox.ini new/itemloaders-1.3.0/tox.ini --- old/itemloaders-1.2.0/tox.ini 2024-04-18 11:51:35.000000000 +0200 +++ new/itemloaders-1.3.0/tox.ini 2024-05-30 13:05:17.000000000 +0200 @@ -45,3 +45,12 @@ commands = python -m build --sdist twine check dist/* + +[testenv:typing] +basepython = python3 +deps = + mypy==1.10.0 + types-attrs==19.1.0 + types-jmespath==1.0.2.20240106 +commands = + mypy --strict --ignore-missing-imports --implicit-reexport {posargs:itemloaders tests}
participants (1)
-
Source-Sync