Hello community, here is the log from the commit of package python-tldextract for openSUSE:Factory checked in at 2020-11-29 12:31:16 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-tldextract (Old) and /work/SRC/openSUSE:Factory/.python-tldextract.new.5913 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "python-tldextract" Sun Nov 29 12:31:16 2020 rev:11 rq:851574 version:3.1.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-tldextract/python-tldextract.changes 2020-10-26 16:22:07.467215571 +0100 +++ /work/SRC/openSUSE:Factory/.python-tldextract.new.5913/python-tldextract.changes 2020-11-29 12:32:49.770269270 +0100 @@ -1,0 +2,11 @@ +Sat Nov 28 20:13:57 UTC 2020 - Mia Herkt <mia@0x0.st> + +- Update to 3.1.0: + * Features + + Prefer to cache in XDG cache directory in user folder, + vs. in Python install folder + (https://github.com/john-kurkowski/tldextract/issues/213) + + Fix `AttributeError` on `--update` + (https://github.com/john-kurkowski/tldextract/issues/215) + +------------------------------------------------------------------- Old: ---- tldextract-3.0.2.tar.gz New: ---- tldextract-3.1.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-tldextract.spec ++++++ --- /var/tmp/diff_new_pack.rdcRbz/_old 2020-11-29 12:32:56.978276560 +0100 +++ /var/tmp/diff_new_pack.rdcRbz/_new 2020-11-29 12:32:56.982276565 +0100 @@ -19,7 +19,7 @@ %define skip_python2 1 %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-tldextract -Version: 3.0.2 +Version: 3.1.0 Release: 0 Summary: Python module to separate the TLD of a URL License: BSD-3-Clause ++++++ tldextract-3.0.2.tar.gz -> tldextract-3.1.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tldextract-3.0.2/CHANGELOG.md new/tldextract-3.1.0/CHANGELOG.md --- old/tldextract-3.0.2/CHANGELOG.md 2020-10-25 05:07:32.000000000 +0100 +++ new/tldextract-3.1.0/CHANGELOG.md 2020-11-22 21:38:24.000000000 +0100 @@ -3,6 +3,13 @@ After upgrading, update your cache file by deleting it or via `tldextract --update`. +## 3.1.0 (2020-11-22) + +* Features + * Prefer to cache in XDG cache directory in user folder, vs. in Python install folder ([#213](https://github.com/john-kurkowski/tldextract/issues/213)) +* Bugfixes + * Fix `AttributeError` on `--update` ([#215](https://github.com/john-kurkowski/tldextract/issues/215)) + ## 3.0.2 (2020-10-24) * Bugfixes diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tldextract-3.0.2/PKG-INFO new/tldextract-3.1.0/PKG-INFO --- old/tldextract-3.0.2/PKG-INFO 2020-10-25 05:08:44.401558900 +0100 +++ new/tldextract-3.1.0/PKG-INFO 2020-11-22 21:38:49.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: tldextract -Version: 3.0.2 +Version: 3.1.0 Summary: Accurately separate the TLD from the registered domain and subdomains of a URL, using the Public Suffix List. By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well. Home-page: https://github.com/john-kurkowski/tldextract Author: John Kurkowski diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tldextract-3.0.2/README.md new/tldextract-3.1.0/README.md --- old/tldextract-3.0.2/README.md 2020-10-24 04:57:22.000000000 +0200 +++ new/tldextract-3.1.0/README.md 2020-11-22 17:28:50.000000000 +0100 @@ -105,16 +105,14 @@ ### Note About Caching Beware when first running the module, it updates its TLD list with a live HTTP -request. This updated TLD set is cached indefinitely in -`/path/to/tldextract/.tld_set`. +request. This updated TLD set is usually cached indefinitely in ``$HOME/.cache/python-tldextract`. +To control the cache's location, set TLDEXTRACT_CACHE environment variable or set the +cache_dir path in TLDExtract initialization. (Arguably runtime bootstrapping like that shouldn't be the default behavior, like for production systems. But I want you to have the latest TLDs, especially when I haven't kept this code up to date.) -To avoid this fetch or control the cache's location, use your own extract -callable by setting TLDEXTRACT_CACHE environment variable or by setting the -cache_dir path in TLDExtract initialization. ```python # extract callable that falls back to the included TLD snapshot, no live HTTP fetching diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tldextract-3.0.2/tests/test_cache.py new/tldextract-3.1.0/tests/test_cache.py --- old/tldextract-3.0.2/tests/test_cache.py 2020-10-24 06:07:50.000000000 +0200 +++ new/tldextract-3.1.0/tests/test_cache.py 2020-11-22 17:28:50.000000000 +0100 @@ -1,6 +1,12 @@ """Test the caching functionality""" +import os.path +import sys +import types + import pytest -from tldextract.cache import DiskCache + +import tldextract.cache +from tldextract.cache import DiskCache, get_pkg_unique_identifier, get_cache_dir def test_disk_cache(tmpdir): @@ -15,3 +21,45 @@ cache.set("testing", "foo", "baz") assert cache.get("testing", "foo") == "baz" + + +def test_get_pkg_unique_identifier(monkeypatch): + monkeypatch.setattr(sys, "version_info", (3, 8, 1, "final", 0)) + monkeypatch.setattr(sys, "prefix", "/home/john/.pyenv/versions/myvirtualenv") + + mock_version_module = types.ModuleType('tldextract._version', 'mocked module') + mock_version_module.version = "1.2.3" + monkeypatch.setitem(sys.modules, "tldextract._version", mock_version_module) + + assert get_pkg_unique_identifier() == "3.8.1.final__myvirtualenv__f01a7b__tldextract-1.2.3" + + +def test_get_cache_dir(monkeypatch): + pkg_identifier = "3.8.1.final__myvirtualenv__f01a7b__tldextract-1.2.3" + monkeypatch.setattr(tldextract.cache, "get_pkg_unique_identifier", lambda: pkg_identifier) + + # with no HOME set, fallback to attempting to use package directory itself + monkeypatch.delenv("HOME", raising=False) + monkeypatch.delenv("XDG_CACHE_HOME", raising=False) + monkeypatch.delenv("TLDEXTRACT_CACHE", raising=False) + assert get_cache_dir().endswith("tldextract/.suffix_cache/") + + # with home set, but not anything else specified, use XDG_CACHE_HOME default + monkeypatch.setenv("HOME", "/home/john") + monkeypatch.delenv("XDG_CACHE_HOME", raising=False) + monkeypatch.delenv("TLDEXTRACT_CACHE", raising=False) + assert get_cache_dir() == os.path.join("/home/john", ".cache/python-tldextract", pkg_identifier) + + # if XDG_CACHE_HOME is set, use it + monkeypatch.setenv("HOME", "/home/john") + monkeypatch.setenv("XDG_CACHE_HOME", "/my/alt/cache") + monkeypatch.delenv("TLDEXTRACT_CACHE", raising=False) + + assert get_cache_dir() == os.path.join("/my/alt/cache/python-tldextract", pkg_identifier) + + # if TLDEXTRACT_CACHE is set, use it + monkeypatch.setenv("HOME", "/home/john") + monkeypatch.setenv("XDG_CACHE_HOME", "/my/alt/cache") + monkeypatch.setenv("TLDEXTRACT_CACHE", "/alt-tld-cache") + + assert get_cache_dir() == "/alt-tld-cache" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tldextract-3.0.2/tldextract/_version.py new/tldextract-3.1.0/tldextract/_version.py --- old/tldextract-3.0.2/tldextract/_version.py 2020-10-25 05:08:44.000000000 +0100 +++ new/tldextract-3.1.0/tldextract/_version.py 2020-11-22 21:38:49.000000000 +0100 @@ -1,4 +1,4 @@ # coding: utf-8 # file generated by setuptools_scm # don't change, don't track in version control -version = '3.0.2' +version = '3.1.0' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tldextract-3.0.2/tldextract/cache.py new/tldextract-3.1.0/tldextract/cache.py --- old/tldextract-3.0.2/tldextract/cache.py 2020-10-25 05:05:50.000000000 +0100 +++ new/tldextract-3.1.0/tldextract/cache.py 2020-11-22 17:28:50.000000000 +0100 @@ -1,9 +1,11 @@ """Helpers """ import errno +import hashlib import json import logging import os import os.path +import sys from hashlib import md5 from filelock import FileLock @@ -13,6 +15,59 @@ _DID_LOG_UNABLE_TO_CACHE = False +def get_pkg_unique_identifier(): + """ + Generate an identifier unique to the python version, tldextract version, and python instance + + This will prevent interference between virtualenvs and issues that might arise when installing + a new version of tldextract + """ + try: + # pylint: disable=import-outside-toplevel + from tldextract._version import version + except ImportError: + version = "dev" + + tldextract_version = "tldextract-" + version + python_env_name = os.path.basename(sys.prefix) + # just to handle the edge case of two identically named python environments + python_binary_path_short_hash = hashlib.md5(sys.prefix.encode("utf-8")).hexdigest()[:6] + python_version = ".".join([str(v) for v in sys.version_info[:-1]]) + identifier_parts = [ + python_version, + python_env_name, + python_binary_path_short_hash, + tldextract_version + ] + pkg_identifier = "__".join(identifier_parts) + + return pkg_identifier + + +def get_cache_dir(): + """ + Get a cache dir that we have permission to write to + + Try to follow the XDG standard, but if that doesn't work fallback to the package directory + http://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html + """ + cache_dir = os.environ.get("TLDEXTRACT_CACHE", None) + if cache_dir is not None: + return cache_dir + + xdg_cache_home = os.getenv("XDG_CACHE_HOME", None) + if xdg_cache_home is None: + user_home = os.getenv("HOME", None) + if user_home: + xdg_cache_home = os.path.join(user_home, ".cache") + + if xdg_cache_home is not None: + return os.path.join(xdg_cache_home, "python-tldextract", get_pkg_unique_identifier()) + + # fallback to trying to use package directory itself + return os.path.join(os.path.dirname(__file__), ".suffix_cache/") + + class DiskCache: """Disk _cache that only works for jsonable values""" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tldextract-3.0.2/tldextract/cli.py new/tldextract-3.1.0/tldextract/cli.py --- old/tldextract-3.0.2/tldextract/cli.py 2020-10-25 04:56:29.000000000 +0100 +++ new/tldextract-3.1.0/tldextract/cli.py 2020-11-22 21:21:42.000000000 +0100 @@ -1,38 +1,56 @@ -'''tldextract CLI''' +"""tldextract CLI""" import argparse import logging import sys -from .tldextract import TLDExtract from ._version import version as __version__ +from .tldextract import TLDExtract def main(): - '''tldextract CLI main command.''' + """tldextract CLI main command.""" logging.basicConfig() parser = argparse.ArgumentParser( - prog='tldextract', - description='Parse hostname from a url or fqdn') + prog="tldextract", description="Parse hostname from a url or fqdn" + ) - parser.add_argument('--version', action='version', version='%(prog)s ' + __version__) - parser.add_argument('input', metavar='fqdn|url', - type=str, nargs='*', help='fqdn or url') - - parser.add_argument('-u', '--update', default=False, action='store_true', - help='force fetch the latest TLD definitions') - parser.add_argument('-c', '--cache_dir', - help='use an alternate TLD definition caching folder') - parser.add_argument('-p', '--private_domains', default=False, action='store_true', - help='Include private domains') + parser.add_argument( + "--version", action="version", version="%(prog)s " + __version__ + ) + parser.add_argument( + "input", metavar="fqdn|url", type=str, nargs="*", help="fqdn or url" + ) + + parser.add_argument( + "-u", + "--update", + default=False, + action="store_true", + help="force fetch the latest TLD definitions", + ) + parser.add_argument( + "-c", "--cache_dir", help="use an alternate TLD definition caching folder" + ) + parser.add_argument( + "-p", + "--private_domains", + default=False, + action="store_true", + help="Include private domains", + ) args = parser.parse_args() - tld_extract = TLDExtract(include_psl_private_domains=args.private_domains) + obj_kwargs = { + "include_psl_private_domains": args.private_domains, + } if args.cache_dir: - tld_extract.cache_file = args.cache_file + obj_kwargs["cache_dir"] = args.cache_dir + + tld_extract = TLDExtract(**obj_kwargs) if args.update: tld_extract.update(True) @@ -42,4 +60,4 @@ return for i in args.input: - print(' '.join(tld_extract(i))) + print(" ".join(tld_extract(i))) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tldextract-3.0.2/tldextract/tldextract.py new/tldextract-3.1.0/tldextract/tldextract.py --- old/tldextract-3.0.2/tldextract/tldextract.py 2020-10-25 05:05:50.000000000 +0100 +++ new/tldextract-3.1.0/tldextract/tldextract.py 2020-11-22 17:28:50.000000000 +0100 @@ -56,14 +56,13 @@ import idna -from .cache import DiskCache +from .cache import DiskCache, get_cache_dir from .remote import IP_RE, SCHEME_RE, looks_like_ip from .suffix_list import get_suffix_lists LOG = logging.getLogger("tldextract") -CACHE_DIR_DEFAULT = os.path.join(os.path.dirname(__file__), ".suffix_cache/") -CACHE_DIR = os.path.expanduser(os.environ.get("TLDEXTRACT_CACHE", CACHE_DIR_DEFAULT)) + CACHE_TIMEOUT = os.environ.get("TLDEXTRACT_CACHE_TIMEOUT") PUBLIC_SUFFIX_LIST_URLS = ( @@ -131,7 +130,7 @@ # TODO: Agreed with Pylint: too-many-arguments def __init__( # pylint: disable=too-many-arguments self, - cache_dir=CACHE_DIR, + cache_dir=get_cache_dir(), suffix_list_urls=PUBLIC_SUFFIX_LIST_URLS, fallback_to_snapshot=True, include_psl_private_domains=False, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tldextract-3.0.2/tldextract.egg-info/PKG-INFO new/tldextract-3.1.0/tldextract.egg-info/PKG-INFO --- old/tldextract-3.0.2/tldextract.egg-info/PKG-INFO 2020-10-25 05:08:44.000000000 +0100 +++ new/tldextract-3.1.0/tldextract.egg-info/PKG-INFO 2020-11-22 21:38:49.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: tldextract -Version: 3.0.2 +Version: 3.1.0 Summary: Accurately separate the TLD from the registered domain and subdomains of a URL, using the Public Suffix List. By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well. Home-page: https://github.com/john-kurkowski/tldextract Author: John Kurkowski