![](https://seccdn.libravatar.org/avatar/e2145bc5cf53dda95c308a3c75e8fef3.jpg?s=120&d=mm&r=g)
Hello community, here is the log from the commit of package python-fsspec for openSUSE:Factory checked in at 2019-07-31 14:29:36 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-fsspec (Old) and /work/SRC/openSUSE:Factory/.python-fsspec.new.4126 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "python-fsspec" Wed Jul 31 14:29:36 2019 rev:2 rq:719840 version:0.3.6 Changes: -------- --- /work/SRC/openSUSE:Factory/python-fsspec/python-fsspec.changes 2019-07-23 22:43:06.662748034 +0200 +++ /work/SRC/openSUSE:Factory/.python-fsspec.new.4126/python-fsspec.changes 2019-07-31 14:29:37.218070632 +0200 @@ -1,0 +2,6 @@ +Tue Jul 30 14:02:33 UTC 2019 - pgajdos@suse.com + +- version update to 0.3.6 + * no upstream change log found + +------------------------------------------------------------------- Old: ---- filesystem_spec-0.3.3.tar.gz New: ---- filesystem_spec-0.3.6.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-fsspec.spec ++++++ --- /var/tmp/diff_new_pack.peHjWC/_old 2019-07-31 14:29:37.746070204 +0200 +++ /var/tmp/diff_new_pack.peHjWC/_new 2019-07-31 14:29:37.746070204 +0200 @@ -12,18 +12,19 @@ # license that conforms to the Open Source Definition (Version 1.9) # published by the Open Source Initiative. -# Please submit bugfixes or comments via http://bugs.opensuse.org/ +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# %{?!python_module:%define python_module() python-%{**} python3-%{**}} %define skip_python2 1 Name: python-fsspec -Version: 0.3.3 +Version: 0.3.6 Release: 0 -License: BSD-3-Clause Summary: Filesystem specification package -Url: http://github.com/intake/filesystem_spec +License: BSD-3-Clause Group: Development/Languages/Python +Url: http://github.com/intake/filesystem_spec Source: https://codeload.github.com/intake/filesystem_spec/tar.gz/%{version}#/filesystem_spec-%{version}.tar.gz BuildRequires: %{python_module setuptools} # SECTION test requirements ++++++ filesystem_spec-0.3.3.tar.gz -> filesystem_spec-0.3.6.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/.travis.yml new/filesystem_spec-0.3.6/.travis.yml --- old/filesystem_spec-0.3.3/.travis.yml 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/.travis.yml 2019-07-25 16:18:34.000000000 +0200 @@ -1,4 +1,5 @@ sudo: required +dist: xenial os: - linux services: @@ -25,11 +26,9 @@ install: - source ci/install.sh - - conda install requests paramiko pyftpdlib cloudpickle pyarrow -y -c defaults -c conda-forge - - pip install hadoop-test-cluster==0.1.0 script: - - py.test -vv fsspec + - py.test --cov=fsspec -v - language: generic env: TEST=S3FS @@ -58,7 +57,7 @@ - pip install -e . --no-deps script: - - GCSFS_RECORD_MODE=none py.test -vv gcsfs + - GCSFS_RECORD_MODE=none py.test -vv gcsfs diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/README.md new/filesystem_spec-0.3.6/README.md --- old/filesystem_spec-0.3.3/README.md 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/README.md 2019-07-25 16:18:34.000000000 +0200 @@ -19,7 +19,8 @@ To produce a template or specification for a file-system interface, that specific implementations should follow, so that applications making use of them can rely on a common behaviour and not have to worry about the specific -internal implementation decisions with any given backend. +internal implementation decisions with any given backend. Many such implementations are included in this package, +or in sister projects such as `s3fs` and `gcsfs`. In addition, if this is well-designed, then additional functionality, such as a key-value store or FUSE mounting of the file-system implementation may be available for all implementations "for free". diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/ci/install.sh new/filesystem_spec-0.3.6/ci/install.sh --- old/filesystem_spec-0.3.3/ci/install.sh 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/ci/install.sh 2019-07-25 16:18:34.000000000 +0200 @@ -1,4 +1,7 @@ #!/usr/bin/env bash +# install FUSE +sudo apt-get install libfuse-dev + # Install conda wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh bash miniconda.sh -b -p $HOME/miniconda @@ -7,6 +10,8 @@ conda update conda # Install dependencies -conda create -n test -c conda-forge python=3.7 pip pytest paramiko requests +conda create -n test -c conda-forge python=3.7 pip pytest paramiko requests zstandard python-snappy lz4 distributed \ + dask pyarrow pyftpdlib cloudpickle pyarrow pytest-cov -y -c defaults -c conda-forge +pip install hadoop-test-cluster==0.1.0 fusepy source activate test pip install . --no-deps diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/docs/source/index.rst new/filesystem_spec-0.3.6/docs/source/index.rst --- old/filesystem_spec-0.3.3/docs/source/index.rst 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/docs/source/index.rst 2019-07-25 16:18:34.000000000 +0200 @@ -39,8 +39,18 @@ --------------- This repo contains several file-system implementations, see :ref:`implementations`. However, -the external projects ``s3fs`` and ``gcsfs`` are also developing compatibility with ``fsspec`` and -will eventually depend upon it. +the external projects ``s3fs`` and ``gcsfs`` depend on ``fsspec`` and share the same behaviours. +``Dask`` and ``Intake`` use ``fsspec`` internally for their IO needs. + +The current list of known implementations can be found as follows + +.. code-block:: python + + from fsspec.registry import known_implementations + known_implementations + +These are only imported on request, which may fail if a required dependency is missing. The dictionary +``fsspec.registry`` contains all imported implementations, and can be mutated by user code, if necessary. .. toctree:: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/_version.py new/filesystem_spec-0.3.6/fsspec/_version.py --- old/filesystem_spec-0.3.3/fsspec/_version.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/_version.py 2019-07-25 16:18:34.000000000 +0200 @@ -23,9 +23,9 @@ # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). - git_refnames = " (tag: 0.3.3)" - git_full = "af267857779f8d4f8b02133a8a1433cfaa391737" - git_date = "2019-07-18 08:27:46 -0400" + git_refnames = " (tag: 0.3.6)" + git_full = "da99945ac4a6d3460a2f024a6371e97c2fa16659" + git_date = "2019-07-25 10:18:34 -0400" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/compression.py new/filesystem_spec-0.3.6/fsspec/compression.py --- old/filesystem_spec-0.3.3/fsspec/compression.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/compression.py 2019-07-25 16:18:34.000000000 +0200 @@ -1,9 +1,8 @@ """Helper functions for a standard streaming compression API""" -from __future__ import print_function, division, absolute_import - from bz2 import BZ2File from gzip import GzipFile from zipfile import ZipFile +from fsspec.spec import AbstractBufferedFile def noop_file(file, **kwargs): @@ -12,7 +11,11 @@ def unzip(infile, mode='rb', filename=None, **kwargs): if 'r' not in mode: - raise ValueError("zip only supported in read mode") + filename = filename or "file" + z = ZipFile(infile, mode='w', **kwargs) + fo = z.open(filename, mode='w') + fo.close = lambda closer=fo.close: closer() or z.close() + return fo z = ZipFile(infile) if filename is None: filename = z.namelist()[0] @@ -36,3 +39,64 @@ compr['xz'] = lzmaffi.LZMAFile except ImportError: pass + + +class SnappyFile(AbstractBufferedFile): + + def __init__(self, infile, mode, **kwargs): + import snappy + self.details = {'size': 999999999} # not true, but OK if we don't seek + super().__init__(fs=None, path='snappy', mode=mode.strip('b') + 'b', + **kwargs) + self.infile = infile + if 'r' in mode: + self.codec = snappy.StreamDecompressor() + else: + self.codec = snappy.StreamCompressor() + + def _upload_chunk(self, final=False): + self.buffer.seek(0) + out = self.codec.add_chunk(self.buffer.read()) + self.infile.write(out) + return True + + def seek(self, loc, whence=0): + raise NotImplementedError("SnappyFile is not seekable") + + def seekable(self): + return False + + def _fetch_range(self, start, end): + """Get the specified set of bytes from remote""" + data = self.infile.read(end - start) + return self.codec.decompress(data) + + +try: + import snappy + snappy.compress + compr['snappy'] = SnappyFile + +except (ImportError, NameError): + pass + +try: + import lz4.frame + compr['lz4'] = lz4.frame.open +except ImportError: + pass + +try: + import zstandard as zstd + + def zstandard_file(infile, mode='rb'): + if 'r' in mode: + cctx = zstd.ZstdDecompressor() + return cctx.stream_reader(infile) + else: + cctx = zstd.ZstdCompressor(level=10) + return cctx.stream_writer(infile) + + compr['zstd'] = zstandard_file +except ImportError: + pass diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/core.py new/filesystem_spec-0.3.6/fsspec/core.py --- old/filesystem_spec-0.3.3/fsspec/core.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/core.py 2019-07-25 16:18:34.000000000 +0200 @@ -210,6 +210,7 @@ def split_protocol(urlpath): + """Return protocol, path pair""" urlpath = stringify_path(urlpath) if "://" in urlpath: protocol, path = urlpath.split("://", 1) @@ -219,6 +220,13 @@ return None, urlpath +def strip_protocol(urlpath): + """Return only path part of full URL, according to appropriate backend""" + protocol, _ = split_protocol(urlpath) + cls = get_filesystem_class(protocol) + return cls._strip_protocol(urlpath) + + def expand_paths_if_needed(paths, mode, num, fs, name_function): """Expand paths if they have a ``*`` in them. @@ -289,8 +297,8 @@ raise ValueError("When specifying a list of paths, all paths must " "share the same protocol") cls = get_filesystem_class(protocol) + optionss = list(map(cls._get_kwargs_from_urls, urlpath)) paths = [cls._strip_protocol(u) for u in urlpath] - optionss = list(map(cls._get_kwargs_from_urls, paths)) options = optionss[0] if not all(o == options for o in optionss): raise ValueError("When specifying a list of paths, all paths must " @@ -304,8 +312,8 @@ protocol = protocol or protocols cls = get_filesystem_class(protocol) - path = cls._strip_protocol(urlpath) options = cls._get_kwargs_from_urls(urlpath) + path = cls._strip_protocol(urlpath) update_storage_options(options, storage_options) fs = cls(**options) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/ftp.py new/filesystem_spec-0.3.6/fsspec/implementations/ftp.py --- old/filesystem_spec-0.3.3/fsspec/implementations/ftp.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/ftp.py 2019-07-25 16:18:34.000000000 +0200 @@ -63,7 +63,10 @@ @staticmethod def _get_kwargs_from_urls(urlpath): - return infer_storage_options(urlpath) + out = infer_storage_options(urlpath) + out.pop('path', None) + out.pop('protocol', None) + return out def invalidate_cache(self, path=None): if path is not None: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/hdfs.py new/filesystem_spec-0.3.6/fsspec/implementations/hdfs.py --- old/filesystem_spec-0.3.3/fsspec/implementations/hdfs.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/hdfs.py 2019-07-25 16:18:34.000000000 +0200 @@ -1,4 +1,6 @@ from ..spec import AbstractFileSystem +from ..utils import infer_storage_options +from pyarrow.hdfs import HadoopFileSystem class PyArrowHDFS(AbstractFileSystem): @@ -9,7 +11,7 @@ """ def __init__(self, host="default", port=0, user=None, kerb_ticket=None, - driver='libhdfs', extra_conf=None): + driver='libhdfs', extra_conf=None, **kwargs): """ Parameters @@ -27,8 +29,9 @@ extra_conf: None or dict Passed on to HadoopFileSystem """ - from pyarrow.hdfs import HadoopFileSystem - self.driver = HadoopFileSystem(host=host, port=port, user=user, + AbstractFileSystem.__init__(self, **kwargs) + self.pars = (host, port, user, kerb_ticket, driver, extra_conf) + self.pahdfs = HadoopFileSystem(host=host, port=port, user=user, kerb_ticket=kerb_ticket, driver=driver, extra_conf=extra_conf) @@ -50,19 +53,95 @@ Returns ------- - arrow HdfsFile file-like instance + HDFSFile file-like instance """ if not autocommit: raise NotImplementedError - return self.driver.open(path, mode, block_size, **kwargs) + return HDFSFile(self, path, mode, block_size, **kwargs) + + def __reduce_ex__(self, protocol): + return PyArrowHDFS, self.pars + + def ls(self, path, detail=True): + out = self.pahdfs.ls(path, detail) + if detail: + for p in out: + p['type'] = p['kind'] + return out + + @staticmethod + def _get_kwargs_from_urls(paths): + ops = infer_storage_options(paths) + out = {} + if ops.get('host', None): + out['host'] = ops['host'] + if ops.get('username', None): + out['user'] = ops['username'] + if ops.get('port', None): + out['port'] = ops['port'] + return out + + @classmethod + def _strip_protocol(cls, path): + ops = infer_storage_options(path) + return ops['path'] + + def __getattribute__(self, item): + if item in ['_open', '__init__', '__getattribute__', '__reduce_ex__', + 'open', 'ls', 'makedirs']: + # all the methods defined in this class. Note `open` here, since + # it calls `_open`, but is actually in superclass + return lambda *args, **kw: getattr(PyArrowHDFS, item)( + self, *args, **kw + ) + if item == '__class__': + return PyArrowHDFS + d = object.__getattribute__(self, '__dict__') + pahdfs = d.get('pahdfs', None) # fs is not immediately defined + if pahdfs is not None and item in [ + 'chmod', 'chown', 'user', + 'df', 'disk_usage', 'download', 'driver', 'exists', + 'extra_conf', 'get_capacity', 'get_space_used', 'host', + 'is_open', 'kerb_ticket', 'strip_protocol', + 'mkdir', 'mv', 'port', 'get_capacity', + 'get_space_used', 'df', 'chmod', 'chown', 'disk_usage', + 'download', 'upload', '_get_kwargs_from_urls', + 'read_parquet', 'rm', 'stat', 'upload', + ]: + return getattr(pahdfs, item) + else: + # attributes of the superclass, while target is being set up + return super().__getattribute__(item) + + +class HDFSFile(object): + """Wrapper around arrow's HdfsFile + + Allows seek beyond EOF and (eventually) commit/discard + """ + + def __init__(self, fs, path, mode, block_size, **kwargs): + self.fs = fs + self.path = path + self.mode = mode + self.block_size = block_size + self.fh = fs.pahdfs.open(path, mode, block_size, **kwargs) + if self.fh.readable(): + self.seek_size = self.size() + + def seek(self, loc, whence=0): + if whence == 0 and self.readable(): + loc = min(loc, self.seek_size) + return self.fh.seek(loc, whence) def __getattr__(self, item): - if item in ['chmod', 'chown', - 'df', 'disk_usage', 'download', 'driver', 'exists', - 'extra_conf', 'get_capacity', 'get_space_used', 'host', - 'info', 'is_open', 'isdir', 'isfile', 'kerb_ticket', - 'ls', 'mkdir', 'mv', 'port', - 'read_parquet', 'rm', 'stat', 'upload', - 'user', 'walk']: - return getattr(self.driver, item) + return getattr(self.fh, item) + + def __reduce_ex__(self, protocol): + return HDFSFile, (self.fs, self.path, self.mode, self.block_size) + + def __enter__(self): + return self + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/http.py new/filesystem_spec-0.3.6/fsspec/implementations/http.py --- old/filesystem_spec-0.3.3/fsspec/implementations/http.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/http.py 2019-07-25 16:18:34.000000000 +0200 @@ -222,6 +222,7 @@ r.raise_for_status() out = r.content self.cache = AllBytes(out) + self.size = len(out) def _fetch_range(self, start, end): """Download a block of data diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/memory.py new/filesystem_spec-0.3.6/fsspec/implementations/memory.py --- old/filesystem_spec-0.3.3/fsspec/implementations/memory.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/memory.py 2019-07-25 16:18:34.000000000 +0200 @@ -106,7 +106,8 @@ return m def copy(self, path1, path2, **kwargs): - self.store[path2] = MemoryFile(self.store[path1].getbuffer()) + self.store[path2] = MemoryFile(self, path2, + self.store[path1].getbuffer()) def cat(self, path): return self.store[path].getvalue() @@ -114,9 +115,6 @@ def _rm(self, path): del self.store[path] - def ukey(self, path): - return hash(self.store[path]) # internal ID of instance - def size(self, path): """Size in bytes of the file at path""" if path not in self.store: @@ -125,17 +123,26 @@ class MemoryFile(BytesIO): - """A BytesIO which can't close and works as a context manager""" + """A BytesIO which can't close and works as a context manager + + Can initialise with data - def __init__(self, fs, path): + No need to provide fs, path if auto-committing (default) + """ + + def __init__(self, fs, path, data=None): self.fs = fs self.path = path + if data: + self.write(data) + self.size = len(data) + self.seek(0) def __enter__(self): return self def close(self): - pass + self.size = self.seek(0, 2) def discard(self): pass diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/sftp.py new/filesystem_spec-0.3.6/fsspec/implementations/sftp.py --- old/filesystem_spec-0.3.3/fsspec/implementations/sftp.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/sftp.py 2019-07-25 16:18:34.000000000 +0200 @@ -11,6 +11,7 @@ Peer-to-peer filesystem over SSH using paramiko. """ + protocol = 'sftp', 'ssh' def __init__(self, host, **ssh_kwargs): """ @@ -27,7 +28,7 @@ May include port, username, password... """ super(SFTPFileSystem, self).__init__(**ssh_kwargs) - self.temppath = ssh_kwargs.get('temppath', '/tmp') + self.temppath = ssh_kwargs.pop('temppath', '/tmp') self.host = host self.ssh_kwargs = ssh_kwargs self._connect() @@ -54,7 +55,10 @@ @staticmethod def _get_kwargs_from_urls(urlpath): - return infer_storage_options(urlpath) + out = infer_storage_options(urlpath) + out.pop('path', None) + out.pop('protocol', None) + return out def mkdir(self, path, mode=511): self.ftp.mkdir(path, mode) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_dask.py new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_dask.py --- old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_dask.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_dask.py 2019-07-25 16:18:34.000000000 +0200 @@ -18,7 +18,7 @@ try: yield client finally: - client.shutdown() + client.close() def test_basic(cli): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_ftp.py new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_ftp.py --- old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_ftp.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_ftp.py 2019-07-25 16:18:34.000000000 +0200 @@ -5,6 +5,7 @@ import time from fsspec.implementations.ftp import FTPFileSystem from fsspec import open_files +import fsspec pytest.importorskip('pyftpdlib') here = os.path.dirname(os.path.abspath(__file__)) @@ -73,6 +74,16 @@ assert fs.cat('/out2') == b'oi' +def test_with_url(ftp_writable): + host, port, user, pw = ftp_writable + fo = fsspec.open("ftp://{}:{}@{}:{}/out".format(user, pw, host, port), 'wb') + with fo as f: + f.write(b'hello') + fo = fsspec.open("ftp://{}:{}@{}:{}/out".format(user, pw, host, port), 'rb') + with fo as f: + assert f.read() == b'hello' + + @pytest.mark.parametrize('cache_type', ['bytes', 'mmap']) def test_write_big(ftp_writable, cache_type): host, port, user, pw = ftp_writable @@ -104,3 +115,7 @@ assert not fs.exists(fn) assert fs.exists(fn) assert fs.cat(fn) == b'not' + + fs.rm(fn) + assert not fs.exists(fn) + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_http.py new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_http.py --- old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_http.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_http.py 2019-07-25 16:18:34.000000000 +0200 @@ -1,15 +1,20 @@ import glob +import os import pytest +import shlex import subprocess import time import fsspec requests = pytest.importorskip('requests') +fn = "test_http.py" +d = os.path.dirname(__file__) +data = open(__file__, 'rb').read() @pytest.fixture(scope='module') def server(): - cmd = "python -m http.server 8000".split() + cmd = shlex.split("python -m http.server 8000 --directory %s" % d) try: P = subprocess.Popen(cmd) retries = 10 @@ -31,14 +36,36 @@ def test_list(server): h = fsspec.filesystem('http') out = h.glob(server + '/*.py') - expected = glob.glob('*.py') - for fn in expected: - assert any(fn in f for f in out) + expected = [os.path.basename(f) for f in glob.glob('%s/*.py' % d)] + for myfile in expected: + assert any(myfile in os.path.basename(f) for f in out) def test_read(server): h = fsspec.filesystem('http') - out = h.glob(server + '/*.py')[0] - expected = glob.glob('*.py')[0] + out = server + '/' + fn with h.open(out, 'rb') as f: - assert f.read() == open(expected, 'rb').read() + assert f.read() == data + with h.open(out, 'rb', block_size=0) as f: + assert f.read() == data + with h.open(out, 'rb', size_policy='head') as f: + assert f.size == len(data) + + +def test_methods(server): + h = fsspec.filesystem('http') + url = server + fn + assert h.exists(url) + assert h.cat(url) == data + + +def test_random_access(server): + h = fsspec.filesystem('http') + url = server + '/' + fn + with h.open(url, 'rb') as f: + assert f.read(5) == data[:5] + # python server does not respect bytes range request + # we actually get all the data + f.seek(5, 1) + assert f.read(5) == data[10:15] + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_local.py new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_local.py --- old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_local.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_local.py 2019-07-25 16:18:34.000000000 +0200 @@ -153,9 +153,6 @@ @pytest.mark.parametrize('fmt', list(compression.compr)) def test_compressions(fmt, mode, tmpdir): tmpdir = str(tmpdir) - if fmt == 'zip': - # zip implemented read-only - pytest.skip() fn = os.path.join(tmpdir, '.tmp.getsize') fs = LocalFileSystem() f = OpenFile(fs, fn, compression=fmt, mode='wb') @@ -266,8 +263,64 @@ fs = LocalFileSystem() assert isinstance(fs, pa.filesystem.FileSystem) + assert fs._get_pyarrow_filesystem() is fs class UnknownFileSystem(object): pass assert not isinstance(UnknownFileSystem(), pa.filesystem.FileSystem) + + +def test_directories(tmpdir): + tmpdir = str(tmpdir) + fs = LocalFileSystem() + fs.mkdir(tmpdir + '/dir') + assert tmpdir + '/dir' in fs.ls(tmpdir) + assert fs.ls(tmpdir, True)[0]['type'] == 'directory' + fs.rmdir(tmpdir + '/dir') + assert not fs.ls(tmpdir) + + +def test_file_ops(tmpdir): + tmpdir = str(tmpdir) + fs = LocalFileSystem() + with pytest.raises(FileNotFoundError): + fs.info(tmpdir + '/nofile') + fs.touch(tmpdir + '/afile') + i1 = fs.ukey(tmpdir + '/afile') + + assert tmpdir + '/afile' in fs.ls(tmpdir) + + with fs.open(tmpdir + '/afile', 'wb') as f: + f.write(b'data') + i2 = fs.ukey(tmpdir + '/afile') + assert i1 != i2 # because file changed + + fs.copy(tmpdir + '/afile', tmpdir + '/afile2') + assert tmpdir + '/afile2' in fs.ls(tmpdir) + + fs.move(tmpdir + '/afile', tmpdir + '/afile3') + assert not fs.exists(tmpdir + '/afile') + assert fs.exists(tmpdir + '/afile3') + + fs.rm(tmpdir, recursive=True) + assert not fs.exists(tmpdir) + + +def test_commit_discard(tmpdir): + tmpdir = str(tmpdir) + fs = LocalFileSystem() + with fs.transaction: + with fs.open(tmpdir + '/afile', 'wb') as f: + assert not fs.exists(tmpdir + '/afile') + f.write(b'data') + assert not fs.exists(tmpdir + '/afile') + assert fs.cat(tmpdir + '/afile') == b'data' + + try: + with fs.transaction: + with fs.open(tmpdir + '/bfile', 'wb') as f: + f.write(b'data') + raise KeyboardInterrupt + except KeyboardInterrupt: + assert not fs.exists(tmpdir + '/bfile') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_memory.py new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_memory.py --- old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_memory.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_memory.py 2019-07-25 16:18:34.000000000 +0200 @@ -15,8 +15,8 @@ def test_1(m): m.touch('/somefile') # NB: is found with or without initial / m.touch('afiles/and/anothers') - assert m.find('') == ['somefile', 'afiles/and/anothers'] - assert list(m.get_mapper('')) == ['somefile', 'afiles/and/anothers'] + assert m.find('') == ['afiles/and/anothers', 'somefile'] + assert list(m.get_mapper('')) == ['afiles/and/anothers', 'somefile'] def test_ls(m): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_sftp.py new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_sftp.py --- old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_sftp.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_sftp.py 2019-07-25 16:18:34.000000000 +0200 @@ -54,6 +54,19 @@ assert not f.exists('/home/someuser') +@pytest.mark.parametrize('protocol', ['sftp', 'ssh']) +def test_with_url(protocol, ssh): + fo = fsspec.open(protocol + "://{username}:{password}@{host}:{port}" + "/home/someuserout".format(**ssh), 'wb') + with fo as f: + f.write(b'hello') + fo = fsspec.open(protocol + "://{username}:{password}@{host}:{port}" + "/home/someuserout".format(**ssh), 'rb') + with fo as f: + assert f.read() == b'hello' + + + def test_transaction(ssh): f = fsspec.get_filesystem_class('sftp')(**ssh) f.mkdirs('/home/someuser/deeper') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_webhdfs.py new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_webhdfs.py --- old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_webhdfs.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_webhdfs.py 2019-07-25 16:18:34.000000000 +0200 @@ -1,9 +1,11 @@ +import pickle import pytest import subprocess import time requests = pytest.importorskip("requests") from fsspec.implementations.webhdfs import WebHDFS +import fsspec @pytest.fixture(scope='module') @@ -29,10 +31,28 @@ subprocess.check_output(cmd0) +def test_pickle(hdfs_cluster): + w = WebHDFS(hdfs_cluster, user='testuser') + w2 = pickle.loads(pickle.dumps(w)) + assert w == w2 + + def test_simple(hdfs_cluster): w = WebHDFS(hdfs_cluster, user='testuser') home = w.home_directory() assert home == '/user/testuser' + with pytest.raises(PermissionError): + w.mkdir('/root') + + +def test_url(hdfs_cluster): + url = 'webhdfs://testuser@localhost:50070/user/testuser/myfile' + fo = fsspec.open(url, 'wb', data_proxy={'worker.example.com': 'localhost'}) + with fo as f: + f.write(b'hello') + fo = fsspec.open(url, 'rb', data_proxy={'worker.example.com': 'localhost'}) + with fo as f: + assert f.read() == b'hello' def test_workflow(hdfs_cluster): @@ -55,11 +75,18 @@ w = WebHDFS(hdfs_cluster, user='testuser', data_proxy={'worker.example.com': 'localhost'}) fn = '/user/testuser/testrun/afile' - w.mkdir('/user/testuser/testrun') + w.mkdirs('/user/testuser/testrun') with w.transaction: with w.open(fn, 'wb') as f: f.write(b'hello') assert not w.exists(fn) assert w.exists(fn) + assert w.ukey(fn) + files = w.ls('/user/testuser/testrun', True) + summ = w.content_summary('/user/testuser/testrun') + assert summ['length'] == files[0]['size'] + assert summ['fileCount'] == 1 + w.rm('/user/testuser/testrun', recursive=True) assert not w.exists(fn) + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_zip.py new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_zip.py --- old/filesystem_spec-0.3.3/fsspec/implementations/tests/test_zip.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/tests/test_zip.py 2019-07-25 16:18:34.000000000 +0200 @@ -1,6 +1,7 @@ import zipfile from contextlib import contextmanager import os +import pickle import pytest import sys import tempfile @@ -40,3 +41,10 @@ m = fs.get_mapper('') assert list(m) == ['a', 'b', 'deeply/nested/path'] assert m['b'] == data['b'] + + +def test_pickle(): + with tempzip(data) as z: + fs = fsspec.get_filesystem_class('zip')(fo=z) + fs2 = pickle.loads(pickle.dumps(fs)) + assert fs2.cat('b') == b'hello' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/webhdfs.py new/filesystem_spec-0.3.6/fsspec/implementations/webhdfs.py --- old/filesystem_spec-0.3.3/fsspec/implementations/webhdfs.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/webhdfs.py 2019-07-25 16:18:34.000000000 +0200 @@ -6,6 +6,7 @@ from urllib.parse import quote import uuid from ..spec import AbstractFileSystem, AbstractBufferedFile +from ..utils import infer_storage_options import logging logger = logging.getLogger('webhdfs') @@ -30,6 +31,7 @@ """ tempdir = '/tmp' + protocol = 'webhdfs', 'webHDFS' def __init__(self, host, port=50070, kerberos=False, token=None, user=None, proxy_to=None, kerb_kwargs=None, data_proxy=None, @@ -155,6 +157,19 @@ info['size'] = info['length'] return info + @classmethod + def _strip_protocol(cls, path): + return infer_storage_options(path)['path'] + + @staticmethod + def _get_kwargs_from_urls(urlpath): + out = infer_storage_options(urlpath) + out.pop('path', None) + out.pop('protocol', None) + if 'username' in out: + out['user'] = out.pop('username') + return out + def info(self, path): out = self._call('GETFILESTATUS', path=path) info = out.json()['FileStatus'] @@ -177,7 +192,7 @@ out = self._call('GETCONTENTSUMMARY', path=path) return out.json()['ContentSummary'] - def get_file_checksum(self, path): + def ukey(self, path): """Checksum info of file, giving method and result""" out = self._call('GETFILECHECKSUM', path=path, redirect=False) location = self._apply_proxy(out.headers['Location']) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/implementations/zip.py new/filesystem_spec-0.3.6/fsspec/implementations/zip.py --- old/filesystem_spec-0.3.3/fsspec/implementations/zip.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/implementations/zip.py 2019-07-25 16:18:34.000000000 +0200 @@ -8,7 +8,9 @@ class ZipFileSystem(AbstractFileSystem): """Read contents of ZIP archive as a file-system - Keeps file object open while instance lives + Keeps file object open while instance lives. + + This class is pickleable, but not necessarily thread-safe """ root_marker = "" @@ -28,6 +30,7 @@ AbstractFileSystem.__init__(self) if mode != 'r': raise ValueError("Only read from zip files accepted") + self.in_fo = fo if isinstance(fo, str): files = open_files(fo) if len(files) != 1: @@ -40,6 +43,11 @@ self.kwargs = storage_options self.dir_cache = None + @classmethod + def _strip_protocol(cls, path): + # zip file paths are always relative to the archive root + return super()._strip_protocol(path).lstrip('/') + def _get_dirs(self): if self.dir_cache is None: files = self.zip.infolist() @@ -83,6 +91,9 @@ else: return list(sorted(f['name'] for f in out)) + def __reduce_ex__(self, protocol): + return ZipFileSystem, (self.in_fo, ) + def cat(self, path): return self.zip.read(path) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/registry.py new/filesystem_spec-0.3.6/fsspec/registry.py --- old/filesystem_spec-0.3.3/fsspec/registry.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/registry.py 2019-07-25 16:18:34.000000000 +0200 @@ -21,6 +21,8 @@ 'err': 'Please install gcsfs to access Google Storage'}, 'sftp': {'class': 'fsspec.implementations.sftp.SFTPFileSystem', 'err': 'SFTPFileSystem requires "paramiko" to be installed'}, + 'ssh': {'class': 'fsspec.implementations.sftp.SFTPFileSystem', + 'err': 'SFTPFileSystem requires "paramiko" to be installed'}, 'ftp': {'class': 'fsspec.implementations.ftp.FTPFileSystem'}, 'hdfs': {'class': 'fsspec.implementations.hdfs.PyArrowHDFS', 'err': 'pyarrow and local java libraries required for HDFS'}, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/spec.py new/filesystem_spec-0.3.6/fsspec/spec.py --- old/filesystem_spec-0.3.3/fsspec/spec.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/spec.py 2019-07-25 16:18:34.000000000 +0200 @@ -36,7 +36,7 @@ from here. """ _singleton = [None] # will contain the newest instance - _cache = None + _cache = {} cachable = True # this class can be cached, instances reused _cached = False blocksize = 2**22 @@ -53,9 +53,6 @@ The instance will skip init if instance.cached = True. """ - if cls._cache is None and cls.cachable: - # set up instance cache, if using - cls._cache = {} # TODO: defer to a class-specific tokeniser? do_cache = storage_options.pop('do_cache', True) @@ -68,6 +65,7 @@ if self.cachable: # store for caching - can hold memory cls._cache[token] = self + self.storage_options = storage_options return self def __init__(self, *args, **storage_options): @@ -333,7 +331,7 @@ try: listing = self.ls(path, True) - except FileNotFoundError: + except (FileNotFoundError, IOError): return [], [], [] for info in listing: @@ -373,7 +371,7 @@ # walk works on directories, but find should also return [path] # when path happens to be a file out.append(path) - return out + return sorted(out) def du(self, path, total=True, maxdepth=None): """Space used by files within a path @@ -426,7 +424,7 @@ elif self.exists(path): return [path] else: - raise FileNotFoundError(path) + return [] # glob of non-existent returns empty elif '/' in path[:ind]: ind2 = path[:ind].rindex('/') root = path[:ind2 + 1] @@ -525,14 +523,7 @@ rpath = self._strip_protocol(rpath) if recursive: rpaths = self.find(rpath) - rootdir = os.path.basename(rpath.rstrip('/')) - if os.path.isdir(lpath): - # copy rpath inside lpath directory - lpath2 = os.path.join(lpath, rootdir) - else: - # copy rpath as lpath directory - lpath2 = lpath - lpaths = [os.path.join(lpath2, path[len(rpath):].lstrip('/')) + lpaths = [os.path.join(lpath, path[len(rpath):].lstrip('/')) for path in rpaths] for lpath in lpaths: dirname = os.path.dirname(lpath) @@ -542,7 +533,6 @@ rpaths = [rpath] lpaths = [lpath] for lpath, rpath in zip(lpaths, rpaths): - print(lpath, rpath) with self.open(rpath, 'rb') as f1: with open(lpath, 'wb') as f2: data = True @@ -632,7 +622,7 @@ @classmethod def _parent(cls, path): - path = path.rstrip('/') + path = cls._strip_protocol(path.rstrip('/')) if '/' in path: return cls.root_marker + path.rsplit('/', 1)[0] else: @@ -1125,8 +1115,9 @@ else: assert self.buffer.tell() == 0 - self.fs.invalidate_cache(self.path) - self.fs.invalidate_cache(self.fs._parent(self.path)) + if self.fs is not None: + self.fs.invalidate_cache(self.path) + self.fs.invalidate_cache(self.fs._parent(self.path)) self.closed = True diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/tests/test_api.py new/filesystem_spec-0.3.6/fsspec/tests/test_api.py --- old/filesystem_spec-0.3.3/fsspec/tests/test_api.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/tests/test_api.py 2019-07-25 16:18:34.000000000 +0200 @@ -1,7 +1,8 @@ """Tests the spec, using memoryfs""" +import os import pickle -from fsspec.implementations.memory import MemoryFileSystem +from fsspec.implementations.memory import MemoryFileSystem, MemoryFile def test_idempotent(): @@ -36,3 +37,77 @@ assert MemoryFileSystem._strip_protocol('other://stuff') == "other://stuff" assert MemoryFileSystem._get_kwargs_from_urls("memory://user@thing") == {} + + +def test_get_put(tmpdir): + tmpdir = str(tmpdir) + fn = os.path.join(tmpdir, 'one') + open(fn, 'wb').write(b'one') + os.mkdir(os.path.join(tmpdir, 'dir')) + fn2 = os.path.join(tmpdir, 'dir', 'two') + open(fn2, 'wb').write(b'two') + + fs = MemoryFileSystem() + fs.put(fn, '/afile') + assert fs.cat('/afile') == b'one' + + fs.store['/bfile'] = MemoryFile(fs, '/bfile', b'data') + fn3 = os.path.join(tmpdir, 'three') + fs.get('/bfile', fn3) + assert open(fn3, 'rb').read() == b'data' + + fs.put(tmpdir, '/more', recursive=True) + assert fs.find('/more') == ['/more/dir/two', '/more/one', '/more/three'] + + for f in [fn, fn2, fn3]: + os.remove(f) + os.rmdir(os.path.join(tmpdir, 'dir')) + + fs.get('/more/', tmpdir + '/', recursive=True) + assert open(fn3, 'rb').read() == b'data' + assert open(fn, 'rb').read() == b'one' + + +def test_du(): + fs = MemoryFileSystem() + fs.store = { + '/dir/afile': MemoryFile(fs, '/afile', b'a'), + '/dir/dirb/afile': MemoryFile(fs, '/afile', b'bb'), + '/dir/dirb/bfile': MemoryFile(fs, '/afile', b'ccc'), + } + assert fs.du('/dir') == 6 + assert fs.du('/dir', total=False)['/dir/dirb/afile'] == 2 + assert fs.du('/dir', maxdepth=0) == 1 + + +def test_head_tail(): + fs = MemoryFileSystem() + with fs.open('/myfile', 'wb') as f: + f.write(b'I had a nice big cabbage') + assert fs.head('/myfile', 5) == b'I had' + assert fs.tail('/myfile', 7) == b'cabbage' + + +def test_move(): + fs = MemoryFileSystem() + with fs.open('/myfile', 'wb') as f: + f.write(b'I had a nice big cabbage') + fs.move('/myfile', '/otherfile') + assert not fs.exists('/myfile') + assert fs.info('/otherfile') + assert isinstance(fs.ukey('/otherfile'), str) + + +def test_read_block_delimiter(): + fs = MemoryFileSystem() + with fs.open('/myfile', 'wb') as f: + f.write(b'some\n' + b'lines\n' + b'of\n' + b'text') + assert fs.read_block('/myfile', 0, 2, b'\n') == b'some\n' + assert fs.read_block('/myfile', 2, 6, b'\n') == b'lines\n' + assert fs.read_block('/myfile', 6, 2, b'\n') == b'' + assert fs.read_block('/myfile', 2, 9, b'\n') == b'lines\nof\n' + assert fs.read_block('/myfile', 12, 6, b'\n') == b'text' + assert fs.read_block('/myfile', 0, None) == fs.cat('/myfile') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/tests/test_file.py new/filesystem_spec-0.3.6/fsspec/tests/test_file.py --- old/filesystem_spec-0.3.3/fsspec/tests/test_file.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/tests/test_file.py 2019-07-25 16:18:34.000000000 +0200 @@ -73,6 +73,8 @@ f2 = ftp.open('/out2', 'rb') assert hash(f2) != hash(f) assert f != f2 + f2 = ftp.open('/out', 'wb') + assert hash(f2) != hash(f) def test_file_text_attributes(ftp_writable): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-0.3.3/fsspec/tests/test_fuse.py new/filesystem_spec-0.3.6/fsspec/tests/test_fuse.py --- old/filesystem_spec-0.3.3/fsspec/tests/test_fuse.py 2019-07-18 14:27:46.000000000 +0200 +++ new/filesystem_spec-0.3.6/fsspec/tests/test_fuse.py 2019-07-25 16:18:34.000000000 +0200 @@ -10,7 +10,7 @@ tmpdir = str(tmpdir) fs = MemoryFileSystem() fs.touch('/mounted/testfile') - run(fs, '/mounted/', tmpdir, False) + th = run(fs, '/mounted/', tmpdir, False) timeout = 10 while True: try: @@ -22,6 +22,7 @@ timeout -= 1 time.sleep(1) assert timeout > 0, "Timeout" + fn = os.path.join(tmpdir, 'test') with open(fn, 'wb') as f: f.write(b'data') @@ -41,3 +42,11 @@ os.rmdir(fn + '/inner') os.rmdir(fn) assert not fs.pseudo_dirs + + # should not normally kill a thread like this, but FUSE blocks, so we + # cannot have thread listen for event. Alternative may be to .join() but + # send a SIGINT + th._tstate_lock.release() + th._stop() + th.join() + fs.store.clear()