Hello community,
here is the log from the commit of package python-minio for openSUSE:Factory checked in at 2019-10-02 14:55:32
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-minio (Old)
and /work/SRC/openSUSE:Factory/.python-minio.new.2352 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-minio"
Wed Oct 2 14:55:32 2019 rev:7 rq:734320 version:5.0.1
Changes:
--------
--- /work/SRC/openSUSE:Factory/python-minio/python-minio.changes 2019-09-13 14:58:32.309277795 +0200
+++ /work/SRC/openSUSE:Factory/.python-minio.new.2352/python-minio.changes 2019-10-02 14:55:36.291332169 +0200
@@ -1,0 +2,8 @@
+Tue Oct 1 12:55:45 UTC 2019 - Marketa Calabkova
+
+- Update to 5.0.1
+ * BREAKING API CHANGE: re-implement select_object_content
+ * Remove white-space characters before parsing XML
+ * Do not encode ~ in V4 S3 signing
+
+-------------------------------------------------------------------
Old:
----
minio-4.0.21.tar.gz
New:
----
minio-5.0.1.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-minio.spec ++++++
--- /var/tmp/diff_new_pack.dDiIIA/_old 2019-10-02 14:55:37.383329304 +0200
+++ /var/tmp/diff_new_pack.dDiIIA/_new 2019-10-02 14:55:37.399329262 +0200
@@ -18,7 +18,7 @@
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
Name: python-minio
-Version: 4.0.21
+Version: 5.0.1
Release: 0
Summary: Minio library for Amazon S3 compatible cloud storage
License: Apache-2.0
++++++ minio-4.0.21.tar.gz -> minio-5.0.1.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/PKG-INFO new/minio-5.0.1/PKG-INFO
--- old/minio-4.0.21/PKG-INFO 2019-08-28 21:41:17.000000000 +0200
+++ new/minio-5.0.1/PKG-INFO 2019-09-18 23:08:50.000000000 +0200
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: minio
-Version: 4.0.21
+Version: 5.0.1
Summary: MinIO Python Library for Amazon S3 Compatible Cloud Storage for Python
Home-page: https://github.com/minio/minio-py
Author: MinIO, Inc.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/docs/API.md new/minio-5.0.1/docs/API.md
--- old/minio-4.0.21/docs/API.md 2019-08-28 21:40:03.000000000 +0200
+++ new/minio-5.0.1/docs/API.md 2019-09-18 23:07:19.000000000 +0200
@@ -690,7 +690,6 @@
|``obj``| _SelectObjectReader_ |Select_object_reader object. |
-
__Example__
@@ -736,7 +735,7 @@
# Get the stats
print(data.stats())
-except CRCValidationError as err:
+except SelectCRCValidationError as err:
print(err)
except ResponseError as err:
print(err)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/examples/select_object_content.py new/minio-5.0.1/examples/select_object_content.py
--- old/minio-4.0.21/examples/select_object_content.py 2019-08-28 21:40:03.000000000 +0200
+++ new/minio-5.0.1/examples/select_object_content.py 2019-09-18 23:07:19.000000000 +0200
@@ -17,12 +17,12 @@
from minio import Minio
from minio.error import ResponseError
-from minio.select_object_reader import CRCValidationError
-from minio.select_object_options import (SelectObjectOptions, CSVInput,
- JSONInput, RequestProgress,
- ParquetInput, InputSerialization,
- OutputSerialization, CSVOutput,
- JsonOutput)
+from minio.select.errors import SelectCRCValidationError, SelectMessageError
+from minio.select.options import (SelectObjectOptions, CSVInput,
+ JSONInput, RequestProgress,
+ ParquetInput, InputSerialization,
+ OutputSerialization, CSVOutput,
+ JsonOutput)
client = Minio('s3.amazonaws.com',
access_key='YOUR-ACCESSKEY',
@@ -71,7 +71,11 @@
# Get the stats
print(data.stats())
-except CRCValidationError as err:
+except SelectMessageError as err:
print(err)
+
+except SelectCRCValidationError as err:
+ print(err)
+
except ResponseError as err:
print(err)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/__init__.py new/minio-5.0.1/minio/__init__.py
--- old/minio-4.0.21/minio/__init__.py 2019-08-28 21:40:03.000000000 +0200
+++ new/minio-5.0.1/minio/__init__.py 2019-09-18 23:07:19.000000000 +0200
@@ -29,7 +29,7 @@
__title__ = 'minio-py'
__author__ = 'MinIO, Inc.'
-__version__ = '4.0.21'
+__version__ = '5.0.1'
__license__ = 'Apache 2.0'
__copyright__ = 'Copyright 2015, 2016, 2017, 2018, 2019 MinIO, Inc.'
@@ -38,6 +38,3 @@
from .post_policy import PostPolicy
from .copy_conditions import CopyConditions
from .definitions import Bucket, Object
-from .select_object_reader import SelectObjectReader
-
-
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/api.py new/minio-5.0.1/minio/api.py
--- old/minio-4.0.21/minio/api.py 2019-08-28 21:40:03.000000000 +0200
+++ new/minio-5.0.1/minio/api.py 2019-09-18 23:07:19.000000000 +0200
@@ -77,8 +77,7 @@
is_valid_bucket_notification_config, is_valid_policy_type,
mkdir_p, dump_http, amzprefix_user_metadata,
is_supported_header,is_amz_header)
-from .helpers import (MAX_MULTIPART_OBJECT_SIZE,
- MAX_PART_SIZE,
+from .helpers import (MAX_PART_SIZE,
MAX_POOL_SIZE,
MIN_PART_SIZE,
DEFAULT_PART_SIZE,
@@ -94,7 +93,7 @@
xml_marshal_select)
from .fold_case_dict import FoldCaseDict
from .thread_pool import ThreadPool
-from .select_object_reader import SelectObjectReader
+from .select import SelectObjectReader
# Comment format.
_COMMENTS = '({0}; {1})'
@@ -664,13 +663,13 @@
# Verify if we wrote data properly.
if total_written < content_size:
- msg = 'Data written {0} bytes is smaller than the' \
+ msg = 'Data written {0} bytes is smaller than the ' \
'specified size {1} bytes'.format(total_written,
content_size)
raise InvalidSizeError(msg)
if total_written > content_size:
- msg = 'Data written {0} bytes is in excess than the' \
+ msg = 'Data written {0} bytes is in excess than the ' \
'specified size {1} bytes'.format(total_written,
content_size)
raise InvalidSizeError(msg)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/helpers.py new/minio-5.0.1/minio/helpers.py
--- old/minio-4.0.21/minio/helpers.py 2019-08-28 21:40:03.000000000 +0200
+++ new/minio-5.0.1/minio/helpers.py 2019-09-18 23:07:19.000000000 +0200
@@ -55,17 +55,6 @@
MIN_PART_SIZE = 5 * 1024 * 1024 # 5MiB
DEFAULT_PART_SIZE = MIN_PART_SIZE # Currently its 5MiB
-
-# Select Object Content
-READ_SIZE_SELECT = 32 * 1024 # Buffer size
-SQL = 'SQL' # Value for ExpressionType
-EVENT_RECORDS = 'Records' # Event Type is Records
-EVENT_PROGRESS = 'Progress' # Event Type Progress
-EVENT_STATS = 'Stats' # Event Type Stats
-EVENT = 'event' # Message Type is event
-EVENT_END = 'End' # Event Type is End
-ERROR = 'error' # Message Type is error
-
_VALID_BUCKETNAME_REGEX = re.compile('^[a-z0-9][a-z0-9\\.\\-]+[a-z0-9]$')
_ALLOWED_HOSTNAME_REGEX = re.compile(
'^((?!-)(?!_)[A-Z_\\d-]{1,63}(?http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+:copyright: (c) 2019 by MinIO, Inc.
+:license: Apache 2.0, see LICENSE for more details.
+"""
+
+__title__ = 'minio-py'
+__author__ = 'MinIO, Inc.'
+__version__ = '0.0.1'
+__license__ = 'Apache 2.0'
+__copyright__ = 'Copyright 2019 MinIO, Inc.'
+
+from .reader import *
+from .helpers import *
+from .errors import *
+from .options import *
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/select/errors.py new/minio-5.0.1/minio/select/errors.py
--- old/minio-4.0.21/minio/select/errors.py 1970-01-01 01:00:00.000000000 +0100
+++ new/minio-5.0.1/minio/select/errors.py 2019-09-18 23:07:19.000000000 +0200
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C)
+# 2019 MinIO, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+minio.select.errors
+~~~~~~~~~~~~~~~
+
+This module implements the error classes for SelectObject responses.
+
+:copyright: (c) 2019 by MinIO, Inc.
+:license: Apache 2.0, see LICENSE for more details.
+
+"""
+
+class SelectMessageError(Exception):
+ '''
+ Raised in case of message type 'error'
+ '''
+
+class SelectCRCValidationError(Exception):
+ '''
+ Raised in case of CRC mismatch
+ '''
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/select/helpers.py new/minio-5.0.1/minio/select/helpers.py
--- old/minio-4.0.21/minio/select/helpers.py 1970-01-01 01:00:00.000000000 +0100
+++ new/minio-5.0.1/minio/select/helpers.py 2019-09-18 23:07:19.000000000 +0200
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C)
+# 2019 MinIO, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+minio.select.helpers
+~~~~~~~~~~~~~~~
+
+This module implements the helper functions for SelectObject responses.
+
+:copyright: (c) 2019 by MinIO, Inc.
+:license: Apache 2.0, see LICENSE for more details.
+
+"""
+
+import codecs
+from binascii import crc32
+
+SQL = 'SQL' # Value for ExpressionType
+EVENT_RECORDS = 'Records' # Event Type is Records
+EVENT_PROGRESS = 'Progress' # Event Type Progress
+EVENT_STATS = 'Stats' # Event Type Stats
+EVENT_CONT = 'Cont' # Event Type continue
+EVENT_END = 'End' # Event Type is End
+EVENT_CONTENT_TYPE = "text/xml" # Event content xml type
+EVENT = 'event' # Message Type is event
+ERROR = 'error' # Message Type is error
+
+def calculate_crc(value):
+ '''
+ Returns the CRC using crc32
+ '''
+ return crc32(value) & 0xffffffff
+
+def validate_crc(current_value, expected_value):
+ '''
+ Validate through CRC check
+ '''
+ crc_current = calculate_crc(current_value)
+ crc_expected = byte_int(expected_value)
+ if crc_current == crc_expected:
+ return True
+ return False
+
+def byte_int(data_bytes):
+ '''
+ Convert bytes to big-endian integer
+ '''
+ return int(codecs.encode(data_bytes, 'hex'), 16)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/select/options.py new/minio-5.0.1/minio/select/options.py
--- old/minio-4.0.21/minio/select/options.py 1970-01-01 01:00:00.000000000 +0100
+++ new/minio-5.0.1/minio/select/options.py 2019-09-18 23:07:19.000000000 +0200
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C)
+# 2019 MinIO, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+minio.select.options
+~~~~~~~~~~~~~~~
+
+This module implements the SelectOption definition for SelectObject API.
+
+:copyright: (c) 2019 by MinIO, Inc.
+:license: Apache 2.0, see LICENSE for more details.
+
+"""
+
+from .helpers import (SQL)
+
+class CSVInput:
+ """
+ CSVInput: Input Format as CSV.
+ """
+ def __init__(self, FileHeaderInfo=None, RecordDelimiter="\n",
+ FieldDelimiter=",", QuoteCharacter='"',
+ QuoteEscapeCharacter='"', Comments="#",
+ AllowQuotedRecordDelimiter=False):
+ self.FileHeaderInfo = FileHeaderInfo
+ self.RecordDelimiter = RecordDelimiter
+ self.FieldDelimiter = FieldDelimiter
+ self.QuoteCharacter = QuoteCharacter
+ self.QuoteEscapeCharacter = QuoteEscapeCharacter
+ self.Comments = Comments
+ self.AllowQuotedRecordDelimiter = AllowQuotedRecordDelimiter
+
+class JSONInput:
+ """
+ JSONInput: Input format as JSON.
+ """
+ def __init__(self, Type=None):
+ self.Type = Type
+
+
+class ParquetInput:
+ """
+ ParquetInput: Input format as Parquet
+ """
+
+
+class InputSerialization:
+ """
+ InputSerialization: nput Format.
+ """
+ def __init__(self, compression_type="NONE", csv=None, json=None, par=None):
+ self.compression_type = compression_type
+ self.csv_input = csv
+ self.json_input = json
+ self.parquet_input = par
+
+
+class CSVOutput:
+ """
+ CSVOutput: Output as CSV.
+
+ """
+ def __init__(self, QuoteFields="ASNEEDED", RecordDelimiter="\n",
+ FieldDelimiter=",", QuoteCharacter='"',
+ QuoteEscapeCharacter='"'):
+ self.QuoteFields = QuoteFields
+ self.RecordDelimiter = RecordDelimiter
+ self.FieldDelimiter = FieldDelimiter
+ self.QuoteCharacter = QuoteCharacter
+ self.QuoteEscapeCharacter = QuoteEscapeCharacter
+
+
+class JsonOutput:
+ """
+ JsonOutput- Output as JSON.
+ """
+ def __init__(self, RecordDelimiter="\n"):
+ self.RecordDelimiter = RecordDelimiter
+
+
+class OutputSerialization:
+ """
+ OutputSerialization: Output Format.
+ """
+ def __init__(self, csv=None, json=None):
+ self.csv_output = csv
+ self.json_output = json
+
+
+class RequestProgress:
+ """
+ RequestProgress: Sends progress message.
+ """
+ def __init__(self, enabled=False):
+ self.enabled = enabled
+
+
+class SelectObjectOptions:
+ """
+ SelectObjectOptions: Options for select object
+ """
+ expression_type = SQL
+
+ def __init__(self, expression, input_serialization,
+ output_serialization, request_progress):
+ self.expression = expression
+ self.in_ser = input_serialization
+ self.out_ser = output_serialization
+ self.req_progress = request_progress
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/select/reader.py new/minio-5.0.1/minio/select/reader.py
--- old/minio-4.0.21/minio/select/reader.py 1970-01-01 01:00:00.000000000 +0100
+++ new/minio-5.0.1/minio/select/reader.py 2019-09-18 23:07:19.000000000 +0200
@@ -0,0 +1,229 @@
+# -*- coding: utf-8 -*-
+# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C)
+# 2019 MinIO, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+minio.select.reader
+~~~~~~~~~~~~~~~
+
+This module implements the reader for SelectObject response body.
+
+:copyright: (c) 2019 by MinIO, Inc.
+:license: Apache 2.0, see LICENSE for more details.
+
+"""
+
+from __future__ import absolute_import
+
+import io
+import sys
+
+from binascii import crc32
+from xml.etree import cElementTree
+from xml.etree.cElementTree import ParseError
+
+from .helpers import (EVENT_RECORDS, EVENT_PROGRESS,
+ EVENT_STATS, EVENT_CONT,
+ EVENT, EVENT_CONTENT_TYPE,
+ EVENT_END, ERROR)
+
+from .helpers import (validate_crc, calculate_crc, byte_int)
+from .errors import (SelectMessageError, SelectCRCValidationError)
+
+def _extract_header(header_bytes):
+ """
+ populates the header map after reading the header in bytes
+ """
+ header_map = {}
+ header_byte_parsed = 0
+ # While loop ends when all the headers present are read
+ # header contains multipe headers
+ while header_byte_parsed < len(header_bytes):
+ header_name_byte_length = byte_int(header_bytes[header_byte_parsed:header_byte_parsed+1])
+ header_byte_parsed += 1
+ header_name = \
+ header_bytes[header_byte_parsed:
+ header_byte_parsed+header_name_byte_length]
+ header_byte_parsed += header_name_byte_length
+ # Header Value Type is of 1 bytes and is skipped
+ header_byte_parsed += 1
+ value_string_byte_length = \
+ byte_int(header_bytes[header_byte_parsed:
+ header_byte_parsed+2])
+ header_byte_parsed += 2
+ header_value = \
+ header_bytes[header_byte_parsed:
+ header_byte_parsed+value_string_byte_length]
+ header_byte_parsed += value_string_byte_length
+ header_map[header_name.decode("utf-8").lstrip(":")] = \
+ header_value.decode("utf-8").lstrip(":")
+ return header_map
+
+def _parse_stats(stats):
+ """
+ Parses stats XML and populates the stat dict.
+ """
+ stat = {}
+ for attribute in cElementTree.fromstring(stats):
+ if attribute.tag == 'BytesScanned':
+ stat['BytesScanned'] = attribute.text
+ elif attribute.tag == 'BytesProcessed':
+ stat['BytesProcessed'] = attribute.text
+ elif attribute.tag == 'BytesReturned':
+ stat['BytesReturned'] = attribute.text
+
+ return stat
+
+class SelectObjectReader(object):
+ """
+ SelectObjectReader returns a Reader that upon read
+ returns queried data, but stops when the response ends.
+ LimitedRandomReader is compatible with BufferedIOBase.
+ """
+ def __init__(self, response):
+ self.response = response
+ self.remaining_bytes = bytes()
+ self.stat = {}
+ self.prog = {}
+
+ def readable(self):
+ return True
+
+ def writeable(self):
+ return False
+
+ def close(self):
+ self.response.close()
+
+ def stats(self):
+ return self.stat
+
+ def progress(self):
+ return self.prog
+
+ def __extract_message(self):
+ """
+ Process the response sent from server.
+ https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
+ """
+
+ crc_bytes = io.BytesIO()
+ total_bytes_len = self.response.read(4)
+ if len(total_bytes_len) == 0:
+ return {}
+
+ total_length = byte_int(total_bytes_len)
+ header_bytes_len = self.response.read(4)
+ if len(header_bytes_len) == 0:
+ return {}
+
+ header_len = byte_int(header_bytes_len)
+
+ crc_bytes.write(total_bytes_len)
+ crc_bytes.write(header_bytes_len)
+
+ prelude_bytes_crc = self.response.read(4)
+ if not validate_crc(crc_bytes.getvalue(), prelude_bytes_crc):
+ raise SelectCRCValidationError(
+ {"Checksum Mismatch, PreludeCRC of " +
+ str(calculate_crc(crc_bytes.getvalue())) +
+ " does not equal expected CRC of " +
+ str(byte_int(prelude_bytes_crc))})
+
+ crc_bytes.write(prelude_bytes_crc)
+
+ header_bytes = self.response.read(header_len)
+ if len(header_bytes) == 0:
+ raise SelectMessageError(
+ "Premature truncation of select message header"+
+ ", server is sending corrupt message?")
+
+ crc_bytes.write(header_bytes)
+
+ header_map = _extract_header(header_bytes)
+ payload_length = total_length - header_len - int(16)
+ payload_bytes = b''
+ event_type = header_map["event-type"]
+ if header_map["message-type"] == ERROR:
+ raise SelectMessageError(
+ header_map["error-code"] + ":\"" + \
+ header_map["error-message"] + "\"")
+ elif header_map["message-type"] == EVENT:
+ if event_type == EVENT_END:
+ pass
+ elif event_type == EVENT_CONT:
+ pass
+ elif event_type == EVENT_STATS:
+ content_type = header_map["content-type"]
+ if content_type != EVENT_CONTENT_TYPE:
+ raise SelectMessageError(
+ "Unrecognized content-type {0}".format(content_type))
+ else:
+ payload_bytes = self.response.read(payload_length)
+ self.stat = _parse_stats(payload_bytes)
+
+ elif event_type == EVENT_RECORDS:
+ payload_bytes = self.response.read(payload_length)
+ else:
+ raise SelectMessageError(
+ "Unrecognized message-type {0}".format(header_map["message-type"])
+ )
+
+ crc_bytes.write(payload_bytes)
+
+ message_crc = self.response.read(4)
+ if len(message_crc) == 0:
+ return {}
+
+ if not validate_crc(crc_bytes.getvalue(),
+ message_crc):
+ raise SelectCRCValidationError(
+ {"Checksum Mismatch, MessageCRC of " +
+ str(calculate_crc(crc_bytes.getvalue())) +
+ " does not equal expected CRC of " +
+ str(byte_int(message_crc))})
+
+ message = {event_type: payload_bytes}
+ return message
+
+ def stream(self, num_bytes=32*1024):
+ """
+ extract each record from the response body ... and buffer it.
+ send only up to requested bytes such as message[:num_bytes]
+ rest is buffered and added to the next iteration.
+
+ caller should call self.close() to close the stream.
+ """
+ while not self.response.isclosed():
+ if len(self.remaining_bytes) == 0:
+ message = self.__extract_message()
+ if EVENT_RECORDS in message:
+ self.remaining_bytes = message.get(EVENT_RECORDS, b'')
+ else:
+ # For all other events continue
+ continue
+
+ result = self.remaining_bytes
+ if num_bytes < len(self.remaining_bytes):
+ result = self.remaining_bytes[:num_bytes]
+ self.remaining_bytes = self.remaining_bytes[len(result):]
+
+ if result == b'':
+ break
+ if sys.version_info.major == 3:
+ yield result.decode('utf-8', errors='ignore')
+ else:
+ # Python 2.x needs explicit conversion.
+ yield result.decode('utf-8', errors='ignore').encode('utf-8')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/select_object_options.py new/minio-5.0.1/minio/select_object_options.py
--- old/minio-4.0.21/minio/select_object_options.py 2019-08-28 21:40:03.000000000 +0200
+++ new/minio-5.0.1/minio/select_object_options.py 1970-01-01 01:00:00.000000000 +0100
@@ -1,121 +0,0 @@
-# -*- coding: utf-8 -*-
-# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C)
-# 2019 MinIO, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-
-This module creates the request for Select
-
-:copyright: (c) 2019 by MinIO, Inc.
-:license: Apache 2.0, see LICENSE for more details.
-
-"""
-from .helpers import (SQL)
-
-
-class CSVInput:
- """
- CSVInput: Input Format as CSV.
- """
- def __init__(self, FileHeaderInfo=None, RecordDelimiter="\n",
- FieldDelimiter=",", QuoteCharacter='"',
- QuoteEscapeCharacter='"', Comments="#",
- AllowQuotedRecordDelimiter=False):
- self.FileHeaderInfo = FileHeaderInfo
- self.RecordDelimiter = RecordDelimiter
- self.FieldDelimiter = FieldDelimiter
- self.QuoteCharacter = QuoteCharacter
- self.QuoteEscapeCharacter = QuoteEscapeCharacter
- self.Comments = Comments
- self.AllowQuotedRecordDelimiter = AllowQuotedRecordDelimiter
-
-
-class JSONInput:
- """
- JSONInput: Input format as JSON.
- """
- def __init__(self, Type=None):
- self.Type = Type
-
-
-class ParquetInput:
- """
- ParquetInput: Input format as Parquet
- """
-
-
-class InputSerialization:
- """
- InputSerialization: nput Format.
- """
- def __init__(self, compression_type="NONE", csv=None, json=None, par=None):
- self.compression_type = compression_type
- self.csv_input = csv
- self.json_input = json
- self.parquet_input = par
-
-
-class CSVOutput:
- """
- CSVOutput: Output as CSV.
-
- """
- def __init__(self, QuoteFields="ASNEEDED", RecordDelimiter="\n",
- FieldDelimiter=",", QuoteCharacter='"',
- QuoteEscapeCharacter='"'):
- self.QuoteFields = QuoteFields
- self.RecordDelimiter = RecordDelimiter
- self.FieldDelimiter = FieldDelimiter
- self.QuoteCharacter = QuoteCharacter
- self.QuoteEscapeCharacter = QuoteEscapeCharacter
-
-
-class JsonOutput:
- """
- JsonOutput- Output as JSON.
- """
- def __init__(self, RecordDelimiter="\n"):
- self.RecordDelimiter = RecordDelimiter
-
-
-class OutputSerialization:
- """
- OutputSerialization: Output Format.
- """
- def __init__(self, csv=None, json=None):
- self.csv_output = csv
- self.json_output = json
-
-
-class RequestProgress:
- """
- RequestProgress: Sends progress message.
- """
- def __init__(self, enabled=False):
- self.enabled = enabled
-
-
-class SelectObjectOptions:
- """
- SelectObjectOptions: Options for select object
- """
- expression_type = SQL
-
- def __init__(self, expression, input_serialization,
- output_serialization, request_progress):
- self.expression = expression
- self.in_ser = input_serialization
- self.out_ser = output_serialization
- self.req_progress = request_progress
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/select_object_reader.py new/minio-5.0.1/minio/select_object_reader.py
--- old/minio-4.0.21/minio/select_object_reader.py 2019-08-28 21:40:03.000000000 +0200
+++ new/minio-5.0.1/minio/select_object_reader.py 1970-01-01 01:00:00.000000000 +0100
@@ -1,294 +0,0 @@
-# -*- coding: utf-8 -*-
-# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C)
-# 2015, 2016, 2017, 2018, 2019 MinIO, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import io
-import codecs
-
-from binascii import crc32
-from xml.etree import cElementTree
-from .error import InvalidXMLError
-from xml.etree.cElementTree import ParseError
-
-from .helpers import (READ_SIZE_SELECT, EVENT_RECORDS,
- EVENT_PROGRESS, EVENT_STATS, EVENT, EVENT_END, ERROR)
-
-
-class CRCValidationError(Exception):
- '''
- Raised in case of CRC mismatch
- '''
-
-
-def calculate_crc(value):
- '''
- Returns the CRC using crc32
- '''
- return crc32(value) & 0xffffffff
-
-
-def validate_crc(current_value, expected_value):
- '''
- Validate through CRC check
- '''
- crc_current = calculate_crc(current_value)
- crc_expected = byte_int(expected_value)
- if crc_current == crc_expected:
- return True
- return False
-
-
-def byte_int(data_bytes):
- '''
- Convert bytes to big-endian integer
- '''
- return int(codecs.encode(data_bytes, 'hex'), 16)
-
-
-class SelectObjectReader(object):
- """
- SelectObjectReader returns a Reader that upon read
- returns queried data, but stops when the response ends.
- LimitedRandomReader is compatible with BufferedIOBase.
- """
- def __init__(self, response):
- self.response = response
- self.remaining_bytes = bytearray()
- self.stat = {}
- self.prog = {}
-
- def readable(self):
- return True
-
- def writeable(self):
- return False
-
- @property
- def closed(self):
- return self.response.isclosed()
-
- def close(self):
- self.response.close()
-
- def stats(self):
- return self.stat
-
- def progress(self):
- return self.prog
-
- def __extract_message(self):
- """
- Process the response sent from server.
- https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
- """
- rec = bytearray()
- read_buffer = READ_SIZE_SELECT
- # Messages read in chunks of read_buffer bytes
- chunked_message = self.response.read(read_buffer)
- total_byte_parsed = 0
- if len(chunked_message) == 0:
- self.close()
- return b''
-
- # The first 4 bytes gives the total_byte_length and then
- # complete message is extracted
- while total_byte_parsed < read_buffer:
- # Case 1 - If the total_byte_length is partially read
- # in the chunked message , then complete the total_byte_length
- # by reading the required bytes from response and then
- # generate the complete message
- if read_buffer - total_byte_parsed <= 4:
- value = chunked_message[total_byte_parsed:
- total_byte_parsed +
- (read_buffer - total_byte_parsed) +
- 1]
- rem_bytes = self.response.read(4 - (read_buffer -
- total_byte_parsed))
- message = value + rem_bytes + \
- self.response.read(byte_int(value+rem_bytes)-4)
- end_status = self.__decode_message(message, rec)
- total_byte_parsed = 0
- break
- else:
- total_byte_length = chunked_message[total_byte_parsed: total_byte_parsed + 4]
- # Case 2 - Incomplete message in chunked message ,
- # so creating the complete message by reading the
- # total_byte_length- len_read from the response message.
- if total_byte_parsed + byte_int(total_byte_length) > read_buffer:
- len_read = len(chunked_message[total_byte_parsed:])
- message = chunked_message[total_byte_parsed:] + \
- self.response.read(byte_int(total_byte_length)-len_read)
- end_status = self.__decode_message(message, rec)
- total_byte_parsed += byte_int(total_byte_length)
- # Case 3- the complete message is present in chunked
- # messsage.
- else:
- message = chunked_message[total_byte_parsed:
- total_byte_parsed +
- byte_int(total_byte_length)]
- total_byte_parsed += byte_int(total_byte_length)
- end_status = self.__decode_message(message, rec)
- if end_status:
- break
- return rec
-
- def __extract_header(self, header, header_length):
- """
- populates the header map after reading the header
- """
- header_map = {}
- header_byte_parsed = 0
- # While loop ends when all the headers present are read
- # header contains multipe headers
- while header_byte_parsed < header_length:
- header_name_byte_length = \
- byte_int(header[header_byte_parsed: header_byte_parsed+1])
- header_byte_parsed += 1
- header_name = \
- header[header_byte_parsed:
- header_byte_parsed+header_name_byte_length]
- header_byte_parsed += header_name_byte_length
- # Header Value Type is of 1 bytes and is skipped
- header_byte_parsed += 1
- value_string_byte_length = \
- byte_int(header[header_byte_parsed:
- header_byte_parsed+2])
- header_byte_parsed += 2
- header_value = \
- header[header_byte_parsed:
- header_byte_parsed+value_string_byte_length]
- header_byte_parsed += value_string_byte_length
- header_map[header_name.decode("utf-8").lstrip(":")] = \
- header_value.decode("utf-8").lstrip(":")
- return header_map
-
- def __read_stats(self, stats):
- """
- pupulates the stat dict.
- """
- root = cElementTree.fromstring(stats)
- for attribute in root:
- if attribute.tag == 'BytesScanned':
- self.stat['BytesScanned'] = attribute.text
- elif attribute.tag == 'BytesProcessed':
- self.stat['BytesProcessed'] = attribute.text
- elif attribute.tag == 'BytesReturned':
- self.stat['BytesReturned'] = attribute.text
-
- def __parse_message(self, header_map, payload, payload_length, record):
- '''
- Parses the message
- '''
- if header_map["message-type"] == ERROR:
- error = header_map["error-code"] + ":\"" +\
- header_map["error-message"] + "\""
- if header_map["message-type"] == EVENT:
- # Fetch the content-type
- content_type = header_map["content-type"]
- # Fetch the event-type
- event_type = header_map["event-type"]
- if event_type == EVENT_RECORDS:
- record += payload[0:payload_length]
- elif event_type == EVENT_PROGRESS:
- if content_type == "text/xml":
- progress = payload[0:payload_length]
- elif event_type == EVENT_STATS:
- if content_type == "text/xml":
- self.__read_stats(payload[0:payload_length])
-
- def __decode_message(self, message, rec):
- end_status = False
- total_byte_length = message[0:4] # total_byte_length is of 4 bytes
- headers_byte_length = message[4: 8] # headers_byte_length is 4 bytes
- prelude_crc = message[8:12] # prelude_crc is of 4 bytes
- header = message[12:12+byte_int(headers_byte_length)]
- payload_length = byte_int(total_byte_length) - \
- byte_int(headers_byte_length) - int(16)
- payload = message[12 + byte_int(headers_byte_length):
- 12 + byte_int(headers_byte_length) + payload_length]
- message_crc = message[12 + byte_int(headers_byte_length) +
- payload_length: 12 +
- byte_int(headers_byte_length) +
- payload_length + 4]
-
- if not validate_crc(total_byte_length + headers_byte_length,
- prelude_crc):
- raise CRCValidationError(
- {"Checksum Mismatch, MessageCRC of " +
- str(calculate_crc(total_byte_length +
- headers_byte_length)) +
- " does not equal expected CRC of " +
- str(byte_int(prelude_crc))})
-
- if not validate_crc(message[0:len(message)-4], message_crc):
- raise CRCValidationError(
- {"Checksum Mismatch, MessageCRC of " +
- str(calculate_crc(message)) +
- " does not equal expected CRC of " +
- str(byte_int(message_crc))})
-
- header_map = self.__extract_header(header, byte_int(headers_byte_length))
-
- if header_map["message-type"] == EVENT:
- # Parse message only when event-type is Records,
- # Progress, Stats. Break the loop if event type is End
- # Do nothing if event type is Cont
- if header_map["event-type"] == EVENT_RECORDS or \
- header_map["event-type"] == EVENT_PROGRESS or \
- header_map["event-type"] == EVENT_STATS:
- self.__parse_message(header_map, payload,
- payload_length, rec)
-
- if header_map["event-type"] == EVENT_END:
- end_status = True
- if header_map["message-type"] == ERROR:
- self.__parse_message(header_map, payload, payload_length, rec)
- end_status = True
- return end_status
-
- def __read(self, num_bytes):
- """
- extract each record from the response body ... and buffer it.
- send only up to requested bytes such as message[:num_bytes]
- rest is buffered and added to the next iteration.
- """
- if len(self.remaining_bytes) == 0:
- res = self.__extract_message()
- if len(res) == 0:
- return b''
- else:
- self.remaining_bytes = res
-
- if num_bytes < len(self.remaining_bytes):
- result = self.remaining_bytes[:num_bytes]
- del self.remaining_bytes[:num_bytes]
- return result
- else:
- left_in_buffer = self.remaining_bytes[:len(self.remaining_bytes)]
- del self.remaining_bytes[:len(left_in_buffer)]
- return left_in_buffer
-
- def stream(self, num_bytes):
- """
- streams the response
- """
- while True:
- x = self.__read(num_bytes)
- if x == b'':
- break
- elif len(x) < num_bytes:
- x += self.__read(num_bytes-len(x))
- yield x.decode('utf-8') if isinstance(x, bytearray) else x
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio/signer.py new/minio-5.0.1/minio/signer.py
--- old/minio-4.0.21/minio/signer.py 2019-08-28 21:40:03.000000000 +0200
+++ new/minio-5.0.1/minio/signer.py 2019-09-18 23:07:19.000000000 +0200
@@ -251,7 +251,9 @@
:param headers: HTTP header dictionary.
:param content_sha256: Content sha256 hexdigest string.
"""
- lines = [method, parsed_url.path, parsed_url.query]
+ # Should not encode ~. Decode it back if present.
+ parsed_url_path = parsed_url.path.replace("%7E", "~")
+ lines = [method, parsed_url_path, parsed_url.query]
# Headers added to canonical request.
header_lines = []
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio.egg-info/PKG-INFO new/minio-5.0.1/minio.egg-info/PKG-INFO
--- old/minio-4.0.21/minio.egg-info/PKG-INFO 2019-08-28 21:41:17.000000000 +0200
+++ new/minio-5.0.1/minio.egg-info/PKG-INFO 2019-09-18 23:08:50.000000000 +0200
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: minio
-Version: 4.0.21
+Version: 5.0.1
Summary: MinIO Python Library for Amazon S3 Compatible Cloud Storage for Python
Home-page: https://github.com/minio/minio-py
Author: MinIO, Inc.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/minio.egg-info/SOURCES.txt new/minio-5.0.1/minio.egg-info/SOURCES.txt
--- old/minio-4.0.21/minio.egg-info/SOURCES.txt 2019-08-28 21:41:17.000000000 +0200
+++ new/minio-5.0.1/minio.egg-info/SOURCES.txt 2019-09-18 23:08:50.000000000 +0200
@@ -49,8 +49,6 @@
minio/helpers.py
minio/parsers.py
minio/post_policy.py
-minio/select_object_options.py
-minio/select_object_reader.py
minio/signer.py
minio/sse.py
minio/thread_pool.py
@@ -60,6 +58,11 @@
minio.egg-info/dependency_links.txt
minio.egg-info/requires.txt
minio.egg-info/top_level.txt
+minio/select/__init__.py
+minio/select/errors.py
+minio/select/helpers.py
+minio/select/options.py
+minio/select/reader.py
tests/__init__.py
tests/functional_test.sh
tests/unit_test.sh
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/setup.py new/minio-5.0.1/setup.py
--- old/minio-4.0.21/setup.py 2019-08-28 21:40:03.000000000 +0200
+++ new/minio-5.0.1/setup.py 2019-09-18 23:07:19.000000000 +0200
@@ -37,6 +37,7 @@
packages = [
'minio',
+ 'minio.select',
]
requires = [
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/tests/functional/tests.py new/minio-5.0.1/tests/functional/tests.py
--- old/minio-4.0.21/tests/functional/tests.py 2019-08-28 21:40:03.000000000 +0200
+++ new/minio-5.0.1/tests/functional/tests.py 2019-09-18 23:07:19.000000000 +0200
@@ -16,13 +16,13 @@
# limitations under the License.
from __future__ import division
+from __future__ import absolute_import
import os
import io
import csv
import sys
-from io import BytesIO
from sys import exit
import uuid
import shutil
@@ -44,10 +44,10 @@
from minio.error import (APINotImplemented, NoSuchBucketPolicy, ResponseError,
PreconditionFailed, BucketAlreadyOwnedByYou,
BucketAlreadyExists, InvalidBucketError)
-from minio.select_object_options import (SelectObjectOptions, CSVInput,
- RequestProgress, InputSerialization,
- OutputSerialization, CSVOutput)
-from minio.select_object_reader import (calculate_crc)
+from minio.select.options import (SelectObjectOptions, CSVInput,
+ RequestProgress, InputSerialization,
+ OutputSerialization, CSVOutput)
+from minio.select.helpers import (calculate_crc)
from minio.sse import SSE_C
from minio.sse import copy_SSE_C
@@ -291,8 +291,8 @@
try:
client.make_bucket(bucket_name)
content = io.BytesIO(b"col1,col2,col3\none,two,three\nX,Y,Z\n")
- expected_crc = calculate_crc(content.getbuffer())
- client.put_object(bucket_name, csvfile, content, content.getbuffer().nbytes)
+ expected_crc = calculate_crc(content.getvalue())
+ client.put_object(bucket_name, csvfile, content, len(content.getvalue()))
options = SelectObjectOptions(
expression="select * from s3object",
@@ -319,11 +319,11 @@
)
data = client.select_object_content(bucket_name, csvfile, options)
# Get the records
- records = ""
+ records = io.BytesIO()
for d in data.stream(10*1024):
- records += d
- generated_crc = calculate_crc(str.encode(records))
+ records.write(d.encode('utf-8'))
+ generated_crc = calculate_crc(records.getvalue())
if expected_crc != generated_crc:
raise ValueError('Data mismatch Expected : "col1,col2,col3\none,two,three\nX,Y,Z\n"',
'Received {}', records)
@@ -2062,10 +2062,8 @@
log_output = LogOutput(client.get_bucket_notification, 'test_get_bucket_notification')
test_get_bucket_notification(client, log_output)
- # getBuffer() of io.BytesIO is supported in Python3.
- if sys.version_info.major == 3:
- log_output = LogOutput(client.select_object_content, 'test_select_object_content')
- test_select_object_content(client, log_output)
+ log_output = LogOutput(client.select_object_content, 'test_select_object_content')
+ test_select_object_content(client, log_output)
else:
# Quick mode tests
@@ -2114,10 +2112,8 @@
log_output = LogOutput(client.copy_object, 'test_copy_object_no_copy_condition')
test_copy_object_no_copy_condition(client, log_output)
- # getBuffer() of io.BytesIO is supported in Python3.
- if sys.version_info.major == 3:
- log_output = LogOutput(client.select_object_content, 'test_select_object_content')
- test_select_object_content(client, log_output)
+ log_output = LogOutput(client.select_object_content, 'test_select_object_content')
+ test_select_object_content(client, log_output)
if secure:
log_output = LogOutput(client.copy_object, 'test_copy_object_with_sse')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/minio-4.0.21/tests/unit/generate_xml_test.py new/minio-5.0.1/tests/unit/generate_xml_test.py
--- old/minio-4.0.21/tests/unit/generate_xml_test.py 2019-08-28 21:40:03.000000000 +0200
+++ new/minio-5.0.1/tests/unit/generate_xml_test.py 2019-09-18 23:07:19.000000000 +0200
@@ -21,13 +21,12 @@
from minio.xml_marshal import (xml_marshal_bucket_constraint,
xml_marshal_complete_multipart_upload,
xml_marshal_select)
-from minio.select_object_options import (SelectObjectOptions,
- CSVInput,
- RequestProgress,
- InputSerialization,
- OutputSerialization,
- CSVOutput)
-
+from minio.select.options import (SelectObjectOptions,
+ CSVInput,
+ RequestProgress,
+ InputSerialization,
+ OutputSerialization,
+ CSVOutput)
class GenerateRequestTest(TestCase):
def test_generate_bucket_constraint(self):