Hello community, here is the log from the commit of package python-sklearn-pandas for openSUSE:Factory checked in at 2018-09-04 22:56:28 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-sklearn-pandas (Old) and /work/SRC/openSUSE:Factory/.python-sklearn-pandas.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "python-sklearn-pandas" Tue Sep 4 22:56:28 2018 rev:2 rq:632817 version:1.7.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-sklearn-pandas/python-sklearn-pandas.changes 2018-06-15 14:37:27.239702383 +0200 +++ /work/SRC/openSUSE:Factory/.python-sklearn-pandas.new/python-sklearn-pandas.changes 2018-09-04 22:56:29.393066440 +0200 @@ -1,0 +2,14 @@ +Sun Sep 2 16:30:43 UTC 2018 - arun@gmx.de + +- specfile: + * remove devel from noarch + * be more specific in %files section + +- update to version 1.7.0: + * Fix issues with unicode names in get_names (#160). + * Update to build using numpy==1.14 and python==3.6 (#154). + * Add strategy and fill_value parameters to CategoricalImputer to + allow imputing with values other than the mode (#144), (#161). + * Preserve input data types when no transform is supplied (#138). + +------------------------------------------------------------------- Old: ---- sklearn-pandas-1.6.0.tar.gz New: ---- sklearn-pandas-1.7.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-sklearn-pandas.spec ++++++ --- /var/tmp/diff_new_pack.QnTBTq/_old 2018-09-04 22:56:30.097068844 +0200 +++ /var/tmp/diff_new_pack.QnTBTq/_new 2018-09-04 22:56:30.101068858 +0200 @@ -18,17 +18,21 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-sklearn-pandas -Version: 1.6.0 +Version: 1.7.0 Release: 0 Summary: Pandas integration with sklearn -License: Zlib and BSD-2-Clause +License: Zlib AND BSD-2-Clause Group: Development/Languages/Python -Url: https://github.com/paulgb/sklearn-pandas +URL: https://github.com/paulgb/sklearn-pandas Source: https://files.pythonhosted.org/packages/source/s/sklearn-pandas/sklearn-pandas-%{version}.tar.gz -BuildRequires: %{python_module devel} BuildRequires: %{python_module setuptools} BuildRequires: fdupes BuildRequires: python-rpm-macros +Requires: python-numpy >= 1.6.1 +Requires: python-pandas >= 0.11.0 +Requires: python-scikit-learn >= 0.15.0 +Requires: python-scipy >= 0.14 +BuildArch: noarch # SECTION test requirements BuildRequires: %{python_module mock} BuildRequires: %{python_module numpy >= 1.6.1} @@ -37,12 +41,6 @@ BuildRequires: %{python_module scikit-learn >= 0.15.0} BuildRequires: %{python_module scipy >= 0.14} # /SECTION -Requires: python-numpy >= 1.6.1 -Requires: python-pandas >= 0.11.0 -Requires: python-scikit-learn >= 0.15.0 -Requires: python-scipy >= 0.14 -BuildArch: noarch - %python_subpackages %description @@ -62,6 +60,6 @@ %files %{python_files} %doc README.rst %license LICENSE -%{python_sitelib}/* +%{python_sitelib}/sklearn_pandas* %changelog ++++++ sklearn-pandas-1.6.0.tar.gz -> sklearn-pandas-1.7.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sklearn-pandas-1.6.0/PKG-INFO new/sklearn-pandas-1.7.0/PKG-INFO --- old/sklearn-pandas-1.6.0/PKG-INFO 2017-10-28 15:46:31.000000000 +0200 +++ new/sklearn-pandas-1.7.0/PKG-INFO 2018-08-15 14:16:05.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: sklearn-pandas -Version: 1.6.0 +Version: 1.7.0 Summary: Pandas integration with sklearn Home-page: https://github.com/paulgb/sklearn-pandas Author: Israel Saeta Pérez diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sklearn-pandas-1.6.0/README.rst new/sklearn-pandas-1.7.0/README.rst --- old/sklearn-pandas-1.6.0/README.rst 2017-10-28 15:45:16.000000000 +0200 +++ new/sklearn-pandas-1.7.0/README.rst 2018-08-15 14:15:41.000000000 +0200 @@ -57,7 +57,7 @@ >>> data = pd.DataFrame({'pet': ['cat', 'dog', 'dog', 'fish', 'cat', 'dog', 'cat', 'fish'], ... 'children': [4., 6, 3, 3, 2, 3, 5, 4], - ... 'salary': [90, 24, 44, 27, 32, 59, 36, 27]}) + ... 'salary': [90., 24, 44, 27, 32, 59, 36, 27]}) Transformation Mapping ---------------------- @@ -106,7 +106,7 @@ >>> sample = pd.DataFrame({'pet': ['cat'], 'children': [5.]}) >>> np.round(mapper.transform(sample), 2) - array([[ 1. , 0. , 0. , 1.04]]) + array([[1. , 0. , 0. , 1.04]]) Output features names @@ -190,14 +190,14 @@ ... ], df_out=True) >>> np.round(mapper_df.fit_transform(data.copy()), 2) pet_cat pet_dog pet_fish children - 0 1.0 0.0 0.0 0.21 - 1 0.0 1.0 0.0 1.88 - 2 0.0 1.0 0.0 -0.63 - 3 0.0 0.0 1.0 -0.63 - 4 1.0 0.0 0.0 -1.46 - 5 0.0 1.0 0.0 -0.63 - 6 1.0 0.0 0.0 1.04 - 7 0.0 0.0 1.0 0.21 + 0 1 0 0 0.21 + 1 0 1 0 1.88 + 2 0 1 0 -0.63 + 3 0 0 1 -0.63 + 4 1 0 0 -1.46 + 5 0 1 0 -0.63 + 6 1 0 0 1.04 + 7 0 0 1 0.21 The names for the columns are the same ones present in the ``transformed_names_`` attribute. @@ -251,14 +251,14 @@ ... ('children', None) ... ]) >>> np.round(mapper3.fit_transform(data.copy())) - array([[ 1., 0., 0., 4.], - [ 0., 1., 0., 6.], - [ 0., 1., 0., 3.], - [ 0., 0., 1., 3.], - [ 1., 0., 0., 2.], - [ 0., 1., 0., 3.], - [ 1., 0., 0., 5.], - [ 0., 0., 1., 4.]]) + array([[1., 0., 0., 4.], + [0., 1., 0., 6.], + [0., 1., 0., 3.], + [0., 0., 1., 3.], + [1., 0., 0., 2.], + [0., 1., 0., 3.], + [1., 0., 0., 5.], + [0., 0., 1., 4.]]) Applying a default transformer ****************************** @@ -329,11 +329,11 @@ ... 'col3': [0, 0, 0, None, None] ... }) >>> mapper6.fit_transform(data6) - array([[ 1., 1., 0.], - [ 1., 0., 0.], - [ 1., 1., 0.], - [ 2., 1., 0.], - [ 3., 1., 0.]]) + array([[1., 1., 0.], + [1., 0., 0.], + [1., 1., 0.], + [2., 1., 0.], + [3., 1., 0.]]) Feature selection and other supervised transformations @@ -344,14 +344,14 @@ >>> from sklearn.feature_selection import SelectKBest, chi2 >>> mapper_fs = DataFrameMapper([(['children','salary'], SelectKBest(chi2, k=1))]) >>> mapper_fs.fit_transform(data[['children','salary']], data['pet']) - array([[ 90.], - [ 24.], - [ 44.], - [ 27.], - [ 32.], - [ 59.], - [ 36.], - [ 27.]]) + array([[90.], + [24.], + [44.], + [27.], + [32.], + [59.], + [36.], + [27.]]) Working with sparse features **************************** @@ -385,11 +385,11 @@ ********************** Since the ``scikit-learn`` ``Imputer`` transformer currently only works with -numbers, ``sklearn-pandas`` provides an equivalent helper transformer that do -work with strings, substituting null values with the most frequent value in -that column. +numbers, ``sklearn-pandas`` provides an equivalent helper transformer that +works with strings, substituting null values with the most frequent value in +that column. Alternatively, you can specify a fixed value to use. -Example: +Example: imputing with the mode: >>> from sklearn_pandas import CategoricalImputer >>> data = np.array(['a', 'b', 'b', np.nan], dtype=object) @@ -397,10 +397,27 @@ >>> imputer.fit_transform(data) array(['a', 'b', 'b', 'b'], dtype=object) +Example: imputing with a fixed value: + + >>> from sklearn_pandas import CategoricalImputer + >>> data = np.array(['a', 'b', 'b', np.nan], dtype=object) + >>> imputer = CategoricalImputer(strategy='fixed_value', replacement='a') + >>> imputer.fit_transform(data) + array(['a', 'b', 'b', 'a'], dtype=object) + Changelog --------- +1.7.0 (2018-08-15) +****************** +* Fix issues with unicode names in ``get_names`` (#160). +* Update to build using ``numpy==1.14`` and ``python==3.6`` (#154). +* Add ``strategy`` and ``replacement`` parameters to ``CategoricalImputer`` to allow imputing + with values other than the mode (#144). +* Preserve input data types when no transform is supplied (#138). + + 1.6.0 (2017-10-28) ****************** * Add column name to exception during fit/transform (#110). @@ -477,7 +494,9 @@ Other contributors: +* Ariel Rossanigo (@arielrossanigo) * Arnau Gil Amat (@arnau126) +* Assaf Ben-David (@AssafBenDavid) * Cal Paterson (@calpaterson) * @defvorfu * Gustavo Sena Mafra (@gsmafra) @@ -486,6 +505,8 @@ * Jimmy Wan (@jimmywan) * Olivier Grisel (@ogrisel) * Paul Butler (@paulgb) +* Richard Miller (@rwjmiller) * Ritesh Agrawal (@ragrawal) +* Timothy Sweetser (@hacktuarial) * Vitaley Zaretskey (@vzaretsk) * Zac Stewart (@zacstewart) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sklearn-pandas-1.6.0/setup.cfg new/sklearn-pandas-1.7.0/setup.cfg --- old/sklearn-pandas-1.6.0/setup.cfg 2017-10-28 15:46:31.000000000 +0200 +++ new/sklearn-pandas-1.7.0/setup.cfg 2018-08-15 14:16:05.000000000 +0200 @@ -4,4 +4,5 @@ [egg_info] tag_build = tag_date = 0 +tag_svn_revision = 0 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sklearn-pandas-1.6.0/sklearn_pandas/__init__.py new/sklearn-pandas-1.7.0/sklearn_pandas/__init__.py --- old/sklearn-pandas-1.6.0/sklearn_pandas/__init__.py 2017-10-28 15:42:48.000000000 +0200 +++ new/sklearn-pandas-1.7.0/sklearn_pandas/__init__.py 2018-08-15 14:15:41.000000000 +0200 @@ -1,4 +1,4 @@ -__version__ = '1.6.0' +__version__ = '1.7.0' from .dataframe_mapper import DataFrameMapper # NOQA from .cross_validation import cross_val_score, GridSearchCV, RandomizedSearchCV # NOQA diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sklearn-pandas-1.6.0/sklearn_pandas/categorical_imputer.py new/sklearn-pandas-1.7.0/sklearn_pandas/categorical_imputer.py --- old/sklearn-pandas-1.6.0/sklearn_pandas/categorical_imputer.py 2017-04-29 18:57:39.000000000 +0200 +++ new/sklearn-pandas-1.7.0/sklearn_pandas/categorical_imputer.py 2018-08-05 17:20:13.000000000 +0200 @@ -33,6 +33,18 @@ copy : boolean, optional (default=True) If True, a copy of X will be created. + strategy : string, optional (default = 'mode') + If set to 'mode', replace all instances of `missing_values` + with the modal value. Otherwise, replace with + the value specified via `replacement`. + + replacement : string, optional (default='?') + The value that all instances of `missing_values` are replaced + with if `strategy` is not set to 'mode'. This is useful if + you don't want to impute with the mode, or if there are multiple + modes in your data and you want to choose a particular one. If + `strategy` is set to `mode`, this parameter is ignored. + Attributes ---------- fill_ : str @@ -40,9 +52,29 @@ """ - def __init__(self, missing_values='NaN', copy=True): + def __init__( + self, + missing_values='NaN', + strategy='mode', + replacement=None, + copy=True + ): self.missing_values = missing_values self.copy = copy + self.replacement = replacement + self.strategy = strategy + + strategies = ['fixed_value', 'mode'] + if self.strategy not in strategies: + raise ValueError( + 'Strategy {0} not in {1}'.format(self.strategy, strategies) + ) + + if self.strategy == 'fixed_value' and self.replacement is None: + raise ValueError( + 'Please specify a value for \'replacement\'' + 'when using the fixed_value strategy.' + ) def fit(self, X, y=None): """ @@ -63,9 +95,13 @@ mask = _get_mask(X, self.missing_values) X = X[~mask] - - modes = pd.Series(X).mode() + if self.strategy == 'mode': + modes = pd.Series(X).mode() + elif self.strategy == 'fixed_value': + modes = np.array([self.replacement]) if modes.shape[0] == 0: + raise ValueError('Data is empty or all values are null') + elif modes.shape[0] > 1: raise ValueError('No value is repeated more than ' 'once in the column') else: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sklearn-pandas-1.6.0/sklearn_pandas/dataframe_mapper.py new/sklearn-pandas-1.7.0/sklearn_pandas/dataframe_mapper.py --- old/sklearn-pandas-1.6.0/sklearn_pandas/dataframe_mapper.py 2017-10-22 19:44:45.000000000 +0200 +++ new/sklearn-pandas-1.7.0/sklearn_pandas/dataframe_mapper.py 2018-08-05 19:04:13.000000000 +0200 @@ -253,13 +253,26 @@ else: names = _get_feature_names(transformer) if names is not None and len(names) == num_cols: - return [name + '_' + str(o) for o in names] + return ['%s_%s' % (name, o) for o in names] # otherwise, return name concatenated with '_1', '_2', etc. else: return [name + '_' + str(o) for o in range(num_cols)] else: return [name] + def get_dtypes(self, extracted): + dtypes_features = [self.get_dtype(ex) for ex in extracted] + return [dtype for dtype_feature in dtypes_features + for dtype in dtype_feature] + + def get_dtype(self, ex): + if isinstance(ex, np.ndarray) or sparse.issparse(ex): + return [ex.dtype] * ex.shape[1] + elif isinstance(ex, pd.DataFrame): + return list(ex.dtypes) + else: + raise TypeError(type(ex)) + def transform(self, X): """ Transform the given data. Assumes that fit has already been called. @@ -323,8 +336,15 @@ else: index = None - return pd.DataFrame(stacked, - columns=self.transformed_names_, - index=index) + # output different data types, if appropriate + dtypes = self.get_dtypes(extracted) + df_out = pd.DataFrame( + stacked, + columns=self.transformed_names_, + index=index) + # preserve types + for col, dtype in zip(self.transformed_names_, dtypes): + df_out[col] = df_out[col].astype(dtype) + return df_out else: return stacked diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sklearn-pandas-1.6.0/sklearn_pandas.egg-info/PKG-INFO new/sklearn-pandas-1.7.0/sklearn_pandas.egg-info/PKG-INFO --- old/sklearn-pandas-1.6.0/sklearn_pandas.egg-info/PKG-INFO 2017-10-28 15:46:31.000000000 +0200 +++ new/sklearn-pandas-1.7.0/sklearn_pandas.egg-info/PKG-INFO 2018-08-15 14:15:57.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: sklearn-pandas -Version: 1.6.0 +Version: 1.7.0 Summary: Pandas integration with sklearn Home-page: https://github.com/paulgb/sklearn-pandas Author: Israel Saeta Pérez