diff --git a/doc/datasets/rcv1.rst b/doc/datasets/rcv1.rst index ded38584cedab330a8b121480a9e498b866ece89..280e90f8b82fbe279e77c21116e103b159cbb31b 100644 --- a/doc/datasets/rcv1.rst +++ b/doc/datasets/rcv1.rst @@ -35,7 +35,7 @@ Each sample can be identified by its ID, ranging (with gaps) from 2286 to 810596 array([2286, 2287, 2288], dtype=int32) ``target_names``: -The target values are the topics of each sample. Each sample belongs to at least one topic, and to up to 17 topics. +The target values are the topics of each sample. Each sample belongs to at least one topic, and to up to 17 topics. There are 103 topics, each represented by a string. Their corpus frequencies span five orders of magnitude, from 5 occurrences for 'GMIL', to 381327 for 'CCAT':: >>> rcv1.target_names[:3].tolist() # doctest: +SKIP diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py index 15cf04b4a9e96388866f2ccd2d05cf5ffa215fc6..fc12805df4a5275cf2742cbe893ab32928326b57 100644 --- a/sklearn/linear_model/base.py +++ b/sklearn/linear_model/base.py @@ -10,7 +10,7 @@ Generalized Linear models. # Mathieu Blondel <mathieu@mblondel.org> # Lars Buitinck <L.J.Buitinck@uva.nl> # Maryan Morel <maryan.morel@polytechnique.edu> -# +# Giorgio Patrini <giorgio.patrini@anu.edu.au> # License: BSD 3 clause from __future__ import division @@ -26,20 +26,16 @@ from scipy import sparse from ..externals import six from ..externals.joblib import Parallel, delayed from ..base import BaseEstimator, ClassifierMixin, RegressorMixin -from ..utils import as_float_array, check_array, check_X_y, deprecated -from ..utils import check_random_state, column_or_1d +from ..utils import check_array, check_X_y, deprecated, as_float_array +from ..utils.validation import FLOAT_DTYPES +from ..utils import check_random_state from ..utils.extmath import safe_sparse_dot from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale from ..utils.fixes import sparse_lsqr from ..utils.seq_dataset import ArrayDataset, CSRDataset from ..utils.validation import check_is_fitted from ..exceptions import NotFittedError - - -# -# TODO: intercept for all models -# We should define a common function to center data instead of -# repeating the same code inside each fit method. +from ..preprocessing.data import normalize as f_normalize # TODO: bayesian_ridge_regression and bayesian_regression_ard # should be squashed into its respective objects. @@ -71,6 +67,8 @@ def make_dataset(X, y, sample_weight, random_state=None): return dataset, intercept_decay +@deprecated("sparse_center_data will be removed in " + "0.20. Use utilities in preprocessing.data instead") def sparse_center_data(X, y, fit_intercept, normalize=False): """ Compute information needed to center data to have mean zero along @@ -87,10 +85,9 @@ def sparse_center_data(X, y, fit_intercept, normalize=False): else: X = sp.csc_matrix(X, copy=normalize, dtype=np.float64) - X_mean, X_var = mean_variance_axis(X, axis=0) + X_offset, X_var = mean_variance_axis(X, axis=0) if normalize: # transform variance to std in-place - # XXX: currently scaled to variance=n_samples to match center_data X_var *= X.shape[0] X_std = np.sqrt(X_var, X_var) del X_var @@ -98,22 +95,23 @@ def sparse_center_data(X, y, fit_intercept, normalize=False): inplace_column_scale(X, 1. / X_std) else: X_std = np.ones(X.shape[1]) - y_mean = y.mean(axis=0) - y = y - y_mean + y_offset = y.mean(axis=0) + y = y - y_offset else: - X_mean = np.zeros(X.shape[1]) + X_offset = np.zeros(X.shape[1]) X_std = np.ones(X.shape[1]) - y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype) + y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype) - return X, y, X_mean, y_mean, X_std + return X, y, X_offset, y_offset, X_std +@deprecated("center_data will be removed in " + "0.20. Use utilities in preprocessing.data instead") def center_data(X, y, fit_intercept, normalize=False, copy=True, sample_weight=None): """ Centers data to have mean zero along axis 0. This is here because nearly all linear models will want their data to be centered. - If sample_weight is not None, then the weighted mean of X and y is zero, and not the mean itself """ @@ -122,26 +120,95 @@ def center_data(X, y, fit_intercept, normalize=False, copy=True, if isinstance(sample_weight, numbers.Number): sample_weight = None if sp.issparse(X): - X_mean = np.zeros(X.shape[1]) + X_offset = np.zeros(X.shape[1]) X_std = np.ones(X.shape[1]) else: - X_mean = np.average(X, axis=0, weights=sample_weight) - X -= X_mean + X_offset = np.average(X, axis=0, weights=sample_weight) + X -= X_offset + # XXX: currently scaled to variance=n_samples if normalize: - # XXX: currently scaled to variance=n_samples X_std = np.sqrt(np.sum(X ** 2, axis=0)) X_std[X_std == 0] = 1 X /= X_std else: X_std = np.ones(X.shape[1]) - y_mean = np.average(y, axis=0, weights=sample_weight) - y = y - y_mean + y_offset = np.average(y, axis=0, weights=sample_weight) + y = y - y_offset else: - X_mean = np.zeros(X.shape[1]) + X_offset = np.zeros(X.shape[1]) X_std = np.ones(X.shape[1]) - y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype) - return X, y, X_mean, y_mean, X_std + y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype) + return X, y, X_offset, y_offset, X_std + + +def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True, + sample_weight=None, return_mean=False): + """ + Centers data to have mean zero along axis 0. If fit_intercept=False or if + the X is a sparse matrix, no centering is done, but normalization can still + be applied. The function returns the statistics necessary to reconstruct + the input data, which are X_offset, y_offset, X_scale, such that the output + + X = (X - X_offset) / X_scale + + X_scale is the L2 norm of X - X_offset. If sample_weight is not None, + then the weighted mean of X and y is zero, and not the mean itself. If + return_mean=True, the mean, eventually weighted, is returned, independently + of whether X was centered (option used for optimization with sparse data in + coordinate_descend). + + This is here because nearly all linear models will want their data to be + centered. + """ + + if isinstance(sample_weight, numbers.Number): + sample_weight = None + + X = check_array(X, copy=copy, accept_sparse=['csr', 'csc'], + dtype=FLOAT_DTYPES) + + if fit_intercept: + if sp.issparse(X): + X_offset, X_var = mean_variance_axis(X, axis=0) + if not return_mean: + X_offset = np.zeros(X.shape[1]) + + if normalize: + + # TODO: f_normalize could be used here as well but the function + # inplace_csr_row_normalize_l2 must be changed such that it + # can return also the norms computed internally + + # transform variance to norm in-place + X_var *= X.shape[0] + X_scale = np.sqrt(X_var, X_var) + del X_var + X_scale[X_scale == 0] = 1 + inplace_column_scale(X, 1. / X_scale) + else: + X_scale = np.ones(X.shape[1]) + + else: + X_offset = np.average(X, axis=0, weights=sample_weight) + X -= X_offset + if normalize: + X, X_scale = f_normalize(X, axis=0, copy=False, + return_norm=True) + else: + X_scale = np.ones(X.shape[1]) + y_offset = np.average(y, axis=0, weights=sample_weight) + y = y - y_offset + else: + X_offset = np.zeros(X.shape[1]) + X_scale = np.ones(X.shape[1]) + y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype) + + return X, y, X_offset, y_offset, X_scale + +# TODO: _rescale_data should be factored into _preprocess_data. +# Currently, the fact that sag implements its own way to deal with +# sample_weight makes the refactoring tricky. def _rescale_data(X, y, sample_weight): """Rescale data so as to support sample_weight""" @@ -200,14 +267,14 @@ class LinearModel(six.with_metaclass(ABCMeta, BaseEstimator)): """ return self._decision_function(X) - _center_data = staticmethod(center_data) + _preprocess_data = staticmethod(_preprocess_data) - def _set_intercept(self, X_mean, y_mean, X_std): + def _set_intercept(self, X_offset, y_offset, X_scale): """Set the intercept_ """ if self.fit_intercept: - self.coef_ = self.coef_ / X_std - self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T) + self.coef_ = self.coef_ / X_scale + self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T) else: self.intercept_ = 0. @@ -360,6 +427,13 @@ class LinearRegression(LinearModel, RegressorMixin): normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True If True, X will be copied; else, it may be overwritten. @@ -435,13 +509,12 @@ class LinearRegression(LinearModel, RegressorMixin): X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], y_numeric=True, multi_output=True) - if ((sample_weight is not None) and np.atleast_1d( - sample_weight).ndim > 1): - sample_weight = column_or_1d(sample_weight, warn=True) + if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1: + raise ValueError("Sample weights must be 1D array or scalar") - X, y, X_mean, y_mean, X_std = self._center_data( - X, y, self.fit_intercept, self.normalize, self.copy_X, - sample_weight=sample_weight) + X, y, X_offset, y_offset, X_scale = self._preprocess_data( + X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, + copy=self.copy_X, sample_weight=sample_weight) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. @@ -466,7 +539,7 @@ class LinearRegression(LinearModel, RegressorMixin): if y.ndim == 1: self.coef_ = np.ravel(self.coef_) - self._set_intercept(X_mean, y_mean, X_std) + self._set_intercept(X_offset, y_offset, X_scale) return self @@ -476,15 +549,16 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy): if sparse.isspmatrix(X): precompute = False - X, y, X_mean, y_mean, X_std = sparse_center_data( - X, y, fit_intercept, normalize) + X, y, X_offset, y_offset, X_scale = _preprocess_data( + X, y, fit_intercept=fit_intercept, normalize=normalize, + return_mean=True) else: # copy was done in fit if necessary - X, y, X_mean, y_mean, X_std = center_data( - X, y, fit_intercept, normalize, copy=copy) + X, y, X_offset, y_offset, X_scale = _preprocess_data( + X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy) if hasattr(precompute, '__array__') and ( - fit_intercept and not np.allclose(X_mean, np.zeros(n_features)) - or normalize and not np.allclose(X_std, np.ones(n_features))): + fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or + normalize and not np.allclose(X_scale, np.ones(n_features))): warnings.warn("Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", @@ -521,4 +595,4 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy): order='F') np.dot(y.T, X, out=Xy.T) - return X, y, X_mean, y_mean, X_std, precompute, Xy + return X, y, X_offset, y_offset, X_scale, precompute, Xy diff --git a/sklearn/linear_model/bayes.py b/sklearn/linear_model/bayes.py index 1cf541721a29397b0d0083169b42ffa006eb1784..79f913b63448964598448b987b7a7a176d791403 100644 --- a/sklearn/linear_model/bayes.py +++ b/sklearn/linear_model/bayes.py @@ -65,6 +65,13 @@ class BayesianRidge(LinearModel, RegressorMixin): normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True If True, X will be copied; else, it may be overwritten. @@ -135,11 +142,11 @@ class BayesianRidge(LinearModel, RegressorMixin): self : returns an instance of self. """ X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True) - X, y, X_mean, y_mean, X_std = self._center_data( + X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, self.fit_intercept, self.normalize, self.copy_X) n_samples, n_features = X.shape - ### Initialization of the values of the parameters + # Initialization of the values of the parameters alpha_ = 1. / np.var(y) lambda_ = 1. @@ -156,10 +163,10 @@ class BayesianRidge(LinearModel, RegressorMixin): U, S, Vh = linalg.svd(X, full_matrices=False) eigen_vals_ = S ** 2 - ### Convergence loop of the bayesian ridge regression + # Convergence loop of the bayesian ridge regression for iter_ in range(self.n_iter): - ### Compute mu and sigma + # Compute mu and sigma # sigma_ = lambda_ / alpha_ * np.eye(n_features) + np.dot(X.T, X) # coef_ = sigma_^-1 * XT * y if n_samples > n_features: @@ -178,28 +185,28 @@ class BayesianRidge(LinearModel, RegressorMixin): logdet_sigma_[:n_samples] += alpha_ * eigen_vals_ logdet_sigma_ = - np.sum(np.log(logdet_sigma_)) - ### Update alpha and lambda + # Update alpha and lambda rmse_ = np.sum((y - np.dot(X, coef_)) ** 2) - gamma_ = (np.sum((alpha_ * eigen_vals_) - / (lambda_ + alpha_ * eigen_vals_))) - lambda_ = ((gamma_ + 2 * lambda_1) - / (np.sum(coef_ ** 2) + 2 * lambda_2)) - alpha_ = ((n_samples - gamma_ + 2 * alpha_1) - / (rmse_ + 2 * alpha_2)) - - ### Compute the objective function + gamma_ = (np.sum((alpha_ * eigen_vals_) / + (lambda_ + alpha_ * eigen_vals_))) + lambda_ = ((gamma_ + 2 * lambda_1) / + (np.sum(coef_ ** 2) + 2 * lambda_2)) + alpha_ = ((n_samples - gamma_ + 2 * alpha_1) / + (rmse_ + 2 * alpha_2)) + + # Compute the objective function if self.compute_score: s = lambda_1 * log(lambda_) - lambda_2 * lambda_ s += alpha_1 * log(alpha_) - alpha_2 * alpha_ - s += 0.5 * (n_features * log(lambda_) - + n_samples * log(alpha_) - - alpha_ * rmse_ - - (lambda_ * np.sum(coef_ ** 2)) - - logdet_sigma_ - - n_samples * log(2 * np.pi)) + s += 0.5 * (n_features * log(lambda_) + + n_samples * log(alpha_) - + alpha_ * rmse_ - + (lambda_ * np.sum(coef_ ** 2)) - + logdet_sigma_ - + n_samples * log(2 * np.pi)) self.scores_.append(s) - ### Check for convergence + # Check for convergence if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol: if verbose: print("Convergence after ", str(iter_), " iterations") @@ -210,7 +217,7 @@ class BayesianRidge(LinearModel, RegressorMixin): self.lambda_ = lambda_ self.coef_ = coef_ - self._set_intercept(X_mean, y_mean, X_std) + self._set_intercept(X_offset, y_offset, X_scale) return self @@ -269,6 +276,13 @@ class ARDRegression(LinearModel, RegressorMixin): normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True. If True, X will be copied; else, it may be overwritten. @@ -351,10 +365,10 @@ class ARDRegression(LinearModel, RegressorMixin): n_samples, n_features = X.shape coef_ = np.zeros(n_features) - X, y, X_mean, y_mean, X_std = self._center_data( + X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, self.fit_intercept, self.normalize, self.copy_X) - ### Launch the convergence loop + # Launch the convergence loop keep_lambda = np.ones(n_features, dtype=bool) lambda_1 = self.lambda_1 @@ -363,51 +377,51 @@ class ARDRegression(LinearModel, RegressorMixin): alpha_2 = self.alpha_2 verbose = self.verbose - ### Initialization of the values of the parameters + # Initialization of the values of the parameters alpha_ = 1. / np.var(y) lambda_ = np.ones(n_features) self.scores_ = list() coef_old_ = None - ### Iterative procedure of ARDRegression + # Iterative procedure of ARDRegression for iter_ in range(self.n_iter): - ### Compute mu and sigma (using Woodbury matrix identity) + # Compute mu and sigma (using Woodbury matrix identity) sigma_ = pinvh(np.eye(n_samples) / alpha_ + np.dot(X[:, keep_lambda] * np.reshape(1. / lambda_[keep_lambda], [1, -1]), X[:, keep_lambda].T)) - sigma_ = np.dot(sigma_, X[:, keep_lambda] - * np.reshape(1. / lambda_[keep_lambda], [1, -1])) - sigma_ = - np.dot(np.reshape(1. / lambda_[keep_lambda], [-1, 1]) - * X[:, keep_lambda].T, sigma_) + sigma_ = np.dot(sigma_, X[:, keep_lambda] * + np.reshape(1. / lambda_[keep_lambda], [1, -1])) + sigma_ = - np.dot(np.reshape(1. / lambda_[keep_lambda], [-1, 1]) * + X[:, keep_lambda].T, sigma_) sigma_.flat[::(sigma_.shape[1] + 1)] += 1. / lambda_[keep_lambda] coef_[keep_lambda] = alpha_ * np.dot( sigma_, np.dot(X[:, keep_lambda].T, y)) - ### Update alpha and lambda + # Update alpha and lambda rmse_ = np.sum((y - np.dot(X, coef_)) ** 2) gamma_ = 1. - lambda_[keep_lambda] * np.diag(sigma_) - lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1) - / ((coef_[keep_lambda]) ** 2 - + 2. * lambda_2)) - alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1) - / (rmse_ + 2. * alpha_2)) + lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1) / + ((coef_[keep_lambda]) ** 2 + + 2. * lambda_2)) + alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1) / + (rmse_ + 2. * alpha_2)) - ### Prune the weights with a precision over a threshold + # Prune the weights with a precision over a threshold keep_lambda = lambda_ < self.threshold_lambda coef_[~keep_lambda] = 0 - ### Compute the objective function + # Compute the objective function if self.compute_score: s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum() s += alpha_1 * log(alpha_) - alpha_2 * alpha_ - s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_) - + np.sum(np.log(lambda_))) + s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_) + + np.sum(np.log(lambda_))) s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_ ** 2).sum()) self.scores_.append(s) - ### Check for convergence + # Check for convergence if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol: if verbose: print("Converged after %s iterations" % iter_) @@ -418,5 +432,5 @@ class ARDRegression(LinearModel, RegressorMixin): self.alpha_ = alpha_ self.sigma_ = sigma_ self.lambda_ = lambda_ - self._set_intercept(X_mean, y_mean, X_std) + self._set_intercept(X_offset, y_offset, X_scale) return self diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index 1711fbe481be5e8a278d158cb83cc8f02bb5e340..cdad75b2f8c9d368aeb35badf4274fc4e251fb15 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -14,7 +14,7 @@ from scipy import sparse from .base import LinearModel, _pre_fit from ..base import RegressorMixin -from .base import center_data, sparse_center_data +from .base import _preprocess_data from ..utils import check_array, check_X_y, deprecated from ..utils.validation import check_random_state from ..model_selection import check_cv @@ -65,7 +65,14 @@ def _alpha_grid(X, y, Xy=None, l1_ratio=1.0, fit_intercept=True, Whether to fit an intercept or not normalize : boolean, optional, default False - If ``True``, the regressors X will be normalized before regression. + If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten. @@ -80,16 +87,17 @@ def _alpha_grid(X, y, Xy=None, l1_ratio=1.0, fit_intercept=True, copy=(copy_X and fit_intercept and not X_sparse)) if not X_sparse: # X can be touched inplace thanks to the above line - X, y, _, _, _ = center_data(X, y, fit_intercept, - normalize, copy=False) + X, y, _, _, _ = _preprocess_data(X, y, fit_intercept, + normalize, copy=False) Xy = safe_sparse_dot(X.T, y, dense_output=True) if sparse_center: # Workaround to find alpha_max for sparse matrices. # since we should not destroy the sparsity of such matrices. - _, _, X_mean, _, X_std = sparse_center_data(X, y, fit_intercept, - normalize) - mean_dot = X_mean * np.sum(y) + _, _, X_offset, _, X_scale = _preprocess_data(X, y, fit_intercept, + normalize, + return_mean=True) + mean_dot = X_offset * np.sum(y) if Xy.ndim == 1: Xy = Xy[:, np.newaxis] @@ -98,7 +106,7 @@ def _alpha_grid(X, y, Xy=None, l1_ratio=1.0, fit_intercept=True, if fit_intercept: Xy -= mean_dot[:, np.newaxis] if normalize: - Xy /= X_std[:, np.newaxis] + Xy /= X_scale[:, np.newaxis] alpha_max = (np.sqrt(np.sum(Xy ** 2, axis=1)).max() / (n_samples * l1_ratio)) @@ -383,17 +391,17 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None, # MultiTaskElasticNet does not support sparse matrices if not multi_output and sparse.isspmatrix(X): - if 'X_mean' in params: + if 'X_offset' in params: # As sparse matrices are not actually centered we need this # to be passed to the CD solver. - X_sparse_scaling = params['X_mean'] / params['X_std'] + X_sparse_scaling = params['X_offset'] / params['X_scale'] else: X_sparse_scaling = np.zeros(n_features) # X should be normalized and fit already if function is called # from ElasticNet.fit if check_input: - X, y, X_mean, y_mean, X_std, precompute, Xy = \ + X, y, X_offset, y_offset, X_scale, precompute, Xy = \ _pre_fit(X, y, Xy, precompute, normalize=False, fit_intercept=False, copy=False) if alphas is None: @@ -529,7 +537,14 @@ class ElasticNet(LinearModel, RegressorMixin): data is assumed to be already centered. normalize : boolean, optional, default False - If ``True``, the regressors X will be normalized before regression. + If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. precompute : True | False | array-like Whether to use a precomputed Gram matrix to speed up @@ -642,6 +657,12 @@ class ElasticNet(LinearModel, RegressorMixin): "well. You are advised to use the LinearRegression " "estimator", stacklevel=2) + if (isinstance(self.precompute, six.string_types) and + self.precompute == 'auto'): + warnings.warn("Setting precompute to 'auto', was found to be " + "slower even when n_samples > n_features. Hence " + "it will be removed in 0.18.", + DeprecationWarning, stacklevel=2) # We expect X and y to be already float64 Fortran ordered arrays # when bypassing checks if check_input: @@ -652,7 +673,7 @@ class ElasticNet(LinearModel, RegressorMixin): multi_output=True, y_numeric=True) y = check_array(y, dtype=np.float64, order='F', copy=False, ensure_2d=False) - X, y, X_mean, y_mean, X_std, precompute, Xy = \ + X, y, X_offset, y_offset, X_scale, precompute, Xy = \ _pre_fit(X, y, None, self.precompute, self.normalize, self.fit_intercept, copy=False) if y.ndim == 1: @@ -689,7 +710,7 @@ class ElasticNet(LinearModel, RegressorMixin): precompute=precompute, Xy=this_Xy, fit_intercept=False, normalize=False, copy_X=True, verbose=False, tol=self.tol, positive=self.positive, - X_mean=X_mean, X_std=X_std, return_n_iter=True, + X_offset=X_offset, X_scale=X_scale, return_n_iter=True, coef_init=coef_[k], max_iter=self.max_iter, random_state=self.random_state, selection=self.selection, @@ -702,7 +723,7 @@ class ElasticNet(LinearModel, RegressorMixin): self.n_iter_ = self.n_iter_[0] self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_]) - self._set_intercept(X_mean, y_mean, X_std) + self._set_intercept(X_offset, y_offset, X_scale) # return self for chaining fit and predict calls return self @@ -741,8 +762,9 @@ class ElasticNet(LinearModel, RegressorMixin): """ check_is_fitted(self, 'n_iter_') if sparse.isspmatrix(X): - return np.ravel(safe_sparse_dot(self.coef_, X.T, dense_output=True) - + self.intercept_) + return np.ravel(safe_sparse_dot(self.coef_, X.T, + dense_output=True) + + self.intercept_) else: return super(ElasticNet, self)._decision_function(X) @@ -777,7 +799,14 @@ class Lasso(ElasticNet): (e.g. data is expected to be already centered). normalize : boolean, optional, default False - If ``True``, the regressors X will be normalized before regression. + If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten. @@ -934,14 +963,14 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None, # Fall back to default enet_multitask precompute = False - X_train, y_train, X_mean, y_mean, X_std, precompute, Xy = \ + X_train, y_train, X_offset, y_offset, X_scale, precompute, Xy = \ _pre_fit(X_train, y_train, None, precompute, normalize, fit_intercept, copy=False) path_params = path_params.copy() path_params['Xy'] = Xy - path_params['X_mean'] = X_mean - path_params['X_std'] = X_std + path_params['X_offset'] = X_offset + path_params['X_scale'] = X_scale path_params['precompute'] = precompute path_params['copy_X'] = False path_params['alphas'] = alphas @@ -958,14 +987,14 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None, if y.ndim == 1: # Doing this so that it becomes coherent with multioutput. coefs = coefs[np.newaxis, :, :] - y_mean = np.atleast_1d(y_mean) + y_offset = np.atleast_1d(y_offset) y_test = y_test[:, np.newaxis] if normalize: - nonzeros = np.flatnonzero(X_std) - coefs[:, nonzeros] /= X_std[nonzeros][:, np.newaxis] + nonzeros = np.flatnonzero(X_scale) + coefs[:, nonzeros] /= X_scale[nonzeros][:, np.newaxis] - intercepts = y_mean[:, np.newaxis] - np.dot(X_mean, coefs) + intercepts = y_offset[:, np.newaxis] - np.dot(X_offset, coefs) if sparse.issparse(X_test): n_order, n_features, n_alphas = coefs.shape # Work around for sparse matices since coefs is a 3-D numpy array. @@ -1069,7 +1098,7 @@ class LinearModelCV(six.with_metaclass(ABCMeta, LinearModel)): X = check_array(X, 'csc', copy=False) if sparse.isspmatrix(X): if (hasattr(reference_to_old_X, "data") and - not np.may_share_memory(reference_to_old_X.data, X.data)): + not np.may_share_memory(reference_to_old_X.data, X.data)): # X is a sparse matrix and has been copied copy_X = False elif not np.may_share_memory(reference_to_old_X, X): @@ -1256,7 +1285,14 @@ class LassoCV(LinearModelCV, RegressorMixin): (e.g. data is expected to be already centered). normalize : boolean, optional, default False - If ``True``, the regressors X will be normalized before regression. + If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten. @@ -1403,7 +1439,14 @@ class ElasticNetCV(LinearModelCV, RegressorMixin): (e.g. data is expected to be already centered). normalize : boolean, optional, default False - If ``True``, the regressors X will be normalized before regression. + If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten. @@ -1528,7 +1571,14 @@ class MultiTaskElasticNet(Lasso): (e.g. data is expected to be already centered). normalize : boolean, optional, default False - If ``True``, the regressors X will be normalized before regression. + If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten. @@ -1650,7 +1700,7 @@ class MultiTaskElasticNet(Lasso): raise ValueError("X and y have inconsistent dimensions (%d != %d)" % (n_samples, y.shape[0])) - X, y, X_mean, y_mean, X_std = center_data( + X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, self.fit_intercept, self.normalize, copy=False) if not self.warm_start or self.coef_ is None: @@ -1671,7 +1721,7 @@ class MultiTaskElasticNet(Lasso): self.coef_, l1_reg, l2_reg, X, y, self.max_iter, self.tol, check_random_state(self.random_state), random) - self._set_intercept(X_mean, y_mean, X_std) + self._set_intercept(X_offset, y_offset, X_scale) if self.dual_gap_ > self.eps_: warnings.warn('Objective did not converge, you might want' @@ -1707,7 +1757,14 @@ class MultiTaskLasso(MultiTaskElasticNet): (e.g. data is expected to be already centered). normalize : boolean, optional, default False - If ``True``, the regressors X will be normalized before regression. + If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten. @@ -1837,7 +1894,14 @@ class MultiTaskElasticNetCV(LinearModelCV, RegressorMixin): (e.g. data is expected to be already centered). normalize : boolean, optional, default False - If ``True``, the regressors X will be normalized before regression. + If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten. @@ -1995,7 +2059,14 @@ class MultiTaskLassoCV(LinearModelCV, RegressorMixin): (e.g. data is expected to be already centered). normalize : boolean, optional, default False - If ``True``, the regressors X will be normalized before regression. + If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten. diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py index 8f6531b7d7f5962f6e3eecd169e4b4f137251eb6..70c93c7be5bc8593ee707e5daa662c80a4f16d27 100644 --- a/sklearn/linear_model/least_angle.py +++ b/sklearn/linear_model/least_angle.py @@ -295,9 +295,9 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, # We have degenerate vectors in our active set. # We'll 'drop for good' the last regressor added. - # Note: this case is very rare. It is no longer triggered by the - # test suite. The `equality_tolerance` margin added in 0.16.0 to - # get early stopping to work consistently on all versions of + # Note: this case is very rare. It is no longer triggered by + # the test suite. The `equality_tolerance` margin added in 0.16 + # to get early stopping to work consistently on all versions of # Python including 32 bit Python under Windows seems to make it # very difficult to trigger the 'drop for good' strategy. warnings.warn('Regressors in active set degenerate. ' @@ -512,7 +512,14 @@ class Lars(LinearModel, RegressorMixin): Sets the verbosity amount normalize : boolean, optional, default False - If ``True``, the regressors X will be normalized before regression. + If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. precompute : True | False | 'auto' | array-like Whether to use a precomputed Gram matrix to speed up @@ -628,7 +635,7 @@ class Lars(LinearModel, RegressorMixin): X, y = check_X_y(X, y, y_numeric=True, multi_output=True) n_features = X.shape[1] - X, y, X_mean, y_mean, X_std = self._center_data(X, y, + X, y, X_offset, y_offset, X_scale = self._preprocess_data(X, y, self.fit_intercept, self.normalize, self.copy_X) @@ -695,7 +702,7 @@ class Lars(LinearModel, RegressorMixin): if n_targets == 1: self.alphas_ = self.alphas_[0] self.n_iter_ = self.n_iter_[0] - self._set_intercept(X_mean, y_mean, X_std) + self._set_intercept(X_offset, y_offset, X_scale) return self @@ -739,6 +746,13 @@ class LassoLars(Lars): normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True If True, X will be copied; else, it may be overwritten. @@ -889,6 +903,13 @@ def _lars_path_residues(X_train, y_train, X_test, y_test, Gram=None, normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. max_iter : integer, optional Maximum number of iterations to perform. @@ -968,6 +989,13 @@ class LarsCV(Lars): normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten. @@ -1171,6 +1199,13 @@ class LassoLarsCV(LarsCV): normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. precompute : True | False | 'auto' | array-like Whether to use a precomputed Gram matrix to speed up @@ -1299,6 +1334,13 @@ class LassoLarsIC(LassoLars): normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. copy_X : boolean, optional, default True If True, X will be copied; else, it may be overwritten. @@ -1402,7 +1444,7 @@ class LassoLarsIC(LassoLars): self.fit_path = True X, y = check_X_y(X, y, y_numeric=True) - X, y, Xmean, ymean, Xstd = LinearModel._center_data( + X, y, Xmean, ymean, Xstd = LinearModel._preprocess_data( X, y, self.fit_intercept, self.normalize, self.copy_X) max_iter = self.max_iter diff --git a/sklearn/linear_model/omp.py b/sklearn/linear_model/omp.py index 967122765c438ff51514eef35679776908c77530..269e18fe7029b4dcb976f2c58f19a2bf347859c2 100644 --- a/sklearn/linear_model/omp.py +++ b/sklearn/linear_model/omp.py @@ -557,8 +557,15 @@ class OrthogonalMatchingPursuit(LinearModel, RegressorMixin): to false, no intercept will be used in calculations (e.g. data is expected to be already centered). - normalize : boolean, optional - If False, the regressors X are assumed to be already normalized. + normalize : boolean, optional, default False + If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to `False`. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. precompute : {True, False, 'auto'}, default 'auto' Whether to use a precomputed Gram and Xy matrix to speed up @@ -629,7 +636,7 @@ class OrthogonalMatchingPursuit(LinearModel, RegressorMixin): X, y = check_X_y(X, y, multi_output=True, y_numeric=True) n_features = X.shape[1] - X, y, X_mean, y_mean, X_std, Gram, Xy = \ + X, y, X_offset, y_offset, X_scale, Gram, Xy = \ _pre_fit(X, y, None, self.precompute, self.normalize, self.fit_intercept, copy=True) @@ -657,7 +664,7 @@ class OrthogonalMatchingPursuit(LinearModel, RegressorMixin): copy_Gram=True, copy_Xy=True, return_n_iter=True) self.coef_ = coef_.T - self._set_intercept(X_mean, y_mean, X_std) + self._set_intercept(X_offset, y_offset, X_scale) return self @@ -690,6 +697,13 @@ def _omp_path_residues(X_train, y_train, X_test, y_test, copy=True, normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to `False`. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. max_iter : integer, optional Maximum numbers of iterations to perform, therefore maximum features @@ -748,8 +762,15 @@ class OrthogonalMatchingPursuitCV(LinearModel, RegressorMixin): to false, no intercept will be used in calculations (e.g. data is expected to be already centered). - normalize : boolean, optional - If False, the regressors X are assumed to be already normalized. + normalize : boolean, optional, default False + If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to `False`. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. max_iter : integer, optional Maximum numbers of iterations to perform, therefore maximum features diff --git a/sklearn/linear_model/randomized_l1.py b/sklearn/linear_model/randomized_l1.py index 962438545856d7f8fc9baf7e58db5f9a0783ce33..e4c9ab20d8dd3dfa7dbf93e5bd73c149645d5d0d 100644 --- a/sklearn/linear_model/randomized_l1.py +++ b/sklearn/linear_model/randomized_l1.py @@ -15,7 +15,7 @@ from scipy.sparse import issparse from scipy import sparse from scipy.interpolate import interp1d -from .base import center_data +from .base import _preprocess_data from ..base import BaseEstimator, TransformerMixin from ..externals import six from ..externals.joblib import Memory, Parallel, delayed @@ -71,7 +71,7 @@ class BaseRandomizedLinearModel(six.with_metaclass(ABCMeta, BaseEstimator, def __init__(self): pass - _center_data = staticmethod(center_data) + _preprocess_data = staticmethod(_preprocess_data) def fit(self, X, y): """Fit the model using X, y as training data. @@ -94,9 +94,8 @@ class BaseRandomizedLinearModel(six.with_metaclass(ABCMeta, BaseEstimator, X = as_float_array(X, copy=False) n_samples, n_features = X.shape - X, y, X_mean, y_mean, X_std = self._center_data(X, y, - self.fit_intercept, - self.normalize) + X, y, X_offset, y_offset, X_scale = \ + self._preprocess_data(X, y, self.fit_intercept, self.normalize) estimator_func, params = self._make_estimator_and_params(X, y) memory = self.memory @@ -223,8 +222,15 @@ class RandomizedLasso(BaseRandomizedLinearModel): verbose : boolean or integer, optional Sets the verbosity amount - normalize : boolean, optional, default True + normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. precompute : True | False | 'auto' Whether to use a precomputed Gram matrix to speed up @@ -405,8 +411,15 @@ class RandomizedLogisticRegression(BaseRandomizedLinearModel): verbose : boolean or integer, optional Sets the verbosity amount - normalize : boolean, optional, default=True + normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. tol : float, optional, default=1e-3 tolerance for stopping criteria of LogisticRegression @@ -500,11 +513,11 @@ class RandomizedLogisticRegression(BaseRandomizedLinearModel): fit_intercept=self.fit_intercept) return _randomized_logistic, params - def _center_data(self, X, y, fit_intercept, normalize=False): + def _preprocess_data(self, X, y, fit_intercept, normalize=False): """Center the data in X but not in y""" - X, _, Xmean, _, X_std = center_data(X, y, fit_intercept, - normalize=normalize) - return X, y, Xmean, y, X_std + X, _, X_offset, _, X_scale = _preprocess_data(X, y, fit_intercept, + normalize=normalize) + return X, y, X_offset, y, X_scale ############################################################################### diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 955fa01bdcd41cee92d5246aa02e100cc995063a..913f862cf6c60adbae68d7dd9a7e370f796efce6 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -280,7 +280,7 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', If True and if X is sparse, the method also returns the intercept, and the solver is automatically changed to 'sag'. This is only a temporary fix for fitting the intercept with sparse data. For dense - data, use sklearn.linear_model.center_data before your regression. + data, use sklearn.linear_model._preprocess_data before your regression. .. versionadded:: 0.17 @@ -463,7 +463,7 @@ class _BaseRidge(six.with_metaclass(ABCMeta, LinearModel)): np.atleast_1d(sample_weight).ndim > 1): raise ValueError("Sample weights must be 1D array or scalar") - X, y, X_mean, y_mean, X_std = self._center_data( + X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, self.fit_intercept, self.normalize, self.copy_X, sample_weight=sample_weight) @@ -474,14 +474,14 @@ class _BaseRidge(six.with_metaclass(ABCMeta, LinearModel)): max_iter=self.max_iter, tol=self.tol, solver=self.solver, random_state=self.random_state, return_n_iter=True, return_intercept=True) - self.intercept_ += y_mean + self.intercept_ += y_offset else: self.coef_, self.n_iter_ = ridge_regression( X, y, alpha=self.alpha, sample_weight=sample_weight, max_iter=self.max_iter, tol=self.tol, solver=self.solver, random_state=self.random_state, return_n_iter=True, return_intercept=False) - self._set_intercept(X_mean, y_mean, X_std) + self._set_intercept(X_offset, y_offset, X_scale) return self @@ -521,6 +521,13 @@ class Ridge(_BaseRidge, RegressorMixin): normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag'} Solver to use in the computational routines: @@ -663,6 +670,13 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge): normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag'} Solver to use in the computational routines: @@ -921,7 +935,7 @@ class _RidgeGCV(LinearModel): n_samples, n_features = X.shape - X, y, X_mean, y_mean, X_std = LinearModel._center_data( + X, y, X_offset, y_offset, X_scale = LinearModel._preprocess_data( X, y, self.fit_intercept, self.normalize, self.copy_X, sample_weight=sample_weight) @@ -989,7 +1003,7 @@ class _RidgeGCV(LinearModel): self.dual_coef_ = C[best] self.coef_ = safe_sparse_dot(self.dual_coef_.T, X) - self._set_intercept(X_mean, y_mean, X_std) + self._set_intercept(X_offset, y_offset, X_scale) if self.store_cv_values: if len(y.shape) == 1: @@ -1085,6 +1099,13 @@ class RidgeCV(_BaseRidgeCV, RegressorMixin): normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. scoring : string, callable or None, optional, default: None A string (see model evaluation documentation) or @@ -1179,6 +1200,13 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV): normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. + This parameter is ignored when `fit_intercept` is set to False. + When the regressors are normalized, note that this makes the + hyperparameters learnt more robust and almost independent of the number + of samples. The same property is not valid for standardized data. + However, if you wish to standardize, please use + `preprocessing.StandardScaler` before calling `fit` on an estimator + with `normalize=False`. scoring : string, callable or None, optional, default: None A string (see model evaluation documentation) or diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index f8a2d531e0c21d53facf00f9af73b52160403eec..4c7b326f24f448996b6e35401827df2495a2dec3 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -6,20 +6,25 @@ import numpy as np from scipy import sparse from scipy import linalg +from itertools import product + from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_equal +from sklearn.utils.testing import ignore_warnings from sklearn.linear_model.base import LinearRegression -from sklearn.linear_model.base import center_data -from sklearn.linear_model.base import sparse_center_data +from sklearn.linear_model.base import _preprocess_data +from sklearn.linear_model.base import sparse_center_data, center_data from sklearn.linear_model.base import _rescale_data from sklearn.utils import check_random_state from sklearn.utils.testing import assert_greater from sklearn.datasets.samples_generator import make_sparse_uncorrelated from sklearn.datasets.samples_generator import make_regression +rng = np.random.RandomState(0) + def test_linear_regression(): # Test LinearRegression on a simple dataset. @@ -93,8 +98,6 @@ def test_raises_value_error_if_sample_weights_greater_than_1d(): n_sampless = [2, 3] n_featuress = [3, 2] - rng = np.random.RandomState(42) - for n_samples, n_features in zip(n_sampless, n_featuress): X = rng.randn(n_samples, n_features) y = rng.randn(n_samples) @@ -181,73 +184,69 @@ def test_linear_regression_sparse_multiple_outcome(random_state=0): assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3) -def test_center_data(): +def test_preprocess_data(): n_samples = 200 n_features = 2 - rng = check_random_state(0) X = rng.rand(n_samples, n_features) y = rng.rand(n_samples) expected_X_mean = np.mean(X, axis=0) - # XXX: currently scaled to variance=n_samples - expected_X_std = np.std(X, axis=0) * np.sqrt(X.shape[0]) + expected_X_norm = np.std(X, axis=0) * np.sqrt(X.shape[0]) expected_y_mean = np.mean(y, axis=0) - Xt, yt, X_mean, y_mean, X_std = center_data(X, y, fit_intercept=False, - normalize=False) + Xt, yt, X_mean, y_mean, X_norm = \ + _preprocess_data(X, y, fit_intercept=False, normalize=False) assert_array_almost_equal(X_mean, np.zeros(n_features)) assert_array_almost_equal(y_mean, 0) - assert_array_almost_equal(X_std, np.ones(n_features)) + assert_array_almost_equal(X_norm, np.ones(n_features)) assert_array_almost_equal(Xt, X) assert_array_almost_equal(yt, y) - Xt, yt, X_mean, y_mean, X_std = center_data(X, y, fit_intercept=True, - normalize=False) + Xt, yt, X_mean, y_mean, X_norm = \ + _preprocess_data(X, y, fit_intercept=True, normalize=False) assert_array_almost_equal(X_mean, expected_X_mean) assert_array_almost_equal(y_mean, expected_y_mean) - assert_array_almost_equal(X_std, np.ones(n_features)) + assert_array_almost_equal(X_norm, np.ones(n_features)) assert_array_almost_equal(Xt, X - expected_X_mean) assert_array_almost_equal(yt, y - expected_y_mean) - Xt, yt, X_mean, y_mean, X_std = center_data(X, y, fit_intercept=True, - normalize=True) + Xt, yt, X_mean, y_mean, X_norm = \ + _preprocess_data(X, y, fit_intercept=True, normalize=True) assert_array_almost_equal(X_mean, expected_X_mean) assert_array_almost_equal(y_mean, expected_y_mean) - assert_array_almost_equal(X_std, expected_X_std) - assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_std) + assert_array_almost_equal(X_norm, expected_X_norm) + assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm) assert_array_almost_equal(yt, y - expected_y_mean) -def test_center_data_multioutput(): +def test_preprocess_data_multioutput(): n_samples = 200 n_features = 3 n_outputs = 2 - rng = check_random_state(0) X = rng.rand(n_samples, n_features) y = rng.rand(n_samples, n_outputs) expected_y_mean = np.mean(y, axis=0) - args = [(center_data, X), (sparse_center_data, sparse.csc_matrix(X))] - for center, X in args: - _, yt, _, y_mean, _ = center(X, y, fit_intercept=False, - normalize=False) + args = [X, sparse.csc_matrix(X)] + for X in args: + _, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=False, + normalize=False) assert_array_almost_equal(y_mean, np.zeros(n_outputs)) assert_array_almost_equal(yt, y) - _, yt, _, y_mean, _ = center(X, y, fit_intercept=True, - normalize=False) + _, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True, + normalize=False) assert_array_almost_equal(y_mean, expected_y_mean) assert_array_almost_equal(yt, y - y_mean) - _, yt, _, y_mean, _ = center(X, y, fit_intercept=True, - normalize=True) + _, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True, + normalize=True) assert_array_almost_equal(y_mean, expected_y_mean) assert_array_almost_equal(yt, y - y_mean) -def test_center_data_weighted(): +def test_preprocess_data_weighted(): n_samples = 200 n_features = 2 - rng = check_random_state(0) X = rng.rand(n_samples, n_features) y = rng.rand(n_samples) sample_weight = rng.rand(n_samples) @@ -256,75 +255,72 @@ def test_center_data_weighted(): # XXX: if normalize=True, should we expect a weighted standard deviation? # Currently not weighted, but calculated with respect to weighted mean - # XXX: currently scaled to variance=n_samples - expected_X_std = (np.sqrt(X.shape[0]) * - np.mean((X - expected_X_mean) ** 2, axis=0) ** .5) + expected_X_norm = (np.sqrt(X.shape[0]) * + np.mean((X - expected_X_mean) ** 2, axis=0) ** .5) - Xt, yt, X_mean, y_mean, X_std = center_data(X, y, fit_intercept=True, - normalize=False, - sample_weight=sample_weight) + Xt, yt, X_mean, y_mean, X_norm = \ + _preprocess_data(X, y, fit_intercept=True, normalize=False, + sample_weight=sample_weight) assert_array_almost_equal(X_mean, expected_X_mean) assert_array_almost_equal(y_mean, expected_y_mean) - assert_array_almost_equal(X_std, np.ones(n_features)) + assert_array_almost_equal(X_norm, np.ones(n_features)) assert_array_almost_equal(Xt, X - expected_X_mean) assert_array_almost_equal(yt, y - expected_y_mean) - Xt, yt, X_mean, y_mean, X_std = center_data(X, y, fit_intercept=True, - normalize=True, - sample_weight=sample_weight) + Xt, yt, X_mean, y_mean, X_norm = \ + _preprocess_data(X, y, fit_intercept=True, normalize=True, + sample_weight=sample_weight) assert_array_almost_equal(X_mean, expected_X_mean) assert_array_almost_equal(y_mean, expected_y_mean) - assert_array_almost_equal(X_std, expected_X_std) - assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_std) + assert_array_almost_equal(X_norm, expected_X_norm) + assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm) assert_array_almost_equal(yt, y - expected_y_mean) -def test_sparse_center_data(): +def test_sparse_preprocess_data_with_return_mean(): n_samples = 200 n_features = 2 - rng = check_random_state(0) # random_state not supported yet in sparse.rand X = sparse.rand(n_samples, n_features, density=.5) # , random_state=rng X = X.tolil() y = rng.rand(n_samples) XA = X.toarray() - # XXX: currently scaled to variance=n_samples - expected_X_std = np.std(XA, axis=0) * np.sqrt(X.shape[0]) + expected_X_norm = np.std(XA, axis=0) * np.sqrt(X.shape[0]) - Xt, yt, X_mean, y_mean, X_std = sparse_center_data(X, y, - fit_intercept=False, - normalize=False) + Xt, yt, X_mean, y_mean, X_norm = \ + _preprocess_data(X, y, fit_intercept=False, normalize=False, + return_mean=True) assert_array_almost_equal(X_mean, np.zeros(n_features)) assert_array_almost_equal(y_mean, 0) - assert_array_almost_equal(X_std, np.ones(n_features)) + assert_array_almost_equal(X_norm, np.ones(n_features)) assert_array_almost_equal(Xt.A, XA) assert_array_almost_equal(yt, y) - Xt, yt, X_mean, y_mean, X_std = sparse_center_data(X, y, - fit_intercept=True, - normalize=False) + Xt, yt, X_mean, y_mean, X_norm = \ + _preprocess_data(X, y, fit_intercept=True, normalize=False, + return_mean=True) assert_array_almost_equal(X_mean, np.mean(XA, axis=0)) assert_array_almost_equal(y_mean, np.mean(y, axis=0)) - assert_array_almost_equal(X_std, np.ones(n_features)) + assert_array_almost_equal(X_norm, np.ones(n_features)) assert_array_almost_equal(Xt.A, XA) assert_array_almost_equal(yt, y - np.mean(y, axis=0)) - Xt, yt, X_mean, y_mean, X_std = sparse_center_data(X, y, - fit_intercept=True, - normalize=True) + Xt, yt, X_mean, y_mean, X_norm = \ + _preprocess_data(X, y, fit_intercept=True, normalize=True, + return_mean=True) assert_array_almost_equal(X_mean, np.mean(XA, axis=0)) assert_array_almost_equal(y_mean, np.mean(y, axis=0)) - assert_array_almost_equal(X_std, expected_X_std) - assert_array_almost_equal(Xt.A, XA / expected_X_std) + assert_array_almost_equal(X_norm, expected_X_norm) + assert_array_almost_equal(Xt.A, XA / expected_X_norm) assert_array_almost_equal(yt, y - np.mean(y, axis=0)) -def test_csr_sparse_center_data(): - # Test output format of sparse_center_data, when input is csr +def test_csr_preprocess_data(): + # Test output format of _preprocess_data, when input is csr X, y = make_regression() X[X < 2.5] = 0.0 csr = sparse.csr_matrix(X) - csr_, y, _, _, _ = sparse_center_data(csr, y, True) + csr_, y, _, _, _ = _preprocess_data(csr, y, True) assert_equal(csr_.getformat(), 'csr') @@ -332,7 +328,6 @@ def test_rescale_data(): n_samples = 200 n_features = 2 - rng = np.random.RandomState(0) sample_weight = 1.0 + rng.rand(n_samples) X = rng.rand(n_samples, n_features) y = rng.rand(n_samples) @@ -341,3 +336,74 @@ def test_rescale_data(): rescaled_y2 = y * np.sqrt(sample_weight) assert_array_almost_equal(rescaled_X, rescaled_X2) assert_array_almost_equal(rescaled_y, rescaled_y2) + + +@ignore_warnings # all deprecation warnings +def test_deprecation_center_data(): + n_samples = 200 + n_features = 2 + + w = 1.0 + rng.rand(n_samples) + X = rng.rand(n_samples, n_features) + y = rng.rand(n_samples) + + param_grid = product([True, False], [True, False], [True, False], + [None, w]) + + for (fit_intercept, normalize, copy, sample_weight) in param_grid: + + XX = X.copy() # such that we can try copy=False as well + + X1, y1, X1_mean, X1_var, y1_mean = \ + center_data(XX, y, fit_intercept=fit_intercept, + normalize=normalize, copy=copy, + sample_weight=sample_weight) + + XX = X.copy() + + X2, y2, X2_mean, X2_var, y2_mean = \ + _preprocess_data(XX, y, fit_intercept=fit_intercept, + normalize=normalize, copy=copy, + sample_weight=sample_weight) + + assert_array_almost_equal(X1, X2) + assert_array_almost_equal(y1, y2) + assert_array_almost_equal(X1_mean, X2_mean) + assert_array_almost_equal(X1_var, X2_var) + assert_array_almost_equal(y1_mean, y2_mean) + + # Sparse cases + X = sparse.csr_matrix(X) + + for (fit_intercept, normalize, copy, sample_weight) in param_grid: + + X1, y1, X1_mean, X1_var, y1_mean = \ + center_data(X, y, fit_intercept=fit_intercept, normalize=normalize, + copy=copy, sample_weight=sample_weight) + + X2, y2, X2_mean, X2_var, y2_mean = \ + _preprocess_data(X, y, fit_intercept=fit_intercept, + normalize=normalize, copy=copy, + sample_weight=sample_weight, return_mean=False) + + assert_array_almost_equal(X1.toarray(), X2.toarray()) + assert_array_almost_equal(y1, y2) + assert_array_almost_equal(X1_mean, X2_mean) + assert_array_almost_equal(X1_var, X2_var) + assert_array_almost_equal(y1_mean, y2_mean) + + for (fit_intercept, normalize) in product([True, False], [True, False]): + + X1, y1, X1_mean, X1_var, y1_mean = \ + sparse_center_data(X, y, fit_intercept=fit_intercept, + normalize=normalize) + + X2, y2, X2_mean, X2_var, y2_mean = \ + _preprocess_data(X, y, fit_intercept=fit_intercept, + normalize=normalize, return_mean=True) + + assert_array_almost_equal(X1.toarray(), X2.toarray()) + assert_array_almost_equal(y1, y2) + assert_array_almost_equal(X1_mean, X2_mean) + assert_array_almost_equal(X1_var, X2_var) + assert_array_almost_equal(y1_mean, y2_mean) diff --git a/sklearn/linear_model/tests/test_randomized_l1.py b/sklearn/linear_model/tests/test_randomized_l1.py index 4057ebdfa2ffdb24f96501d15e81db191210114c..95bffc0fbd33bd862ba7e14286001d0e80120636 100644 --- a/sklearn/linear_model/tests/test_randomized_l1.py +++ b/sklearn/linear_model/tests/test_randomized_l1.py @@ -14,7 +14,7 @@ from sklearn.linear_model.randomized_l1 import (lasso_stability_path, from sklearn.datasets import load_diabetes, load_iris from sklearn.feature_selection import f_regression, f_classif from sklearn.preprocessing import StandardScaler -from sklearn.linear_model.base import center_data +from sklearn.linear_model.base import _preprocess_data diabetes = load_diabetes() X = diabetes.data @@ -111,7 +111,7 @@ def test_randomized_logistic_sparse(): # center here because sparse matrices are usually not centered # labels should not be centered - X, _, _, _, _ = center_data(X, y, True, True) + X, _, _, _, _ = _preprocess_data(X, y, True, True) X_sp = sparse.csr_matrix(X) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 44a1b267f2d75c867e71683631326002c170352a..ff54bba9716c46a35e90ccf3e972d2a7c77744b7 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -227,21 +227,21 @@ def test_toy_ridge_object(): # TODO: test also n_samples > n_features X = np.array([[1], [2]]) Y = np.array([1, 2]) - clf = Ridge(alpha=0.0) - clf.fit(X, Y) + reg = Ridge(alpha=0.0) + reg.fit(X, Y) X_test = [[1], [2], [3], [4]] - assert_almost_equal(clf.predict(X_test), [1., 2, 3, 4]) + assert_almost_equal(reg.predict(X_test), [1., 2, 3, 4]) - assert_equal(len(clf.coef_.shape), 1) - assert_equal(type(clf.intercept_), np.float64) + assert_equal(len(reg.coef_.shape), 1) + assert_equal(type(reg.intercept_), np.float64) Y = np.vstack((Y, Y)).T - clf.fit(X, Y) + reg.fit(X, Y) X_test = [[1], [2], [3], [4]] - assert_equal(len(clf.coef_.shape), 2) - assert_equal(type(clf.intercept_), np.ndarray) + assert_equal(len(reg.coef_.shape), 2) + assert_equal(type(reg.intercept_), np.ndarray) def test_ridge_vs_lstsq(): @@ -417,16 +417,16 @@ def _test_multi_ridge_diabetes(filter_): def _test_ridge_classifiers(filter_): n_classes = np.unique(y_iris).shape[0] n_features = X_iris.shape[1] - for clf in (RidgeClassifier(), RidgeClassifierCV()): - clf.fit(filter_(X_iris), y_iris) - assert_equal(clf.coef_.shape, (n_classes, n_features)) - y_pred = clf.predict(filter_(X_iris)) + for reg in (RidgeClassifier(), RidgeClassifierCV()): + reg.fit(filter_(X_iris), y_iris) + assert_equal(reg.coef_.shape, (n_classes, n_features)) + y_pred = reg.predict(filter_(X_iris)) assert_greater(np.mean(y_iris == y_pred), .79) cv = KFold(5) - clf = RidgeClassifierCV(cv=cv) - clf.fit(filter_(X_iris), y_iris) - y_pred = clf.predict(filter_(X_iris)) + reg = RidgeClassifierCV(cv=cv) + reg.fit(filter_(X_iris), y_iris) + y_pred = reg.predict(filter_(X_iris)) assert_true(np.mean(y_iris == y_pred) >= 0.8) @@ -481,63 +481,63 @@ def test_class_weights(): [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] - clf = RidgeClassifier(class_weight=None) - clf.fit(X, y) - assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1])) + reg = RidgeClassifier(class_weight=None) + reg.fit(X, y) + assert_array_equal(reg.predict([[0.2, -1.0]]), np.array([1])) # we give a small weights to class 1 - clf = RidgeClassifier(class_weight={1: 0.001}) - clf.fit(X, y) + reg = RidgeClassifier(class_weight={1: 0.001}) + reg.fit(X, y) # now the hyperplane should rotate clock-wise and # the prediction on this point should shift - assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1])) + assert_array_equal(reg.predict([[0.2, -1.0]]), np.array([-1])) # check if class_weight = 'balanced' can handle negative labels. - clf = RidgeClassifier(class_weight='balanced') - clf.fit(X, y) - assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1])) + reg = RidgeClassifier(class_weight='balanced') + reg.fit(X, y) + assert_array_equal(reg.predict([[0.2, -1.0]]), np.array([1])) # class_weight = 'balanced', and class_weight = None should return # same values when y has equal number of all labels X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0]]) y = [1, 1, -1, -1] - clf = RidgeClassifier(class_weight=None) - clf.fit(X, y) - clfa = RidgeClassifier(class_weight='balanced') - clfa.fit(X, y) - assert_equal(len(clfa.classes_), 2) - assert_array_almost_equal(clf.coef_, clfa.coef_) - assert_array_almost_equal(clf.intercept_, clfa.intercept_) + reg = RidgeClassifier(class_weight=None) + reg.fit(X, y) + rega = RidgeClassifier(class_weight='balanced') + rega.fit(X, y) + assert_equal(len(rega.classes_), 2) + assert_array_almost_equal(reg.coef_, rega.coef_) + assert_array_almost_equal(reg.intercept_, rega.intercept_) def test_class_weight_vs_sample_weight(): """Check class_weights resemble sample_weights behavior.""" - for clf in (RidgeClassifier, RidgeClassifierCV): + for reg in (RidgeClassifier, RidgeClassifierCV): # Iris is balanced, so no effect expected for using 'balanced' weights - clf1 = clf() - clf1.fit(iris.data, iris.target) - clf2 = clf(class_weight='balanced') - clf2.fit(iris.data, iris.target) - assert_almost_equal(clf1.coef_, clf2.coef_) + reg1 = reg() + reg1.fit(iris.data, iris.target) + reg2 = reg(class_weight='balanced') + reg2.fit(iris.data, iris.target) + assert_almost_equal(reg1.coef_, reg2.coef_) # Inflate importance of class 1, check against user-defined weights sample_weight = np.ones(iris.target.shape) sample_weight[iris.target == 1] *= 100 class_weight = {0: 1., 1: 100., 2: 1.} - clf1 = clf() - clf1.fit(iris.data, iris.target, sample_weight) - clf2 = clf(class_weight=class_weight) - clf2.fit(iris.data, iris.target) - assert_almost_equal(clf1.coef_, clf2.coef_) + reg1 = reg() + reg1.fit(iris.data, iris.target, sample_weight) + reg2 = reg(class_weight=class_weight) + reg2.fit(iris.data, iris.target) + assert_almost_equal(reg1.coef_, reg2.coef_) # Check that sample_weight and class_weight are multiplicative - clf1 = clf() - clf1.fit(iris.data, iris.target, sample_weight ** 2) - clf2 = clf(class_weight=class_weight) - clf2.fit(iris.data, iris.target, sample_weight) - assert_almost_equal(clf1.coef_, clf2.coef_) + reg1 = reg() + reg1.fit(iris.data, iris.target, sample_weight ** 2) + reg2 = reg(class_weight=class_weight) + reg2.fit(iris.data, iris.target, sample_weight) + assert_almost_equal(reg1.coef_, reg2.coef_) def test_class_weights_cv(): @@ -546,14 +546,14 @@ def test_class_weights_cv(): [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] - clf = RidgeClassifierCV(class_weight=None, alphas=[.01, .1, 1]) - clf.fit(X, y) + reg = RidgeClassifierCV(class_weight=None, alphas=[.01, .1, 1]) + reg.fit(X, y) # we give a small weights to class 1 - clf = RidgeClassifierCV(class_weight={1: 0.001}, alphas=[.01, .1, 1, 10]) - clf.fit(X, y) + reg = RidgeClassifierCV(class_weight={1: 0.001}, alphas=[.01, .1, 1, 10]) + reg.fit(X, y) - assert_array_equal(clf.predict([[-.2, 2]]), np.array([-1])) + assert_array_equal(reg.predict([[-.2, 2]]), np.array([-1])) def test_ridgecv_store_cv_values(): diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index 094100cd7024cf85e655ea6ddad701791d667853..d447e715c0f27c3678fe78d6084fb8ce30804fd6 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -1228,7 +1228,7 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin): return XP -def normalize(X, norm='l2', axis=1, copy=True): +def normalize(X, norm='l2', axis=1, copy=True, return_norm=False): """Scale input vectors individually to unit norm (vector length). Read more in the :ref:`User Guide <preprocessing_normalization>`. @@ -1253,6 +1253,9 @@ def normalize(X, norm='l2', axis=1, copy=True): copy (if the input is already a numpy array or a scipy.sparse CSR matrix and if axis is 1). + return_norm : boolean, default False + whether to return the computed norms + See also -------- :class:`sklearn.preprocessing.Normalizer` to perform normalization @@ -1297,7 +1300,10 @@ def normalize(X, norm='l2', axis=1, copy=True): if axis == 0: X = X.T - return X + if return_norm: + return X, norms + else: + return X class Normalizer(BaseEstimator, TransformerMixin):