diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py index 8c75ef7c696024ed028ff99476ac775f1bdfa121..7ae174cc45ef648b0b347c6f9ce1da1ae16c4ebd 100644 --- a/sklearn/cluster/hierarchical.py +++ b/sklearn/cluster/hierarchical.py @@ -713,7 +713,7 @@ class AgglomerativeClustering(BaseEstimator, ClusterMixin): ------- self """ - X = check_array(X, ensure_min_samples=2) + X = check_array(X, ensure_min_samples=2, estimator=self) memory = self.memory if isinstance(memory, six.string_types): memory = Memory(cachedir=memory, verbose=0) @@ -870,7 +870,7 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform): self """ X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], - ensure_min_features=2) + ensure_min_features=2, estimator=self) return AgglomerativeClustering.fit(self, X.T, **params) @property diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py index 6957507a336a01851edd2b04275ad66f6d76b1c0..9e8e07063399d77a2b6438d69bf52de0acd99534 100644 --- a/sklearn/covariance/graph_lasso_.py +++ b/sklearn/covariance/graph_lasso_.py @@ -336,7 +336,8 @@ class GraphLasso(EmpiricalCovariance): def fit(self, X, y=None): # Covariance does not make sense for a single feature - X = check_array(X, ensure_min_features=2, ensure_min_samples=2) + X = check_array(X, ensure_min_features=2, ensure_min_samples=2, + estimator=self) if self.assume_centered: self.location_ = np.zeros(X.shape[1]) @@ -570,7 +571,7 @@ class GraphLassoCV(GraphLasso): Data from which to compute the covariance estimate """ # Covariance does not make sense for a single feature - X = check_array(X, ensure_min_features=2) + X = check_array(X, ensure_min_features=2, estimator=self) if self.assume_centered: self.location_ = np.zeros(X.shape[1]) else: diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index c48feaa066d5dc67f5e4bf88ca81d3c28c2bb844..e442f278f31063ca908d88aedcb9af36b03a1383 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -419,7 +419,7 @@ class LinearDiscriminantAnalysis(BaseEstimator, LinearClassifierMixin, "the estimator initialisation or set_params method.", DeprecationWarning) self.tol = tol - X, y = check_X_y(X, y, ensure_min_samples=2) + X, y = check_X_y(X, y, ensure_min_samples=2, estimator=self) self.classes_ = unique_labels(y) if self.priors is None: # estimate priors from sample diff --git a/sklearn/linear_model/omp.py b/sklearn/linear_model/omp.py index 589939ed7858f90b5b95008636af7f00d090552c..3b87b9cf6c41057c2938ea6c9b6495604ea75e6b 100644 --- a/sklearn/linear_model/omp.py +++ b/sklearn/linear_model/omp.py @@ -832,7 +832,8 @@ class OrthogonalMatchingPursuitCV(LinearModel, RegressorMixin): self : object returns an instance of self. """ - X, y = check_X_y(X, y, y_numeric=True, ensure_min_features=2) + X, y = check_X_y(X, y, y_numeric=True, ensure_min_features=2, + estimator=self) X = as_float_array(X, copy=False, force_all_finite=False) cv = check_cv(self.cv, X, y, classifier=False) max_iter = (min(max(int(0.1 * X.shape[1]), 5), X.shape[1]) diff --git a/sklearn/linear_model/randomized_l1.py b/sklearn/linear_model/randomized_l1.py index 652f0375215cc46e4a930c810a1bfd2cf94ca8ab..aa02d0c5a643783e429cb771c3b4075b88ebf837 100644 --- a/sklearn/linear_model/randomized_l1.py +++ b/sklearn/linear_model/randomized_l1.py @@ -89,7 +89,7 @@ class BaseRandomizedLinearModel(six.with_metaclass(ABCMeta, BaseEstimator, Returns an instance of self. """ X, y = check_X_y(X, y, ['csr', 'csc'], y_numeric=True, - ensure_min_samples=2) + ensure_min_samples=2, estimator=self) X = as_float_array(X, copy=False) n_samples, n_features = X.shape diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index c67064b5bb288d0f4677f2b246a579d2640187aa..26825d8deb9337d3b035515bc50721a38be8fa17 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -452,7 +452,7 @@ class SpectralEmbedding(BaseEstimator): Returns the instance itself. """ - X = check_array(X, ensure_min_samples=2) + X = check_array(X, ensure_min_samples=2, estimator=self) random_state = check_random_state(self.random_state) if isinstance(self.affinity, six.string_types): diff --git a/sklearn/mixture/gmm.py b/sklearn/mixture/gmm.py index 42c9aadb93403c0566523449b98e2adfc1740c2e..de11daf147106873fcd377d55bca5e9cc4658b80 100644 --- a/sklearn/mixture/gmm.py +++ b/sklearn/mixture/gmm.py @@ -462,7 +462,8 @@ class GMM(BaseEstimator): """ # initialization step - X = check_array(X, dtype=np.float64, ensure_min_samples=2) + X = check_array(X, dtype=np.float64, ensure_min_samples=2, + estimator=self) if X.shape[0] < self.n_components: raise ValueError( 'GMM estimation with %s components, but got only %s samples' % diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 4481ef914d1011b25b5fb7b477f42354a68ef1e2..d3ac1a18630c396b9cd97118be7f22708e72da33 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -357,6 +357,15 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, # list of accepted types. dtype = dtype[0] + if estimator is not None: + if isinstance(estimator, six.string_types): + estimator_name = estimator + else: + estimator_name = estimator.__class__.__name__ + else: + estimator_name = "Estimator" + context = " by %s" % estimator_name if estimator is not None else "" + if sp.issparse(array): array = _ensure_sparse_format(array, accept_sparse, dtype, copy, force_all_finite) @@ -379,8 +388,8 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, if dtype_numeric and array.dtype.kind == "O": array = array.astype(np.float64) if not allow_nd and array.ndim >= 3: - raise ValueError("Found array with dim %d. Expected <= 2" % - array.ndim) + raise ValueError("Found array with dim %d. %s expected <= 2." + % (array.ndim, estimator_name)) if force_all_finite: _assert_all_finite(array) @@ -389,23 +398,21 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, n_samples = _num_samples(array) if n_samples < ensure_min_samples: raise ValueError("Found array with %d sample(s) (shape=%s) while a" - " minimum of %d is required." - % (n_samples, shape_repr, ensure_min_samples)) + " minimum of %d is required%s." + % (n_samples, shape_repr, ensure_min_samples, + context)) if ensure_min_features > 0 and array.ndim == 2: n_features = array.shape[1] if n_features < ensure_min_features: raise ValueError("Found array with %d feature(s) (shape=%s) while" - " a minimum of %d is required." - % (n_features, shape_repr, ensure_min_features)) + " a minimum of %d is required%s." + % (n_features, shape_repr, ensure_min_features, + context)) if warn_on_dtype and dtype_orig is not None and array.dtype != dtype_orig: - msg = ("Data with input dtype %s was converted to %s" - % (dtype_orig, array.dtype)) - if estimator is not None: - if not isinstance(estimator, six.string_types): - estimator = estimator.__class__.__name__ - msg += " by %s" % estimator + msg = ("Data with input dtype %s was converted to %s%s." + % (dtype_orig, array.dtype, context)) warnings.warn(msg, DataConversionWarning) return array