diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 616b13f1e5327454df5001e86f521b5023b0866c..0d83972faaf7ba634cb9a9a4fc0fcd269a53f2e5 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -99,6 +99,11 @@ Enhancements A ``TypeError`` will be raised for any other kwargs. :issue:`8028` by :user:`Alexander Booth <alexandercbooth>`. + - :class:`model_selection.GridSearchCV`, :class:`model_selection.RandomizedSearchCV` + and :func:`model_selection.cross_val_score` now allow estimators with callable + kernels which were previously prohibited. :issue:`8005` by `Andreas Müller`_ . + + Bug fixes ......... diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index 1ce28755075a4f3f376f82bc862f2b3e096f2ded..49d1d566bd5085053580a8db8d2b62eb91663c89 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -443,15 +443,6 @@ def test_grid_search_precomputed_kernel_error_nonsquare(): assert_raises(ValueError, cv.fit, K_train, y_train) -def test_grid_search_precomputed_kernel_error_kernel_function(): - # Test that grid search returns an error when using a kernel_function - X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0) - kernel_function = lambda x1, x2: np.dot(x1, x2.T) - clf = SVC(kernel=kernel_function) - cv = GridSearchCV(clf, {'C': [0.1, 1.0]}) - assert_raises(ValueError, cv.fit, X_, y_) - - class BrokenClassifier(BaseEstimator): """Broken classifier that cannot be fit twice""" diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index 31c5fc8257528a0079aaf80e47cdc3de079b1aed..830a079a0fc6d68c185c534a0df47076e3ee1137 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -310,7 +310,12 @@ def test_cross_val_score_precomputed(): score_precomputed = cross_val_score(svm, linear_kernel, y) svm = SVC(kernel="linear") score_linear = cross_val_score(svm, X, y) - assert_array_equal(score_precomputed, score_linear) + assert_array_almost_equal(score_precomputed, score_linear) + + # test with callable + svm = SVC(kernel=lambda x, y: np.dot(x, y.T)) + score_callable = cross_val_score(svm, X, y) + assert_array_almost_equal(score_precomputed, score_callable) # Error raised for non-square X svm = SVC(kernel="precomputed") diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py index 3e416b08213392cfa2d1c46cf7f60c3587338f34..cff4c35a58b46a588776920a343ca930b44738f6 100644 --- a/sklearn/svm/base.py +++ b/sklearn/svm/base.py @@ -104,8 +104,7 @@ class BaseLibSVM(six.with_metaclass(ABCMeta, BaseEstimator)): @property def _pairwise(self): # Used by cross_val_score. - kernel = self.kernel - return kernel == "precomputed" or callable(kernel) + return self.kernel == "precomputed" def fit(self, X, y, sample_weight=None): """Fit the SVM model according to the given training data. diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py index d34c62b185649f537a646771d145f422f668e3e4..3123bb1778ce36d1bde905326d7be883d978a5c6 100644 --- a/sklearn/utils/metaestimators.py +++ b/sklearn/utils/metaestimators.py @@ -81,31 +81,62 @@ def if_delegate_has_method(delegate): def _safe_split(estimator, X, y, indices, train_indices=None): - """Create subset of dataset and properly handle kernels.""" - from ..gaussian_process.kernels import Kernel as GPKernel + """Create subset of dataset and properly handle kernels. - if (hasattr(estimator, 'kernel') and callable(estimator.kernel) and - not isinstance(estimator.kernel, GPKernel)): - # cannot compute the kernel values with custom function - raise ValueError("Cannot use a custom kernel function. " - "Precompute the kernel matrix instead.") + Slice X, y according to indices for cross-validation, but take care of + precomputed kernel-matrices or pairwise affinities / distances. - if not hasattr(X, "shape"): - if getattr(estimator, "_pairwise", False): + If ``estimator._pairwise is True``, X needs to be square and + we slice rows and columns. If ``train_indices`` is not None, + we slice rows using ``indices`` (assumed the test set) and columns + using ``train_indices``, indicating the training set. + + Labels y will always be sliced only along the last axis. + + Parameters + ---------- + estimator : object + Estimator to determine whether we should slice only rows or rows and + columns. + + X : array-like, sparse matrix or iterable + Data to be sliced. If ``estimator._pairwise is True``, + this needs to be a square array-like or sparse matrix. + + y : array-like, sparse matrix or iterable + Targets to be sliced. + + indices : array of int + Rows to select from X and y. + If ``estimator._pairwise is True`` and ``train_indices is None`` + then ``indices`` will also be used to slice columns. + + train_indices : array of int or None, default=None + If ``estimator._pairwise is True`` and ``train_indices is not None``, + then ``train_indices`` will be use to slice the columns of X. + + Returns + ------- + X_sliced : array-like, sparse matrix or list + Sliced data. + + y_sliced : array-like, sparse matrix or list + Sliced targets. + + """ + if getattr(estimator, "_pairwise", False): + if not hasattr(X, "shape"): raise ValueError("Precomputed kernels or affinity matrices have " "to be passed as arrays or sparse matrices.") - X_subset = [X[index] for index in indices] - else: - if getattr(estimator, "_pairwise", False): - # X is a precomputed square kernel matrix - if X.shape[0] != X.shape[1]: - raise ValueError("X should be a square kernel matrix") - if train_indices is None: - X_subset = X[np.ix_(indices, indices)] - else: - X_subset = X[np.ix_(indices, train_indices)] + # X is a precomputed square kernel matrix + if X.shape[0] != X.shape[1]: + raise ValueError("X should be a square kernel matrix") + if train_indices is None: + X_subset = X[np.ix_(indices, indices)] else: - X_subset = safe_indexing(X, indices) + X_subset = X[np.ix_(indices, train_indices)] + else: + X_subset = safe_indexing(X, indices) if y is not None: y_subset = safe_indexing(y, indices)