diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index 08b5f4957ada4c190dab40b336d5e1c11fe72700..0def13318d4aadae94ca26fae1004492a6680675 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -412,7 +412,7 @@ Partial Least Squares Cross Validation ================ -.. automodule:: sklearn.cross_val +.. automodule:: sklearn.cross_validation :no-members: :no-inherited-members: @@ -422,14 +422,14 @@ Cross Validation :toctree: generated/ :template: class.rst - cross_val.LeaveOneOut - cross_val.LeavePOut - cross_val.KFold - cross_val.StratifiedKFold - cross_val.LeaveOneLabelOut - cross_val.LeavePLabelOut - cross_val.Bootstrap - cross_val.ShuffleSplit + cross_validation.LeaveOneOut + cross_validation.LeavePOut + cross_validation.KFold + cross_validation.StratifiedKFold + cross_validation.LeaveOneLabelOut + cross_validation.LeavePLabelOut + cross_validation.Bootstrap + cross_validation.ShuffleSplit Grid Search diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index fc89659d14e2f41bacd803106e28bd782327b192..9c6581ff763a206690ba8581981ed4aa3c19083b 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -2,7 +2,7 @@ Cross-Validation ================ -.. currentmodule:: sklearn.cross_val +.. currentmodule:: sklearn.cross_validation Learning the parameters of a prediction function and testing it on the same data yields a methodological bias. To avoid over-fitting, we have to define two @@ -43,12 +43,12 @@ cross-validation procedure does not waste much data as only one sample is removed from the learning set:: >>> import numpy as np - >>> from sklearn.cross_val import LeaveOneOut + >>> from sklearn.cross_validation import LeaveOneOut >>> X = np.array([[0., 0.], [1., 1.], [-1., -1.], [2., 2.]]) >>> Y = np.array([0, 1, 0, 1]) >>> loo = LeaveOneOut(len(Y)) >>> print loo - sklearn.cross_val.LeaveOneOut(n=4) + sklearn.cross_validation.LeaveOneOut(n=4) >>> for train, test in loo: print train, test [False True True True] [ True False False False] [ True False True True] [False True False False] @@ -67,12 +67,12 @@ integer indices. It can be obtained by setting the parameter indices to True when creating the cross-validation procedure:: >>> import numpy as np - >>> from sklearn.cross_val import LeaveOneOut + >>> from sklearn.cross_validation import LeaveOneOut >>> X = np.array([[0., 0.], [1., 1.], [-1., -1.], [2., 2.]]) >>> Y = np.array([0, 1, 0, 1]) >>> loo = LeaveOneOut(len(Y), indices=True) >>> print loo - sklearn.cross_val.LeaveOneOut(n=4) + sklearn.cross_validation.LeaveOneOut(n=4) >>> for train, test in loo: print train, test [1 2 3] [0] [0 2 3] [1] @@ -89,12 +89,12 @@ possible training/test sets by removing *P* samples from the complete set. Example of Leave-2-Out:: - >>> from sklearn.cross_val import LeavePOut + >>> from sklearn.cross_validation import LeavePOut >>> X = [[0., 0.], [1., 1.], [-1., -1.], [2., 2.]] >>> Y = [0, 1, 0, 1] >>> loo = LeavePOut(len(Y), 2) >>> print loo - sklearn.cross_val.LeavePOut(n=4, p=2) + sklearn.cross_validation.LeavePOut(n=4, p=2) >>> for train, test in loo: print train,test [False False True True] [ True True False False] [False True False True] [ True False True False] @@ -124,12 +124,12 @@ and the fold left out is used for test. Example of 2-fold:: - >>> from sklearn.cross_val import KFold + >>> from sklearn.cross_validation import KFold >>> X = [[0., 0.], [1., 1.], [-1., -1.], [2., 2.]] >>> Y = [0, 1, 0, 1] >>> loo = KFold(len(Y), 2) >>> print loo - sklearn.cross_val.KFold(n=4, k=2) + sklearn.cross_validation.KFold(n=4, k=2) >>> for train, test in loo: print train,test [False False True True] [ True True False False] [ True True False False] [False False True True] @@ -146,12 +146,12 @@ class as in the complete set. Example of stratified 2-fold:: - >>> from sklearn.cross_val import StratifiedKFold + >>> from sklearn.cross_validation import StratifiedKFold >>> X = [[0., 0.], [1., 1.], [-1., -1.], [2., 2.], [3., 3.], [4., 4.], [0., 1.]] >>> Y = [0, 0, 0, 1, 1, 1, 0] >>> skf = StratifiedKFold(Y, 2) >>> print skf - sklearn.cross_val.StratifiedKFold(labels=[0 0 0 1 1 1 0], k=2) + sklearn.cross_validation.StratifiedKFold(labels=[0 0 0 1 1 1 0], k=2) >>> for train, test in skf: print train, test [False True False False True False True] [ True False True True False True False] [ True False True True False True False] [False True False False True False True] @@ -174,13 +174,13 @@ For example, in the cases of multiple experiments, *LOLO* can be used to create a cross-validation based on the different experiments: we create a training set using the samples of all the experiments except one:: - >>> from sklearn.cross_val import LeaveOneLabelOut + >>> from sklearn.cross_validation import LeaveOneLabelOut >>> X = [[0., 0.], [1., 1.], [-1., -1.], [2., 2.]] >>> Y = [0, 1, 0, 1] >>> labels = [1, 1, 2, 2] >>> loo = LeaveOneLabelOut(labels) >>> print loo - sklearn.cross_val.LeaveOneLabelOut(labels=[1, 1, 2, 2]) + sklearn.cross_validation.LeaveOneLabelOut(labels=[1, 1, 2, 2]) >>> for train, test in loo: print train,test [False False True True] [ True True False False] [ True True False False] [False False True True] @@ -200,13 +200,13 @@ related to *P* labels for each training/test set. Example of Leave-2-Label Out:: - >>> from sklearn.cross_val import LeavePLabelOut + >>> from sklearn.cross_validation import LeavePLabelOut >>> X = [[0., 0.], [1., 1.], [-1., -1.], [2., 2.], [3., 3.], [4., 4.]] >>> Y = [0, 1, 0, 1, 0, 1] >>> labels = [1, 1, 2, 2, 3, 3] >>> loo = LeavePLabelOut(labels, 2) >>> print loo - sklearn.cross_val.LeavePLabelOut(labels=[1, 1, 2, 2, 3, 3], p=2) + sklearn.cross_validation.LeavePLabelOut(labels=[1, 1, 2, 2, 3, 3], p=2) >>> for train, test in loo: print train,test [False False False False True True] [ True True True True False False] [False False True True False False] [ True True False False True True] @@ -228,8 +228,8 @@ generator. Here is a usage example:: - >>> from sklearn import cross_val - >>> ss = cross_val.ShuffleSplit(5, n_iterations=3, test_fraction=0.25, + >>> from sklearn import cross_validation + >>> ss = cross_validation.ShuffleSplit(5, n_iterations=3, test_fraction=0.25, ... random_state=0) >>> len(ss) 3 @@ -268,8 +268,8 @@ smaller than the total dataset if it is very large. .. _Bootstrapping: http://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29 - >>> from sklearn import cross_val - >>> bs = cross_val.Bootstrap(9, random_state=0) + >>> from sklearn import cross_validation + >>> bs = cross_validation.Bootstrap(9, random_state=0) >>> len(bs) 3 >>> print bs @@ -291,15 +291,15 @@ function:: >>> from sklearn import datasets >>> from sklearn import svm - >>> from sklearn import cross_val + >>> from sklearn import cross_validation >>> iris = datasets.load_iris() >>> n_samples = iris.data.shape[0] >>> clf = svm.SVC(kernel='linear') - >>> cv = cross_val.ShuffleSplit(n_samples, n_iterations=3, + >>> cv = cross_validation.ShuffleSplit(n_samples, n_iterations=3, ... test_fraction=0.3, random_state=0) - >>> cross_val.cross_val_score(clf, iris.data, iris.target, cv=cv) + >>> cross_validation.cross_val_score(clf, iris.data, iris.target, cv=cv) ... # doctest: +ELLIPSIS array([ 0.97..., 0.95..., 0.95...]) @@ -308,7 +308,7 @@ method of the estimator. It is possible to change this by passing a custom scoring function, e.g. from the metrics module:: >>> from sklearn import metrics - >>> cross_val.cross_val_score(clf, iris.data, iris.target, cv=cv, + >>> cross_validation.cross_val_score(clf, iris.data, iris.target, cv=cv, ... score_func=metrics.f1_score) ... # doctest: +ELLIPSIS array([ 0.95..., 1. ..., 1. ...]) @@ -317,7 +317,7 @@ It is also possible to directly pass a number of folds instead of a CV iterator. In that case a :class:`KFold` or :class:`StratifiedKFold` instance is automatically created:: - >>> cross_val.cross_val_score(clf, iris.data, iris.target, cv=5) + >>> cross_validation.cross_val_score(clf, iris.data, iris.target, cv=5) ... # doctest: +ELLIPSIS array([ 1. ..., 0.96..., 0.9 ..., 0.96..., 1. ...]) diff --git a/examples/applications/face_recognition.py b/examples/applications/face_recognition.py index a9e3efe1fa6d747079b297260b305a1807e8b4af..81418b21a7ae4c5ee0c27b915155ca94797aa394 100644 --- a/examples/applications/face_recognition.py +++ b/examples/applications/face_recognition.py @@ -36,7 +36,7 @@ from time import time import logging import pylab as pl -from sklearn.cross_val import StratifiedKFold +from sklearn.cross_validation import StratifiedKFold from sklearn.datasets import fetch_lfw_people from sklearn.grid_search import GridSearchCV from sklearn.metrics import classification_report diff --git a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py index c23b944db31ae44f507665490fd8e8f7702439a4..28d64b0d7c50fcd80c30548dd9d9f162c2934de9 100644 --- a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py +++ b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py @@ -29,7 +29,7 @@ from sklearn.linear_model import BayesianRidge from sklearn.pipeline import Pipeline from sklearn.grid_search import GridSearchCV from sklearn.externals.joblib import Memory -from sklearn.cross_val import KFold +from sklearn.cross_validation import KFold ############################################################################### # Generate data diff --git a/examples/gaussian_process/gp_diabetes_dataset.py b/examples/gaussian_process/gp_diabetes_dataset.py index fbf0b791b6c033ba4fd436eed7ffae12ffbb8ffd..4c0996826d5e84877519c207f30f8f86b45a5853 100644 --- a/examples/gaussian_process/gp_diabetes_dataset.py +++ b/examples/gaussian_process/gp_diabetes_dataset.py @@ -25,7 +25,7 @@ print __doc__ from sklearn import datasets from sklearn.gaussian_process import GaussianProcess -from sklearn.cross_val import cross_val_score, KFold +from sklearn.cross_validation import cross_val_score, KFold # Load the dataset from scikit's data sets diabetes = datasets.load_diabetes() @@ -44,7 +44,7 @@ gp.theta0 = gp.theta # Given correlation parameter = MLE gp.thetaL, gp.thetaU = None, None # None bounds deactivate MLE # Perform a cross-validation estimate of the coefficient of determination using -# the cross_val module using all CPUs available on the machine +# the cross_validation module using all CPUs available on the machine K = 20 # folds R2 = cross_val_score(gp, X, y=y, cv=KFold(y.size, K), n_jobs=1).mean() print("The %d-Folds estimate of the coefficient of determination is R2 = %s" diff --git a/examples/grid_search_digits.py b/examples/grid_search_digits.py index 3ebd27d7694885908dd40b5ba30579f609e890e8..eb3be3a5127243e44645ed044457b330a3eed650 100644 --- a/examples/grid_search_digits.py +++ b/examples/grid_search_digits.py @@ -17,7 +17,7 @@ from pprint import pprint import numpy as np from sklearn import datasets -from sklearn.cross_val import StratifiedKFold +from sklearn.cross_validation import StratifiedKFold from sklearn.grid_search import GridSearchCV from sklearn.metrics import classification_report from sklearn.metrics import precision_score diff --git a/examples/mixture/plot_gmm_classifier.py b/examples/mixture/plot_gmm_classifier.py index c71ce06d2f49c624d9594f152536547ef2af9645..e0be6e0164dec2c4b1d283129dbf5747e5b616b1 100644 --- a/examples/mixture/plot_gmm_classifier.py +++ b/examples/mixture/plot_gmm_classifier.py @@ -31,7 +31,7 @@ import matplotlib as mpl import numpy as np from sklearn import datasets -from sklearn.cross_val import StratifiedKFold +from sklearn.cross_validation import StratifiedKFold from sklearn.mixture import GMM def make_ellipses(gmm, ax): diff --git a/examples/plot_permutation_test_for_classification.py b/examples/plot_permutation_test_for_classification.py index 1ec9205af9761bc91c90df6a384ceb7f925dd437..fa4d7b72e6c5b8c38eee27b6dbecf338a32c7a35 100644 --- a/examples/plot_permutation_test_for_classification.py +++ b/examples/plot_permutation_test_for_classification.py @@ -20,7 +20,7 @@ import numpy as np import pylab as pl from sklearn.svm import SVC -from sklearn.cross_val import StratifiedKFold, permutation_test_score +from sklearn.cross_validation import StratifiedKFold, permutation_test_score from sklearn import datasets from sklearn.metrics import zero_one_score diff --git a/examples/plot_rfe_with_cross_validation.py b/examples/plot_rfe_with_cross_validation.py index 382c9efc349175866be83822eba393430ce7f7f2..d7232f5aaacc40f516bbe44c4a2e734852730796 100644 --- a/examples/plot_rfe_with_cross_validation.py +++ b/examples/plot_rfe_with_cross_validation.py @@ -10,7 +10,7 @@ print __doc__ import numpy as np from sklearn.svm import SVC -from sklearn.cross_val import StratifiedKFold +from sklearn.cross_validation import StratifiedKFold from sklearn.feature_selection import RFECV from sklearn.datasets import samples_generator from sklearn.metrics import zero_one diff --git a/examples/plot_roc_crossval.py b/examples/plot_roc_crossval.py index 46a45fbdb76ddb79fa2f84d9aede2038b3bca091..0fd71098291bd457566172055aab4f2ebfd13d05 100644 --- a/examples/plot_roc_crossval.py +++ b/examples/plot_roc_crossval.py @@ -15,7 +15,7 @@ import pylab as pl from sklearn import svm, datasets from sklearn.metrics import roc_curve, auc -from sklearn.cross_val import StratifiedKFold +from sklearn.cross_validation import StratifiedKFold ################################################################################ # Data IO and generation diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index e1e3ac04b08f06dae04f1b65a9d1e9213664b7d0..3ecfb10ac8c7ad24dcd476b2672f377e068f17ff 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -10,7 +10,7 @@ print __doc__ import numpy as np import pylab as pl -from sklearn import svm, datasets, feature_selection, cross_val +from sklearn import svm, datasets, feature_selection, cross_validation from sklearn.pipeline import Pipeline ################################################################################ @@ -42,7 +42,7 @@ percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100) for percentile in percentiles: clf.set_params(anova__percentile=percentile) # Compute cross-validation score using all CPUs - this_scores = cross_val.cross_val_score(clf, X, y, n_jobs=1) + this_scores = cross_validation.cross_val_score(clf, X, y, n_jobs=1) score_means.append(this_scores.mean()) score_stds.append(this_scores.std()) diff --git a/sklearn/__init__.py b/sklearn/__init__.py index 22867642d297fd466787a64b311ca6e890d3ad83..8c1e5c2f5fd2bb755688b7b8ae3d4c3edd94583e 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -41,7 +41,7 @@ except: pass -__all__ = ['check_build', 'cross_val', 'ball_tree', 'cluster', 'covariance', +__all__ = ['check_build', 'cross_validation', 'ball_tree', 'cluster', 'covariance', 'datasets', 'decomposition', 'feature_extraction', 'feature_selection', 'gaussian_process', 'grid_search', 'hmm', 'lda', 'linear_model', diff --git a/sklearn/cross_val.py b/sklearn/cross_validation.py similarity index 96% rename from sklearn/cross_val.py rename to sklearn/cross_validation.py index f8b33b9c4189ac5c4fd7cae2264c2b5091849682..fdcfbd92959cfe5bf308d2f588535c58ca3a7264 100644 --- a/sklearn/cross_val.py +++ b/sklearn/cross_validation.py @@ -41,14 +41,14 @@ class LeaveOneOut(object): Examples ======== - >>> from sklearn import cross_val + >>> from sklearn import cross_validation >>> X = np.array([[1, 2], [3, 4]]) >>> y = np.array([1, 2]) - >>> loo = cross_val.LeaveOneOut(2) + >>> loo = cross_validation.LeaveOneOut(2) >>> len(loo) 2 >>> print loo - sklearn.cross_val.LeaveOneOut(n=2) + sklearn.cross_validation.LeaveOneOut(n=2) >>> for train_index, test_index in loo: ... print "TRAIN:", train_index, "TEST:", test_index ... X_train, X_test = X[train_index], X[test_index] @@ -118,14 +118,14 @@ class LeavePOut(object): Examples ======== - >>> from sklearn import cross_val + >>> from sklearn import cross_validation >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) >>> y = np.array([1, 2, 3, 4]) - >>> lpo = cross_val.LeavePOut(4, 2) + >>> lpo = cross_validation.LeavePOut(4, 2) >>> len(lpo) 6 >>> print lpo - sklearn.cross_val.LeavePOut(n=4, p=2) + sklearn.cross_validation.LeavePOut(n=4, p=2) >>> for train_index, test_index in lpo: ... print "TRAIN:", train_index, "TEST:", test_index ... X_train, X_test = X[train_index], X[test_index] @@ -194,14 +194,14 @@ class KFold(object): Examples -------- - >>> from sklearn import cross_val + >>> from sklearn import cross_validation >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]]) >>> y = np.array([1, 2, 3, 4]) - >>> kf = cross_val.KFold(4, k=2) + >>> kf = cross_validation.KFold(4, k=2) >>> len(kf) 2 >>> print kf - sklearn.cross_val.KFold(n=4, k=2) + sklearn.cross_validation.KFold(n=4, k=2) >>> for train_index, test_index in kf: ... print "TRAIN:", train_index, "TEST:", test_index ... X_train, X_test = X[train_index], X[test_index] @@ -284,14 +284,14 @@ class StratifiedKFold(object): Examples -------- - >>> from sklearn import cross_val + >>> from sklearn import cross_validation >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]]) >>> y = np.array([0, 0, 1, 1]) - >>> skf = cross_val.StratifiedKFold(y, k=2) + >>> skf = cross_validation.StratifiedKFold(y, k=2) >>> len(skf) 2 >>> print skf - sklearn.cross_val.StratifiedKFold(labels=[0 0 1 1], k=2) + sklearn.cross_validation.StratifiedKFold(labels=[0 0 1 1], k=2) >>> for train_index, test_index in skf: ... print "TRAIN:", train_index, "TEST:", test_index ... X_train, X_test = X[train_index], X[test_index] @@ -372,15 +372,15 @@ class LeaveOneLabelOut(object): Examples ---------- - >>> from sklearn import cross_val + >>> from sklearn import cross_validation >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) >>> y = np.array([1, 2, 1, 2]) >>> labels = np.array([1, 1, 2, 2]) - >>> lol = cross_val.LeaveOneLabelOut(labels) + >>> lol = cross_validation.LeaveOneLabelOut(labels) >>> len(lol) 2 >>> print lol - sklearn.cross_val.LeaveOneLabelOut(labels=[1 1 2 2]) + sklearn.cross_validation.LeaveOneLabelOut(labels=[1 1 2 2]) >>> for train_index, test_index in lol: ... print "TRAIN:", train_index, "TEST:", test_index ... X_train, X_test = X[train_index], X[test_index] @@ -457,15 +457,15 @@ class LeavePLabelOut(object): Examples ---------- - >>> from sklearn import cross_val + >>> from sklearn import cross_validation >>> X = np.array([[1, 2], [3, 4], [5, 6]]) >>> y = np.array([1, 2, 1]) >>> labels = np.array([1, 2, 3]) - >>> lpl = cross_val.LeavePLabelOut(labels, p=2) + >>> lpl = cross_validation.LeavePLabelOut(labels, p=2) >>> len(lpl) 3 >>> print lpl - sklearn.cross_val.LeavePLabelOut(labels=[1 2 3], p=2) + sklearn.cross_validation.LeavePLabelOut(labels=[1 2 3], p=2) >>> for train_index, test_index in lpl: ... print "TRAIN:", train_index, "TEST:", test_index ... X_train, X_test = X[train_index], X[test_index] @@ -569,8 +569,8 @@ class Bootstrap(object): Examples -------- - >>> from sklearn import cross_val - >>> bs = cross_val.Bootstrap(9, random_state=0) + >>> from sklearn import cross_validation + >>> bs = cross_validation.Bootstrap(9, random_state=0) >>> len(bs) 3 >>> print bs @@ -675,8 +675,8 @@ class ShuffleSplit(object): Examples ---------- - >>> from sklearn import cross_val - >>> rs = cross_val.ShuffleSplit(4, n_iterations=3, test_fraction=.25, + >>> from sklearn import cross_validation + >>> rs = cross_validation.ShuffleSplit(4, n_iterations=3, test_fraction=.25, ... random_state=0) >>> len(rs) 3 @@ -885,7 +885,7 @@ def permutation_test_score(estimator, X, y, score_func, cv=None, cv : integer or crossvalidation generator, optional If an integer is passed, it is the number of fold (default 3). Specific crossvalidation objects can be passed, see - sklearn.cross_val module for the list of possible objects + sklearn.cross_validation module for the list of possible objects n_jobs: integer, optional The number of CPUs to use to do the computation. -1 means diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index cd710c5d2708cfa88e4fa83588a60a6c9a38847a..fc39789a68f22cbed441a2d7e943349a0c340c7f 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -10,7 +10,7 @@ import numpy as np from ..base import BaseEstimator from ..base import clone from ..base import is_classifier -from ..cross_val import check_cv +from ..cross_validation import check_cv class RFE(BaseEstimator): @@ -210,7 +210,7 @@ class RFECV(RFE): If int, it is the number of folds. If None, 3-fold cross-validation is performed by default. Specific cross-validation objects can also be passed, see - `scikits.learn.cross_val module` for details. + `scikits.learn.cross_validation module` for details. loss_function : function, optional (default=None) The loss function to minimize by cross-validation. If None, then the diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 63292db3b2ab65db2f32d2b49ca6cce4021e361f..1c92572cb5273c4cdeaa65cdd90bf15898d033ea 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -11,7 +11,7 @@ import numpy as np import scipy.sparse as sp from .externals.joblib import Parallel, delayed, logger -from .cross_val import check_cv +from .cross_validation import check_cv from .base import BaseEstimator, is_classifier, clone from .utils.fixes import product @@ -183,7 +183,7 @@ class GridSearchCV(BaseEstimator): cv : integer or crossvalidation generator, optional If an integer is passed, it is the number of fold (default 3). Specific crossvalidation objects can be passed, see - sklearn.cross_val module for the list of possible objects + sklearn.cross_validation module for the list of possible objects refit: boolean refit the best estimator with the entire dataset diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index 49b662073002deef1c3aedae09b776c504ce8cac..7b887a9a974b7b2f4daba0ff48be3820e3592d4e 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -9,7 +9,7 @@ import numpy as np from .base import LinearModel from ..utils import as_float_array -from ..cross_val import check_cv +from ..cross_validation import check_cv from . import cd_fast @@ -546,7 +546,7 @@ class LassoCV(LinearModelCV): cv : integer or crossvalidation generator, optional If an integer is passed, it is the number of fold (default 3). Specific crossvalidation objects can be passed, see - sklearn.cross_val module for the list of possible objects + sklearn.cross_validation module for the list of possible objects Notes ----- @@ -601,7 +601,7 @@ class ElasticNetCV(LinearModelCV): cv : integer or crossvalidation generator, optional If an integer is passed, it is the number of fold (default 3). Specific crossvalidation objects can be passed, see - sklearn.cross_val module for the list of possible objects + sklearn.cross_validation module for the list of possible objects Notes diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py index 47aabaf34d8654e0334772edf94f094d18651112..1d135a62b571321879061807183b99514d741810 100644 --- a/sklearn/linear_model/least_angle.py +++ b/sklearn/linear_model/least_angle.py @@ -17,7 +17,7 @@ from scipy.linalg.lapack import get_lapack_funcs from .base import LinearModel from ..utils import arrayfuncs, as_float_array from ..utils import deprecated -from ..cross_val import check_cv +from ..cross_validation import check_cv from ..externals.joblib import Parallel, delayed @@ -642,7 +642,7 @@ class LarsCV(LARS): Maximum number of iterations to perform. cv : crossvalidation generator, optional - see sklearn.cross_val module. If None is passed, default to + see sklearn.cross_validation module. If None is passed, default to a 5-fold strategy n_jobs : integer, optional @@ -775,7 +775,7 @@ class LassoLarsCV(LarsCV): Maximum number of iterations to perform. cv : crossvalidation generator, optional - see sklearn.cross_val module. If None is passed, default to + see sklearn.cross_validation module. If None is passed, default to a 5-fold strategy n_jobs : integer, optional diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 85761972f4e4c410e7fb881dc42f1ed2c8eea062..7296618ede120470a0a2787b9bbde68e8b587798 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -16,7 +16,7 @@ from sklearn.linear_model.ridge import RidgeClassifier from sklearn.linear_model.ridge import RidgeClassifierCV -from sklearn.cross_val import KFold +from sklearn.cross_validation import KFold diabetes = datasets.load_diabetes() diff --git a/sklearn/tests/test_cross_val.py b/sklearn/tests/test_cross_validation.py similarity index 74% rename from sklearn/tests/test_cross_val.py rename to sklearn/tests/test_cross_validation.py index e8917cc2e8ee99e4cc74d1e6dd73d658c4a79868..8c41d83d971250f02e87962a20e126470c643277 100644 --- a/sklearn/tests/test_cross_val.py +++ b/sklearn/tests/test_cross_validation.py @@ -1,4 +1,4 @@ -"""Test the cross_val module""" +"""Test the cross_validation module""" import numpy as np from scipy.sparse import coo_matrix @@ -14,12 +14,12 @@ from ..metrics import f1_score from ..metrics import mean_square_error from ..metrics import r2_score from ..metrics import explained_variance_score -from ..cross_val import StratifiedKFold +from ..cross_validation import StratifiedKFold from ..svm import SVC from ..linear_model import Ridge from ..svm.sparse import SVC as SparseSVC -from .. import cross_val -from ..cross_val import permutation_test_score +from .. import cross_validation +from ..cross_validation import permutation_test_score from numpy.testing import assert_array_almost_equal from numpy.testing import assert_array_equal @@ -50,9 +50,9 @@ y = np.arange(10) / 2 def test_kfold(): # Check that errors are raise if there is not enough samples - assert_raises(AssertionError, cross_val.KFold, 3, 4) + assert_raises(AssertionError, cross_validation.KFold, 3, 4) y = [0, 0, 1, 1, 2] - assert_raises(AssertionError, cross_val.StratifiedKFold, y, 3) + assert_raises(AssertionError, cross_validation.StratifiedKFold, y, 3) def test_cross_val_score(): @@ -60,10 +60,10 @@ def test_cross_val_score(): for a in range(-10, 10): clf.a = a # Smoke test - scores = cross_val.cross_val_score(clf, X, y) + scores = cross_validation.cross_val_score(clf, X, y) assert_array_equal(scores, clf.score(X, y)) - scores = cross_val.cross_val_score(clf, X_sparse, y) + scores = cross_validation.cross_val_score(clf, X_sparse, y) assert_array_equal(scores, clf.score(X_sparse, y)) @@ -72,18 +72,18 @@ def test_cross_val_score_with_score_func_classification(): clf = SVC(kernel='linear') # Default score (should be the accuracy score) - scores = cross_val.cross_val_score(clf, iris.data, iris.target, cv=5) + scores = cross_validation.cross_val_score(clf, iris.data, iris.target, cv=5) assert_array_almost_equal(scores, [1., 0.97, 0.90, 0.97, 1.], 2) # Correct classification score (aka. zero / one score) - should be the # same as the default estimator score - zo_scores = cross_val.cross_val_score(clf, iris.data, iris.target, + zo_scores = cross_validation.cross_val_score(clf, iris.data, iris.target, score_func=zero_one_score, cv=5) assert_array_almost_equal(zo_scores, [1., 0.97, 0.90, 0.97, 1.], 2) # F1 score (class are balanced so f1_score should be equal to zero/one # score - f1_scores = cross_val.cross_val_score(clf, iris.data, iris.target, + f1_scores = cross_validation.cross_val_score(clf, iris.data, iris.target, score_func=f1_score, cv=5) assert_array_almost_equal(f1_scores, [1., 0.97, 0.90, 0.97, 1.], 2) @@ -94,23 +94,23 @@ def test_cross_val_score_with_score_func_regression(): reg = Ridge() # Default score of the Ridge regression estimator - scores = cross_val.cross_val_score(reg, X, y, cv=5) + scores = cross_validation.cross_val_score(reg, X, y, cv=5) assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # R2 score (aka. determination coefficient) - should be the # same as the default estimator score - r2_scores = cross_val.cross_val_score(reg, X, y, score_func=r2_score, + r2_scores = cross_validation.cross_val_score(reg, X, y, score_func=r2_score, cv=5) assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # Mean squared error - mse_scores = cross_val.cross_val_score(reg, X, y, cv=5, + mse_scores = cross_validation.cross_val_score(reg, X, y, cv=5, score_func=mean_square_error) expected_mse = [4578.47, 3319.02, 1646.29, 1639.58, 10092.00] assert_array_almost_equal(mse_scores, expected_mse, 2) # Explained variance - ev_scores = cross_val.cross_val_score(reg, X, y, cv=5, + ev_scores = cross_validation.cross_val_score(reg, X, y, cv=5, score_func=explained_variance_score) assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) @@ -159,12 +159,12 @@ def test_cross_val_generator_with_indices(): X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) y = np.array([1, 1, 2, 2]) labels = np.array([1, 2, 3, 4]) - loo = cross_val.LeaveOneOut(4, indices=True) - lpo = cross_val.LeavePOut(4, 2, indices=True) - kf = cross_val.KFold(4, 2, indices=True) - skf = cross_val.StratifiedKFold(y, 2, indices=True) - lolo = cross_val.LeaveOneLabelOut(labels, indices=True) - lopo = cross_val.LeavePLabelOut(labels, 2, indices=True) + loo = cross_validation.LeaveOneOut(4, indices=True) + lpo = cross_validation.LeavePOut(4, 2, indices=True) + kf = cross_validation.KFold(4, 2, indices=True) + skf = cross_validation.StratifiedKFold(y, 2, indices=True) + lolo = cross_validation.LeaveOneLabelOut(labels, indices=True) + lopo = cross_validation.LeavePLabelOut(labels, 2, indices=True) for cv in [loo, lpo, kf, skf, lolo, lopo]: for train, test in cv: X_train, X_test = X[train], X[test] @@ -172,7 +172,7 @@ def test_cross_val_generator_with_indices(): def test_bootstrap_errors(): - assert_raises(ValueError, cross_val.Bootstrap, 10, n_train=100) - assert_raises(ValueError, cross_val.Bootstrap, 10, n_test=100) - assert_raises(ValueError, cross_val.Bootstrap, 10, n_train=1.1) - assert_raises(ValueError, cross_val.Bootstrap, 10, n_test=1.1) + assert_raises(ValueError, cross_validation.Bootstrap, 10, n_train=100) + assert_raises(ValueError, cross_validation.Bootstrap, 10, n_test=100) + assert_raises(ValueError, cross_validation.Bootstrap, 10, n_train=1.1) + assert_raises(ValueError, cross_validation.Bootstrap, 10, n_test=1.1) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 4d4dac86e033b2c93aa914a604ac0c0175233394..cbcca0394cfb0735944165c17f6d1bb0f809e151 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -296,7 +296,7 @@ def resample(*arrays, **options): See also -------- - :class:`sklearn.cross_val.Bootstrap` + :class:`sklearn.cross_validation.Bootstrap` :func:`sklearn.utils.shuffle` """ random_state = check_random_state(options.pop('random_state', None))