diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 08b5f4957ada4c190dab40b336d5e1c11fe72700..0def13318d4aadae94ca26fae1004492a6680675 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -412,7 +412,7 @@ Partial Least Squares
 Cross Validation
 ================
 
-.. automodule:: sklearn.cross_val
+.. automodule:: sklearn.cross_validation
    :no-members:
    :no-inherited-members:
 
@@ -422,14 +422,14 @@ Cross Validation
    :toctree: generated/
    :template: class.rst
 
-   cross_val.LeaveOneOut
-   cross_val.LeavePOut
-   cross_val.KFold
-   cross_val.StratifiedKFold
-   cross_val.LeaveOneLabelOut
-   cross_val.LeavePLabelOut
-   cross_val.Bootstrap
-   cross_val.ShuffleSplit
+   cross_validation.LeaveOneOut
+   cross_validation.LeavePOut
+   cross_validation.KFold
+   cross_validation.StratifiedKFold
+   cross_validation.LeaveOneLabelOut
+   cross_validation.LeavePLabelOut
+   cross_validation.Bootstrap
+   cross_validation.ShuffleSplit
 
 
 Grid Search
diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index fc89659d14e2f41bacd803106e28bd782327b192..9c6581ff763a206690ba8581981ed4aa3c19083b 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -2,7 +2,7 @@
 Cross-Validation
 ================
 
-.. currentmodule:: sklearn.cross_val
+.. currentmodule:: sklearn.cross_validation
 
 Learning the parameters of a prediction function and testing it on the same
 data yields a methodological bias. To avoid over-fitting, we have to define two
@@ -43,12 +43,12 @@ cross-validation procedure does not waste much data as only one sample
 is removed from the learning set::
 
   >>> import numpy as np
-  >>> from sklearn.cross_val import LeaveOneOut
+  >>> from sklearn.cross_validation import LeaveOneOut
   >>> X = np.array([[0., 0.], [1., 1.], [-1., -1.], [2., 2.]])
   >>> Y = np.array([0, 1, 0, 1])
   >>> loo = LeaveOneOut(len(Y))
   >>> print loo
-  sklearn.cross_val.LeaveOneOut(n=4)
+  sklearn.cross_validation.LeaveOneOut(n=4)
   >>> for train, test in loo: print train, test
   [False  True  True  True] [ True False False False]
   [ True False  True  True] [False  True False False]
@@ -67,12 +67,12 @@ integer indices. It can be obtained by setting the parameter indices to True
 when creating the cross-validation procedure::
 
   >>> import numpy as np
-  >>> from sklearn.cross_val import LeaveOneOut
+  >>> from sklearn.cross_validation import LeaveOneOut
   >>> X = np.array([[0., 0.], [1., 1.], [-1., -1.], [2., 2.]])
   >>> Y = np.array([0, 1, 0, 1])
   >>> loo = LeaveOneOut(len(Y), indices=True)
   >>> print loo
-  sklearn.cross_val.LeaveOneOut(n=4)
+  sklearn.cross_validation.LeaveOneOut(n=4)
   >>> for train, test in loo: print train, test
   [1 2 3] [0]
   [0 2 3] [1]
@@ -89,12 +89,12 @@ possible training/test sets by removing *P* samples from the complete set.
 
 Example of Leave-2-Out::
 
-  >>> from sklearn.cross_val import LeavePOut
+  >>> from sklearn.cross_validation import LeavePOut
   >>> X = [[0., 0.], [1., 1.], [-1., -1.], [2., 2.]]
   >>> Y = [0, 1, 0, 1]
   >>> loo = LeavePOut(len(Y), 2)
   >>> print loo
-  sklearn.cross_val.LeavePOut(n=4, p=2)
+  sklearn.cross_validation.LeavePOut(n=4, p=2)
   >>> for train, test in loo: print train,test
   [False False  True  True] [ True  True False False]
   [False  True False  True] [ True False  True False]
@@ -124,12 +124,12 @@ and the fold left out is used for test.
 
 Example of 2-fold::
 
-  >>> from sklearn.cross_val import KFold
+  >>> from sklearn.cross_validation import KFold
   >>> X = [[0., 0.], [1., 1.], [-1., -1.], [2., 2.]]
   >>> Y = [0, 1, 0, 1]
   >>> loo = KFold(len(Y), 2)
   >>> print loo
-  sklearn.cross_val.KFold(n=4, k=2)
+  sklearn.cross_validation.KFold(n=4, k=2)
   >>> for train, test in loo: print train,test
   [False False  True  True] [ True  True False False]
   [ True  True False False] [False False  True  True]
@@ -146,12 +146,12 @@ class as in the complete set.
 
 Example of stratified 2-fold::
 
-  >>> from sklearn.cross_val import StratifiedKFold
+  >>> from sklearn.cross_validation import StratifiedKFold
   >>> X = [[0., 0.], [1., 1.], [-1., -1.], [2., 2.], [3., 3.], [4., 4.], [0., 1.]]
   >>> Y = [0, 0, 0, 1, 1, 1, 0]
   >>> skf = StratifiedKFold(Y, 2)
   >>> print skf
-  sklearn.cross_val.StratifiedKFold(labels=[0 0 0 1 1 1 0], k=2)
+  sklearn.cross_validation.StratifiedKFold(labels=[0 0 0 1 1 1 0], k=2)
   >>> for train, test in skf: print train, test
   [False  True False False  True False  True] [ True False  True  True False  True False]
   [ True False  True  True False  True False] [False  True False False  True False  True]
@@ -174,13 +174,13 @@ For example, in the cases of multiple experiments, *LOLO* can be used to
 create a cross-validation based on the different experiments: we create
 a training set using the samples of all the experiments except one::
 
-  >>> from sklearn.cross_val import LeaveOneLabelOut
+  >>> from sklearn.cross_validation import LeaveOneLabelOut
   >>> X = [[0., 0.], [1., 1.], [-1., -1.], [2., 2.]]
   >>> Y = [0, 1, 0, 1]
   >>> labels = [1, 1, 2, 2]
   >>> loo = LeaveOneLabelOut(labels)
   >>> print loo
-  sklearn.cross_val.LeaveOneLabelOut(labels=[1, 1, 2, 2])
+  sklearn.cross_validation.LeaveOneLabelOut(labels=[1, 1, 2, 2])
   >>> for train, test in loo: print train,test
   [False False  True  True] [ True  True False False]
   [ True  True False False] [False False  True  True]
@@ -200,13 +200,13 @@ related to *P* labels for each training/test set.
 
 Example of Leave-2-Label Out::
 
-  >>> from sklearn.cross_val import LeavePLabelOut
+  >>> from sklearn.cross_validation import LeavePLabelOut
   >>> X = [[0., 0.], [1., 1.], [-1., -1.], [2., 2.], [3., 3.], [4., 4.]]
   >>> Y = [0, 1, 0, 1, 0, 1]
   >>> labels = [1, 1, 2, 2, 3, 3]
   >>> loo = LeavePLabelOut(labels, 2)
   >>> print loo
-  sklearn.cross_val.LeavePLabelOut(labels=[1, 1, 2, 2, 3, 3], p=2)
+  sklearn.cross_validation.LeavePLabelOut(labels=[1, 1, 2, 2, 3, 3], p=2)
   >>> for train, test in loo: print train,test
   [False False False False  True  True] [ True  True  True  True False False]
   [False False  True  True False False] [ True  True False False  True  True]
@@ -228,8 +228,8 @@ generator.
 
 Here is a usage example::
 
-  >>> from sklearn import cross_val
-  >>> ss = cross_val.ShuffleSplit(5, n_iterations=3, test_fraction=0.25,
+  >>> from sklearn import cross_validation
+  >>> ss = cross_validation.ShuffleSplit(5, n_iterations=3, test_fraction=0.25,
   ...     random_state=0)
   >>> len(ss)
   3
@@ -268,8 +268,8 @@ smaller than the total dataset if it is very large.
 
 .. _Bootstrapping: http://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29
 
-  >>> from sklearn import cross_val
-  >>> bs = cross_val.Bootstrap(9, random_state=0)
+  >>> from sklearn import cross_validation
+  >>> bs = cross_validation.Bootstrap(9, random_state=0)
   >>> len(bs)
   3
   >>> print bs
@@ -291,15 +291,15 @@ function::
 
   >>> from sklearn import datasets
   >>> from sklearn import svm
-  >>> from sklearn import cross_val
+  >>> from sklearn import cross_validation
 
   >>> iris = datasets.load_iris()
   >>> n_samples = iris.data.shape[0]
   >>> clf = svm.SVC(kernel='linear')
-  >>> cv = cross_val.ShuffleSplit(n_samples, n_iterations=3,
+  >>> cv = cross_validation.ShuffleSplit(n_samples, n_iterations=3,
   ...     test_fraction=0.3, random_state=0)
 
-  >>> cross_val.cross_val_score(clf, iris.data, iris.target, cv=cv)
+  >>> cross_validation.cross_val_score(clf, iris.data, iris.target, cv=cv)
   ...                                                     # doctest: +ELLIPSIS
   array([ 0.97...,  0.95...,  0.95...])
 
@@ -308,7 +308,7 @@ method of the estimator. It is possible to change this by passing a custom
 scoring function, e.g. from the metrics module::
 
   >>> from sklearn import metrics
-  >>> cross_val.cross_val_score(clf, iris.data, iris.target, cv=cv,
+  >>> cross_validation.cross_val_score(clf, iris.data, iris.target, cv=cv,
   ...     score_func=metrics.f1_score)
   ...                                                     # doctest: +ELLIPSIS
   array([ 0.95...,  1.  ...,  1.  ...])
@@ -317,7 +317,7 @@ It is also possible to directly pass a number of folds instead of a
 CV iterator.  In that case a :class:`KFold` or :class:`StratifiedKFold`
 instance is automatically created::
 
-  >>> cross_val.cross_val_score(clf, iris.data, iris.target, cv=5)
+  >>> cross_validation.cross_val_score(clf, iris.data, iris.target, cv=5)
   ...                                                     # doctest: +ELLIPSIS
   array([ 1.  ...,  0.96...,  0.9 ...,  0.96...,  1.  ...])
 
diff --git a/examples/applications/face_recognition.py b/examples/applications/face_recognition.py
index a9e3efe1fa6d747079b297260b305a1807e8b4af..81418b21a7ae4c5ee0c27b915155ca94797aa394 100644
--- a/examples/applications/face_recognition.py
+++ b/examples/applications/face_recognition.py
@@ -36,7 +36,7 @@ from time import time
 import logging
 import pylab as pl
 
-from sklearn.cross_val import StratifiedKFold
+from sklearn.cross_validation import StratifiedKFold
 from sklearn.datasets import fetch_lfw_people
 from sklearn.grid_search import GridSearchCV
 from sklearn.metrics import classification_report
diff --git a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py
index c23b944db31ae44f507665490fd8e8f7702439a4..28d64b0d7c50fcd80c30548dd9d9f162c2934de9 100644
--- a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py
+++ b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py
@@ -29,7 +29,7 @@ from sklearn.linear_model import BayesianRidge
 from sklearn.pipeline import Pipeline
 from sklearn.grid_search import GridSearchCV
 from sklearn.externals.joblib import Memory
-from sklearn.cross_val import KFold
+from sklearn.cross_validation import KFold
 
 ###############################################################################
 # Generate data
diff --git a/examples/gaussian_process/gp_diabetes_dataset.py b/examples/gaussian_process/gp_diabetes_dataset.py
index fbf0b791b6c033ba4fd436eed7ffae12ffbb8ffd..4c0996826d5e84877519c207f30f8f86b45a5853 100644
--- a/examples/gaussian_process/gp_diabetes_dataset.py
+++ b/examples/gaussian_process/gp_diabetes_dataset.py
@@ -25,7 +25,7 @@ print __doc__
 
 from sklearn import datasets
 from sklearn.gaussian_process import GaussianProcess
-from sklearn.cross_val import cross_val_score, KFold
+from sklearn.cross_validation import cross_val_score, KFold
 
 # Load the dataset from scikit's data sets
 diabetes = datasets.load_diabetes()
@@ -44,7 +44,7 @@ gp.theta0 = gp.theta # Given correlation parameter = MLE
 gp.thetaL, gp.thetaU = None, None # None bounds deactivate MLE
 
 # Perform a cross-validation estimate of the coefficient of determination using
-# the cross_val module using all CPUs available on the machine
+# the cross_validation module using all CPUs available on the machine
 K = 20 # folds
 R2 = cross_val_score(gp, X, y=y, cv=KFold(y.size, K), n_jobs=1).mean()
 print("The %d-Folds estimate of the coefficient of determination is R2 = %s"
diff --git a/examples/grid_search_digits.py b/examples/grid_search_digits.py
index 3ebd27d7694885908dd40b5ba30579f609e890e8..eb3be3a5127243e44645ed044457b330a3eed650 100644
--- a/examples/grid_search_digits.py
+++ b/examples/grid_search_digits.py
@@ -17,7 +17,7 @@ from pprint import pprint
 import numpy as np
 
 from sklearn import datasets
-from sklearn.cross_val import StratifiedKFold
+from sklearn.cross_validation import StratifiedKFold
 from sklearn.grid_search import GridSearchCV
 from sklearn.metrics import classification_report
 from sklearn.metrics import precision_score
diff --git a/examples/mixture/plot_gmm_classifier.py b/examples/mixture/plot_gmm_classifier.py
index c71ce06d2f49c624d9594f152536547ef2af9645..e0be6e0164dec2c4b1d283129dbf5747e5b616b1 100644
--- a/examples/mixture/plot_gmm_classifier.py
+++ b/examples/mixture/plot_gmm_classifier.py
@@ -31,7 +31,7 @@ import matplotlib as mpl
 import numpy as np
 
 from sklearn import datasets
-from sklearn.cross_val import StratifiedKFold
+from sklearn.cross_validation import StratifiedKFold
 from sklearn.mixture import GMM
 
 def make_ellipses(gmm, ax):
diff --git a/examples/plot_permutation_test_for_classification.py b/examples/plot_permutation_test_for_classification.py
index 1ec9205af9761bc91c90df6a384ceb7f925dd437..fa4d7b72e6c5b8c38eee27b6dbecf338a32c7a35 100644
--- a/examples/plot_permutation_test_for_classification.py
+++ b/examples/plot_permutation_test_for_classification.py
@@ -20,7 +20,7 @@ import numpy as np
 import pylab as pl
 
 from sklearn.svm import SVC
-from sklearn.cross_val import StratifiedKFold, permutation_test_score
+from sklearn.cross_validation import StratifiedKFold, permutation_test_score
 from sklearn import datasets
 from sklearn.metrics import zero_one_score
 
diff --git a/examples/plot_rfe_with_cross_validation.py b/examples/plot_rfe_with_cross_validation.py
index 382c9efc349175866be83822eba393430ce7f7f2..d7232f5aaacc40f516bbe44c4a2e734852730796 100644
--- a/examples/plot_rfe_with_cross_validation.py
+++ b/examples/plot_rfe_with_cross_validation.py
@@ -10,7 +10,7 @@ print __doc__
 
 import numpy as np
 from sklearn.svm import SVC
-from sklearn.cross_val import StratifiedKFold
+from sklearn.cross_validation import StratifiedKFold
 from sklearn.feature_selection import RFECV
 from sklearn.datasets import samples_generator
 from sklearn.metrics import zero_one
diff --git a/examples/plot_roc_crossval.py b/examples/plot_roc_crossval.py
index 46a45fbdb76ddb79fa2f84d9aede2038b3bca091..0fd71098291bd457566172055aab4f2ebfd13d05 100644
--- a/examples/plot_roc_crossval.py
+++ b/examples/plot_roc_crossval.py
@@ -15,7 +15,7 @@ import pylab as pl
 
 from sklearn import svm, datasets
 from sklearn.metrics import roc_curve, auc
-from sklearn.cross_val import StratifiedKFold
+from sklearn.cross_validation import StratifiedKFold
 
 ################################################################################
 # Data IO and generation
diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py
index e1e3ac04b08f06dae04f1b65a9d1e9213664b7d0..3ecfb10ac8c7ad24dcd476b2672f377e068f17ff 100644
--- a/examples/svm/plot_svm_anova.py
+++ b/examples/svm/plot_svm_anova.py
@@ -10,7 +10,7 @@ print __doc__
 
 import numpy as np
 import pylab as pl
-from sklearn import svm, datasets, feature_selection, cross_val
+from sklearn import svm, datasets, feature_selection, cross_validation
 from sklearn.pipeline import Pipeline
 
 ################################################################################
@@ -42,7 +42,7 @@ percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)
 for percentile in percentiles:
     clf.set_params(anova__percentile=percentile)
     # Compute cross-validation score using all CPUs
-    this_scores = cross_val.cross_val_score(clf, X, y, n_jobs=1)
+    this_scores = cross_validation.cross_val_score(clf, X, y, n_jobs=1)
     score_means.append(this_scores.mean())
     score_stds.append(this_scores.std())
 
diff --git a/sklearn/__init__.py b/sklearn/__init__.py
index 22867642d297fd466787a64b311ca6e890d3ad83..8c1e5c2f5fd2bb755688b7b8ae3d4c3edd94583e 100644
--- a/sklearn/__init__.py
+++ b/sklearn/__init__.py
@@ -41,7 +41,7 @@ except:
     pass
 
 
-__all__ = ['check_build', 'cross_val', 'ball_tree', 'cluster', 'covariance',
+__all__ = ['check_build', 'cross_validation', 'ball_tree', 'cluster', 'covariance',
            'datasets', 'decomposition', 'feature_extraction',
            'feature_selection',
            'gaussian_process', 'grid_search', 'hmm', 'lda', 'linear_model',
diff --git a/sklearn/cross_val.py b/sklearn/cross_validation.py
similarity index 96%
rename from sklearn/cross_val.py
rename to sklearn/cross_validation.py
index f8b33b9c4189ac5c4fd7cae2264c2b5091849682..fdcfbd92959cfe5bf308d2f588535c58ca3a7264 100644
--- a/sklearn/cross_val.py
+++ b/sklearn/cross_validation.py
@@ -41,14 +41,14 @@ class LeaveOneOut(object):
 
     Examples
     ========
-    >>> from sklearn import cross_val
+    >>> from sklearn import cross_validation
     >>> X = np.array([[1, 2], [3, 4]])
     >>> y = np.array([1, 2])
-    >>> loo = cross_val.LeaveOneOut(2)
+    >>> loo = cross_validation.LeaveOneOut(2)
     >>> len(loo)
     2
     >>> print loo
-    sklearn.cross_val.LeaveOneOut(n=2)
+    sklearn.cross_validation.LeaveOneOut(n=2)
     >>> for train_index, test_index in loo:
     ...    print "TRAIN:", train_index, "TEST:", test_index
     ...    X_train, X_test = X[train_index], X[test_index]
@@ -118,14 +118,14 @@ class LeavePOut(object):
 
     Examples
     ========
-    >>> from sklearn import cross_val
+    >>> from sklearn import cross_validation
     >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
     >>> y = np.array([1, 2, 3, 4])
-    >>> lpo = cross_val.LeavePOut(4, 2)
+    >>> lpo = cross_validation.LeavePOut(4, 2)
     >>> len(lpo)
     6
     >>> print lpo
-    sklearn.cross_val.LeavePOut(n=4, p=2)
+    sklearn.cross_validation.LeavePOut(n=4, p=2)
     >>> for train_index, test_index in lpo:
     ...    print "TRAIN:", train_index, "TEST:", test_index
     ...    X_train, X_test = X[train_index], X[test_index]
@@ -194,14 +194,14 @@ class KFold(object):
 
     Examples
     --------
-    >>> from sklearn import cross_val
+    >>> from sklearn import cross_validation
     >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
     >>> y = np.array([1, 2, 3, 4])
-    >>> kf = cross_val.KFold(4, k=2)
+    >>> kf = cross_validation.KFold(4, k=2)
     >>> len(kf)
     2
     >>> print kf
-    sklearn.cross_val.KFold(n=4, k=2)
+    sklearn.cross_validation.KFold(n=4, k=2)
     >>> for train_index, test_index in kf:
     ...    print "TRAIN:", train_index, "TEST:", test_index
     ...    X_train, X_test = X[train_index], X[test_index]
@@ -284,14 +284,14 @@ class StratifiedKFold(object):
 
     Examples
     --------
-    >>> from sklearn import cross_val
+    >>> from sklearn import cross_validation
     >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
     >>> y = np.array([0, 0, 1, 1])
-    >>> skf = cross_val.StratifiedKFold(y, k=2)
+    >>> skf = cross_validation.StratifiedKFold(y, k=2)
     >>> len(skf)
     2
     >>> print skf
-    sklearn.cross_val.StratifiedKFold(labels=[0 0 1 1], k=2)
+    sklearn.cross_validation.StratifiedKFold(labels=[0 0 1 1], k=2)
     >>> for train_index, test_index in skf:
     ...    print "TRAIN:", train_index, "TEST:", test_index
     ...    X_train, X_test = X[train_index], X[test_index]
@@ -372,15 +372,15 @@ class LeaveOneLabelOut(object):
 
     Examples
     ----------
-    >>> from sklearn import cross_val
+    >>> from sklearn import cross_validation
     >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
     >>> y = np.array([1, 2, 1, 2])
     >>> labels = np.array([1, 1, 2, 2])
-    >>> lol = cross_val.LeaveOneLabelOut(labels)
+    >>> lol = cross_validation.LeaveOneLabelOut(labels)
     >>> len(lol)
     2
     >>> print lol
-    sklearn.cross_val.LeaveOneLabelOut(labels=[1 1 2 2])
+    sklearn.cross_validation.LeaveOneLabelOut(labels=[1 1 2 2])
     >>> for train_index, test_index in lol:
     ...    print "TRAIN:", train_index, "TEST:", test_index
     ...    X_train, X_test = X[train_index], X[test_index]
@@ -457,15 +457,15 @@ class LeavePLabelOut(object):
 
     Examples
     ----------
-    >>> from sklearn import cross_val
+    >>> from sklearn import cross_validation
     >>> X = np.array([[1, 2], [3, 4], [5, 6]])
     >>> y = np.array([1, 2, 1])
     >>> labels = np.array([1, 2, 3])
-    >>> lpl = cross_val.LeavePLabelOut(labels, p=2)
+    >>> lpl = cross_validation.LeavePLabelOut(labels, p=2)
     >>> len(lpl)
     3
     >>> print lpl
-    sklearn.cross_val.LeavePLabelOut(labels=[1 2 3], p=2)
+    sklearn.cross_validation.LeavePLabelOut(labels=[1 2 3], p=2)
     >>> for train_index, test_index in lpl:
     ...    print "TRAIN:", train_index, "TEST:", test_index
     ...    X_train, X_test = X[train_index], X[test_index]
@@ -569,8 +569,8 @@ class Bootstrap(object):
 
     Examples
     --------
-    >>> from sklearn import cross_val
-    >>> bs = cross_val.Bootstrap(9, random_state=0)
+    >>> from sklearn import cross_validation
+    >>> bs = cross_validation.Bootstrap(9, random_state=0)
     >>> len(bs)
     3
     >>> print bs
@@ -675,8 +675,8 @@ class ShuffleSplit(object):
 
     Examples
     ----------
-    >>> from sklearn import cross_val
-    >>> rs = cross_val.ShuffleSplit(4, n_iterations=3, test_fraction=.25,
+    >>> from sklearn import cross_validation
+    >>> rs = cross_validation.ShuffleSplit(4, n_iterations=3, test_fraction=.25,
     ...                             random_state=0)
     >>> len(rs)
     3
@@ -885,7 +885,7 @@ def permutation_test_score(estimator, X, y, score_func, cv=None,
     cv : integer or crossvalidation generator, optional
         If an integer is passed, it is the number of fold (default 3).
         Specific crossvalidation objects can be passed, see
-        sklearn.cross_val module for the list of possible objects
+        sklearn.cross_validation module for the list of possible objects
 
     n_jobs: integer, optional
         The number of CPUs to use to do the computation. -1 means
diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index cd710c5d2708cfa88e4fa83588a60a6c9a38847a..fc39789a68f22cbed441a2d7e943349a0c340c7f 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -10,7 +10,7 @@ import numpy as np
 from ..base import BaseEstimator
 from ..base import clone
 from ..base import is_classifier
-from ..cross_val import check_cv
+from ..cross_validation import check_cv
 
 
 class RFE(BaseEstimator):
@@ -210,7 +210,7 @@ class RFECV(RFE):
         If int, it is the number of folds.
         If None, 3-fold cross-validation is performed by default.
         Specific cross-validation objects can also be passed, see
-        `scikits.learn.cross_val module` for details.
+        `scikits.learn.cross_validation module` for details.
 
     loss_function : function, optional (default=None)
         The loss function to minimize by cross-validation. If None, then the
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 63292db3b2ab65db2f32d2b49ca6cce4021e361f..1c92572cb5273c4cdeaa65cdd90bf15898d033ea 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -11,7 +11,7 @@ import numpy as np
 import scipy.sparse as sp
 
 from .externals.joblib import Parallel, delayed, logger
-from .cross_val import check_cv
+from .cross_validation import check_cv
 from .base import BaseEstimator, is_classifier, clone
 from .utils.fixes import product
 
@@ -183,7 +183,7 @@ class GridSearchCV(BaseEstimator):
     cv : integer or crossvalidation generator, optional
         If an integer is passed, it is the number of fold (default 3).
         Specific crossvalidation objects can be passed, see 
-        sklearn.cross_val module for the list of possible objects
+        sklearn.cross_validation module for the list of possible objects
 
     refit: boolean
         refit the best estimator with the entire dataset
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index 49b662073002deef1c3aedae09b776c504ce8cac..7b887a9a974b7b2f4daba0ff48be3820e3592d4e 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -9,7 +9,7 @@ import numpy as np
 
 from .base import LinearModel
 from ..utils import as_float_array
-from ..cross_val import check_cv
+from ..cross_validation import check_cv
 from . import cd_fast
 
 
@@ -546,7 +546,7 @@ class LassoCV(LinearModelCV):
     cv : integer or crossvalidation generator, optional
         If an integer is passed, it is the number of fold (default 3).
         Specific crossvalidation objects can be passed, see
-        sklearn.cross_val module for the list of possible objects
+        sklearn.cross_validation module for the list of possible objects
 
     Notes
     -----
@@ -601,7 +601,7 @@ class ElasticNetCV(LinearModelCV):
     cv : integer or crossvalidation generator, optional
         If an integer is passed, it is the number of fold (default 3).
         Specific crossvalidation objects can be passed, see
-        sklearn.cross_val module for the list of possible objects
+        sklearn.cross_validation module for the list of possible objects
 
 
     Notes
diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index 47aabaf34d8654e0334772edf94f094d18651112..1d135a62b571321879061807183b99514d741810 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -17,7 +17,7 @@ from scipy.linalg.lapack import get_lapack_funcs
 from .base import LinearModel
 from ..utils import arrayfuncs, as_float_array
 from ..utils import deprecated
-from ..cross_val import check_cv
+from ..cross_validation import check_cv
 from ..externals.joblib import Parallel, delayed
 
 
@@ -642,7 +642,7 @@ class LarsCV(LARS):
         Maximum number of iterations to perform.
 
     cv : crossvalidation generator, optional
-        see sklearn.cross_val module. If None is passed, default to
+        see sklearn.cross_validation module. If None is passed, default to
         a 5-fold strategy
 
     n_jobs : integer, optional
@@ -775,7 +775,7 @@ class LassoLarsCV(LarsCV):
         Maximum number of iterations to perform.
 
     cv : crossvalidation generator, optional
-        see sklearn.cross_val module. If None is passed, default to
+        see sklearn.cross_validation module. If None is passed, default to
         a 5-fold strategy
 
     n_jobs : integer, optional
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index 85761972f4e4c410e7fb881dc42f1ed2c8eea062..7296618ede120470a0a2787b9bbde68e8b587798 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -16,7 +16,7 @@ from sklearn.linear_model.ridge import RidgeClassifier
 from sklearn.linear_model.ridge import RidgeClassifierCV
 
 
-from sklearn.cross_val import KFold
+from sklearn.cross_validation import KFold
 
 diabetes = datasets.load_diabetes()
 
diff --git a/sklearn/tests/test_cross_val.py b/sklearn/tests/test_cross_validation.py
similarity index 74%
rename from sklearn/tests/test_cross_val.py
rename to sklearn/tests/test_cross_validation.py
index e8917cc2e8ee99e4cc74d1e6dd73d658c4a79868..8c41d83d971250f02e87962a20e126470c643277 100644
--- a/sklearn/tests/test_cross_val.py
+++ b/sklearn/tests/test_cross_validation.py
@@ -1,4 +1,4 @@
-"""Test the cross_val module"""
+"""Test the cross_validation module"""
 
 import numpy as np
 from scipy.sparse import coo_matrix
@@ -14,12 +14,12 @@ from ..metrics import f1_score
 from ..metrics import mean_square_error
 from ..metrics import r2_score
 from ..metrics import explained_variance_score
-from ..cross_val import StratifiedKFold
+from ..cross_validation import StratifiedKFold
 from ..svm import SVC
 from ..linear_model import Ridge
 from ..svm.sparse import SVC as SparseSVC
-from .. import cross_val
-from ..cross_val import permutation_test_score
+from .. import cross_validation
+from ..cross_validation import permutation_test_score
 
 from numpy.testing import assert_array_almost_equal
 from numpy.testing import assert_array_equal
@@ -50,9 +50,9 @@ y = np.arange(10) / 2
 
 def test_kfold():
     # Check that errors are raise if there is not enough samples
-    assert_raises(AssertionError, cross_val.KFold, 3, 4)
+    assert_raises(AssertionError, cross_validation.KFold, 3, 4)
     y = [0, 0, 1, 1, 2]
-    assert_raises(AssertionError, cross_val.StratifiedKFold, y, 3)
+    assert_raises(AssertionError, cross_validation.StratifiedKFold, y, 3)
 
 
 def test_cross_val_score():
@@ -60,10 +60,10 @@ def test_cross_val_score():
     for a in range(-10, 10):
         clf.a = a
         # Smoke test
-        scores = cross_val.cross_val_score(clf, X, y)
+        scores = cross_validation.cross_val_score(clf, X, y)
         assert_array_equal(scores, clf.score(X, y))
 
-        scores = cross_val.cross_val_score(clf, X_sparse, y)
+        scores = cross_validation.cross_val_score(clf, X_sparse, y)
         assert_array_equal(scores, clf.score(X_sparse, y))
 
 
@@ -72,18 +72,18 @@ def test_cross_val_score_with_score_func_classification():
     clf = SVC(kernel='linear')
 
     # Default score (should be the accuracy score)
-    scores = cross_val.cross_val_score(clf, iris.data, iris.target, cv=5)
+    scores = cross_validation.cross_val_score(clf, iris.data, iris.target, cv=5)
     assert_array_almost_equal(scores, [1., 0.97, 0.90, 0.97, 1.], 2)
 
     # Correct classification score (aka. zero / one score) - should be the
     # same as the default estimator score
-    zo_scores = cross_val.cross_val_score(clf, iris.data, iris.target,
+    zo_scores = cross_validation.cross_val_score(clf, iris.data, iris.target,
                                           score_func=zero_one_score, cv=5)
     assert_array_almost_equal(zo_scores, [1., 0.97, 0.90, 0.97, 1.], 2)
 
     # F1 score (class are balanced so f1_score should be equal to zero/one
     # score
-    f1_scores = cross_val.cross_val_score(clf, iris.data, iris.target,
+    f1_scores = cross_validation.cross_val_score(clf, iris.data, iris.target,
                                           score_func=f1_score, cv=5)
     assert_array_almost_equal(f1_scores, [1., 0.97, 0.90, 0.97, 1.], 2)
 
@@ -94,23 +94,23 @@ def test_cross_val_score_with_score_func_regression():
     reg = Ridge()
 
     # Default score of the Ridge regression estimator
-    scores = cross_val.cross_val_score(reg, X, y, cv=5)
+    scores = cross_validation.cross_val_score(reg, X, y, cv=5)
     assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
 
     # R2 score (aka. determination coefficient) - should be the
     # same as the default estimator score
-    r2_scores = cross_val.cross_val_score(reg, X, y, score_func=r2_score,
+    r2_scores = cross_validation.cross_val_score(reg, X, y, score_func=r2_score,
                                           cv=5)
     assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
 
     # Mean squared error
-    mse_scores = cross_val.cross_val_score(reg, X, y, cv=5,
+    mse_scores = cross_validation.cross_val_score(reg, X, y, cv=5,
                                            score_func=mean_square_error)
     expected_mse = [4578.47, 3319.02, 1646.29, 1639.58, 10092.00]
     assert_array_almost_equal(mse_scores, expected_mse, 2)
 
     # Explained variance
-    ev_scores = cross_val.cross_val_score(reg, X, y, cv=5,
+    ev_scores = cross_validation.cross_val_score(reg, X, y, cv=5,
                                           score_func=explained_variance_score)
     assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
 
@@ -159,12 +159,12 @@ def test_cross_val_generator_with_indices():
     X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
     y = np.array([1, 1, 2, 2])
     labels = np.array([1, 2, 3, 4])
-    loo = cross_val.LeaveOneOut(4, indices=True)
-    lpo = cross_val.LeavePOut(4, 2, indices=True)
-    kf = cross_val.KFold(4, 2, indices=True)
-    skf = cross_val.StratifiedKFold(y, 2, indices=True)
-    lolo = cross_val.LeaveOneLabelOut(labels, indices=True)
-    lopo = cross_val.LeavePLabelOut(labels, 2, indices=True)
+    loo = cross_validation.LeaveOneOut(4, indices=True)
+    lpo = cross_validation.LeavePOut(4, 2, indices=True)
+    kf = cross_validation.KFold(4, 2, indices=True)
+    skf = cross_validation.StratifiedKFold(y, 2, indices=True)
+    lolo = cross_validation.LeaveOneLabelOut(labels, indices=True)
+    lopo = cross_validation.LeavePLabelOut(labels, 2, indices=True)
     for cv in [loo, lpo, kf, skf, lolo, lopo]:
         for train, test in cv:
             X_train, X_test = X[train], X[test]
@@ -172,7 +172,7 @@ def test_cross_val_generator_with_indices():
 
 
 def test_bootstrap_errors():
-    assert_raises(ValueError, cross_val.Bootstrap, 10, n_train=100)
-    assert_raises(ValueError, cross_val.Bootstrap, 10, n_test=100)
-    assert_raises(ValueError, cross_val.Bootstrap, 10, n_train=1.1)
-    assert_raises(ValueError, cross_val.Bootstrap, 10, n_test=1.1)
+    assert_raises(ValueError, cross_validation.Bootstrap, 10, n_train=100)
+    assert_raises(ValueError, cross_validation.Bootstrap, 10, n_test=100)
+    assert_raises(ValueError, cross_validation.Bootstrap, 10, n_train=1.1)
+    assert_raises(ValueError, cross_validation.Bootstrap, 10, n_test=1.1)
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 4d4dac86e033b2c93aa914a604ac0c0175233394..cbcca0394cfb0735944165c17f6d1bb0f809e151 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -296,7 +296,7 @@ def resample(*arrays, **options):
 
     See also
     --------
-    :class:`sklearn.cross_val.Bootstrap`
+    :class:`sklearn.cross_validation.Bootstrap`
     :func:`sklearn.utils.shuffle`
     """
     random_state = check_random_state(options.pop('random_state', None))