diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 09b4a7090deffa41905b53f3368fda08bb3405bc..8df8604b77b26140669e361e534f02b82dfdd0fa 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -3,172 +3,9 @@ The :mod:`sklearn.utils` module includes various utilites.
 """
 
 import numpy as np
-import scipy.sparse as sp
 import warnings
 
-
-def assert_all_finite(X):
-    """Throw a ValueError if X contains NaN or infinity.
-
-    Input MUST be an np.ndarray instance or a scipy.sparse matrix."""
-
-    # First try an O(n) time, O(1) space solution for the common case that
-    # there everything is finite; fall back to O(n) space np.isfinite to
-    # prevent false positives from overflow in sum method.
-    if X.dtype.char in np.typecodes['AllFloat'] and not np.isfinite(X.sum()) \
-      and not np.isfinite(X).all():
-            raise ValueError("array contains NaN or infinity")
-
-
-def safe_asarray(X, dtype=None, order=None):
-    """Convert X to an array or sparse matrix.
-
-    Prevents copying X when possible; sparse matrices are passed through."""
-    if not sp.issparse(X):
-        X = np.asarray(X, dtype, order)
-    assert_all_finite(X)
-    return X
-
-
-def as_float_array(X, copy=True):
-    """Converts an array-like to an array of floats
-
-    The new dtype will be np.float32 or np.float64, depending on the original
-    type. The function can create a copy or modify the argument depending
-    on the argument copy.
-
-    Parameters
-    ----------
-    X : array
-
-    copy : bool, optional
-        If True, a copy of X will be created. If False, a copy may still be
-        returned if X's dtype is not a floating point type.
-
-    Returns
-    -------
-    X : array
-        An array of type np.float
-    """
-    if isinstance(X, np.matrix):
-        X = X.A
-    elif not isinstance(X, np.ndarray) and not sp.issparse(X):
-        return safe_asarray(X, dtype=np.float64)
-    if X.dtype in [np.float32, np.float64]:
-        return X.copy() if copy else X
-    if X.dtype == np.int32:
-        X = X.astype(np.float32)
-    else:
-        X = X.astype(np.float64)
-    return X
-
-
-def array2d(X, dtype=None, order=None):
-    """Returns at least 2-d array with data from X"""
-    return np.asarray(np.atleast_2d(X), dtype=dtype, order=order)
-
-
-def atleast2d_or_csr(X):
-    """Like numpy.atleast_2d, but converts sparse matrices to CSR format
-
-    Also, converts np.matrix to np.ndarray.
-    """
-    X = X.tocsr() if sp.issparse(X) else array2d(X)
-    assert_all_finite(X)
-    return X
-
-
-def check_random_state(seed):
-    """Turn seed into a np.random.RandomState instance
-
-    If seed is None, return the RandomState singleton used by np.random.
-    If seed is an int, return a new RandomState instance seeded with seed.
-    If seed is already a RandomState instance, return it.
-    Otherwise raise ValueError.
-    """
-    if seed is None or seed is np.random:
-        return np.random.mtrand._rand
-    if isinstance(seed, int):
-        return np.random.RandomState(seed)
-    if isinstance(seed, np.random.RandomState):
-        return seed
-    raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
-                     ' instance' % seed)
-
-
-def _num_samples(x):
-    """Return number of samples in array-like x."""
-    if not hasattr(x, '__len__') and not hasattr(x, 'shape'):
-        raise TypeError("Expected sequence or array-like, got %r" % x)
-    return x.shape[0] if hasattr(x, 'shape') else len(x)
-
-
-def check_arrays(*arrays, **options):
-    """Checked that all arrays have consistent first dimensions
-
-    Parameters
-    ----------
-    *arrays : sequence of arrays or scipy.sparse matrices with same shape[0]
-        Python lists or tuples occurring in arrays are converted to 1D numpy
-        arrays.
-
-    sparse_format : 'csr' or 'csc', None by default
-        If not None, any scipy.sparse matrix is converted to
-        Compressed Sparse Rows or Compressed Sparse Columns representations.
-
-    copy : boolean, False by default
-        If copy is True, ensure that returned arrays are copies of the original
-        (if not already converted to another format earlier in the process).
-    """
-    sparse_format = options.pop('sparse_format', None)
-    if sparse_format not in (None, 'csr', 'csc'):
-        raise ValueError('Unexpected sparse format: %r' % sparse_format)
-    copy = options.pop('copy', False)
-    if options:
-        raise ValueError("Unexpected kw arguments: %r" % options.keys())
-
-    if len(arrays) == 0:
-        return None
-
-    n_samples = _num_samples(arrays[0])
-
-    checked_arrays = []
-    for array in arrays:
-        array_orig = array
-        if array is None:
-            # special case: ignore optional y=None kwarg pattern
-            checked_arrays.append(array)
-            continue
-
-        size = _num_samples(array)
-
-        if size != n_samples:
-            raise ValueError("Found array with dim %d. Expected %d" % (
-                size, n_samples))
-
-        if sp.issparse(array):
-            if sparse_format == 'csr':
-                array = array.tocsr()
-            elif sparse_format == 'csc':
-                array = array.tocsc()
-        else:
-            array = np.asarray(array)
-
-        if copy and array is array_orig:
-            array = array.copy()
-        checked_arrays.append(array)
-
-    return checked_arrays
-
-
-def warn_if_not_float(X, estimator='This algorithm'):
-    """Warning utility function to check that data type is floating point"""
-    if not isinstance(estimator, basestring):
-        estimator = estimator.__class__.__name__
-    if X.dtype.kind != 'f':
-        warnings.warn("%s assumes floating point values as input, "
-                      "got %s" % (estimator, X.dtype))
-
+from validation import *
 
 class deprecated(object):
     """Decorator to mark a function or class as deprecated.
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
new file mode 100644
index 0000000000000000000000000000000000000000..d251c1d3d214470629b9dd0511e358fd66cc0dfd
--- /dev/null
+++ b/sklearn/utils/validation.py
@@ -0,0 +1,169 @@
+"""
+Utilities for input validation
+"""
+
+import numpy as np
+import scipy.sparse as sp
+import warnings
+
+
+def assert_all_finite(X):
+    """Throw a ValueError if X contains NaN or infinity.
+
+    Input MUST be an np.ndarray instance or a scipy.sparse matrix."""
+
+    # First try an O(n) time, O(1) space solution for the common case that
+    # there everything is finite; fall back to O(n) space np.isfinite to
+    # prevent false positives from overflow in sum method.
+    if X.dtype.char in np.typecodes['AllFloat'] and not np.isfinite(X.sum()) \
+      and not np.isfinite(X).all():
+            raise ValueError("array contains NaN or infinity")
+
+
+def safe_asarray(X, dtype=None, order=None):
+    """Convert X to an array or sparse matrix.
+
+    Prevents copying X when possible; sparse matrices are passed through."""
+    if not sp.issparse(X):
+        X = np.asarray(X, dtype, order)
+    assert_all_finite(X)
+    return X
+
+
+def as_float_array(X, copy=True):
+    """Converts an array-like to an array of floats
+
+    The new dtype will be np.float32 or np.float64, depending on the original
+    type. The function can create a copy or modify the argument depending
+    on the argument copy.
+
+    Parameters
+    ----------
+    X : array
+
+    copy : bool, optional
+        If True, a copy of X will be created. If False, a copy may still be
+        returned if X's dtype is not a floating point type.
+
+    Returns
+    -------
+    X : array
+        An array of type np.float
+    """
+    if isinstance(X, np.matrix):
+        X = X.A
+    elif not isinstance(X, np.ndarray) and not sp.issparse(X):
+        return safe_asarray(X, dtype=np.float64)
+    if X.dtype in [np.float32, np.float64]:
+        return X.copy() if copy else X
+    if X.dtype == np.int32:
+        X = X.astype(np.float32)
+    else:
+        X = X.astype(np.float64)
+    return X
+
+
+def array2d(X, dtype=None, order=None):
+    """Returns at least 2-d array with data from X"""
+    return np.asarray(np.atleast_2d(X), dtype=dtype, order=order)
+
+
+def atleast2d_or_csr(X):
+    """Like numpy.atleast_2d, but converts sparse matrices to CSR format
+
+    Also, converts np.matrix to np.ndarray.
+    """
+    X = X.tocsr() if sp.issparse(X) else array2d(X)
+    assert_all_finite(X)
+    return X
+
+def _num_samples(x):
+    """Return number of samples in array-like x."""
+    if not hasattr(x, '__len__') and not hasattr(x, 'shape'):
+        raise TypeError("Expected sequence or array-like, got %r" % x)
+    return x.shape[0] if hasattr(x, 'shape') else len(x)
+
+
+def check_arrays(*arrays, **options):
+    """Checked that all arrays have consistent first dimensions
+
+    Parameters
+    ----------
+    *arrays : sequence of arrays or scipy.sparse matrices with same shape[0]
+        Python lists or tuples occurring in arrays are converted to 1D numpy
+        arrays.
+
+    sparse_format : 'csr' or 'csc', None by default
+        If not None, any scipy.sparse matrix is converted to
+        Compressed Sparse Rows or Compressed Sparse Columns representations.
+
+    copy : boolean, False by default
+        If copy is True, ensure that returned arrays are copies of the original
+        (if not already converted to another format earlier in the process).
+    """
+    sparse_format = options.pop('sparse_format', None)
+    if sparse_format not in (None, 'csr', 'csc'):
+        raise ValueError('Unexpected sparse format: %r' % sparse_format)
+    copy = options.pop('copy', False)
+    if options:
+        raise ValueError("Unexpected kw arguments: %r" % options.keys())
+
+    if len(arrays) == 0:
+        return None
+
+    n_samples = _num_samples(arrays[0])
+
+    checked_arrays = []
+    for array in arrays:
+        array_orig = array
+        if array is None:
+            # special case: ignore optional y=None kwarg pattern
+            checked_arrays.append(array)
+            continue
+
+        size = _num_samples(array)
+
+        if size != n_samples:
+            raise ValueError("Found array with dim %d. Expected %d" % (
+                size, n_samples))
+
+        if sp.issparse(array):
+            if sparse_format == 'csr':
+                array = array.tocsr()
+            elif sparse_format == 'csc':
+                array = array.tocsc()
+        else:
+            array = np.asarray(array)
+
+        if copy and array is array_orig:
+            array = array.copy()
+        checked_arrays.append(array)
+
+    return checked_arrays
+
+
+def warn_if_not_float(X, estimator='This algorithm'):
+    """Warning utility function to check that data type is floating point"""
+    if not isinstance(estimator, basestring):
+        estimator = estimator.__class__.__name__
+    if X.dtype.kind != 'f':
+        warnings.warn("%s assumes floating point values as input, "
+                      "got %s" % (estimator, X.dtype))
+
+
+def check_random_state(seed):
+    """Turn seed into a np.random.RandomState instance
+
+    If seed is None, return the RandomState singleton used by np.random.
+    If seed is an int, return a new RandomState instance seeded with seed.
+    If seed is already a RandomState instance, return it.
+    Otherwise raise ValueError.
+    """
+    if seed is None or seed is np.random:
+        return np.random.mtrand._rand
+    if isinstance(seed, int):
+        return np.random.RandomState(seed)
+    if isinstance(seed, np.random.RandomState):
+        return seed
+    raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
+                     ' instance' % seed)