diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
index c915cbcee71b731465187fa5045f0412f161e26c..2d003429815c91b1ae415a4cd661e2dafd48702c 100644
--- a/sklearn/linear_model/base.py
+++ b/sklearn/linear_model/base.py
@@ -158,7 +158,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
     coordinate_descend).
 
     This is here because nearly all linear models will want their data to be
-    centered.
+    centered. This function also systematically makes y consistent with X.dtype
     """
 
     if isinstance(sample_weight, numbers.Number):
@@ -166,12 +166,13 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
 
     X = check_array(X, copy=copy, accept_sparse=['csr', 'csc'],
                     dtype=FLOAT_DTYPES)
+    y = np.asarray(y, dtype=X.dtype)
 
     if fit_intercept:
         if sp.issparse(X):
             X_offset, X_var = mean_variance_axis(X, axis=0)
             if not return_mean:
-                X_offset[:] = 0
+                X_offset[:] = X.dtype.type(0)
 
             if normalize:
 
@@ -201,7 +202,10 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
     else:
         X_offset = np.zeros(X.shape[1], dtype=X.dtype)
         X_scale = np.ones(X.shape[1], dtype=X.dtype)
-        y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
+        if y.ndim == 1:
+            y_offset = X.dtype.type(0)
+        else:
+            y_offset = np.zeros(y.shape[1], dtype=X.dtype)
 
     return X, y, X_offset, y_offset, X_scale
 
@@ -460,7 +464,7 @@ class LinearRegression(LinearModel, RegressorMixin):
             Training data
 
         y : numpy array of shape [n_samples, n_targets]
-            Target values
+            Target values. Will be cast to X's dtype if necessary
 
         sample_weight : numpy array of shape [n_samples]
             Individual weights for each sample
diff --git a/sklearn/linear_model/bayes.py b/sklearn/linear_model/bayes.py
index 82153024e33a7698fa6599b408d12f029d25da32..7ea93706fb1b0ad3de4191d03e8f1d0cec65e467 100644
--- a/sklearn/linear_model/bayes.py
+++ b/sklearn/linear_model/bayes.py
@@ -148,7 +148,7 @@ class BayesianRidge(LinearModel, RegressorMixin):
         X : numpy array of shape [n_samples,n_features]
             Training data
         y : numpy array of shape [n_samples]
-            Target values
+            Target values. Will be cast to X's dtype if necessary
 
         Returns
         -------
@@ -420,7 +420,7 @@ class ARDRegression(LinearModel, RegressorMixin):
             Training vector, where n_samples in the number of samples and
             n_features is the number of features.
         y : array, shape = [n_samples]
-            Target values (integers)
+            Target values (integers). Will be cast to X's dtype if necessary
 
         Returns
         -------
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index 6a1061f0a906a99d9b4ac661914e97b730ec1858..e1740a6702166bec0504f354e504e1d567ec895c 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -653,7 +653,7 @@ class ElasticNet(LinearModel, RegressorMixin):
             Data
 
         y : ndarray, shape (n_samples,) or (n_samples, n_targets)
-            Target
+            Target. Will be cast to X's dtype if necessary
 
         check_input : boolean, (default=True)
             Allow to bypass several input checking.
@@ -1680,7 +1680,7 @@ class MultiTaskElasticNet(Lasso):
         X : ndarray, shape (n_samples, n_features)
             Data
         y : ndarray, shape (n_samples, n_tasks)
-            Target
+            Target. Will be cast to X's dtype if necessary
 
         Notes
         -----
diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index dfd7acb01993eec9f098d0f2e1429cb732b7c5c1..854b463cc70136ccc0a6e2f3a20a752ae5880ea5 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -1455,7 +1455,7 @@ class LassoLarsIC(LassoLars):
             training data.
 
         y : array-like, shape (n_samples,)
-            target values.
+            target values. Will be cast to X's dtype if necessary
 
         copy_X : boolean, optional, default True
             If ``True``, X will be copied; else, it may be overwritten.
diff --git a/sklearn/linear_model/omp.py b/sklearn/linear_model/omp.py
index 7b75c4717e9a12f2ca327eab774c9afa0ce1941d..2a03d31fee03556bb2133d2b38dbf3a2b627d337 100644
--- a/sklearn/linear_model/omp.py
+++ b/sklearn/linear_model/omp.py
@@ -617,7 +617,7 @@ class OrthogonalMatchingPursuit(LinearModel, RegressorMixin):
             Training data.
 
         y : array-like, shape (n_samples,) or (n_samples, n_targets)
-            Target values.
+            Target values. Will be cast to X's dtype if necessary
 
 
         Returns
@@ -835,7 +835,7 @@ class OrthogonalMatchingPursuitCV(LinearModel, RegressorMixin):
             Training data.
 
         y : array-like, shape [n_samples]
-            Target values.
+            Target values. Will be cast to X's dtype if necessary
 
         Returns
         -------
diff --git a/sklearn/linear_model/randomized_l1.py b/sklearn/linear_model/randomized_l1.py
index ba6a424a96ff2f13188391c73b07c9fd8c2fcf9f..27ec90aa49e6aa30ba397792dafd95dfe91ffd2c 100644
--- a/sklearn/linear_model/randomized_l1.py
+++ b/sklearn/linear_model/randomized_l1.py
@@ -82,7 +82,7 @@ class BaseRandomizedLinearModel(six.with_metaclass(ABCMeta, BaseEstimator,
             Training data.
 
         y : array-like, shape = [n_samples]
-            Target values.
+            Target values. Will be cast to X's dtype if necessary
 
         Returns
         -------
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index 00d28cc6eba971612c45c0ebc841723b3e94937b..e0c7b6f1880375c6f0958890ae9db6083dc45e6e 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -975,7 +975,7 @@ class _RidgeGCV(LinearModel):
             Training data
 
         y : array-like, shape = [n_samples] or [n_samples, n_targets]
-            Target values
+            Target values. Will be cast to X's dtype if necessary
 
         sample_weight : float or array-like of shape [n_samples]
             Sample weight
@@ -1094,7 +1094,7 @@ class _BaseRidgeCV(LinearModel):
             Training data
 
         y : array-like, shape = [n_samples] or [n_samples, n_targets]
-            Target values
+            Target values. Will be cast to X's dtype if necessary
 
         sample_weight : float or array-like of shape [n_samples]
             Sample weight
@@ -1336,7 +1336,7 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV):
             and n_features is the number of features.
 
         y : array-like, shape (n_samples,)
-            Target values.
+            Target values. Will be cast to X's dtype if necessary
 
         sample_weight : float or numpy array of shape (n_samples,)
             Sample weight.
diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 4c7b326f24f448996b6e35401827df2495a2dec3..ed53e1fbb4aa5d206d0f967707162638ee7e5549 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -324,6 +324,72 @@ def test_csr_preprocess_data():
     assert_equal(csr_.getformat(), 'csr')
 
 
+def test_dtype_preprocess_data():
+    n_samples = 200
+    n_features = 2
+    X = rng.rand(n_samples, n_features)
+    y = rng.rand(n_samples)
+
+    X_32 = np.asarray(X, dtype=np.float32)
+    y_32 = np.asarray(y, dtype=np.float32)
+    X_64 = np.asarray(X, dtype=np.float64)
+    y_64 = np.asarray(y, dtype=np.float64)
+
+    for fit_intercept in [True, False]:
+        for normalize in [True, False]:
+
+            Xt_32, yt_32, X_mean_32, y_mean_32, X_norm_32 = _preprocess_data(
+                X_32, y_32, fit_intercept=fit_intercept, normalize=normalize,
+                return_mean=True)
+
+            Xt_64, yt_64, X_mean_64, y_mean_64, X_norm_64 = _preprocess_data(
+                X_64, y_64, fit_intercept=fit_intercept, normalize=normalize,
+                return_mean=True)
+
+            Xt_3264, yt_3264, X_mean_3264, y_mean_3264, X_norm_3264 = (
+                _preprocess_data(X_32, y_64, fit_intercept=fit_intercept,
+                                 normalize=normalize, return_mean=True))
+
+            Xt_6432, yt_6432, X_mean_6432, y_mean_6432, X_norm_6432 = (
+                _preprocess_data(X_64, y_32, fit_intercept=fit_intercept,
+                                 normalize=normalize, return_mean=True))
+
+            assert_equal(Xt_32.dtype, np.float32)
+            assert_equal(yt_32.dtype, np.float32)
+            assert_equal(X_mean_32.dtype, np.float32)
+            assert_equal(y_mean_32.dtype, np.float32)
+            assert_equal(X_norm_32.dtype, np.float32)
+
+            assert_equal(Xt_64.dtype, np.float64)
+            assert_equal(yt_64.dtype, np.float64)
+            assert_equal(X_mean_64.dtype, np.float64)
+            assert_equal(y_mean_64.dtype, np.float64)
+            assert_equal(X_norm_64.dtype, np.float64)
+
+            assert_equal(Xt_3264.dtype, np.float32)
+            assert_equal(yt_3264.dtype, np.float32)
+            assert_equal(X_mean_3264.dtype, np.float32)
+            assert_equal(y_mean_3264.dtype, np.float32)
+            assert_equal(X_norm_3264.dtype, np.float32)
+
+            assert_equal(Xt_6432.dtype, np.float64)
+            assert_equal(yt_6432.dtype, np.float64)
+            assert_equal(X_mean_6432.dtype, np.float64)
+            assert_equal(y_mean_6432.dtype, np.float64)
+            assert_equal(X_norm_6432.dtype, np.float64)
+
+            assert_equal(X_32.dtype, np.float32)
+            assert_equal(y_32.dtype, np.float32)
+            assert_equal(X_64.dtype, np.float64)
+            assert_equal(y_64.dtype, np.float64)
+
+            assert_array_almost_equal(Xt_32, Xt_64)
+            assert_array_almost_equal(yt_32, yt_64)
+            assert_array_almost_equal(X_mean_32, X_mean_64)
+            assert_array_almost_equal(y_mean_32, y_mean_64)
+            assert_array_almost_equal(X_norm_32, X_norm_64)
+
+
 def test_rescale_data():
     n_samples = 200
     n_features = 2
diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 6a2758cdf3b001e50f37c6c8e567285ef86e736e..7372fbed1ab3ded93a3e5ba42d4a77a5cb1c50bf 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -661,12 +661,11 @@ def test_check_input_false():
     clf = ElasticNet(selection='cyclic', tol=1e-8)
     # Check that no error is raised if data is provided in the right format
     clf.fit(X, y, check_input=False)
+    # With check_input=False, an exhaustive check is not made on y but its
+    # dtype is still cast in _preprocess_data to X's dtype. So the test should
+    # pass anyway
     X = check_array(X, order='F', dtype='float32')
-    clf.fit(X, y, check_input=True)
-    # Check that an error is raised if data is provided in the wrong dtype,
-    # because of check bypassing
-    assert_raises(ValueError, clf.fit, X, y, check_input=False)
-
+    clf.fit(X, y, check_input=False)
     # With no input checking, providing X in C order should result in false
     # computation
     X = check_array(X, order='C', dtype='float64')