From 5c4b1bb23192a137ac22ced229c50d6b69859ac6 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Fri, 9 Dec 2016 12:43:38 -0500
Subject: [PATCH] [MRG+1] Housekeeping Deprecations for v0.19 (#7927)

* remove stuff to be removed 0.19

* more changes

* remove classes from 0.19 whatsnew

* remove _LearntSelectorMixin

* remove ProjectedGradientNMF, load_lwf_*

* minor fixes

* remove more copy from logistic regression path

* remove lda, qda from __init__.__all__

* remove pg solver in nmf from tests etc

* remove class_weight="auto" from tests

* doctest change for decision_function_shape="ovr"

* remove transfrom from tree test, minor fixes to tree tests

* some fixes in the tests

* undo changes in functions which still allow 1d input...

* also allow 1d in scale

* more test fixes...

* last test fixes in forest and tree

* svm default value change doctest failures

* pep8

* remove more class_weight="auto" stuff

* minor cosmetics in docstrings deprecated / removed behavior.

* say that store_covariance has been moved to __init__ in discriminant_analysis
---
 doc/modules/classes.rst                       |  17 -
 doc/modules/model_persistence.rst             |   2 +-
 doc/modules/pipeline.rst                      |   4 +-
 doc/modules/svm.rst                           |   4 +-
 doc/tutorial/basic/tutorial.rst               |  12 +-
 .../supervised_learning.rst                   |   2 +-
 sklearn/__init__.py                           |   4 +-
 sklearn/base.py                               |   8 -
 .../tests/test_robust_covariance.py           |   4 +-
 sklearn/datasets/__init__.py                  |   4 -
 sklearn/datasets/lfw.py                       |  36 --
 sklearn/datasets/tests/test_lfw.py            |  44 +-
 sklearn/decomposition/__init__.py             |   3 +-
 sklearn/decomposition/nmf.py                  | 424 +-----------------
 sklearn/decomposition/tests/test_nmf.py       | 121 ++---
 sklearn/discriminant_analysis.py              |  51 +--
 sklearn/ensemble/forest.py                    |  28 +-
 sklearn/ensemble/gradient_boosting.py         |  53 +--
 sklearn/ensemble/iforest.py                   |   4 +-
 sklearn/ensemble/tests/test_forest.py         |  12 +-
 .../ensemble/tests/test_gradient_boosting.py  |  10 +-
 sklearn/feature_selection/from_model.py       |  69 +--
 .../tests/test_from_model.py                  |  24 -
 sklearn/grid_search.py                        |   2 +-
 sklearn/lda.py                                |  20 -
 sklearn/linear_model/base.py                  |  22 -
 sklearn/linear_model/coordinate_descent.py    |  17 +-
 sklearn/linear_model/logistic.py              |  40 +-
 sklearn/linear_model/perceptron.py            |   3 +-
 sklearn/linear_model/stochastic_gradient.py   |  25 +-
 .../linear_model/tests/test_least_angle.py    |   2 -
 sklearn/linear_model/tests/test_logistic.py   |  11 -
 sklearn/metrics/base.py                       |   9 -
 sklearn/metrics/regression.py                 |  20 +-
 sklearn/model_selection/_search.py            |   2 +-
 sklearn/pipeline.py                           |   5 -
 sklearn/preprocessing/data.py                 |  99 +---
 sklearn/preprocessing/tests/test_data.py      |  32 +-
 sklearn/qda.py                                |  20 -
 sklearn/svm/base.py                           |  26 +-
 sklearn/svm/classes.py                        |  45 +-
 sklearn/svm/tests/test_svm.py                 |  19 -
 sklearn/tests/test_discriminant_analysis.py   |  37 --
 sklearn/tests/test_multiclass.py              |   2 -
 sklearn/tests/test_pipeline.py                |   9 -
 sklearn/tree/tests/test_tree.py               |  48 +-
 sklearn/tree/tree.py                          |   5 +-
 sklearn/utils/__init__.py                     |  11 +-
 sklearn/utils/class_weight.py                 |  26 +-
 sklearn/utils/estimator_checks.py             |  50 +--
 sklearn/utils/testing.py                      |  24 -
 sklearn/utils/tests/test_class_weight.py      |  69 +--
 sklearn/utils/tests/test_validation.py        |   9 -
 sklearn/utils/validation.py                   |  47 +-
 54 files changed, 234 insertions(+), 1462 deletions(-)
 delete mode 100644 sklearn/lda.py
 delete mode 100644 sklearn/qda.py

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 5b44889bfa..78c2e1333d 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -1361,23 +1361,6 @@ Low-level methods
 Recently deprecated
 ===================
 
-To be removed in 0.19
----------------------
-
-.. autosummary::
-   :toctree: generated/
-   :template: deprecated_class.rst
-
-   lda.LDA
-   qda.QDA
-
-.. autosummary::
-   :toctree: generated/
-   :template: deprecated_function.rst
-
-   datasets.load_lfw_pairs
-   datasets.load_lfw_people
-
 
 To be removed in 0.20
 ---------------------
diff --git a/doc/modules/model_persistence.rst b/doc/modules/model_persistence.rst
index b1903c5202..5b83bc28a7 100644
--- a/doc/modules/model_persistence.rst
+++ b/doc/modules/model_persistence.rst
@@ -23,7 +23,7 @@ persistence model, namely `pickle <https://docs.python.org/2/library/pickle.html
   >>> X, y = iris.data, iris.target
   >>> clf.fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
   SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-      decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
+      decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
       max_iter=-1, probability=False, random_state=None, shrinking=True,
       tol=0.001, verbose=False)
 
diff --git a/doc/modules/pipeline.rst b/doc/modules/pipeline.rst
index 27fd0ce94f..5ce5386343 100644
--- a/doc/modules/pipeline.rst
+++ b/doc/modules/pipeline.rst
@@ -43,7 +43,7 @@ is an estimator object::
     Pipeline(steps=[('reduce_dim', PCA(copy=True, iterated_power='auto',
     n_components=None, random_state=None, svd_solver='auto', tol=0.0,
     whiten=False)), ('clf', SVC(C=1.0, cache_size=200, class_weight=None,
-    coef0=0.0, decision_function_shape=None, degree=3, gamma='auto',
+    coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto',
     kernel='rbf', max_iter=-1, probability=False, random_state=None,
     shrinking=True, tol=0.001, verbose=False))])
 
@@ -80,7 +80,7 @@ Parameters of the estimators in the pipeline can be accessed using the
     Pipeline(steps=[('reduce_dim', PCA(copy=True, iterated_power='auto',
         n_components=None, random_state=None, svd_solver='auto', tol=0.0,
         whiten=False)), ('clf', SVC(C=10, cache_size=200, class_weight=None,
-        coef0=0.0, decision_function_shape=None, degree=3, gamma='auto',
+        coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto',
         kernel='rbf', max_iter=-1, probability=False, random_state=None,
         shrinking=True, tol=0.001, verbose=False))])
 
diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index 1a7ec4f7f5..8fb0d481eb 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -77,7 +77,7 @@ n_features]`` holding the training samples, and an array y of class labels
     >>> clf = svm.SVC()
     >>> clf.fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
     SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-        decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
+        decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
         max_iter=-1, probability=False, random_state=None, shrinking=True,
         tol=0.001, verbose=False)
 
@@ -516,7 +516,7 @@ test vectors must be provided.
     >>> gram = np.dot(X, X.T)
     >>> clf.fit(gram, y) # doctest: +NORMALIZE_WHITESPACE
     SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-        decision_function_shape=None, degree=3, gamma='auto',
+        decision_function_shape='ovr', degree=3, gamma='auto',
         kernel='precomputed', max_iter=-1, probability=False,
         random_state=None, shrinking=True, tol=0.001, verbose=False)
     >>> # predict on training examples
diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
index a55311e015..89600953a8 100644
--- a/doc/tutorial/basic/tutorial.rst
+++ b/doc/tutorial/basic/tutorial.rst
@@ -180,7 +180,7 @@ the last entry of ``digits.data``::
 
   >>> clf.fit(digits.data[:-1], digits.target[:-1])  # doctest: +NORMALIZE_WHITESPACE
   SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
-    decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
+    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
     max_iter=-1, probability=False, random_state=None, shrinking=True,
     tol=0.001, verbose=False)
 
@@ -219,7 +219,7 @@ persistence model, namely `pickle <https://docs.python.org/2/library/pickle.html
   >>> X, y = iris.data, iris.target
   >>> clf.fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
   SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-    decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
+    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
     max_iter=-1, probability=False, random_state=None, shrinking=True,
     tol=0.001, verbose=False)
 
@@ -293,7 +293,7 @@ maintained::
     >>> clf = SVC()
     >>> clf.fit(iris.data, iris.target)  # doctest: +NORMALIZE_WHITESPACE
     SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-      decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
+      decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
       max_iter=-1, probability=False, random_state=None, shrinking=True,
       tol=0.001, verbose=False)
 
@@ -302,7 +302,7 @@ maintained::
 
     >>> clf.fit(iris.data, iris.target_names[iris.target])  # doctest: +NORMALIZE_WHITESPACE
     SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-      decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
+      decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
       max_iter=-1, probability=False, random_state=None, shrinking=True,
       tol=0.001, verbose=False)
 
@@ -331,7 +331,7 @@ more than once will overwrite what was learned by any previous ``fit()``::
   >>> clf = SVC()
   >>> clf.set_params(kernel='linear').fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
   SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-    decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
+    decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
     max_iter=-1, probability=False, random_state=None, shrinking=True,
     tol=0.001, verbose=False)
   >>> clf.predict(X_test)
@@ -339,7 +339,7 @@ more than once will overwrite what was learned by any previous ``fit()``::
 
   >>> clf.set_params(kernel='rbf').fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
   SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-    decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
+    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
     max_iter=-1, probability=False, random_state=None, shrinking=True,
     tol=0.001, verbose=False)
   >>> clf.predict(X_test)
diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
index eaf4dc49a2..0440a80340 100644
--- a/doc/tutorial/statistical_inference/supervised_learning.rst
+++ b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -455,7 +455,7 @@ classification --:class:`SVC` (Support Vector Classification).
     >>> svc = svm.SVC(kernel='linear')
     >>> svc.fit(iris_X_train, iris_y_train)    # doctest: +NORMALIZE_WHITESPACE
     SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-        decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
+        decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
         max_iter=-1, probability=False, random_state=None, shrinking=True,
         tol=0.001, verbose=False)
 
diff --git a/sklearn/__init__.py b/sklearn/__init__.py
index 4e80774bc3..26c4fff59d 100644
--- a/sklearn/__init__.py
+++ b/sklearn/__init__.py
@@ -62,10 +62,10 @@ else:
                'ensemble', 'exceptions', 'externals', 'feature_extraction',
                'feature_selection', 'gaussian_process', 'grid_search',
                'isotonic', 'kernel_approximation', 'kernel_ridge',
-               'lda', 'learning_curve', 'linear_model', 'manifold', 'metrics',
+               'learning_curve', 'linear_model', 'manifold', 'metrics',
                'mixture', 'model_selection', 'multiclass', 'multioutput',
                'naive_bayes', 'neighbors', 'neural_network', 'pipeline',
-               'preprocessing', 'qda', 'random_projection', 'semi_supervised',
+               'preprocessing', 'random_projection', 'semi_supervised',
                'svm', 'tree', 'discriminant_analysis',
                # Non-modules:
                'clone']
diff --git a/sklearn/base.py b/sklearn/base.py
index 5d26d7f8e5..1b79841746 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -10,17 +10,9 @@ import numpy as np
 from scipy import sparse
 from .externals import six
 from .utils.fixes import signature
-from .utils.deprecation import deprecated
-from .exceptions import ChangedBehaviorWarning as _ChangedBehaviorWarning
 from . import __version__
 
 
-@deprecated("ChangedBehaviorWarning has been moved into the sklearn.exceptions"
-            " module. It will not be available here from version 0.19")
-class ChangedBehaviorWarning(_ChangedBehaviorWarning):
-    pass
-
-
 ##############################################################################
 def _first_and_last_element(arr):
     """Returns first and last element of numpy array or sparse matrix."""
diff --git a/sklearn/covariance/tests/test_robust_covariance.py b/sklearn/covariance/tests/test_robust_covariance.py
index be5b65cd76..27e423b410 100644
--- a/sklearn/covariance/tests/test_robust_covariance.py
+++ b/sklearn/covariance/tests/test_robust_covariance.py
@@ -44,14 +44,14 @@ def test_mcd():
 
 def test_fast_mcd_on_invalid_input():
     X = np.arange(100)
-    assert_raise_message(ValueError, 'fast_mcd expects at least 2 samples',
+    assert_raise_message(ValueError, 'Got X with X.ndim=1',
                          fast_mcd, X)
 
 
 def test_mcd_class_on_invalid_input():
     X = np.arange(100)
     mcd = MinCovDet()
-    assert_raise_message(ValueError, 'MinCovDet expects at least 2 samples',
+    assert_raise_message(ValueError, 'Got X with X.ndim=1',
                          mcd.fit, X)
 
 
diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py
index 0a8cfc62df..c38e99acd3 100644
--- a/sklearn/datasets/__init__.py
+++ b/sklearn/datasets/__init__.py
@@ -18,8 +18,6 @@ from .base import load_sample_image
 from .covtype import fetch_covtype
 from .kddcup99 import fetch_kddcup99
 from .mlcomp import load_mlcomp
-from .lfw import load_lfw_pairs
-from .lfw import load_lfw_people
 from .lfw import fetch_lfw_pairs
 from .lfw import fetch_lfw_people
 from .twenty_newsgroups import fetch_20newsgroups
@@ -74,8 +72,6 @@ __all__ = ['clear_data_home',
            'load_files',
            'load_iris',
            'load_breast_cancer',
-           'load_lfw_pairs',
-           'load_lfw_people',
            'load_linnerud',
            'load_mlcomp',
            'load_sample_image',
diff --git a/sklearn/datasets/lfw.py b/sklearn/datasets/lfw.py
index e191efb37b..d155cfe478 100644
--- a/sklearn/datasets/lfw.py
+++ b/sklearn/datasets/lfw.py
@@ -26,8 +26,6 @@ detector from various online websites.
 from os import listdir, makedirs, remove, rename
 from os.path import join, exists, isdir
 
-from sklearn.utils import deprecated
-
 import logging
 import numpy as np
 
@@ -376,23 +374,6 @@ def _fetch_lfw_pairs(index_file_path, data_folder_path, slice_=None,
     return pairs, target, np.array(['Different persons', 'Same person'])
 
 
-@deprecated("Function 'load_lfw_people' has been deprecated in 0.17 and will "
-            "be removed in 0.19."
-            "Use fetch_lfw_people(download_if_missing=False) instead.")
-def load_lfw_people(download_if_missing=False, **kwargs):
-    """
-    Alias for fetch_lfw_people(download_if_missing=False)
-
-    .. deprecated:: 0.17
-        This function will be removed in 0.19.
-        Use :func:`sklearn.datasets.fetch_lfw_people` with parameter
-        ``download_if_missing=False`` instead.
-
-    Check fetch_lfw_people.__doc__ for the documentation and parameter list.
-    """
-    return fetch_lfw_people(download_if_missing=download_if_missing, **kwargs)
-
-
 def fetch_lfw_pairs(subset='train', data_home=None, funneled=True, resize=0.5,
                     color=False, slice_=(slice(70, 195), slice(78, 172)),
                     download_if_missing=True):
@@ -509,20 +490,3 @@ def fetch_lfw_pairs(subset='train', data_home=None, funneled=True, resize=0.5,
     return Bunch(data=pairs.reshape(len(pairs), -1), pairs=pairs,
                  target=target, target_names=target_names,
                  DESCR="'%s' segment of the LFW pairs dataset" % subset)
-
-
-@deprecated("Function 'load_lfw_pairs' has been deprecated in 0.17 and will "
-            "be removed in 0.19."
-            "Use fetch_lfw_pairs(download_if_missing=False) instead.")
-def load_lfw_pairs(download_if_missing=False, **kwargs):
-    """
-    Alias for fetch_lfw_pairs(download_if_missing=False)
-
-    .. deprecated:: 0.17
-        This function will be removed in 0.19.
-        Use :func:`sklearn.datasets.fetch_lfw_pairs` with parameter
-        ``download_if_missing=False`` instead.
-
-    Check fetch_lfw_pairs.__doc__ for the documentation and parameter list.
-    """
-    return fetch_lfw_pairs(download_if_missing=download_if_missing, **kwargs)
diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
index 83f7ba16c8..3e5875a060 100644
--- a/sklearn/datasets/tests/test_lfw.py
+++ b/sklearn/datasets/tests/test_lfw.py
@@ -22,14 +22,11 @@ try:
 except ImportError:
     imsave = None
 
-from sklearn.datasets import load_lfw_pairs
-from sklearn.datasets import load_lfw_people
 from sklearn.datasets import fetch_lfw_pairs
 from sklearn.datasets import fetch_lfw_people
 
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import SkipTest
 from sklearn.utils.testing import raises
 
@@ -115,20 +112,14 @@ def teardown_module():
 
 @raises(IOError)
 def test_load_empty_lfw_people():
-    fetch_lfw_people(data_home=SCIKIT_LEARN_EMPTY_DATA, download_if_missing=False)
-
-
-def test_load_lfw_people_deprecation():
-    msg = ("Function 'load_lfw_people' has been deprecated in 0.17 and will be "
-           "removed in 0.19."
-           "Use fetch_lfw_people(download_if_missing=False) instead.")
-    assert_warns_message(DeprecationWarning, msg, load_lfw_people,
-                         data_home=SCIKIT_LEARN_DATA)
+    fetch_lfw_people(data_home=SCIKIT_LEARN_EMPTY_DATA,
+                     download_if_missing=False)
 
 
 def test_load_fake_lfw_people():
     lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA,
-                                  min_faces_per_person=3, download_if_missing=False)
+                                  min_faces_per_person=3,
+                                  download_if_missing=False)
 
     # The data is croped around the center as a rectangular bounding box
     # around the face. Colors are converted to gray levels:
@@ -144,8 +135,9 @@ def test_load_fake_lfw_people():
 
     # It is possible to ask for the original data without any croping or color
     # conversion and not limit on the number of picture per person
-    lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA,
-                                  resize=None, slice_=None, color=True, download_if_missing=False)
+    lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, resize=None,
+                                  slice_=None, color=True,
+                                  download_if_missing=False)
     assert_equal(lfw_people.images.shape, (17, 250, 250, 3))
 
     # the ids and class names are the same as previously
@@ -158,24 +150,19 @@ def test_load_fake_lfw_people():
 
 @raises(ValueError)
 def test_load_fake_lfw_people_too_restrictive():
-    fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=100, download_if_missing=False)
+    fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=100,
+                     download_if_missing=False)
 
 
 @raises(IOError)
 def test_load_empty_lfw_pairs():
-    fetch_lfw_pairs(data_home=SCIKIT_LEARN_EMPTY_DATA, download_if_missing=False)
-
-
-def test_load_lfw_pairs_deprecation():
-    msg = ("Function 'load_lfw_pairs' has been deprecated in 0.17 and will be "
-           "removed in 0.19."
-           "Use fetch_lfw_pairs(download_if_missing=False) instead.")
-    assert_warns_message(DeprecationWarning, msg, load_lfw_pairs,
-                         data_home=SCIKIT_LEARN_DATA)
+    fetch_lfw_pairs(data_home=SCIKIT_LEARN_EMPTY_DATA,
+                    download_if_missing=False)
 
 
 def test_load_fake_lfw_pairs():
-    lfw_pairs_train = fetch_lfw_pairs(data_home=SCIKIT_LEARN_DATA, download_if_missing=False)
+    lfw_pairs_train = fetch_lfw_pairs(data_home=SCIKIT_LEARN_DATA,
+                                      download_if_missing=False)
 
     # The data is croped around the center as a rectangular bounding box
     # around the face. Colors are converted to gray levels:
@@ -190,8 +177,9 @@ def test_load_fake_lfw_pairs():
 
     # It is possible to ask for the original data without any croping or color
     # conversion
-    lfw_pairs_train = fetch_lfw_pairs(data_home=SCIKIT_LEARN_DATA,
-                                      resize=None, slice_=None, color=True, download_if_missing=False)
+    lfw_pairs_train = fetch_lfw_pairs(data_home=SCIKIT_LEARN_DATA, resize=None,
+                                      slice_=None, color=True,
+                                      download_if_missing=False)
     assert_equal(lfw_pairs_train.pairs.shape, (10, 2, 250, 250, 3))
 
     # the ids and class names are the same as previously
diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py
index 24690c8c8f..faca56b91b 100644
--- a/sklearn/decomposition/__init__.py
+++ b/sklearn/decomposition/__init__.py
@@ -4,7 +4,7 @@ algorithms, including among others PCA, NMF or ICA. Most of the algorithms of
 this module can be regarded as dimensionality reduction techniques.
 """
 
-from .nmf import NMF, ProjectedGradientNMF, non_negative_factorization
+from .nmf import NMF, non_negative_factorization
 from .pca import PCA, RandomizedPCA
 from .incremental_pca import IncrementalPCA
 from .kernel_pca import KernelPCA
@@ -26,7 +26,6 @@ __all__ = ['DictionaryLearning',
            'MiniBatchSparsePCA',
            'NMF',
            'PCA',
-           'ProjectedGradientNMF',
            'RandomizedPCA',
            'SparseCoder',
            'SparsePCA',
diff --git a/sklearn/decomposition/nmf.py b/sklearn/decomposition/nmf.py
index cf5fc431e6..3b71079d99 100644
--- a/sklearn/decomposition/nmf.py
+++ b/sklearn/decomposition/nmf.py
@@ -24,7 +24,6 @@ from ..utils import check_random_state, check_array
 from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm
 from ..utils.extmath import fast_dot
 from ..utils.validation import check_is_fitted, check_non_negative
-from ..utils import deprecated
 from ..exceptions import ConvergenceWarning
 from .cdnmf_fast import _update_cdnmf_fast
 
@@ -52,12 +51,6 @@ def trace_dot(X, Y):
     return np.dot(X.ravel(), Y.ravel())
 
 
-def _sparseness(x):
-    """Hoyer's measure of sparsity for a vector"""
-    sqrt_n = np.sqrt(len(x))
-    return (sqrt_n - np.linalg.norm(x, 1) / norm(x)) / (sqrt_n - 1)
-
-
 def _check_init(A, shape, whom):
     A = check_array(A)
     if np.shape(A) != shape:
@@ -80,20 +73,6 @@ def _safe_compute_error(X, W, H):
     return error
 
 
-def _check_string_param(sparseness, solver):
-    allowed_sparseness = (None, 'data', 'components')
-    if sparseness not in allowed_sparseness:
-        raise ValueError(
-            'Invalid sparseness parameter: got %r instead of one of %r' %
-            (sparseness, allowed_sparseness))
-
-    allowed_solver = ('pg', 'cd')
-    if solver not in allowed_solver:
-        raise ValueError(
-            'Invalid solver parameter: got %r instead of one of %r' %
-            (solver, allowed_solver))
-
-
 def _initialize_nmf(X, n_components, init=None, eps=1e-6,
                     random_state=None):
     """Algorithms for NMF initialization.
@@ -345,115 +324,6 @@ def _nls_subproblem(V, W, H, tol, max_iter, alpha=0., l1_ratio=0.,
     return H, grad, n_iter
 
 
-def _update_projected_gradient_w(X, W, H, tolW, nls_max_iter, alpha, l1_ratio,
-                                 sparseness, beta, eta):
-    """Helper function for _fit_projected_gradient"""
-    n_samples, n_features = X.shape
-    n_components_ = H.shape[0]
-
-    if sparseness is None:
-        Wt, gradW, iterW = _nls_subproblem(X.T, H.T, W.T, tolW, nls_max_iter,
-                                           alpha=alpha, l1_ratio=l1_ratio)
-    elif sparseness == 'data':
-        Wt, gradW, iterW = _nls_subproblem(
-            safe_vstack([X.T, np.zeros((1, n_samples))]),
-            safe_vstack([H.T, np.sqrt(beta) * np.ones((1,
-                         n_components_))]),
-            W.T, tolW, nls_max_iter, alpha=alpha, l1_ratio=l1_ratio)
-    elif sparseness == 'components':
-        Wt, gradW, iterW = _nls_subproblem(
-            safe_vstack([X.T,
-                         np.zeros((n_components_, n_samples))]),
-            safe_vstack([H.T,
-                         np.sqrt(eta) * np.eye(n_components_)]),
-            W.T, tolW, nls_max_iter, alpha=alpha, l1_ratio=l1_ratio)
-
-    return Wt.T, gradW.T, iterW
-
-
-def _update_projected_gradient_h(X, W, H, tolH, nls_max_iter, alpha, l1_ratio,
-                                 sparseness, beta, eta):
-    """Helper function for _fit_projected_gradient"""
-    n_samples, n_features = X.shape
-    n_components_ = W.shape[1]
-
-    if sparseness is None:
-        H, gradH, iterH = _nls_subproblem(X, W, H, tolH, nls_max_iter,
-                                          alpha=alpha, l1_ratio=l1_ratio)
-    elif sparseness == 'data':
-        H, gradH, iterH = _nls_subproblem(
-            safe_vstack([X, np.zeros((n_components_, n_features))]),
-            safe_vstack([W,
-                         np.sqrt(eta) * np.eye(n_components_)]),
-            H, tolH, nls_max_iter, alpha=alpha, l1_ratio=l1_ratio)
-    elif sparseness == 'components':
-        H, gradH, iterH = _nls_subproblem(
-            safe_vstack([X, np.zeros((1, n_features))]),
-            safe_vstack([W, np.sqrt(beta) * np.ones((1, n_components_))]),
-            H, tolH, nls_max_iter, alpha=alpha, l1_ratio=l1_ratio)
-
-    return H, gradH, iterH
-
-
-def _fit_projected_gradient(X, W, H, tol, max_iter,
-                            nls_max_iter, alpha, l1_ratio,
-                            sparseness, beta, eta):
-    """Compute Non-negative Matrix Factorization (NMF) with Projected Gradient
-
-    References
-    ----------
-    C.-J. Lin. Projected gradient methods for non-negative matrix
-    factorization. Neural Computation, 19(2007), 2756-2779.
-    http://www.csie.ntu.edu.tw/~cjlin/nmf/
-
-    P. Hoyer. Non-negative Matrix Factorization with Sparseness Constraints.
-    Journal of Machine Learning Research 2004.
-    """
-    gradW = (np.dot(W, np.dot(H, H.T)) -
-             safe_sparse_dot(X, H.T, dense_output=True))
-    gradH = (np.dot(np.dot(W.T, W), H) -
-             safe_sparse_dot(W.T, X, dense_output=True))
-
-    init_grad = squared_norm(gradW) + squared_norm(gradH.T)
-    # max(0.001, tol) to force alternating minimizations of W and H
-    tolW = max(0.001, tol) * np.sqrt(init_grad)
-    tolH = tolW
-
-    for n_iter in range(1, max_iter + 1):
-        # stopping condition
-        # as discussed in paper
-        proj_grad_W = squared_norm(gradW * np.logical_or(gradW < 0, W > 0))
-        proj_grad_H = squared_norm(gradH * np.logical_or(gradH < 0, H > 0))
-
-        if (proj_grad_W + proj_grad_H) / init_grad < tol ** 2:
-            break
-
-        # update W
-        W, gradW, iterW = _update_projected_gradient_w(X, W, H, tolW,
-                                                       nls_max_iter,
-                                                       alpha, l1_ratio,
-                                                       sparseness, beta, eta)
-        if iterW == 1:
-            tolW = 0.1 * tolW
-
-        # update H
-        H, gradH, iterH = _update_projected_gradient_h(X, W, H, tolH,
-                                                       nls_max_iter,
-                                                       alpha, l1_ratio,
-                                                       sparseness, beta, eta)
-        if iterH == 1:
-            tolH = 0.1 * tolH
-
-    H[H == 0] = 0   # fix up negative zeros
-
-    if n_iter == max_iter:
-        W, _, _ = _update_projected_gradient_w(X, W, H, tol, nls_max_iter,
-                                               alpha, l1_ratio, sparseness,
-                                               beta, eta)
-
-    return W, H, n_iter
-
-
 def _update_coordinate_descent(X, W, Ht, l1_reg, l2_reg, shuffle,
                                random_state):
     """Helper function for _fit_coordinate_descent
@@ -604,8 +474,7 @@ def non_negative_factorization(X, W=None, H=None, n_components=None,
                                init='random', update_H=True, solver='cd',
                                tol=1e-4, max_iter=200, alpha=0., l1_ratio=0.,
                                regularization=None, random_state=None,
-                               verbose=0, shuffle=False, nls_max_iter=2000,
-                               sparseness=None, beta=1, eta=0.1):
+                               verbose=0, shuffle=False):
     """Compute Non-negative Matrix Factorization (NMF)
 
     Find two non-negative matrices (W, H) whose product approximates the non-
@@ -668,9 +537,8 @@ def non_negative_factorization(X, W=None, H=None, n_components=None,
         Set to True, both W and H will be estimated from initial guesses.
         Set to False, only W will be estimated.
 
-    solver : 'pg' | 'cd'
+    solver : 'cd'
         Numerical solver to use:
-        'pg' is a (deprecated) Projected Gradient solver.
         'cd' is a Coordinate Descent solver.
 
     tol : float, default: 1e-4
@@ -702,21 +570,6 @@ def non_negative_factorization(X, W=None, H=None, n_components=None,
     shuffle : boolean, default: False
         If true, randomize the order of coordinates in the CD solver.
 
-    nls_max_iter : integer, default: 2000
-        Number of iterations in NLS subproblem.
-        Used only in the deprecated 'pg' solver.
-
-    sparseness : 'data' | 'components' | None, default: None
-        Where to enforce sparsity in the model.
-        Used only in the deprecated 'pg' solver.
-
-    beta : double, default: 1
-        Degree of sparseness, if sparseness is not None. Larger values mean
-        more sparseness. Used only in the deprecated 'pg' solver.
-
-    eta : double, default: 0.1
-        Degree of correctness to maintain, if sparsity is not None. Smaller
-        values mean larger error. Used only in the deprecated 'pg' solver.
 
     Returns
     -------
@@ -743,7 +596,6 @@ def non_negative_factorization(X, W=None, H=None, n_components=None,
 
     X = check_array(X, accept_sparse=('csr', 'csc'))
     check_non_negative(X, "NMF (input X)")
-    _check_string_param(sparseness, solver)
 
     n_samples, n_features = X.shape
     if n_components is None:
@@ -770,23 +622,7 @@ def non_negative_factorization(X, W=None, H=None, n_components=None,
         W, H = _initialize_nmf(X, n_components, init=init,
                                random_state=random_state)
 
-    if solver == 'pg':
-        warnings.warn("'pg' solver will be removed in release 0.19."
-                      " Use 'cd' solver instead.", DeprecationWarning)
-        if update_H:  # fit_transform
-            W, H, n_iter = _fit_projected_gradient(X, W, H, tol,
-                                                   max_iter,
-                                                   nls_max_iter,
-                                                   alpha, l1_ratio,
-                                                   sparseness,
-                                                   beta, eta)
-        else:  # transform
-            W, H, n_iter = _update_projected_gradient_w(X, W, H,
-                                                        tol, nls_max_iter,
-                                                        alpha, l1_ratio,
-                                                        sparseness, beta,
-                                                        eta)
-    elif solver == 'cd':
+    if solver == 'cd':
         W, H, n_iter = _fit_coordinate_descent(X, W, H, tol,
                                                max_iter,
                                                alpha, l1_ratio,
@@ -856,10 +692,9 @@ class NMF(BaseEstimator, TransformerMixin):
 
         - 'custom': use custom matrices W and H
 
-    solver : 'pg' | 'cd'
+    solver : 'cd'
         Numerical solver to use:
-        'pg' is a Projected Gradient solver (deprecated).
-        'cd' is a Coordinate Descent solver (recommended).
+        'cd' is a Coordinate Descent solver.
 
         .. versionadded:: 0.17
            Coordinate Descent solver.
@@ -900,37 +735,6 @@ class NMF(BaseEstimator, TransformerMixin):
         .. versionadded:: 0.17
            *shuffle* parameter used in the Coordinate Descent solver.
 
-    nls_max_iter : integer, default: 2000
-        Number of iterations in NLS subproblem.
-        Used only in the deprecated 'pg' solver.
-
-        .. versionchanged:: 0.17
-           Deprecated Projected Gradient solver. Use Coordinate Descent solver
-           instead.
-
-    sparseness : 'data' | 'components' | None, default: None
-        Where to enforce sparsity in the model.
-        Used only in the deprecated 'pg' solver.
-
-        .. versionchanged:: 0.17
-           Deprecated Projected Gradient solver. Use Coordinate Descent solver
-           instead.
-
-    beta : double, default: 1
-        Degree of sparseness, if sparseness is not None. Larger values mean
-        more sparseness. Used only in the deprecated 'pg' solver.
-
-        .. versionchanged:: 0.17
-           Deprecated Projected Gradient solver. Use Coordinate Descent solver
-           instead.
-
-    eta : double, default: 0.1
-        Degree of correctness to maintain, if sparsity is not None. Smaller
-        values mean larger error. Used only in the deprecated 'pg' solver.
-
-        .. versionchanged:: 0.17
-           Deprecated Projected Gradient solver. Use Coordinate Descent solver
-           instead.
 
     Attributes
     ----------
@@ -952,9 +756,9 @@ class NMF(BaseEstimator, TransformerMixin):
     >>> from sklearn.decomposition import NMF
     >>> model = NMF(n_components=2, init='random', random_state=0)
     >>> model.fit(X) #doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    NMF(alpha=0.0, beta=1, eta=0.1, init='random', l1_ratio=0.0, max_iter=200,
-      n_components=2, nls_max_iter=2000, random_state=0, shuffle=False,
-      solver='cd', sparseness=None, tol=0.0001, verbose=0)
+    NMF(alpha=0.0, init='random', l1_ratio=0.0, max_iter=200,
+      n_components=2, random_state=0, shuffle=False,
+      solver='cd', tol=0.0001, verbose=0)
 
     >>> model.components_
     array([[ 2.09783018,  0.30560234],
@@ -974,10 +778,9 @@ class NMF(BaseEstimator, TransformerMixin):
     computer sciences 92.3: 708-721, 2009.
     """
 
-    def __init__(self, n_components=None, init=None, solver='cd',
-                 tol=1e-4, max_iter=200, random_state=None,
-                 alpha=0., l1_ratio=0., verbose=0, shuffle=False,
-                 nls_max_iter=2000, sparseness=None, beta=1, eta=0.1):
+    def __init__(self, n_components=None, init=None, solver='cd', tol=1e-4,
+                 max_iter=200, random_state=None, alpha=0., l1_ratio=0.,
+                 verbose=0, shuffle=False):
         self.n_components = n_components
         self.init = init
         self.solver = solver
@@ -989,17 +792,6 @@ class NMF(BaseEstimator, TransformerMixin):
         self.verbose = verbose
         self.shuffle = shuffle
 
-        if sparseness is not None:
-            warnings.warn("Controlling regularization through the sparseness,"
-                          " beta and eta arguments is only available"
-                          " for 'pg' solver, which will be removed"
-                          " in release 0.19. Use another solver with L1 or L2"
-                          " regularization instead.", DeprecationWarning)
-        self.nls_max_iter = nls_max_iter
-        self.sparseness = sparseness
-        self.beta = beta
-        self.eta = eta
-
     def fit_transform(self, X, y=None, W=None, H=None):
         """Learn a NMF model for the data X and returns the transformed data.
 
@@ -1029,13 +821,7 @@ class NMF(BaseEstimator, TransformerMixin):
             tol=self.tol, max_iter=self.max_iter, alpha=self.alpha,
             l1_ratio=self.l1_ratio, regularization='both',
             random_state=self.random_state, verbose=self.verbose,
-            shuffle=self.shuffle,
-            nls_max_iter=self.nls_max_iter, sparseness=self.sparseness,
-            beta=self.beta, eta=self.eta)
-
-        if self.solver == 'pg':
-            self.comp_sparseness_ = _sparseness(H.ravel())
-            self.data_sparseness_ = _sparseness(W.ravel())
+            shuffle=self.shuffle)
 
         self.reconstruction_err_ = _safe_compute_error(X, W, H)
 
@@ -1081,9 +867,7 @@ class NMF(BaseEstimator, TransformerMixin):
             tol=self.tol, max_iter=self.max_iter, alpha=self.alpha,
             l1_ratio=self.l1_ratio, regularization='both',
             random_state=self.random_state, verbose=self.verbose,
-            shuffle=self.shuffle,
-            nls_max_iter=self.nls_max_iter, sparseness=self.sparseness,
-            beta=self.beta, eta=self.eta)
+            shuffle=self.shuffle)
 
         return W
 
@@ -1104,185 +888,3 @@ class NMF(BaseEstimator, TransformerMixin):
         """
         check_is_fitted(self, 'n_components_')
         return np.dot(W, self.components_)
-
-
-@deprecated("It will be removed in release 0.19. Use NMF instead."
-            "'pg' solver is still available until release 0.19.")
-class ProjectedGradientNMF(NMF):
-    """Non-Negative Matrix Factorization (NMF)
-
-    Find two non-negative matrices (W, H) whose product approximates the non-
-    negative matrix X. This factorization can be used for example for
-    dimensionality reduction, source separation or topic extraction.
-
-    The objective function is::
-
-        0.5 * ||X - WH||_Fro^2
-        + alpha * l1_ratio * ||vec(W)||_1
-        + alpha * l1_ratio * ||vec(H)||_1
-        + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2
-        + 0.5 * alpha * (1 - l1_ratio) * ||H||_Fro^2
-
-    Where::
-
-        ||A||_Fro^2 = \sum_{i,j} A_{ij}^2 (Frobenius norm)
-        ||vec(A)||_1 = \sum_{i,j} abs(A_{ij}) (Elementwise L1 norm)
-
-    The objective function is minimized with an alternating minimization of W
-    and H.
-
-    Read more in the :ref:`User Guide <NMF>`.
-
-    Parameters
-    ----------
-    n_components : int or None
-        Number of components, if n_components is not set all features
-        are kept.
-
-    init :  'random' | 'nndsvd' |  'nndsvda' | 'nndsvdar' | 'custom'
-        Method used to initialize the procedure.
-        Default: 'nndsvdar' if n_components < n_features, otherwise random.
-        Valid options:
-
-        - 'random': non-negative random matrices, scaled with:
-            sqrt(X.mean() / n_components)
-
-        - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)
-            initialization (better for sparseness)
-
-        - 'nndsvda': NNDSVD with zeros filled with the average of X
-            (better when sparsity is not desired)
-
-        - 'nndsvdar': NNDSVD with zeros filled with small random values
-            (generally faster, less accurate alternative to NNDSVDa
-            for when sparsity is not desired)
-
-        - 'custom': use custom matrices W and H
-
-    solver : 'pg' | 'cd'
-        Numerical solver to use:
-        'pg' is a Projected Gradient solver (deprecated).
-        'cd' is a Coordinate Descent solver (recommended).
-
-        .. versionadded:: 0.17
-           Coordinate Descent solver.
-
-        .. versionchanged:: 0.17
-           Deprecated Projected Gradient solver.
-
-    tol : double, default: 1e-4
-        Tolerance value used in stopping conditions.
-
-    max_iter : integer, default: 200
-        Number of iterations to compute.
-
-    random_state : integer seed, RandomState instance, or None (default)
-        Random number generator seed control.
-
-    alpha : double, default: 0.
-        Constant that multiplies the regularization terms. Set it to zero to
-        have no regularization.
-
-        .. versionadded:: 0.17
-           *alpha* used in the Coordinate Descent solver.
-
-    l1_ratio : double, default: 0.
-        The regularization mixing parameter, with 0 <= l1_ratio <= 1.
-        For l1_ratio = 0 the penalty is an elementwise L2 penalty
-        (aka Frobenius Norm).
-        For l1_ratio = 1 it is an elementwise L1 penalty.
-        For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
-
-        .. versionadded:: 0.17
-           Regularization parameter *l1_ratio* used in the Coordinate Descent
-           solver.
-
-    shuffle : boolean, default: False
-        If true, randomize the order of coordinates in the CD solver.
-
-        .. versionadded:: 0.17
-           *shuffle* parameter used in the Coordinate Descent solver.
-
-    nls_max_iter : integer, default: 2000
-        Number of iterations in NLS subproblem.
-        Used only in the deprecated 'pg' solver.
-
-        .. versionchanged:: 0.17
-           Deprecated Projected Gradient solver. Use Coordinate Descent solver
-           instead.
-
-    sparseness : 'data' | 'components' | None, default: None
-        Where to enforce sparsity in the model.
-        Used only in the deprecated 'pg' solver.
-
-        .. versionchanged:: 0.17
-           Deprecated Projected Gradient solver. Use Coordinate Descent solver
-           instead.
-
-    beta : double, default: 1
-        Degree of sparseness, if sparseness is not None. Larger values mean
-        more sparseness. Used only in the deprecated 'pg' solver.
-
-        .. versionchanged:: 0.17
-           Deprecated Projected Gradient solver. Use Coordinate Descent solver
-           instead.
-
-    eta : double, default: 0.1
-        Degree of correctness to maintain, if sparsity is not None. Smaller
-        values mean larger error. Used only in the deprecated 'pg' solver.
-
-        .. versionchanged:: 0.17
-           Deprecated Projected Gradient solver. Use Coordinate Descent solver
-           instead.
-
-    Attributes
-    ----------
-    components_ : array, [n_components, n_features]
-        Non-negative components of the data.
-
-    reconstruction_err_ : number
-        Frobenius norm of the matrix difference between
-        the training data and the reconstructed data from
-        the fit produced by the model. ``|| X - WH ||_2``
-
-    n_iter_ : int
-        Actual number of iterations.
-
-    Examples
-    --------
-    >>> import numpy as np
-    >>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
-    >>> from sklearn.decomposition import NMF
-    >>> model = NMF(n_components=2, init='random', random_state=0)
-    >>> model.fit(X) #doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    NMF(alpha=0.0, beta=1, eta=0.1, init='random', l1_ratio=0.0, max_iter=200,
-      n_components=2, nls_max_iter=2000, random_state=0, shuffle=False,
-      solver='cd', sparseness=None, tol=0.0001, verbose=0)
-
-    >>> model.components_
-    array([[ 2.09783018,  0.30560234],
-           [ 2.13443044,  2.13171694]])
-    >>> model.reconstruction_err_ #doctest: +ELLIPSIS
-    0.00115993...
-
-    References
-    ----------
-    C.-J. Lin. Projected gradient methods for non-negative matrix
-    factorization. Neural Computation, 19(2007), 2756-2779.
-    http://www.csie.ntu.edu.tw/~cjlin/nmf/
-
-    Cichocki, Andrzej, and P. H. A. N. Anh-Huy. "Fast local algorithms for
-    large scale nonnegative matrix and tensor factorizations."
-    IEICE transactions on fundamentals of electronics, communications and
-    computer sciences 92.3: 708-721, 2009.
-    """
-
-    def __init__(self, n_components=None, solver='pg', init=None,
-                 tol=1e-4, max_iter=200, random_state=None,
-                 alpha=0., l1_ratio=0., verbose=0,
-                 nls_max_iter=2000, sparseness=None, beta=1, eta=0.1):
-        super(ProjectedGradientNMF, self).__init__(
-            n_components=n_components, init=init, solver='pg', tol=tol,
-            max_iter=max_iter, random_state=random_state, alpha=alpha,
-            l1_ratio=l1_ratio, verbose=verbose, nls_max_iter=nls_max_iter,
-            sparseness=sparseness, beta=beta, eta=eta)
diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py
index c431dd3842..bb93ed94f3 100644
--- a/sklearn/decomposition/tests/test_nmf.py
+++ b/sklearn/decomposition/tests/test_nmf.py
@@ -1,7 +1,6 @@
 import numpy as np
 from scipy import linalg
-from sklearn.decomposition import (NMF, ProjectedGradientNMF,
-                                   non_negative_factorization)
+from sklearn.decomposition import NMF, non_negative_factorization
 from sklearn.decomposition import nmf   # For testing internals
 from scipy.sparse import csc_matrix
 
@@ -10,9 +9,7 @@ from sklearn.utils.testing import assert_false
 from sklearn.utils.testing import assert_raise_message, assert_no_warnings
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_almost_equal
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_less
-from sklearn.utils.testing import ignore_warnings
 from sklearn.base import clone
 
 
@@ -27,16 +24,13 @@ def test_initialize_nn_output():
         assert_false((W < 0).any() or (H < 0).any())
 
 
-@ignore_warnings
 def test_parameter_checking():
     A = np.ones((2, 2))
     name = 'spam'
-    msg = "Invalid solver parameter: got 'spam' instead of one of"
+    msg = "Invalid solver parameter 'spam'"
     assert_raise_message(ValueError, msg, NMF(solver=name).fit, A)
     msg = "Invalid init parameter: got 'spam' instead of one of"
     assert_raise_message(ValueError, msg, NMF(init=name).fit, A)
-    msg = "Invalid sparseness parameter: got 'spam' instead of one of"
-    assert_raise_message(ValueError, msg, NMF(sparseness=name).fit, A)
 
     msg = "Negative values in data passed to"
     assert_raise_message(ValueError, msg, NMF().fit, -A)
@@ -71,27 +65,22 @@ def test_initialize_variants():
         assert_almost_equal(evl[ref != 0], ref[ref != 0])
 
 
-@ignore_warnings
 def test_nmf_fit_nn_output():
     # Test that the decomposition does not contain negative values
     A = np.c_[5 * np.ones(5) - np.arange(1, 6),
               5 * np.ones(5) + np.arange(1, 6)]
-    for solver in ('pg', 'cd'):
-        for init in (None, 'nndsvd', 'nndsvda', 'nndsvdar'):
-            model = NMF(n_components=2, solver=solver, init=init,
-                        random_state=0)
-            transf = model.fit_transform(A)
-            assert_false((model.components_ < 0).any() or
-                         (transf < 0).any())
+    for init in (None, 'nndsvd', 'nndsvda', 'nndsvdar'):
+        model = NMF(n_components=2, init=init, random_state=0)
+        transf = model.fit_transform(A)
+        assert_false((model.components_ < 0).any() or
+                     (transf < 0).any())
 
 
-@ignore_warnings
 def test_nmf_fit_close():
     # Test that the fit is not too far away
-    for solver in ('pg', 'cd'):
-        pnmf = NMF(5, solver=solver, init='nndsvd', random_state=0)
-        X = np.abs(random_state.randn(6, 5))
-        assert_less(pnmf.fit(X).reconstruction_err_, 0.05)
+    pnmf = NMF(5, init='nndsvd', random_state=0)
+    X = np.abs(random_state.randn(6, 5))
+    assert_less(pnmf.fit(X).reconstruction_err_, 0.05)
 
 
 def test_nls_nn_output():
@@ -109,15 +98,13 @@ def test_nls_close():
     assert_true((np.abs(Ap - A) < 0.01).all())
 
 
-@ignore_warnings
 def test_nmf_transform():
     # Test that NMF.transform returns close values
     A = np.abs(random_state.randn(6, 5))
-    for solver in ('pg', 'cd'):
-        m = NMF(solver=solver, n_components=4, init='nndsvd', random_state=0)
-        ft = m.fit_transform(A)
-        t = m.transform(A)
-        assert_array_almost_equal(ft, t, decimal=2)
+    m = NMF(n_components=4, init='nndsvd', random_state=0)
+    ft = m.fit_transform(A)
+    t = m.transform(A)
+    assert_array_almost_equal(ft, t, decimal=2)
 
 
 def test_nmf_transform_custom_init():
@@ -134,45 +121,23 @@ def test_nmf_transform_custom_init():
     m.transform(A)
 
 
-@ignore_warnings
 def test_nmf_inverse_transform():
     # Test that NMF.inverse_transform returns close values
     random_state = np.random.RandomState(0)
     A = np.abs(random_state.randn(6, 4))
-    for solver in ('pg', 'cd'):
-        m = NMF(solver=solver, n_components=4, init='random', random_state=0)
-        m.fit_transform(A)
-        t = m.transform(A)
-        A_new = m.inverse_transform(t)
-        assert_array_almost_equal(A, A_new, decimal=2)
+    m = NMF(n_components=4, init='random', random_state=0)
+    m.fit_transform(A)
+    t = m.transform(A)
+    A_new = m.inverse_transform(t)
+    assert_array_almost_equal(A, A_new, decimal=2)
 
 
-@ignore_warnings
 def test_n_components_greater_n_features():
     # Smoke test for the case of more components than features.
     A = np.abs(random_state.randn(30, 10))
     NMF(n_components=15, random_state=0, tol=1e-2).fit(A)
 
 
-@ignore_warnings
-def test_projgrad_nmf_sparseness():
-    # Test sparseness
-    # Test that sparsity constraints actually increase sparseness in the
-    # part where they are applied.
-    tol = 1e-2
-    A = np.abs(random_state.randn(10, 10))
-    m = ProjectedGradientNMF(n_components=5, random_state=0, tol=tol).fit(A)
-    data_sp = ProjectedGradientNMF(n_components=5, sparseness='data',
-                                   random_state=0,
-                                   tol=tol).fit(A).data_sparseness_
-    comp_sp = ProjectedGradientNMF(n_components=5, sparseness='components',
-                                   random_state=0,
-                                   tol=tol).fit(A).comp_sparseness_
-    assert_greater(data_sp, m.data_sparseness_)
-    assert_greater(comp_sp, m.comp_sparseness_)
-
-
-@ignore_warnings
 def test_sparse_input():
     # Test that sparse matrices are accepted as input
     from scipy.sparse import csc_matrix
@@ -181,21 +146,18 @@ def test_sparse_input():
     A[:, 2 * np.arange(5)] = 0
     A_sparse = csc_matrix(A)
 
-    for solver in ('pg', 'cd'):
-        est1 = NMF(solver=solver, n_components=5, init='random',
-                   random_state=0, tol=1e-2)
-        est2 = clone(est1)
+    est1 = NMF(n_components=5, init='random', random_state=0, tol=1e-2)
+    est2 = clone(est1)
 
-        W1 = est1.fit_transform(A)
-        W2 = est2.fit_transform(A_sparse)
-        H1 = est1.components_
-        H2 = est2.components_
+    W1 = est1.fit_transform(A)
+    W2 = est2.fit_transform(A_sparse)
+    H1 = est1.components_
+    H2 = est2.components_
 
-        assert_array_almost_equal(W1, W2)
-        assert_array_almost_equal(H1, H2)
+    assert_array_almost_equal(W1, W2)
+    assert_array_almost_equal(H1, H2)
 
 
-@ignore_warnings
 def test_sparse_transform():
     # Test that transform works on sparse data.  Issue #2124
 
@@ -203,34 +165,29 @@ def test_sparse_transform():
     A[A > 1.0] = 0
     A = csc_matrix(A)
 
-    for solver in ('pg', 'cd'):
-        model = NMF(solver=solver, random_state=0, tol=1e-4, n_components=2)
-        A_fit_tr = model.fit_transform(A)
-        A_tr = model.transform(A)
-        assert_array_almost_equal(A_fit_tr, A_tr, decimal=1)
+    model = NMF(random_state=0, tol=1e-4, n_components=2)
+    A_fit_tr = model.fit_transform(A)
+    A_tr = model.transform(A)
+    assert_array_almost_equal(A_fit_tr, A_tr, decimal=1)
 
 
-@ignore_warnings
 def test_non_negative_factorization_consistency():
     # Test that the function is called in the same way, either directly
     # or through the NMF class
     A = np.abs(random_state.randn(10, 10))
     A[:, 2 * np.arange(5)] = 0
 
-    for solver in ('pg', 'cd'):
-        W_nmf, H, _ = non_negative_factorization(
-            A, solver=solver, random_state=1, tol=1e-2)
-        W_nmf_2, _, _ = non_negative_factorization(
-            A, H=H, update_H=False, solver=solver, random_state=1, tol=1e-2)
+    W_nmf, H, _ = non_negative_factorization(A, random_state=1, tol=1e-2)
+    W_nmf_2, _, _ = non_negative_factorization(
+        A, H=H, update_H=False, random_state=1, tol=1e-2)
 
-        model_class = NMF(solver=solver, random_state=1, tol=1e-2)
-        W_cls = model_class.fit_transform(A)
-        W_cls_2 = model_class.transform(A)
-        assert_array_almost_equal(W_nmf, W_cls, decimal=10)
-        assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10)
+    model_class = NMF(random_state=1, tol=1e-2)
+    W_cls = model_class.fit_transform(A)
+    W_cls_2 = model_class.transform(A)
+    assert_array_almost_equal(W_nmf, W_cls, decimal=10)
+    assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10)
 
 
-@ignore_warnings
 def test_non_negative_factorization_checking():
     A = np.ones((2, 2))
     # Test parameters checking is public function
diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py
index 628314a013..04180f2843 100644
--- a/sklearn/discriminant_analysis.py
+++ b/sklearn/discriminant_analysis.py
@@ -56,7 +56,8 @@ def _cov(X, shrinkage=None):
             sc = StandardScaler()  # standardize features
             X = sc.fit_transform(X)
             s = ledoit_wolf(X)[0]
-            s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]  # rescale
+            # rescale
+            s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]
         elif shrinkage == 'empirical':
             s = empirical_covariance(X)
         else:
@@ -407,15 +408,15 @@ class LinearDiscriminantAnalysis(BaseEstimator, LinearClassifierMixin,
         self.coef_ = np.dot(coef, self.scalings_.T)
         self.intercept_ -= np.dot(self.xbar_, self.coef_.T)
 
-    def fit(self, X, y, store_covariance=None, tol=None):
+    def fit(self, X, y):
         """Fit LinearDiscriminantAnalysis model according to the given
            training data and parameters.
 
-           .. versionchanged:: 0.17
-              Deprecated *store_covariance* have been moved to main constructor.
+           .. versionchanged:: 0.19
+              *store_covariance* has been moved to main constructor.
 
-           .. versionchanged:: 0.17
-              Deprecated *tol* have been moved to main constructor.
+           .. versionchanged:: 0.19
+              *tol* has been moved to main constructor.
 
         Parameters
         ----------
@@ -425,20 +426,6 @@ class LinearDiscriminantAnalysis(BaseEstimator, LinearClassifierMixin,
         y : array, shape (n_samples,)
             Target values.
         """
-        if store_covariance:
-            warnings.warn("The parameter 'store_covariance' is deprecated as "
-                          "of version 0.17 and will be removed in 0.19. The "
-                          "parameter is no longer necessary because the value "
-                          "is set via the estimator initialisation or "
-                          "set_params method.", DeprecationWarning)
-            self.store_covariance = store_covariance
-        if tol:
-            warnings.warn("The parameter 'tol' is deprecated as of version "
-                          "0.17 and will be removed in 0.19. The parameter is "
-                          "no longer necessary because the value is set via "
-                          "the estimator initialisation or set_params method.",
-                          DeprecationWarning)
-            self.tol = tol
         X, y = check_X_y(X, y, ensure_min_samples=2, estimator=self)
         self.classes_ = unique_labels(y)
 
@@ -630,14 +617,14 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin):
         self.store_covariances = store_covariances
         self.tol = tol
 
-    def fit(self, X, y, store_covariances=None, tol=None):
+    def fit(self, X, y):
         """Fit the model according to the given training data and parameters.
 
-            .. versionchanged:: 0.17
-               Deprecated *store_covariance* have been moved to main constructor.
+            .. versionchanged:: 0.19
+               *store_covariance* has been moved to main constructor.
 
-            .. versionchanged:: 0.17
-               Deprecated *tol* have been moved to main constructor.
+            .. versionchanged:: 0.19
+               *tol* has been moved to main constructor.
 
         Parameters
         ----------
@@ -648,20 +635,6 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin):
         y : array, shape = [n_samples]
             Target values (integers)
         """
-        if store_covariances:
-            warnings.warn("The parameter 'store_covariances' is deprecated as "
-                          "of version 0.17 and will be removed in 0.19. The "
-                          "parameter is no longer necessary because the value "
-                          "is set via the estimator initialisation or "
-                          "set_params method.", DeprecationWarning)
-            self.store_covariances = store_covariances
-        if tol:
-            warnings.warn("The parameter 'tol' is deprecated as of version "
-                          "0.17 and will be removed in 0.19. The parameter is "
-                          "no longer necessary because the value is set via "
-                          "the estimator initialisation or set_params method.",
-                          DeprecationWarning)
-            self.tol = tol
         X, y = check_X_y(X, y)
         check_classification_targets(y)
         self.classes_, y = np.unique(y, return_inverse=True)
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 3d252dfa4f..5ab0a0b191 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -53,7 +53,6 @@ from scipy.sparse import hstack as sparse_hstack
 from ..base import ClassifierMixin, RegressorMixin
 from ..externals.joblib import Parallel, delayed
 from ..externals import six
-from ..feature_selection.from_model import _LearntSelectorMixin
 from ..metrics import r2_score
 from ..preprocessing import OneHotEncoder
 from ..tree import (DecisionTreeClassifier, DecisionTreeRegressor,
@@ -124,8 +123,7 @@ def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees,
     return tree
 
 
-class BaseForest(six.with_metaclass(ABCMeta, BaseEnsemble,
-                                    _LearntSelectorMixin)):
+class BaseForest(six.with_metaclass(ABCMeta, BaseEnsemble)):
     """Base class for forests of trees.
 
     Warning: This class should not be used directly. Use derived classes
@@ -473,17 +471,12 @@ class ForestClassifier(six.with_metaclass(ABCMeta, BaseForest,
         y = y_store_unique_indices
 
         if self.class_weight is not None:
-            valid_presets = ('auto', 'balanced', 'subsample', 'balanced_subsample')
+            valid_presets = ('balanced', 'balanced_subsample')
             if isinstance(self.class_weight, six.string_types):
                 if self.class_weight not in valid_presets:
                     raise ValueError('Valid presets for class_weight include '
                                      '"balanced" and "balanced_subsample". Given "%s".'
                                      % self.class_weight)
-                if self.class_weight == "subsample":
-                    warn("class_weight='subsample' is deprecated in 0.17 and"
-                         "will be removed in 0.19. It was replaced by "
-                         "class_weight='balanced_subsample' using the balanced"
-                         "strategy.", DeprecationWarning)
                 if self.warm_start:
                     warn('class_weight presets "balanced" or "balanced_subsample" are '
                          'not recommended for warm_start if the fitted data '
@@ -495,19 +488,14 @@ class ForestClassifier(six.with_metaclass(ABCMeta, BaseForest,
                          'distributions. Pass the resulting weights as the '
                          'class_weight parameter.')
 
-            if (self.class_weight not in ['subsample', 'balanced_subsample'] or
+            if (self.class_weight != 'balanced_subsample' or
                     not self.bootstrap):
-                if self.class_weight == 'subsample':
-                    class_weight = 'auto'
-                elif self.class_weight == "balanced_subsample":
+                if self.class_weight == "balanced_subsample":
                     class_weight = "balanced"
                 else:
                     class_weight = self.class_weight
-                with warnings.catch_warnings():
-                    if class_weight == "auto":
-                        warnings.simplefilter('ignore', DeprecationWarning)
-                    expanded_class_weight = compute_sample_weight(class_weight,
-                                                                  y_original)
+                expanded_class_weight = compute_sample_weight(class_weight,
+                                                              y_original)
 
         return y, expanded_class_weight
 
@@ -1685,9 +1673,7 @@ class RandomTreesEmbedding(BaseForest):
         X_transformed : sparse matrix, shape=(n_samples, n_out)
             Transformed dataset.
         """
-        # ensure_2d=False because there are actually unit test checking we fail
-        # for 1d.
-        X = check_array(X, accept_sparse=['csc'], ensure_2d=False)
+        X = check_array(X, accept_sparse=['csc'])
         if issparse(X):
             # Pre-sort indices to avoid that each individual tree of the
             # ensemble sorts the indices.
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 2db5b574ad..9bef9635ea 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -31,7 +31,6 @@ from ..base import BaseEstimator
 from ..base import ClassifierMixin
 from ..base import RegressorMixin
 from ..externals import six
-from ..feature_selection.from_model import _LearntSelectorMixin
 
 from ._gradient_boosting import predict_stages
 from ._gradient_boosting import predict_stage
@@ -55,7 +54,6 @@ from ..utils import check_array
 from ..utils import check_X_y
 from ..utils import column_or_1d
 from ..utils import check_consistent_length
-from ..utils import deprecated
 from ..utils.extmath import logsumexp
 from ..utils.fixes import expit
 from ..utils.fixes import bincount
@@ -715,8 +713,7 @@ class VerboseReporter(object):
                 self.verbose_mod *= 10
 
 
-class BaseGradientBoosting(six.with_metaclass(ABCMeta, BaseEnsemble,
-                                              _LearntSelectorMixin)):
+class BaseGradientBoosting(six.with_metaclass(ABCMeta, BaseEnsemble)):
     """Abstract base class for Gradient Boosting. """
 
     @abstractmethod
@@ -1125,30 +1122,6 @@ class BaseGradientBoosting(six.with_metaclass(ABCMeta, BaseEnsemble,
         predict_stages(self.estimators_, X, self.learning_rate, score)
         return score
 
-    @deprecated(" and will be removed in 0.19")
-    def decision_function(self, X):
-        """Compute the decision function of ``X``.
-
-        Parameters
-        ----------
-        X : array-like of shape = [n_samples, n_features]
-            The input samples.
-
-        Returns
-        -------
-        score : array, shape = [n_samples, n_classes] or [n_samples]
-            The decision function of the input samples. The order of the
-            classes corresponds to that in the attribute `classes_`.
-            Regression and binary classification produce an array of shape
-            [n_samples].
-        """
-
-        self._check_initialized()
-        X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)
-        score = self._decision_function(X)
-        if score.shape[1] == 1:
-            return score.ravel()
-        return score
 
     def _staged_decision_function(self, X):
         """Compute decision function of ``X`` for each iteration.
@@ -1177,30 +1150,6 @@ class BaseGradientBoosting(six.with_metaclass(ABCMeta, BaseEnsemble,
             predict_stage(self.estimators_, i, X, self.learning_rate, score)
             yield score.copy()
 
-    @deprecated(" and will be removed in 0.19")
-    def staged_decision_function(self, X):
-        """Compute decision function of ``X`` for each iteration.
-
-        This method allows monitoring (i.e. determine error on testing set)
-        after each stage.
-
-        Parameters
-        ----------
-        X : array-like of shape = [n_samples, n_features]
-            The input samples.
-
-        Returns
-        -------
-        score : generator of array, shape = [n_samples, k]
-            The decision function of the input samples. The order of the
-            classes corresponds to that in the attribute `classes_`.
-            Regression and binary classification are special cases with
-            ``k == 1``, otherwise ``k==n_classes``.
-        """
-        for dec in self._staged_decision_function(X):
-            # no yield from in Python2.X
-            yield dec
-
     @property
     def feature_importances_(self):
         """Return the feature importances (the higher, the more important the
diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py
index 85b532db69..441d7078aa 100644
--- a/sklearn/ensemble/iforest.py
+++ b/sklearn/ensemble/iforest.py
@@ -154,9 +154,7 @@ class IsolationForest(BaseBagging):
         self : object
             Returns self.
         """
-        # ensure_2d=False because there are actually unit test checking we fail
-        # for 1d.
-        X = check_array(X, accept_sparse=['csc'], ensure_2d=False)
+        X = check_array(X, accept_sparse=['csc'])
         if issparse(X):
             # Pre-sort indices to avoid that each individual tree of the
             # ensemble sorts the indices.
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
index 5ff4cf851f..9d7f796af6 100644
--- a/sklearn/ensemble/tests/test_forest.py
+++ b/sklearn/ensemble/tests/test_forest.py
@@ -208,12 +208,6 @@ def check_importances(name, criterion, X, y):
     assert_equal(importances.shape[0], 10)
     assert_equal(n_important, 3)
 
-    # XXX: Remove this test in 0.19 after transform support to estimators
-    # is removed.
-    X_new = assert_warns(
-        DeprecationWarning, est.transform, X, threshold="mean")
-    assert_less(0 < X_new.shape[1], X.shape[1])
-
     # Check with parallel
     importances = est.feature_importances_
     est.set_params(n_jobs=2)
@@ -968,11 +962,9 @@ def check_class_weight_balanced_and_bootstrap_multi_output(name):
     clf = ForestClassifier(class_weight=[{-1: 0.5, 1: 1.}, {-2: 1., 2: 1.}],
                            random_state=0)
     clf.fit(X, _y)
-    # smoke test for subsample and balanced subsample
+    # smoke test for balanced subsample
     clf = ForestClassifier(class_weight='balanced_subsample', random_state=0)
     clf.fit(X, _y)
-    clf = ForestClassifier(class_weight='subsample', random_state=0)
-    ignore_warnings(clf.fit)(X, _y)
 
 
 def test_class_weight_balanced_and_bootstrap_multi_output():
@@ -991,7 +983,7 @@ def check_class_weight_errors(name):
     assert_raises(ValueError, clf.fit, X, _y)
 
     # Warning warm_start with preset
-    clf = ForestClassifier(class_weight='auto', warm_start=True,
+    clf = ForestClassifier(class_weight='balanced', warm_start=True,
                            random_state=0)
     assert_warns(UserWarning, clf.fit, X, y)
     assert_warns(UserWarning, clf.fit, X, _y)
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 634bc259a1..817122338c 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -299,15 +299,6 @@ def test_feature_importances():
         clf.fit(X, y)
         assert_true(hasattr(clf, 'feature_importances_'))
 
-        # XXX: Remove this test in 0.19 after transform support to estimators
-        # is removed.
-        X_new = assert_warns(
-            DeprecationWarning, clf.transform, X, threshold="mean")
-        assert_less(X_new.shape[1], X.shape[1])
-        feature_mask = (
-            clf.feature_importances_ > clf.feature_importances_.mean())
-        assert_array_almost_equal(X_new, X[:, feature_mask])
-
 
 def test_probability_log():
     # Predict probabilities.
@@ -1073,6 +1064,7 @@ def check_sparse_input(EstimatorClass, X, X_sparse, y):
             np.array(sparse.staged_decision_function(X_sparse)),
             np.array(sparse.staged_decision_function(X)))
 
+
 @skip_if_32bit
 def test_sparse_input():
     ests = (GradientBoostingClassifier, GradientBoostingRegressor)
diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
index 31a862b601..c0adcdcb6f 100644
--- a/sklearn/feature_selection/from_model.py
+++ b/sklearn/feature_selection/from_model.py
@@ -4,11 +4,9 @@
 import numpy as np
 
 from .base import SelectorMixin
-from ..base import TransformerMixin, BaseEstimator, clone
+from ..base import BaseEstimator, clone
 from ..externals import six
 
-from ..utils import safe_mask, check_array, deprecated
-from ..utils.validation import check_is_fitted
 from ..exceptions import NotFittedError
 from ..utils.fixes import norm
 
@@ -78,71 +76,6 @@ def _calculate_threshold(estimator, importances, threshold):
     return threshold
 
 
-class _LearntSelectorMixin(TransformerMixin):
-    # Note because of the extra threshold parameter in transform, this does
-    # not naturally extend from SelectorMixin
-    """Transformer mixin selecting features based on importance weights.
-
-    This implementation can be mixin on any estimator that exposes a
-    ``feature_importances_`` or ``coef_`` attribute to evaluate the relative
-    importance of individual features for feature selection.
-    """
-    @deprecated('Support to use estimators as feature selectors will be '
-                'removed in version 0.19. Use SelectFromModel instead.')
-    def transform(self, X, threshold=None):
-        """Reduce X to its most important features.
-
-        Uses ``coef_`` or ``feature_importances_`` to determine the most
-        important features.  For models with a ``coef_`` for each class, the
-        absolute sum over the classes is used.
-
-        Parameters
-        ----------
-        X : array or scipy sparse matrix of shape [n_samples, n_features]
-            The input samples.
-
-        threshold : string, float or None, optional (default=None)
-            The threshold value to use for feature selection. Features whose
-            importance is greater or equal are kept while the others are
-            discarded. If "median" (resp. "mean"), then the threshold value is
-            the median (resp. the mean) of the feature importances. A scaling
-            factor (e.g., "1.25*mean") may also be used. If None and if
-            available, the object attribute ``threshold`` is used. Otherwise,
-            "mean" is used by default.
-
-        Returns
-        -------
-        X_r : array of shape [n_samples, n_selected_features]
-            The input samples with only the selected features.
-        """
-        check_is_fitted(self, ('coef_', 'feature_importances_'),
-                        all_or_any=any)
-
-        X = check_array(X, 'csc')
-        importances = _get_feature_importances(self)
-        if len(importances) != X.shape[1]:
-            raise ValueError("X has different number of features than"
-                             " during model fitting.")
-
-        if threshold is None:
-            threshold = getattr(self, 'threshold', None)
-        threshold = _calculate_threshold(self, importances, threshold)
-
-        # Selection
-        try:
-            mask = importances >= threshold
-        except TypeError:
-            # Fails in Python 3.x when threshold is str;
-            # result is array of True
-            raise ValueError("Invalid threshold: all features are discarded.")
-
-        if np.any(mask):
-            mask = safe_mask(X, mask)
-            return X[:, mask]
-        else:
-            raise ValueError("Invalid threshold: all features are discarded.")
-
-
 class SelectFromModel(BaseEstimator, SelectorMixin):
     """Meta-transformer for selecting features based on importance weights.
 
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index 7f303cf8e2..6efb6f405b 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -1,5 +1,4 @@
 import numpy as np
-import scipy.sparse as sp
 
 from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_equal
@@ -9,7 +8,6 @@ from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
-from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import skip_if_32bit
 
 from sklearn import datasets
@@ -25,28 +23,6 @@ data, y = iris.data, iris.target
 rng = np.random.RandomState(0)
 
 
-def test_transform_linear_model():
-    for clf in (LogisticRegression(C=0.1),
-                LinearSVC(C=0.01, dual=False),
-                SGDClassifier(alpha=0.001, n_iter=50, shuffle=True,
-                              random_state=0)):
-        for thresh in (None, ".09*mean", "1e-5 * median"):
-            for func in (np.array, sp.csr_matrix):
-                X = func(data)
-                clf.set_params(penalty="l1")
-                clf.fit(X, y)
-                X_new = assert_warns(
-                    DeprecationWarning, clf.transform, X, thresh)
-                if isinstance(clf, SGDClassifier):
-                    assert_true(X_new.shape[1] <= X.shape[1])
-                else:
-                    assert_less(X_new.shape[1], X.shape[1])
-                clf.set_params(penalty="l2")
-                clf.fit(X_new, y)
-                pred = clf.predict(X_new)
-                assert_greater(np.mean(pred == y), 0.7)
-
-
 def test_invalid_input():
     clf = SGDClassifier(alpha=0.1, n_iter=10, shuffle=True, random_state=None)
     for threshold in ["gobbledigook", ".5 * gobbledigook"]:
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 6c0101a559..835ad92021 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -741,7 +741,7 @@ class GridSearchCV(BaseSearchCV):
     ...                             # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
     GridSearchCV(cv=None, error_score=...,
            estimator=SVC(C=1.0, cache_size=..., class_weight=..., coef0=...,
-                         decision_function_shape=None, degree=..., gamma=...,
+                         decision_function_shape='ovr', degree=..., gamma=...,
                          kernel='rbf', max_iter=-1, probability=False,
                          random_state=None, shrinking=True, tol=...,
                          verbose=False),
diff --git a/sklearn/lda.py b/sklearn/lda.py
deleted file mode 100644
index 9c3959b6bc..0000000000
--- a/sklearn/lda.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import warnings
-from .discriminant_analysis import LinearDiscriminantAnalysis as _LDA
-
-warnings.warn("lda.LDA has been moved to "
-              "discriminant_analysis.LinearDiscriminantAnalysis "
-              "in 0.17 and will be removed in 0.19", DeprecationWarning)
-
-
-class LDA(_LDA):
-    """
-    Alias for
-    :class:`sklearn.discriminant_analysis.LinearDiscriminantAnalysis`.
-
-    .. deprecated:: 0.17
-        This class will be removed in 0.19.
-        Use
-        :class:`sklearn.discriminant_analysis.LinearDiscriminantAnalysis`
-        instead.
-    """
-    pass
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
index 1dbb1a4cc7..7ac614a1cd 100644
--- a/sklearn/linear_model/base.py
+++ b/sklearn/linear_model/base.py
@@ -229,22 +229,6 @@ class LinearModel(six.with_metaclass(ABCMeta, BaseEstimator)):
     def fit(self, X, y):
         """Fit model."""
 
-    @deprecated(" and will be removed in 0.19.")
-    def decision_function(self, X):
-        """Decision function of the linear model.
-
-        Parameters
-        ----------
-        X : {array-like, sparse matrix}, shape = (n_samples, n_features)
-            Samples.
-
-        Returns
-        -------
-        C : array, shape = (n_samples,)
-            Returns predicted values.
-        """
-        return self._decision_function(X)
-
     def _decision_function(self, X):
         check_is_fitted(self, "coef_")
 
@@ -478,12 +462,6 @@ class LinearRegression(LinearModel, RegressorMixin):
         self.copy_X = copy_X
         self.n_jobs = n_jobs
 
-    @property
-    @deprecated("``residues_`` is deprecated and will be removed in 0.19")
-    def residues_(self):
-        """Get the residues of the fitted model."""
-        return self._residues
-
     def fit(self, X, y, sample_weight=None):
         """
         Fit linear model.
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index 5871c29738..de33fec9ab 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -15,7 +15,7 @@ from scipy import sparse
 from .base import LinearModel, _pre_fit
 from ..base import RegressorMixin
 from .base import _preprocess_data
-from ..utils import check_array, check_X_y, deprecated
+from ..utils import check_array, check_X_y
 from ..utils.validation import check_random_state
 from ..model_selection import check_cv
 from ..externals.joblib import Parallel, delayed
@@ -746,21 +746,6 @@ class ElasticNet(LinearModel, RegressorMixin):
         """ sparse representation of the fitted ``coef_`` """
         return sparse.csr_matrix(self.coef_)
 
-    @deprecated(" and will be removed in 0.19")
-    def decision_function(self, X):
-        """Decision function of the linear model
-
-        Parameters
-        ----------
-        X : numpy array or scipy.sparse matrix of shape (n_samples, n_features)
-
-        Returns
-        -------
-        T : array, shape (n_samples,)
-            The predicted decision function
-        """
-        return self._decision_function(X)
-
     def _decision_function(self, X):
         """Decision function of the linear model
 
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index e792371383..ac4973f1df 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -17,7 +17,6 @@ from scipy import optimize, sparse
 
 from .base import LinearClassifierMixin, SparseCoefMixin, BaseEstimator
 from .sag import sag_solver
-from ..feature_selection.from_model import _LearntSelectorMixin
 from ..preprocessing import LabelEncoder, LabelBinarizer
 from ..svm.base import _fit_liblinear
 from ..utils import check_array, check_consistent_length, compute_class_weight
@@ -445,7 +444,7 @@ def _check_solver_option(solver, multi_class, penalty, dual):
 
 def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
                              max_iter=100, tol=1e-4, verbose=0,
-                             solver='lbfgs', coef=None, copy=False,
+                             solver='lbfgs', coef=None,
                              class_weight=None, dual=False, penalty='l2',
                              intercept_scaling=1., multi_class='ovr',
                              random_state=None, check_input=True,
@@ -502,10 +501,6 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
         Initialization value for coefficients of logistic regression.
         Useless for liblinear solver.
 
-    copy : bool, default False
-        Whether or not to produce a copy of the data. A copy is not required
-        anymore. This parameter is deprecated and will be removed in 0.19.
-
     class_weight : dict or 'balanced', optional
         Weights associated with classes in the form ``{class_label: weight}``.
         If not given, all classes are supposed to have weight one.
@@ -579,21 +574,19 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     -----
     You might get slightly different results with the solver liblinear than
     with the others since this uses LIBLINEAR which penalizes the intercept.
-    """
-    if copy:
-        warnings.warn("A copy is not required anymore. The 'copy' parameter "
-                      "is deprecated and will be removed in 0.19.",
-                      DeprecationWarning)
 
+    .. versionchanged:: 0.19
+        The "copy" parameter was removed.
+    """
     if isinstance(Cs, numbers.Integral):
         Cs = np.logspace(-4, 4, Cs)
 
     _check_solver_option(solver, multi_class, penalty, dual)
 
     # Preprocessing.
-    if check_input or copy:
+    if check_input:
         X = check_array(X, accept_sparse='csr', dtype=np.float64)
-        y = check_array(y, ensure_2d=False, copy=copy, dtype=None)
+        y = check_array(y, ensure_2d=False, dtype=None)
         check_consistent_length(X, y)
     _, n_features = X.shape
     classes = np.unique(y)
@@ -632,8 +625,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
         y_bin[~mask] = -1.
         # for compute_class_weight
 
-        # 'auto' is deprecated and will be removed in 0.19
-        if class_weight in ("auto", "balanced"):
+        if class_weight == "balanced":
             class_weight_ = compute_class_weight(class_weight, mask_classes,
                                                  y_bin)
             sample_weight *= class_weight_[le.fit_transform(y_bin)]
@@ -945,7 +937,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
 
 
 class LogisticRegression(BaseEstimator, LinearClassifierMixin,
-                         _LearntSelectorMixin, SparseCoefMixin):
+                         SparseCoefMixin):
     """Logistic Regression (aka logit, MaxEnt) classifier.
 
     In the multiclass case, the training algorithm uses the one-vs-rest (OvR)
@@ -1011,8 +1003,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
         through the fit method) if sample_weight is specified.
 
         .. versionadded:: 0.17
-           *class_weight='balanced'* instead of deprecated
-           *class_weight='auto'*.
+           *class_weight='balanced'*
 
     max_iter : int, default: 100
         Useful only for the newton-cg, sag and lbfgs solvers.
@@ -1238,7 +1229,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
                                backend=backend)(
             path_func(X, y, pos_class=class_, Cs=[self.C],
                       fit_intercept=self.fit_intercept, tol=self.tol,
-                      verbose=self.verbose, solver=self.solver, copy=False,
+                      verbose=self.verbose, solver=self.solver,
                       multi_class=self.multi_class, max_iter=self.max_iter,
                       class_weight=self.class_weight, check_input=False,
                       random_state=self.random_state, coef=warm_start_coef_,
@@ -1313,7 +1304,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
 
 
 class LogisticRegressionCV(LogisticRegression, BaseEstimator,
-                           LinearClassifierMixin, _LearntSelectorMixin):
+                           LinearClassifierMixin):
     """Logistic Regression CV (aka logit, MaxEnt) classifier.
 
     This class implements logistic regression using liblinear, newton-cg, sag
@@ -1559,11 +1550,6 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
         check_classification_targets(y)
 
         class_weight = self.class_weight
-        if class_weight and not(isinstance(class_weight, dict) or
-                                class_weight in ['balanced', 'auto']):
-            # 'auto' is deprecated and will be removed in 0.19
-            raise ValueError("class_weight provided should be a "
-                             "dict or 'balanced'")
 
         # Encode for string labels
         label_encoder = LabelEncoder().fit(y)
@@ -1609,7 +1595,7 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
             iter_classes = classes
 
         # compute the class weights for the entire dataset y
-        if class_weight in ("auto", "balanced"):
+        if class_weight == "balanced":
             class_weight = compute_class_weight(class_weight,
                                                 np.arange(len(self.classes_)),
                                                 y)
@@ -1703,7 +1689,7 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
                     X, y, pos_class=encoded_label, Cs=[C_], solver=self.solver,
                     fit_intercept=self.fit_intercept, coef=coef_init,
                     max_iter=self.max_iter, tol=self.tol,
-                    penalty=self.penalty, copy=False,
+                    penalty=self.penalty,
                     class_weight=class_weight,
                     multi_class=self.multi_class,
                     verbose=max(0, self.verbose - 1),
diff --git a/sklearn/linear_model/perceptron.py b/sklearn/linear_model/perceptron.py
index 76f8c648c7..d597181765 100644
--- a/sklearn/linear_model/perceptron.py
+++ b/sklearn/linear_model/perceptron.py
@@ -2,10 +2,9 @@
 # License: BSD 3 clause
 
 from .stochastic_gradient import BaseSGDClassifier
-from ..feature_selection.from_model import _LearntSelectorMixin
 
 
-class Perceptron(BaseSGDClassifier, _LearntSelectorMixin):
+class Perceptron(BaseSGDClassifier):
     """Perceptron
 
     Read more in the :ref:`User Guide <perceptron>`.
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 232ca90a77..f93ee0b4d7 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -13,9 +13,7 @@ from ..externals.joblib import Parallel, delayed
 from .base import LinearClassifierMixin, SparseCoefMixin
 from .base import make_dataset
 from ..base import BaseEstimator, RegressorMixin
-from ..feature_selection.from_model import _LearntSelectorMixin
-from ..utils import (check_array, check_random_state, check_X_y,
-                     deprecated)
+from ..utils import check_array, check_random_state, check_X_y
 from ..utils.extmath import safe_sparse_dot
 from ..utils.multiclass import _check_partial_fit_first_call
 from ..utils.validation import check_is_fitted
@@ -497,7 +495,7 @@ class BaseSGDClassifier(six.with_metaclass(ABCMeta, BaseSGD,
         -------
         self : returns an instance of self.
         """
-        if self.class_weight in ['balanced', 'auto']:
+        if self.class_weight in ['balanced']:
             raise ValueError("class_weight '{0}' is not supported for "
                              "partial_fit. In order to use 'balanced' weights,"
                              " use compute_class_weight('{0}', classes, y). "
@@ -545,7 +543,7 @@ class BaseSGDClassifier(six.with_metaclass(ABCMeta, BaseSGD,
                          sample_weight=sample_weight)
 
 
-class SGDClassifier(BaseSGDClassifier, _LearntSelectorMixin):
+class SGDClassifier(BaseSGDClassifier):
     """Linear classifiers (SVM, logistic regression, a.o.) with SGD training.
 
     This estimator implements regularized linear models with stochastic
@@ -972,21 +970,6 @@ class BaseSGDRegressor(BaseSGD, RegressorMixin):
                          intercept_init=intercept_init,
                          sample_weight=sample_weight)
 
-    @deprecated(" and will be removed in 0.19.")
-    def decision_function(self, X):
-        """Predict using the linear model
-
-        Parameters
-        ----------
-        X : {array-like, sparse matrix}, shape (n_samples, n_features)
-
-        Returns
-        -------
-        array, shape (n_samples,)
-           Predicted target values per element in X.
-        """
-        return self._decision_function(X)
-
     def _decision_function(self, X):
         """Predict using the linear model
 
@@ -1093,7 +1076,7 @@ class BaseSGDRegressor(BaseSGD, RegressorMixin):
             self.intercept_ = np.atleast_1d(self.intercept_)
 
 
-class SGDRegressor(BaseSGDRegressor, _LearntSelectorMixin):
+class SGDRegressor(BaseSGDRegressor):
     """Linear model fitted by minimizing a regularized empirical loss with SGD
 
     SGD stands for Stochastic Gradient Descent: the gradient of the loss is
diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py
index 25cbf35032..fbd559695e 100644
--- a/sklearn/linear_model/tests/test_least_angle.py
+++ b/sklearn/linear_model/tests/test_least_angle.py
@@ -370,8 +370,6 @@ def test_multitarget():
     for estimator in (linear_model.LassoLars(), linear_model.Lars()):
         estimator.fit(X, Y)
         Y_pred = estimator.predict(X)
-        Y_dec = assert_warns(DeprecationWarning, estimator.decision_function, X)
-        assert_array_almost_equal(Y_pred, Y_dec)
         alphas, active, coef, path = (estimator.alphas_, estimator.active_,
                                       estimator.coef_, estimator.coef_path_)
         for k in range(n_targets):
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index 7fcee53651..ec2be517bf 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -10,7 +10,6 @@ from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_warns
-from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import raises
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_raise_message
@@ -733,16 +732,6 @@ def test_logistic_regression_class_weights():
         assert_array_almost_equal(clf1.coef_, clf2.coef_, decimal=6)
 
 
-def test_multinomial_logistic_regression_with_classweight_auto():
-    X, y = iris.data, iris.target
-    model = LogisticRegression(multi_class='multinomial',
-                               class_weight='auto', solver='lbfgs')
-    # 'auto' is deprecated and will be removed in 0.19
-    assert_warns_message(DeprecationWarning,
-                         "class_weight='auto' heuristic is deprecated",
-                         model.fit, X, y)
-
-
 def test_logistic_regression_convergence_warnings():
     # Test that warnings are raised if model does not converge
 
diff --git a/sklearn/metrics/base.py b/sklearn/metrics/base.py
index 0ad96c1afd..b8bbab3093 100644
--- a/sklearn/metrics/base.py
+++ b/sklearn/metrics/base.py
@@ -19,15 +19,6 @@ import numpy as np
 from ..utils import check_array, check_consistent_length
 from ..utils.multiclass import type_of_target
 
-from ..exceptions import UndefinedMetricWarning as _UndefinedMetricWarning
-from ..utils import deprecated
-
-
-@deprecated("UndefinedMetricWarning has been moved into the sklearn.exceptions"
-            " module. It will not be available here from version 0.19")
-class UndefinedMetricWarning(_UndefinedMetricWarning):
-    pass
-
 
 def _average_binary_score(binary_metric, y_true, y_score, average,
                           sample_weight=None):
diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
index e4af5b2921..0450c39533 100644
--- a/sklearn/metrics/regression.py
+++ b/sklearn/metrics/regression.py
@@ -29,7 +29,6 @@ from ..utils.validation import check_array, check_consistent_length
 from ..utils.validation import column_or_1d
 from ..externals.six import string_types
 
-import warnings
 
 __ALL__ = [
     "mean_absolute_error",
@@ -436,9 +435,8 @@ def explained_variance_score(y_true, y_pred,
     return np.average(output_scores, weights=avg_weights)
 
 
-def r2_score(y_true, y_pred,
-             sample_weight=None,
-             multioutput=None):
+def r2_score(y_true, y_pred, sample_weight=None,
+             multioutput="uniform_average"):
     """R^2 (coefficient of determination) regression score function.
 
     Best possible score is 1.0 and it can be negative (because the
@@ -464,9 +462,7 @@ def r2_score(y_true, y_pred,
 
         Defines aggregating of multiple output scores.
         Array-like value defines weights used to average scores.
-        Default value corresponds to 'variance_weighted', this behaviour is
-        deprecated since version 0.17 and will be changed to 'uniform_average'
-        starting from 0.19.
+        Default is "uniform_average".
 
         'raw_values' :
             Returns a full set of scores in case of multioutput input.
@@ -478,6 +474,9 @@ def r2_score(y_true, y_pred,
             Scores of all outputs are averaged, weighted by the variances
             of each individual output.
 
+        .. versionchanged:: 0.19
+            Default value of multioutput is 'uniform_average'.
+
     Returns
     -------
     z : float or ndarray of floats
@@ -543,13 +542,6 @@ def r2_score(y_true, y_pred,
     # arbitrary set to zero to avoid -inf scores, having a constant
     # y_true is not interesting for scoring a regression anyway
     output_scores[nonzero_numerator & ~nonzero_denominator] = 0.
-    if multioutput is None and y_true.shape[1] != 1:
-        warnings.warn("Default 'multioutput' behavior now corresponds to "
-                      "'variance_weighted' value which is deprecated since "
-                      "0.17, it will be changed to 'uniform_average' "
-                      "starting from 0.19.",
-                      DeprecationWarning)
-        multioutput = 'variance_weighted'
     if isinstance(multioutput, string_types):
         if multioutput == 'raw_values':
             # return scores individually
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index e1d744ceab..566ec8c996 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -804,7 +804,7 @@ class GridSearchCV(BaseSearchCV):
     ...                             # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
     GridSearchCV(cv=None, error_score=...,
            estimator=SVC(C=1.0, cache_size=..., class_weight=..., coef0=...,
-                         decision_function_shape=None, degree=..., gamma=...,
+                         decision_function_shape='ovr', degree=..., gamma=...,
                          kernel='rbf', max_iter=-1, probability=False,
                          random_state=None, shrinking=True, tol=...,
                          verbose=False),
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index d8fa137d70..3b47eff30a 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -10,7 +10,6 @@ estimator, as a chain of transforms and estimators.
 # License: BSD
 
 from collections import defaultdict
-from warnings import warn
 from abc import ABCMeta, abstractmethod
 
 import numpy as np
@@ -470,10 +469,6 @@ class Pipeline(_BasePipeline):
         return self._inverse_transform
 
     def _inverse_transform(self, X):
-        if hasattr(X, 'ndim') and X.ndim == 1:
-            warn("From version 0.19, a 1d X will not be reshaped in"
-                 " pipeline.inverse_transform any more.", FutureWarning)
-            X = X[None, :]
         Xt = X
         for name, transform in self.steps[::-1]:
             if transform is not None:
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 5e896a8ab1..ee160a1a8c 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -17,7 +17,6 @@ from scipy import sparse
 from ..base import BaseEstimator, TransformerMixin
 from ..externals import six
 from ..utils import check_array
-from ..utils import deprecated
 from ..utils.extmath import row_norms
 from ..utils.extmath import _incremental_mean_and_var
 from ..utils.fixes import bincount
@@ -51,13 +50,6 @@ __all__ = [
     'minmax_scale',
 ]
 
-DEPRECATION_MSG_1D = (
-    "Passing 1d arrays as data is deprecated in 0.17 and will "
-    "raise ValueError in 0.19. Reshape your data either using "
-    "X.reshape(-1, 1) if your data has a single feature or "
-    "X.reshape(1, -1) if it contains a single sample."
-)
-
 
 def _handle_zeros_in_scale(scale, copy=True):
     ''' Makes sure that whenever scale is zero, we handle it correctly.
@@ -226,19 +218,19 @@ class MinMaxScaler(BaseEstimator, TransformerMixin):
         Per feature minimum seen in the data
 
         .. versionadded:: 0.17
-           *data_min_* instead of deprecated *data_min*.
+           *data_min_*
 
     data_max_ : ndarray, shape (n_features,)
         Per feature maximum seen in the data
 
         .. versionadded:: 0.17
-           *data_max_* instead of deprecated *data_max*.
+           *data_max_*
 
     data_range_ : ndarray, shape (n_features,)
         Per feature range ``(data_max_ - data_min_)`` seen in the data
 
         .. versionadded:: 0.17
-           *data_range_* instead of deprecated *data_range*.
+           *data_range_*
 
     See also
     --------
@@ -249,18 +241,6 @@ class MinMaxScaler(BaseEstimator, TransformerMixin):
         self.feature_range = feature_range
         self.copy = copy
 
-    @property
-    @deprecated("Attribute data_range will be removed in "
-                "0.19. Use ``data_range_`` instead")
-    def data_range(self):
-        return self.data_range_
-
-    @property
-    @deprecated("Attribute data_min will be removed in "
-                "0.19. Use ``data_min_`` instead")
-    def data_min(self):
-        return self.data_min_
-
     def _reset(self):
         """Reset internal data-dependent state of the scaler, if necessary.
 
@@ -314,12 +294,9 @@ class MinMaxScaler(BaseEstimator, TransformerMixin):
             raise TypeError("MinMaxScaler does no support sparse input. "
                             "You may consider to use MaxAbsScaler instead.")
 
-        X = check_array(X, copy=self.copy, ensure_2d=False, warn_on_dtype=True,
+        X = check_array(X, copy=self.copy, warn_on_dtype=True,
                         estimator=self, dtype=FLOAT_DTYPES)
 
-        if X.ndim == 1:
-            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
-
         data_min = np.min(X, axis=0)
         data_max = np.max(X, axis=0)
 
@@ -351,9 +328,7 @@ class MinMaxScaler(BaseEstimator, TransformerMixin):
         """
         check_is_fitted(self, 'scale_')
 
-        X = check_array(X, copy=self.copy, ensure_2d=False, dtype=FLOAT_DTYPES)
-        if X.ndim == 1:
-            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
+        X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES)
 
         X *= self.scale_
         X += self.min_
@@ -369,9 +344,7 @@ class MinMaxScaler(BaseEstimator, TransformerMixin):
         """
         check_is_fitted(self, 'scale_')
 
-        X = check_array(X, copy=self.copy, ensure_2d=False, dtype=FLOAT_DTYPES)
-        if X.ndim == 1:
-            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
+        X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES)
 
         X -= self.min_
         X /= self.scale_
@@ -419,13 +392,7 @@ def minmax_scale(X, feature_range=(0, 1), axis=0, copy=True):
     MinMaxScaler: Performs scaling to a given range using the``Transformer`` API
         (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`).
     """  # noqa
-    # To allow retro-compatibility, we handle here the case of 1D-input
-    # From 0.17, 1D-input are deprecated in scaler objects
-    # Although, we want to allow the users to keep calling this function
-    # with 1D-input.
-
-    # Cast input to array, as we need to check ndim. Prior to 0.17, that was
-    # done inside the scaler object fit_transform.
+    # Unlike the scaler object, this function allows 1d input.
     # If copy is required, it will be done inside the scaler object.
     X = check_array(X, copy=False, ensure_2d=False, warn_on_dtype=True,
                     dtype=FLOAT_DTYPES)
@@ -497,7 +464,7 @@ class StandardScaler(BaseEstimator, TransformerMixin):
         Per feature relative scaling of the data.
 
         .. versionadded:: 0.17
-           *scale_* is recommended instead of deprecated *std_*.
+           *scale_*
 
     mean_ : array of floats with shape [n_features]
         The mean value for each feature in the training set.
@@ -523,12 +490,6 @@ class StandardScaler(BaseEstimator, TransformerMixin):
         self.with_std = with_std
         self.copy = copy
 
-    @property
-    @deprecated("Attribute ``std_`` will be removed in 0.19. "
-                "Use ``scale_`` instead")
-    def std_(self):
-        return self.scale_
-
     def _reset(self):
         """Reset internal data-dependent state of the scaler, if necessary.
 
@@ -579,11 +540,7 @@ class StandardScaler(BaseEstimator, TransformerMixin):
         y : Passthrough for ``Pipeline`` compatibility.
         """
         X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
-                        ensure_2d=False, warn_on_dtype=True,
-                        estimator=self, dtype=FLOAT_DTYPES)
-
-        if X.ndim == 1:
-            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
+                        warn_on_dtype=True, estimator=self, dtype=FLOAT_DTYPES)
 
         # Even in the case of `with_mean=False`, we update the mean anyway
         # This is needed for the incremental computation of the var
@@ -641,13 +598,9 @@ class StandardScaler(BaseEstimator, TransformerMixin):
         check_is_fitted(self, 'scale_')
 
         copy = copy if copy is not None else self.copy
-        X = check_array(X, accept_sparse='csr', copy=copy,
-                        ensure_2d=False, warn_on_dtype=True,
+        X = check_array(X, accept_sparse='csr', copy=copy, warn_on_dtype=True,
                         estimator=self, dtype=FLOAT_DTYPES)
 
-        if X.ndim == 1:
-            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
-
         if sparse.issparse(X):
             if self.with_mean:
                 raise ValueError(
@@ -779,10 +732,7 @@ class MaxAbsScaler(BaseEstimator, TransformerMixin):
         y : Passthrough for ``Pipeline`` compatibility.
         """
         X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
-                        ensure_2d=False, estimator=self, dtype=FLOAT_DTYPES)
-
-        if X.ndim == 1:
-            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
+                        estimator=self, dtype=FLOAT_DTYPES)
 
         if sparse.issparse(X):
             mins, maxs = min_max_axis(X, axis=0)
@@ -812,10 +762,7 @@ class MaxAbsScaler(BaseEstimator, TransformerMixin):
         """
         check_is_fitted(self, 'scale_')
         X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
-                        ensure_2d=False, estimator=self, dtype=FLOAT_DTYPES)
-
-        if X.ndim == 1:
-            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
+                        estimator=self, dtype=FLOAT_DTYPES)
 
         if sparse.issparse(X):
             inplace_column_scale(X, 1.0 / self.scale_)
@@ -833,9 +780,7 @@ class MaxAbsScaler(BaseEstimator, TransformerMixin):
         """
         check_is_fitted(self, 'scale_')
         X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
-                        ensure_2d=False, estimator=self, dtype=FLOAT_DTYPES)
-        if X.ndim == 1:
-            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
+                        estimator=self, dtype=FLOAT_DTYPES)
 
         if sparse.issparse(X):
             inplace_column_scale(X, self.scale_)
@@ -868,13 +813,8 @@ def maxabs_scale(X, axis=0, copy=True):
     MaxAbsScaler: Performs scaling to the [-1, 1] range using the``Transformer`` API
         (e.g. as part of a preprocessing :class:`sklearn.pipeline.Pipeline`).
     """  # noqa
-    # To allow retro-compatibility, we handle here the case of 1D-input
-    # From 0.17, 1D-input are deprecated in scaler objects
-    # Although, we want to allow the users to keep calling this function
-    # with 1D-input.
+    # Unlike the scaler object, this function allows 1d input.
 
-    # Cast input to array, as we need to check ndim. Prior to 0.17, that was
-    # done inside the scaler object fit_transform.
     # If copy is required, it will be done inside the scaler object.
     X = check_array(X, accept_sparse=('csr', 'csc'), copy=False,
                     ensure_2d=False, dtype=FLOAT_DTYPES)
@@ -980,10 +920,7 @@ class RobustScaler(BaseEstimator, TransformerMixin):
     def _check_array(self, X, copy):
         """Makes sure centering is not enabled for sparse matrices."""
         X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
-                        ensure_2d=False, estimator=self, dtype=FLOAT_DTYPES)
-
-        if X.ndim == 1:
-            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
+                        estimator=self, dtype=FLOAT_DTYPES)
 
         if sparse.issparse(X):
             if self.with_centering:
@@ -1004,8 +941,6 @@ class RobustScaler(BaseEstimator, TransformerMixin):
         if sparse.issparse(X):
             raise TypeError("RobustScaler cannot be fitted on sparse inputs")
         X = self._check_array(X, self.copy)
-        if X.ndim == 1:
-            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
         if self.with_centering:
             self.center_ = np.median(X, axis=0)
 
@@ -1033,8 +968,6 @@ class RobustScaler(BaseEstimator, TransformerMixin):
         if self.with_scaling:
             check_is_fitted(self, 'scale_')
         X = self._check_array(X, self.copy)
-        if X.ndim == 1:
-            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
 
         if sparse.issparse(X):
             if self.with_scaling:
@@ -1059,8 +992,6 @@ class RobustScaler(BaseEstimator, TransformerMixin):
         if self.with_scaling:
             check_is_fitted(self, 'scale_')
         X = self._check_array(X, self.copy)
-        if X.ndim == 1:
-            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
 
         if sparse.issparse(X):
             if self.with_scaling:
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index d76e008972..7a51049b60 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -27,7 +27,6 @@ from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_false
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import assert_no_warnings
-from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import skip_if_32bit
 
@@ -790,12 +789,12 @@ def test_scale_sparse_with_mean_raise_exception():
 
 def test_scale_input_finiteness_validation():
     # Check if non finite inputs raise ValueError
-    X = [np.nan, 5, 6, 7, 8]
+    X = [[np.nan, 5, 6, 7, 8]]
     assert_raises_regex(ValueError,
                         "Input contains NaN, infinity or a value too large",
                         scale, X)
 
-    X = [np.inf, 5, 6, 7, 8]
+    X = [[np.inf, 5, 6, 7, 8]]
     assert_raises_regex(ValueError,
                         "Input contains NaN, infinity or a value too large",
                         scale, X)
@@ -1019,22 +1018,6 @@ def test_maxabs_scaler_transform_one_row_csr():
     assert_array_almost_equal(X.toarray(), X_scaled_back.toarray())
 
 
-def test_deprecation_minmax_scaler():
-    rng = np.random.RandomState(0)
-    X = rng.random_sample((5, 4))
-    scaler = MinMaxScaler().fit(X)
-
-    depr_message = ("Attribute data_range will be removed in "
-                    "0.19. Use ``data_range_`` instead")
-    assert_warns_message(DeprecationWarning, depr_message, getattr, scaler,
-                         "data_range")
-
-    depr_message = ("Attribute data_min will be removed in "
-                    "0.19. Use ``data_min_`` instead")
-    assert_warns_message(DeprecationWarning, depr_message, getattr, scaler,
-                         "data_min")
-
-
 def test_warning_scaling_integers():
     # Check warning when scaling integer data
     X = np.array([[1, 2, 0],
@@ -1443,17 +1426,6 @@ def test_fit_transform():
         assert_array_equal(X_transformed, X_transformed2)
 
 
-def test_deprecation_standard_scaler():
-    rng = np.random.RandomState(0)
-    X = rng.random_sample((5, 4))
-    scaler = StandardScaler().fit(X)
-    depr_message = ("Function std_ is deprecated; Attribute ``std_`` will be "
-                    "removed in 0.19. Use ``scale_`` instead")
-    std_ = assert_warns_message(DeprecationWarning, depr_message, getattr,
-                                scaler, "std_")
-    assert_array_equal(std_, scaler.scale_)
-
-
 def test_add_dummy_feature():
     X = [[1, 0], [0, 1], [0, 1]]
     X = add_dummy_feature(X)
diff --git a/sklearn/qda.py b/sklearn/qda.py
deleted file mode 100644
index 604d6a919d..0000000000
--- a/sklearn/qda.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import warnings
-from .discriminant_analysis import QuadraticDiscriminantAnalysis as _QDA
-
-warnings.warn("qda.QDA has been moved to "
-              "discriminant_analysis.QuadraticDiscriminantAnalysis "
-              "in 0.17 and will be removed in 0.19.", DeprecationWarning)
-
-
-class QDA(_QDA):
-    """
-    Alias for
-    :class:`sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis`.
-
-    .. deprecated:: 0.17
-        This class will be removed in 0.19.
-        Use
-        :class:`sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis`
-        instead.
-    """
-    pass
diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
index b00130127f..3e416b0821 100644
--- a/sklearn/svm/base.py
+++ b/sklearn/svm/base.py
@@ -12,12 +12,11 @@ from ..preprocessing import LabelEncoder
 from ..utils.multiclass import _ovr_decision_function
 from ..utils import check_array, check_consistent_length, check_random_state
 from ..utils import column_or_1d, check_X_y
-from ..utils import compute_class_weight, deprecated
+from ..utils import compute_class_weight
 from ..utils.extmath import safe_sparse_dot
 from ..utils.validation import check_is_fitted
 from ..utils.multiclass import check_classification_targets
 from ..externals import six
-from ..exceptions import ChangedBehaviorWarning
 from ..exceptions import ConvergenceWarning
 from ..exceptions import NotFittedError
 
@@ -368,24 +367,6 @@ class BaseLibSVM(six.with_metaclass(ABCMeta, BaseEstimator)):
             X = np.asarray(kernel, dtype=np.float64, order='C')
         return X
 
-    @deprecated(" and will be removed in 0.19")
-    def decision_function(self, X):
-        """Distance of the samples X to the separating hyperplane.
-
-        Parameters
-        ----------
-        X : array-like, shape (n_samples, n_features)
-            For kernel="precomputed", the expected shape of X is
-            [n_samples_test, n_samples_train].
-
-        Returns
-        -------
-        X : array-like, shape (n_samples, n_class * (n_class-1) / 2)
-            Returns the decision function of the sample for each class
-            in the model.
-        """
-        return self._decision_function(X)
-
     def _decision_function(self, X):
         """Distance of the samples X to the separating hyperplane.
 
@@ -545,11 +526,6 @@ class BaseSVC(six.with_metaclass(ABCMeta, BaseLibSVM, ClassifierMixin)):
             n_classes)
         """
         dec = self._decision_function(X)
-        if self.decision_function_shape is None and len(self.classes_) > 2:
-            warnings.warn("The decision_function_shape default value will "
-                          "change from 'ovo' to 'ovr' in 0.19. This will change "
-                          "the shape of the decision function returned by "
-                          "SVC.", ChangedBehaviorWarning)
         if self.decision_function_shape == 'ovr' and len(self.classes_) > 2:
             return _ovr_decision_function(dec < 0, -dec, len(self.classes_))
         return dec
diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index 1d269a02c9..7e920011d0 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -5,14 +5,13 @@ from .base import _fit_liblinear, BaseSVC, BaseLibSVM
 from ..base import BaseEstimator, RegressorMixin
 from ..linear_model.base import LinearClassifierMixin, SparseCoefMixin, \
     LinearModel
-from ..feature_selection.from_model import _LearntSelectorMixin
 from ..utils import check_X_y
 from ..utils.validation import _num_samples
 from ..utils.multiclass import check_classification_targets
 
 
 class LinearSVC(BaseEstimator, LinearClassifierMixin,
-                _LearntSelectorMixin, SparseCoefMixin):
+                SparseCoefMixin):
     """Linear Support Vector Classification.
 
     Similar to SVC with parameter kernel='linear', but implemented in terms of
@@ -50,13 +49,13 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin,
     multi_class : string, 'ovr' or 'crammer_singer' (default='ovr')
         Determines the multi-class strategy if `y` contains more than
         two classes.
-        ``"ovr"`` trains n_classes one-vs-rest classifiers, while ``"crammer_singer"``
-        optimizes a joint objective over all classes.
+        ``"ovr"`` trains n_classes one-vs-rest classifiers, while
+        ``"crammer_singer"`` optimizes a joint objective over all classes.
         While `crammer_singer` is interesting from a theoretical perspective
         as it is consistent, it is seldom used in practice as it rarely leads
         to better accuracy and is more expensive to compute.
-        If ``"crammer_singer"`` is chosen, the options loss, penalty and dual will
-        be ignored.
+        If ``"crammer_singer"`` is chosen, the options loss, penalty and dual
+        will be ignored.
 
     fit_intercept : boolean, optional (default=True)
         Whether to calculate the intercept for this model. If set
@@ -454,14 +453,14 @@ class SVC(BaseSVC):
     max_iter : int, optional (default=-1)
         Hard limit on iterations within solver, or -1 for no limit.
 
-    decision_function_shape : 'ovo', 'ovr' or None, default=None
+    decision_function_shape : 'ovo', 'ovr', default='ovr'
         Whether to return a one-vs-rest ('ovr') decision function of shape
         (n_samples, n_classes) as all other classifiers, or the original
         one-vs-one ('ovo') decision function of libsvm which has shape
         (n_samples, n_classes * (n_classes - 1) / 2).
-        The default of None will currently behave as 'ovo' for backward
-        compatibility and raise a deprecation warning, but will change 'ovr'
-        in 0.19.
+
+        .. versionchanged:: 0.19
+            decision_function_shape is 'ovr' by default.
 
         .. versionadded:: 0.17
            *decision_function_shape='ovr'* is recommended.
@@ -510,7 +509,7 @@ class SVC(BaseSVC):
     >>> clf = SVC()
     >>> clf.fit(X, y) #doctest: +NORMALIZE_WHITESPACE
     SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-        decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
+        decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
         max_iter=-1, probability=False, random_state=None, shrinking=True,
         tol=0.001, verbose=False)
     >>> print(clf.predict([[-0.8, -1]]))
@@ -531,7 +530,7 @@ class SVC(BaseSVC):
     def __init__(self, C=1.0, kernel='rbf', degree=3, gamma='auto',
                  coef0=0.0, shrinking=True, probability=False,
                  tol=1e-3, cache_size=200, class_weight=None,
-                 verbose=False, max_iter=-1, decision_function_shape=None,
+                 verbose=False, max_iter=-1, decision_function_shape='ovr',
                  random_state=None):
 
         super(SVC, self).__init__(
@@ -595,8 +594,8 @@ class NuSVC(BaseSVC):
     class_weight : {dict, 'balanced'}, optional
         Set the parameter C of class i to class_weight[i]*C for
         SVC. If not given, all classes are supposed to have
-        weight one. The "balanced" mode uses the values of y to automatically adjust
-        weights inversely proportional to class frequencies as
+        weight one. The "balanced" mode uses the values of y to automatically
+        adjust weights inversely proportional to class frequencies as
         ``n_samples / (n_classes * np.bincount(y))``
 
     verbose : bool, default: False
@@ -607,14 +606,14 @@ class NuSVC(BaseSVC):
     max_iter : int, optional (default=-1)
         Hard limit on iterations within solver, or -1 for no limit.
 
-    decision_function_shape : 'ovo', 'ovr' or None, default=None
+    decision_function_shape : 'ovo', 'ovr', default='ovr'
         Whether to return a one-vs-rest ('ovr') decision function of shape
         (n_samples, n_classes) as all other classifiers, or the original
         one-vs-one ('ovo') decision function of libsvm which has shape
         (n_samples, n_classes * (n_classes - 1) / 2).
-        The default of None will currently behave as 'ovo' for backward
-        compatibility and raise a deprecation warning, but will change 'ovr'
-        in 0.19.
+
+        .. versionchanged:: 0.19
+            decision_function_shape is 'ovr' by default.
 
         .. versionadded:: 0.17
            *decision_function_shape='ovr'* is recommended.
@@ -663,7 +662,7 @@ class NuSVC(BaseSVC):
     >>> clf = NuSVC()
     >>> clf.fit(X, y) #doctest: +NORMALIZE_WHITESPACE
     NuSVC(cache_size=200, class_weight=None, coef0=0.0,
-          decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
+          decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
           max_iter=-1, nu=0.5, probability=False, random_state=None,
           shrinking=True, tol=0.001, verbose=False)
     >>> print(clf.predict([[-0.8, -1]]))
@@ -679,10 +678,10 @@ class NuSVC(BaseSVC):
         liblinear.
     """
 
-    def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma='auto',
-                 coef0=0.0, shrinking=True, probability=False,
-                 tol=1e-3, cache_size=200, class_weight=None, verbose=False,
-                 max_iter=-1, decision_function_shape=None, random_state=None):
+    def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma='auto', coef0=0.0,
+                 shrinking=True, probability=False, tol=1e-3, cache_size=200,
+                 class_weight=None, verbose=False, max_iter=-1,
+                 decision_function_shape='ovr', random_state=None):
 
         super(NuSVC, self).__init__(
             impl='nu_svc', kernel=kernel, degree=degree, gamma=gamma,
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 88c4b04dfc..ce122a4fcf 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -20,7 +20,6 @@ from sklearn.utils.testing import assert_greater, assert_in, assert_less
 from sklearn.utils.testing import assert_raises_regexp, assert_warns
 from sklearn.utils.testing import assert_warns_message, assert_raise_message
 from sklearn.utils.testing import ignore_warnings, assert_raises
-from sklearn.exceptions import ChangedBehaviorWarning
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.exceptions import NotFittedError
 from sklearn.multiclass import OneVsRestClassifier
@@ -87,17 +86,6 @@ def test_libsvm_iris():
     assert_array_equal(pred, pred2)
 
 
-@ignore_warnings
-def test_single_sample_1d():
-    # Test whether SVCs work on a single sample given as a 1-d array
-
-    clf = svm.SVC().fit(X, Y)
-    clf.predict(X[0])
-
-    clf = svm.LinearSVC(random_state=0).fit(X, Y)
-    clf.predict(X[0])
-
-
 def test_precomputed():
     # SVC with a precomputed kernel.
     # We test it with a toy dataset and with iris.
@@ -382,13 +370,6 @@ def test_decision_function_shape():
     dec = clf.decision_function(X_train)
     assert_equal(dec.shape, (len(X_train), 10))
 
-    # check deprecation warning
-    clf = svm.SVC(kernel='linear', C=0.1).fit(X_train, y_train)
-    msg = "change the shape of the decision function"
-    dec = assert_warns_message(ChangedBehaviorWarning, msg,
-                               clf.decision_function, X_train)
-    assert_equal(dec.shape, (len(X_train), 10))
-
 
 def test_svr_predict():
     # Test SVR's decision_function
diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index d1049fa6de..a7a878a731 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -1,4 +1,3 @@
-import sys
 import numpy as np
 
 from sklearn.utils.testing import assert_array_equal
@@ -11,7 +10,6 @@ from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import ignore_warnings
-from sklearn.utils.testing import SkipTest
 
 from sklearn.datasets import make_blobs
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
@@ -19,16 +17,6 @@ from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
 from sklearn.discriminant_analysis import _cov
 
 
-# import reload
-version = sys.version_info
-if version[0] == 3:
-    # Python 3+ import for reload. Builtin in Python2
-    if version[1] == 3:
-        reload = None
-    else:
-        from importlib import reload
-
-
 # Data is just 6 separable points in the plane
 X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]], dtype='f')
 y = np.array([1, 1, 1, 2, 2, 2])
@@ -317,31 +305,6 @@ def test_qda_regularization():
     assert_array_equal(y_pred5, y5)
 
 
-def test_deprecated_lda_qda_deprecation():
-    if reload is None:
-        raise SkipTest("Can't reload module on Python3.3")
-
-    def import_lda_module():
-        import sklearn.lda
-        # ensure that we trigger DeprecationWarning even if the sklearn.lda
-        # was loaded previously by another test.
-        reload(sklearn.lda)
-        return sklearn.lda
-
-    lda = assert_warns(DeprecationWarning, import_lda_module)
-    assert isinstance(lda.LDA(), LinearDiscriminantAnalysis)
-
-    def import_qda_module():
-        import sklearn.qda
-        # ensure that we trigger DeprecationWarning even if the sklearn.qda
-        # was loaded previously by another test.
-        reload(sklearn.qda)
-        return sklearn.qda
-
-    qda = assert_warns(DeprecationWarning, import_qda_module)
-    assert isinstance(qda.QDA(), QuadraticDiscriminantAnalysis)
-
-
 def test_covariance():
     x, y = make_blobs(n_samples=100, n_features=5,
                       centers=1, random_state=42)
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index 5d4140b5d7..b62e78e87c 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -334,7 +334,6 @@ def test_ovr_multilabel_predict_proba():
         # Decision function only estimator.
         decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train)
         assert_false(hasattr(decision_only, 'predict_proba'))
-        assert_true(hasattr(decision_only, 'decision_function'))
 
         # Estimator with predict_proba disabled, depending on parameters.
         decision_only = OneVsRestClassifier(svm.SVC(probability=False))
@@ -370,7 +369,6 @@ def test_ovr_single_label_predict_proba():
     # Decision function only estimator.
     decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train)
     assert_false(hasattr(decision_only, 'predict_proba'))
-    assert_true(hasattr(decision_only, 'decision_function'))
 
     Y_pred = clf.predict(X_test)
     Y_proba = clf.predict_proba(X_test)
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 5f1f62cdce..fb105abd78 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -13,7 +13,6 @@ from sklearn.utils.testing import assert_false
 from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import assert_dict_equal
 
 from sklearn.base import clone, BaseEstimator
@@ -700,14 +699,6 @@ def test_classes_property():
     assert_array_equal(clf.classes_, np.unique(y))
 
 
-def test_X1d_inverse_transform():
-    transformer = Transf()
-    pipeline = make_pipeline(transformer)
-    X = np.ones(10)
-    msg = "1d X will not be reshaped in pipeline.inverse_transform"
-    assert_warns_message(FutureWarning, msg, pipeline.inverse_transform, X)
-
-
 def test_set_feature_union_steps():
     mult2 = Mult(2)
     mult2.get_feature_names = lambda: ['x2']
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index c3e8e795b3..ff662e9af4 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -28,7 +28,6 @@ from sklearn.utils.testing import assert_greater_equal
 from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_less_equal
 from sklearn.utils.testing import assert_true
-from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import raises
 from sklearn.utils.testing import ignore_warnings
 
@@ -382,11 +381,6 @@ def test_importances():
         assert_equal(importances.shape[0], 10, "Failed with {0}".format(name))
         assert_equal(n_important, 3, "Failed with {0}".format(name))
 
-        X_new = assert_warns(
-            DeprecationWarning, clf.transform, X, threshold="mean")
-        assert_less(0, X_new.shape[1], "Failed with {0}".format(name))
-        assert_less(X_new.shape[1], X.shape[1], "Failed with {0}".format(name))
-
     # Check on iris that importances are the same for all builders
     clf = DecisionTreeClassifier(random_state=0)
     clf.fit(iris.data, iris.target)
@@ -529,7 +523,8 @@ def test_error():
                       X, y)
         assert_raises(ValueError, TreeEstimator(max_depth=-1).fit, X, y)
         assert_raises(ValueError, TreeEstimator(max_features=42).fit, X, y)
-        assert_raises(ValueError, TreeEstimator(min_impurity_split=-1.0).fit, X, y)
+        assert_raises(ValueError, TreeEstimator(min_impurity_split=-1.0).fit,
+                      X, y)
 
         # Wrong dimensions
         est = TreeEstimator()
@@ -602,7 +597,6 @@ def test_min_samples_split():
                        "Failed with {0}".format(name))
 
 
-
 def test_min_samples_leaf():
     # Test if leaves contain more than leaf_count training examples
     X = np.asfortranarray(iris.data.astype(tree._tree.DTYPE))
@@ -867,7 +861,6 @@ def test_pickle():
                          "pickling with {1}".format(attribute, name))
 
 
-
 def test_multioutput():
     # Check estimators on multi-output problems.
     X = [[-2, -1],
@@ -1287,18 +1280,19 @@ def check_sparse_input(tree, dataset, max_depth=None):
 
 
 def test_sparse_input():
-    for tree, dataset in product(SPARSE_TREES,
-                                 ("clf_small", "toy", "digits", "multilabel",
-                                  "sparse-pos", "sparse-neg", "sparse-mix",
-                                  "zeros")):
+    for tree_type, dataset in product(SPARSE_TREES, ("clf_small", "toy",
+                                                     "digits", "multilabel",
+                                                     "sparse-pos",
+                                                     "sparse-neg",
+                                                     "sparse-mix", "zeros")):
         max_depth = 3 if dataset == "digits" else None
-        yield (check_sparse_input, tree, dataset, max_depth)
+        yield (check_sparse_input, tree_type, dataset, max_depth)
 
     # Due to numerical instability of MSE and too strict test, we limit the
     # maximal depth
-    for tree, dataset in product(REG_TREES, ["boston", "reg_small"]):
-        if tree in SPARSE_TREES:
-            yield (check_sparse_input, tree, dataset, 2)
+    for tree_type, dataset in product(SPARSE_TREES, ["boston", "reg_small"]):
+        if tree_type in REG_TREES:
+            yield (check_sparse_input, tree_type, dataset, 2)
 
 
 def check_sparse_parameters(tree, dataset):
@@ -1346,10 +1340,10 @@ def check_sparse_parameters(tree, dataset):
 
 
 def test_sparse_parameters():
-    for tree, dataset in product(SPARSE_TREES,
-                                 ["sparse-pos", "sparse-neg", "sparse-mix",
-                                  "zeros"]):
-        yield (check_sparse_parameters, tree, dataset)
+    for tree_type, dataset in product(SPARSE_TREES, ["sparse-pos",
+                                                     "sparse-neg",
+                                                     "sparse-mix", "zeros"]):
+        yield (check_sparse_parameters, tree_type, dataset)
 
 
 def check_sparse_criterion(tree, dataset):
@@ -1373,10 +1367,10 @@ def check_sparse_criterion(tree, dataset):
 
 
 def test_sparse_criterion():
-    for tree, dataset in product(SPARSE_TREES,
-                                 ["sparse-pos", "sparse-neg", "sparse-mix",
-                                  "zeros"]):
-        yield (check_sparse_criterion, tree, dataset)
+    for tree_type, dataset in product(SPARSE_TREES, ["sparse-pos",
+                                                     "sparse-neg",
+                                                     "sparse-mix", "zeros"]):
+        yield (check_sparse_criterion, tree_type, dataset)
 
 
 def check_explicit_sparse_zeros(tree, max_depth=3,
@@ -1449,8 +1443,8 @@ def check_explicit_sparse_zeros(tree, max_depth=3,
 
 
 def test_explicit_sparse_zeros():
-    for tree in SPARSE_TREES:
-        yield (check_explicit_sparse_zeros, tree)
+    for tree_type in SPARSE_TREES:
+        yield (check_explicit_sparse_zeros, tree_type)
 
 
 @ignore_warnings
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index c3567e864c..a4b6a2aac9 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -29,7 +29,6 @@ from ..base import BaseEstimator
 from ..base import ClassifierMixin
 from ..base import RegressorMixin
 from ..externals import six
-from ..feature_selection.from_model import _LearntSelectorMixin
 from ..utils import check_array
 from ..utils import check_random_state
 from ..utils import compute_sample_weight
@@ -71,8 +70,7 @@ SPARSE_SPLITTERS = {"best": _splitter.BestSparseSplitter,
 # =============================================================================
 
 
-class BaseDecisionTree(six.with_metaclass(ABCMeta, BaseEstimator,
-                                          _LearntSelectorMixin)):
+class BaseDecisionTree(six.with_metaclass(ABCMeta, BaseEstimator)):
     """Base class for decision trees.
 
     Warning: This class should not be used directly.
@@ -739,7 +737,6 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
             X_idx_sorted=X_idx_sorted)
         return self
 
-
     def predict_proba(self, X, check_input=True):
         """Predict class probabilities of the input samples X.
 
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index ac16ef9ad6..ede3bb45ce 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -13,17 +13,10 @@ from .validation import (as_float_array,
                          check_random_state, column_or_1d, check_array,
                          check_consistent_length, check_X_y, indexable,
                          check_symmetric)
-from .deprecation import deprecated
 from .class_weight import compute_class_weight, compute_sample_weight
 from ..externals.joblib import cpu_count
-from ..exceptions import ConvergenceWarning as _ConvergenceWarning
 from ..exceptions import DataConversionWarning
-
-
-@deprecated("ConvergenceWarning has been moved into the sklearn.exceptions "
-            "module. It will not be available here from version 0.19")
-class ConvergenceWarning(_ConvergenceWarning):
-    pass
+from .deprecation import deprecated
 
 
 __all__ = ["murmurhash3_32", "as_float_array",
@@ -32,7 +25,7 @@ __all__ = ["murmurhash3_32", "as_float_array",
            "compute_class_weight", "compute_sample_weight",
            "column_or_1d", "safe_indexing",
            "check_consistent_length", "check_X_y", 'indexable',
-           "check_symmetric", "indices_to_mask"]
+           "check_symmetric", "indices_to_mask", "deprecated"]
 
 
 def safe_mask(X, mask):
diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py
index 5b778423f1..119e5eabe9 100644
--- a/sklearn/utils/class_weight.py
+++ b/sklearn/utils/class_weight.py
@@ -2,7 +2,6 @@
 #          Manoj Kumar
 # License: BSD 3 clause
 
-import warnings
 import numpy as np
 from ..externals import six
 from ..utils.fixes import in1d
@@ -48,25 +47,16 @@ def compute_class_weight(class_weight, classes, y):
     if class_weight is None or len(class_weight) == 0:
         # uniform class weights
         weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
-    elif class_weight in ['auto', 'balanced']:
+    elif class_weight == 'balanced':
         # Find the weight of each class as present in y.
         le = LabelEncoder()
         y_ind = le.fit_transform(y)
         if not all(np.in1d(classes, le.classes_)):
             raise ValueError("classes should have valid labels that are in y")
 
-        # inversely proportional to the number of samples in the class
-        if class_weight == 'auto':
-            recip_freq = 1. / bincount(y_ind)
-            weight = recip_freq[le.transform(classes)] / np.mean(recip_freq)
-            warnings.warn("The class_weight='auto' heuristic is deprecated in"
-                          " 0.17 in favor of a new heuristic "
-                          "class_weight='balanced'. 'auto' will be removed in"
-                          " 0.19", DeprecationWarning)
-        else:
-            recip_freq = len(y) / (len(le.classes_) *
-                                   bincount(y_ind).astype(np.float64))
-            weight = recip_freq[le.transform(classes)]
+        recip_freq = len(y) / (len(le.classes_) *
+                               bincount(y_ind).astype(np.float64))
+        weight = recip_freq[le.transform(classes)]
     else:
         # user-defined dictionary
         weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
@@ -107,8 +97,8 @@ def compute_sample_weight(class_weight, y, indices=None):
         Array of indices to be used in a subsample. Can be of length less than
         n_samples in the case of a subsample, or equal to n_samples in the
         case of a bootstrap subsample with repeated indices. If None, the
-        sample weight will be calculated over the full sample. Only "auto" is
-        supported for class_weight if this is provided.
+        sample weight will be calculated over the full sample. Only "balanced"
+        is supported for class_weight if this is provided.
 
     Returns
     -------
@@ -122,7 +112,7 @@ def compute_sample_weight(class_weight, y, indices=None):
     n_outputs = y.shape[1]
 
     if isinstance(class_weight, six.string_types):
-        if class_weight not in ['balanced', 'auto']:
+        if class_weight not in ['balanced']:
             raise ValueError('The only valid preset for class_weight is '
                              '"balanced". Given "%s".' % class_weight)
     elif (indices is not None and
@@ -145,7 +135,7 @@ def compute_sample_weight(class_weight, y, indices=None):
         classes_full = np.unique(y_full)
         classes_missing = None
 
-        if class_weight in ['balanced', 'auto'] or n_outputs == 1:
+        if class_weight == 'balanced' or n_outputs == 1:
             class_weight_k = class_weight
         else:
             class_weight_k = class_weight[k]
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 1af836a2a1..db5569af28 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -42,7 +42,6 @@ from sklearn.random_projection import BaseRandomProjection
 from sklearn.feature_selection import SelectKBest
 from sklearn.svm.base import BaseLibSVM
 from sklearn.pipeline import make_pipeline
-from sklearn.decomposition import NMF, ProjectedGradientNMF
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.exceptions import DataConversionWarning
 from sklearn.exceptions import SkipTestWarning
@@ -67,16 +66,6 @@ MULTI_OUTPUT = ['CCA', 'DecisionTreeRegressor', 'ElasticNet',
                 'RANSACRegressor', 'RadiusNeighborsRegressor',
                 'RandomForestRegressor', 'Ridge', 'RidgeCV']
 
-# Estimators with deprecated transform methods. Should be removed in 0.19 when
-# _LearntSelectorMixin is removed.
-DEPRECATED_TRANSFORM = [
-    "RandomForestClassifier", "RandomForestRegressor", "ExtraTreesClassifier",
-    "ExtraTreesRegressor", "DecisionTreeClassifier",
-    "DecisionTreeRegressor", "ExtraTreeClassifier", "ExtraTreeRegressor",
-    "LinearSVC", "SGDClassifier", "SGDRegressor", "Perceptron",
-    "LogisticRegression", "LogisticRegressionCV",
-    "GradientBoostingClassifier", "GradientBoostingRegressor"]
-
 
 def _yield_non_meta_checks(name, Estimator):
     yield check_estimators_dtypes
@@ -221,9 +210,8 @@ def _yield_all_checks(name, Estimator):
         for check in _yield_regressor_checks(name, Estimator):
             yield check
     if issubclass(Estimator, TransformerMixin):
-        if name not in DEPRECATED_TRANSFORM:
-            for check in _yield_transformer_checks(name, Estimator):
-                yield check
+        for check in _yield_transformer_checks(name, Estimator):
+            yield check
     if issubclass(Estimator, ClusterMixin):
         for check in _yield_clustering_checks(name, Estimator):
             yield check
@@ -329,10 +317,6 @@ def set_testing_parameters(estimator):
         # which is more feature than we have in most case.
         estimator.set_params(k=1)
 
-    if isinstance(estimator, NMF):
-        if not isinstance(estimator, ProjectedGradientNMF):
-            estimator.set_params(solver='cd')
-
 
 class NotAnArray(object):
     " An object that is convertable to an array"
@@ -424,8 +408,7 @@ def check_dtype_object(name, Estimator):
     if hasattr(estimator, "predict"):
         estimator.predict(X)
 
-    if (hasattr(estimator, "transform") and
-            name not in DEPRECATED_TRANSFORM):
+    if hasattr(estimator, "transform"):
         estimator.transform(X)
 
     try:
@@ -725,10 +708,7 @@ def check_pipeline_consistency(name, Estimator):
     estimator.fit(X, y)
     pipeline.fit(X, y)
 
-    if name in DEPRECATED_TRANSFORM:
-        funcs = ["score"]
-    else:
-        funcs = ["score", "fit_transform"]
+    funcs = ["score", "fit_transform"]
 
     for func_name in funcs:
         func = getattr(estimator, func_name, None)
@@ -751,11 +731,7 @@ def check_fit_score_takes_y(name, Estimator):
     set_testing_parameters(estimator)
     set_random_state(estimator)
 
-    if name in DEPRECATED_TRANSFORM:
-        funcs = ["fit", "score", "partial_fit", "fit_predict"]
-    else:
-        funcs = [
-            "fit", "score", "partial_fit", "fit_predict", "fit_transform"]
+    funcs = ["fit", "score", "partial_fit", "fit_predict", "fit_transform"]
     for func_name in funcs:
         func = getattr(estimator, func_name, None)
         if func is not None:
@@ -777,11 +753,7 @@ def check_estimators_dtypes(name, Estimator):
     y = X_train_int_64[:, 0]
     y = multioutput_estimator_convert_y_2d(name, y)
 
-    if name in DEPRECATED_TRANSFORM:
-        methods = ["predict", "decision_function", "predict_proba"]
-    else:
-        methods = [
-            "predict", "transform", "decision_function", "predict_proba"]
+    methods = ["predict", "transform", "decision_function", "predict_proba"]
 
     for X_train in [X_train_32, X_train_64, X_train_int_64, X_train_int_32]:
         estimator = Estimator()
@@ -869,8 +841,7 @@ def check_estimators_nan_inf(name, Estimator):
                     raise AssertionError(error_string_predict, Estimator)
 
             # transform
-            if (hasattr(estimator, "transform") and
-                    name not in DEPRECATED_TRANSFORM):
+            if hasattr(estimator, "transform"):
                 try:
                     estimator.transform(X_train)
                 except ValueError as e:
@@ -888,11 +859,8 @@ def check_estimators_nan_inf(name, Estimator):
 @ignore_warnings
 def check_estimators_pickle(name, Estimator):
     """Test that we can pickle all estimators"""
-    if name in DEPRECATED_TRANSFORM:
-        check_methods = ["predict", "decision_function", "predict_proba"]
-    else:
-        check_methods = ["predict", "transform", "decision_function",
-                         "predict_proba"]
+    check_methods = ["predict", "transform", "decision_function",
+                     "predict_proba"]
 
     X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                       random_state=0, n_features=2, cluster_std=0.1)
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index ce5465ad1f..9a62b3c6a9 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -15,8 +15,6 @@ import inspect
 import pkgutil
 import warnings
 import sys
-import re
-import platform
 import struct
 
 import scipy as sp
@@ -636,28 +634,6 @@ def skip_if_32bit(func):
     return run_test
 
 
-def if_not_mac_os(versions=('10.7', '10.8', '10.9'),
-                  message='Multi-process bug in Mac OS X >= 10.7 '
-                          '(see issue #636)'):
-    """Test decorator that skips test if OS is Mac OS X and its
-    major version is one of ``versions``.
-    """
-    warnings.warn("if_not_mac_os is deprecated in 0.17 and will be removed"
-                  " in 0.19: use the safer and more generic"
-                  " if_safe_multiprocessing_with_blas instead",
-                  DeprecationWarning)
-    mac_version, _, _ = platform.mac_ver()
-    skip = '.'.join(mac_version.split('.')[:2]) in versions
-
-    def decorator(func):
-        if skip:
-            @wraps(func)
-            def func(*args, **kwargs):
-                raise SkipTest(message)
-        return func
-    return decorator
-
-
 def if_safe_multiprocessing_with_blas(func):
     """Decorator for tests involving both BLAS calls and multiprocessing.
 
diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py
index 1bf6377a48..a073eeafcf 100644
--- a/sklearn/utils/tests/test_class_weight.py
+++ b/sklearn/utils/tests/test_class_weight.py
@@ -12,17 +12,12 @@ from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_warns
 
 
 def test_compute_class_weight():
     # Test (and demo) compute_class_weight.
     y = np.asarray([2, 2, 2, 3, 3, 4])
     classes = np.unique(y)
-    cw = assert_warns(DeprecationWarning,
-                      compute_class_weight, "auto", classes, y)
-    assert_almost_equal(cw.sum(), classes.shape)
-    assert_true(cw[0] < cw[1] < cw[2])
 
     cw = compute_class_weight("balanced", classes, y)
     # total effect of samples is preserved
@@ -35,11 +30,9 @@ def test_compute_class_weight_not_present():
     # Raise error when y does not contain all class labels
     classes = np.arange(4)
     y = np.asarray([0, 0, 0, 1, 1, 2])
-    assert_raises(ValueError, compute_class_weight, "auto", classes, y)
     assert_raises(ValueError, compute_class_weight, "balanced", classes, y)
     # Raise error when y has items not in classes
     classes = np.arange(2)
-    assert_raises(ValueError, compute_class_weight, "auto", classes, y)
     assert_raises(ValueError, compute_class_weight, "balanced", classes, y)
     assert_raises(ValueError, compute_class_weight, {0: 1., 1: 2.}, classes, y)
 
@@ -94,16 +87,11 @@ def test_compute_class_weight_invariance():
     assert_array_almost_equal(logreg.coef_, logreg0.coef_)
 
 
-def test_compute_class_weight_auto_negative():
+def test_compute_class_weight_balanced_negative():
     # Test compute_class_weight when labels are negative
     # Test with balanced class labels.
     classes = np.array([-2, -1, 0])
     y = np.asarray([-1, -1, 0, 0, -2, -2])
-    cw = assert_warns(DeprecationWarning, compute_class_weight, "auto",
-                      classes, y)
-    assert_almost_equal(cw.sum(), classes.shape)
-    assert_equal(len(cw), len(classes))
-    assert_array_almost_equal(cw, np.array([1., 1., 1.]))
 
     cw = compute_class_weight("balanced", classes, y)
     assert_equal(len(cw), len(classes))
@@ -111,11 +99,6 @@ def test_compute_class_weight_auto_negative():
 
     # Test with unbalanced class labels.
     y = np.asarray([-1, 0, 0, -2, -2, -2])
-    cw = assert_warns(DeprecationWarning, compute_class_weight, "auto",
-                      classes, y)
-    assert_almost_equal(cw.sum(), classes.shape)
-    assert_equal(len(cw), len(classes))
-    assert_array_almost_equal(cw, np.array([0.545, 1.636, 0.818]), decimal=3)
 
     cw = compute_class_weight("balanced", classes, y)
     assert_equal(len(cw), len(classes))
@@ -124,15 +107,10 @@ def test_compute_class_weight_auto_negative():
     assert_array_almost_equal(cw, [2. / 3, 2., 1.])
 
 
-def test_compute_class_weight_auto_unordered():
+def test_compute_class_weight_balanced_unordered():
     # Test compute_class_weight when classes are unordered
     classes = np.array([1, 0, 3])
     y = np.asarray([1, 0, 0, 3, 3, 3])
-    cw = assert_warns(DeprecationWarning, compute_class_weight, "auto",
-                      classes, y)
-    assert_almost_equal(cw.sum(), classes.shape)
-    assert_equal(len(cw), len(classes))
-    assert_array_almost_equal(cw, np.array([1.636, 0.818, 0.545]), decimal=3)
 
     cw = compute_class_weight("balanced", classes, y)
     class_counts = np.bincount(y)[classes]
@@ -144,9 +122,6 @@ def test_compute_sample_weight():
     # Test (and demo) compute_sample_weight.
     # Test with balanced classes
     y = np.asarray([1, 1, 1, 2, 2, 2])
-    sample_weight = assert_warns(DeprecationWarning,
-                                 compute_sample_weight, "auto", y)
-    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
     sample_weight = compute_sample_weight("balanced", y)
     assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
 
@@ -156,20 +131,14 @@ def test_compute_sample_weight():
 
     # Test with column vector of balanced classes
     y = np.asarray([[1], [1], [1], [2], [2], [2]])
-    sample_weight = assert_warns(DeprecationWarning,
-                                 compute_sample_weight, "auto", y)
-    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
     sample_weight = compute_sample_weight("balanced", y)
     assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
 
     # Test with unbalanced classes
     y = np.asarray([1, 1, 1, 2, 2, 2, 3])
-    sample_weight = assert_warns(DeprecationWarning,
-                                 compute_sample_weight, "auto", y)
-    expected_auto = np.asarray([.6, .6, .6, .6, .6, .6, 1.8])
-    assert_array_almost_equal(sample_weight, expected_auto)
     sample_weight = compute_sample_weight("balanced", y)
-    expected_balanced = np.array([0.7777, 0.7777, 0.7777, 0.7777, 0.7777, 0.7777, 2.3333])
+    expected_balanced = np.array([0.7777, 0.7777, 0.7777, 0.7777, 0.7777,
+                                  0.7777, 2.3333])
     assert_array_almost_equal(sample_weight, expected_balanced, decimal=4)
 
     # Test with `None` weights
@@ -178,9 +147,6 @@ def test_compute_sample_weight():
 
     # Test with multi-output of balanced classes
     y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
-    sample_weight = assert_warns(DeprecationWarning,
-                                 compute_sample_weight, "auto", y)
-    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
     sample_weight = compute_sample_weight("balanced", y)
     assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
 
@@ -191,9 +157,6 @@ def test_compute_sample_weight():
 
     # Test with multi-output of unbalanced classes
     y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [3, -1]])
-    sample_weight = assert_warns(DeprecationWarning,
-                                 compute_sample_weight, "auto", y)
-    assert_array_almost_equal(sample_weight, expected_auto ** 2)
     sample_weight = compute_sample_weight("balanced", y)
     assert_array_almost_equal(sample_weight, expected_balanced ** 2, decimal=3)
 
@@ -202,60 +165,38 @@ def test_compute_sample_weight_with_subsample():
     # Test compute_sample_weight with subsamples specified.
     # Test with balanced classes and all samples present
     y = np.asarray([1, 1, 1, 2, 2, 2])
-    sample_weight = assert_warns(DeprecationWarning,
-                                 compute_sample_weight, "auto", y)
-    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
     sample_weight = compute_sample_weight("balanced", y, range(6))
     assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
 
     # Test with column vector of balanced classes and all samples present
     y = np.asarray([[1], [1], [1], [2], [2], [2]])
-    sample_weight = assert_warns(DeprecationWarning,
-                                 compute_sample_weight, "auto", y)
-    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
     sample_weight = compute_sample_weight("balanced", y, range(6))
     assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
 
     # Test with a subsample
     y = np.asarray([1, 1, 1, 2, 2, 2])
-    sample_weight = assert_warns(DeprecationWarning,
-                                 compute_sample_weight, "auto", y, range(4))
-    assert_array_almost_equal(sample_weight, [.5, .5, .5, 1.5, 1.5, 1.5])
     sample_weight = compute_sample_weight("balanced", y, range(4))
     assert_array_almost_equal(sample_weight, [2. / 3, 2. / 3,
                                               2. / 3, 2., 2., 2.])
 
     # Test with a bootstrap subsample
     y = np.asarray([1, 1, 1, 2, 2, 2])
-    sample_weight = assert_warns(DeprecationWarning, compute_sample_weight,
-                                 "auto", y, [0, 1, 1, 2, 2, 3])
-    expected_auto = np.asarray([1 / 3., 1 / 3., 1 / 3., 5 / 3., 5 / 3., 5 / 3.])
-    assert_array_almost_equal(sample_weight, expected_auto)
     sample_weight = compute_sample_weight("balanced", y, [0, 1, 1, 2, 2, 3])
     expected_balanced = np.asarray([0.6, 0.6, 0.6, 3., 3., 3.])
     assert_array_almost_equal(sample_weight, expected_balanced)
 
     # Test with a bootstrap subsample for multi-output
     y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
-    sample_weight = assert_warns(DeprecationWarning, compute_sample_weight,
-                                 "auto", y, [0, 1, 1, 2, 2, 3])
-    assert_array_almost_equal(sample_weight, expected_auto ** 2)
     sample_weight = compute_sample_weight("balanced", y, [0, 1, 1, 2, 2, 3])
     assert_array_almost_equal(sample_weight, expected_balanced ** 2)
 
     # Test with a missing class
     y = np.asarray([1, 1, 1, 2, 2, 2, 3])
-    sample_weight = assert_warns(DeprecationWarning, compute_sample_weight,
-                                 "auto", y, range(6))
-    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
     sample_weight = compute_sample_weight("balanced", y, range(6))
     assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
 
     # Test with a missing class for multi-output
     y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [2, 2]])
-    sample_weight = assert_warns(DeprecationWarning, compute_sample_weight,
-                                 "auto", y, range(6))
-    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
     sample_weight = compute_sample_weight("balanced", y, range(6))
     assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
 
@@ -270,7 +211,7 @@ def test_compute_sample_weight_errors():
     assert_raises(ValueError, compute_sample_weight, "ni", y_)
     assert_raises(ValueError, compute_sample_weight, "ni", y_, range(4))
 
-    # Not "auto" for subsample
+    # Not "balanced" for subsample
     assert_raises(ValueError,
                   compute_sample_weight, {1: 2, 2: 1}, y, range(4))
 
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 9c57ab4e96..752af7e03f 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -122,9 +122,6 @@ def test_check_array():
     X_csr = sp.csr_matrix(X)
     assert_raises(TypeError, check_array, X_csr)
     # ensure_2d
-    assert_warns(DeprecationWarning, check_array, [0, 1, 2])
-    X_array = check_array([0, 1, 2])
-    assert_equal(X_array.ndim, 2)
     X_array = check_array([0, 1, 2], ensure_2d=False)
     assert_equal(X_array.ndim, 1)
     # don't allow ndim > 3
@@ -338,12 +335,6 @@ def test_check_array_min_samples_and_features_messages():
     msg = "Singleton array array(42) cannot be considered a valid collection."
     assert_raise_message(TypeError, msg, check_array, 42, ensure_2d=False)
 
-    # But this works if the input data is forced to look like a 2 array with
-    # one sample and one feature:
-    X_checked = assert_warns(DeprecationWarning, check_array, [42],
-                             ensure_2d=True)
-    assert_array_equal(np.array([[42]]), X_checked)
-
     # Simulate a model that would need at least 2 samples to be well defined
     X = np.ones((1, 10))
     y = np.ones(1)
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index deb98eef85..1a4b493e02 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -16,34 +16,16 @@ import scipy.sparse as sp
 
 from ..externals import six
 from ..utils.fixes import signature
-from .deprecation import deprecated
-from ..exceptions import DataConversionWarning as _DataConversionWarning
-from ..exceptions import NonBLASDotWarning as _NonBLASDotWarning
-from ..exceptions import NotFittedError as _NotFittedError
+from ..exceptions import NonBLASDotWarning
+from ..exceptions import NotFittedError
+from ..exceptions import DataConversionWarning
 
 
-@deprecated("DataConversionWarning has been moved into the sklearn.exceptions"
-            " module. It will not be available here from version 0.19")
-class DataConversionWarning(_DataConversionWarning):
-    pass
-
-
-@deprecated("NonBLASDotWarning has been moved into the sklearn.exceptions"
-            " module. It will not be available here from version 0.19")
-class NonBLASDotWarning(_NonBLASDotWarning):
-    pass
-
-
-@deprecated("NotFittedError has been moved into the sklearn.exceptions module."
-            " It will not be available here from version 0.19")
-class NotFittedError(_NotFittedError):
-    pass
-
 FLOAT_DTYPES = (np.float64, np.float32, np.float16)
 
 # Silenced by default to reduce verbosity. Turn on at runtime for
 # performance profiling.
-warnings.simplefilter('ignore', _NonBLASDotWarning)
+warnings.simplefilter('ignore', NonBLASDotWarning)
 
 
 def _assert_all_finite(X):
@@ -311,7 +293,7 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None,
         Whether to raise an error on np.inf and np.nan in X.
 
     ensure_2d : boolean (default=True)
-        Whether to make X at least 2d.
+        Whether to raise a value error if X is not 2d.
 
     allow_nd : boolean (default=False)
         Whether to allow X.ndim > 2.
@@ -383,16 +365,10 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None,
 
         if ensure_2d:
             if array.ndim == 1:
-                if ensure_min_samples >= 2:
-                    raise ValueError("%s expects at least 2 samples provided "
-                                     "in a 2 dimensional array-like input"
-                                     % estimator_name)
-                warnings.warn(
-                    "Passing 1d arrays as data is deprecated in 0.17 and will "
-                    "raise ValueError in 0.19. Reshape your data either using "
+                raise ValueError(
+                    "Got X with X.ndim=1. Reshape your data either using "
                     "X.reshape(-1, 1) if your data has a single feature or "
-                    "X.reshape(1, -1) if it contains a single sample.",
-                    DeprecationWarning)
+                    "X.reshape(1, -1) if it contains a single sample.")
             array = np.atleast_2d(array)
             # To ensure that array flags are maintained
             array = np.array(array, dtype=dtype, order=order, copy=copy)
@@ -426,7 +402,7 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None,
     if warn_on_dtype and dtype_orig is not None and array.dtype != dtype_orig:
         msg = ("Data with input dtype %s was converted to %s%s."
                % (dtype_orig, array.dtype, context))
-        warnings.warn(msg, _DataConversionWarning)
+        warnings.warn(msg, DataConversionWarning)
     return array
 
 
@@ -556,7 +532,7 @@ def column_or_1d(y, warn=False):
             warnings.warn("A column-vector y was passed when a 1d array was"
                           " expected. Please change the shape of y to "
                           "(n_samples, ), for example using ravel().",
-                          _DataConversionWarning, stacklevel=2)
+                          DataConversionWarning, stacklevel=2)
         return np.ravel(y)
 
     raise ValueError("bad input shape {0}".format(shape))
@@ -686,8 +662,7 @@ def check_is_fitted(estimator, attributes, msg=None, all_or_any=all):
         attributes = [attributes]
 
     if not all_or_any([hasattr(estimator, attr) for attr in attributes]):
-        # FIXME NotFittedError_ --> NotFittedError in 0.19
-        raise _NotFittedError(msg % {'name': type(estimator).__name__})
+        raise NotFittedError(msg % {'name': type(estimator).__name__})
 
 
 def check_non_negative(X, whom):
-- 
GitLab