diff --git a/.travis.yml b/.travis.yml
index eaa843f94799e77d25f6f47c44ac3d7b0721c28c..472b79b34d0b2ca5d6b2619f3f6cb1d120c4812a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -32,22 +32,22 @@ matrix:
             - libatlas-dev
     # This environment tests the oldest supported anaconda env
     - env: DISTRIB="conda" PYTHON_VERSION="2.7" INSTALL_MKL="false"
-           NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.4"
+           NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.5"
            COVERAGE=true
-    # This environment tests the newest supported Anaconda release (4.3.1)
+    # This environment tests the newest supported Anaconda release (4.4.0)
     # It also runs tests requiring Pandas.
-    - env: DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true"
-           NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.2"
+    - env: DISTRIB="conda" PYTHON_VERSION="3.6.1" INSTALL_MKL="true"
+           NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" PANDAS_VERSION="0.20.1"
            CYTHON_VERSION="0.25.2" COVERAGE=true
     # This environment use pytest to run the tests. It uses the newest
-    # supported Anaconda release (4.3.1). It also runs tests requiring Pandas.
-    # - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6"
-    #        INSTALL_MKL="true" NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1"
-    #        PANDAS_VERSION="0.19.2" CYTHON_VERSION="0.25.2"
+    # supported Anaconda release (4.4.0). It also runs tests requiring Pandas.
+    # - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6.1"
+    #        INSTALL_MKL="true" NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0"
+    #        PANDAS_VERSION="0.20.1" CYTHON_VERSION="0.25.2"
     # flake8 linting on diff wrt common ancestor with upstream/master
     - env: RUN_FLAKE8="true" SKIP_TESTS="true"
            DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
-           NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4"
+           NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" CYTHON_VERSION="0.23.5"
     # This environment tests scikit-learn against numpy and scipy master
     # installed from their CI wheels in a virtualenv with the Python
     # interpreter provided by travis.
diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py
index 1e54b570c9d8a68d01e945570f321e2df82ef5cb..a1e0358e392a0b1513f4181a6ac937f87674920c 100644
--- a/benchmarks/bench_plot_nmf.py
+++ b/benchmarks/bench_plot_nmf.py
@@ -24,7 +24,7 @@ from sklearn.decomposition.nmf import _beta_divergence
 from sklearn.decomposition.nmf import INTEGER_TYPES, _check_init
 from sklearn.externals.joblib import Memory
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.utils.extmath import fast_dot, safe_sparse_dot, squared_norm
+from sklearn.utils.extmath import safe_sparse_dot, squared_norm
 from sklearn.utils import check_array
 from sklearn.utils.validation import check_is_fitted, check_non_negative
 
@@ -99,7 +99,7 @@ def _nls_subproblem(X, W, H, tol, max_iter, alpha=0., l1_ratio=0.,
     http://www.csie.ntu.edu.tw/~cjlin/nmf/
     """
     WtX = safe_sparse_dot(W.T, X)
-    WtW = fast_dot(W.T, W)
+    WtW = np.dot(W.T, W)
 
     # values justified in the paper (alpha is renamed gamma)
     gamma = 1
diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst
index 3abba5dd84c7ce29c298121a3f95010d0243ee68..692e7ca1f99a73a9edb563d16d8943a56fe53cd8 100644
--- a/doc/developers/performance.rst
+++ b/doc/developers/performance.rst
@@ -84,38 +84,6 @@ C/C++ generated files are embedded in distributed stable packages. The goal is
 to make it possible to install scikit-learn stable version
 on any machine with Python, Numpy, Scipy and C/C++ compiler.
 
-Fast matrix multiplications
-===========================
-
-Matrix multiplications (matrix-matrix and matrix-vector) are usually handled
-using the NumPy function ``np.dot``, but in versions of NumPy before 1.7.2
-this function is suboptimal when the inputs are not both in the C (row-major)
-layout; in that case, the inputs may be implicitly copied to obtain the right
-layout. This obviously consumes memory and takes time.
-
-The function ``fast_dot`` in ``sklearn.utils.extmath`` offers a fast
-replacement for ``np.dot`` that prevents copies from being made in some cases.
-In all other cases, it dispatches to ``np.dot`` and when the NumPy version is
-new enough, it is in fact an alias for that function, making it a drop-in
-replacement. Example usage of ``fast_dot``::
-
-  >>> import numpy as np
-  >>> from sklearn.utils.extmath import fast_dot
-  >>> X = np.random.random_sample([2, 10])
-  >>> np.allclose(np.dot(X, X.T), fast_dot(X, X.T))
-  True
-
-This function operates optimally on 2-dimensional arrays, both of the same
-dtype, which should be either single or double precision float. If these
-requirements aren't met or the BLAS package is not available, the call is
-silently dispatched to ``numpy.dot``. If you want to be sure when the original
-``numpy.dot`` has been invoked in a situation where it is suboptimal, you can
-activate the related warning::
-
-  >>> import warnings
-  >>> from sklearn.exceptions import NonBLASDotWarning
-  >>> warnings.simplefilter('always', NonBLASDotWarning) # doctest: +SKIP
-
 .. _profiling-python-code:
 
 Profiling Python code
@@ -425,4 +393,3 @@ A sample algorithmic trick: warm restarts for cross validation
 
 TODO: demonstrate the warm restart tricks for cross validation of linear
 regression with Coordinate Descent.
-
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 8d7521f3c29e8e07bdc0a65cd7bf0f6ab828dbc2..38febcaa86f666a56e7c73d3994a484425a20041 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -389,6 +389,37 @@ API changes summary
      has been renamed to ``n_components`` and will be removed in version 0.21.
      :issue:`8922` by :user:Attractadore
 
+   - SciPy >= 0.13.3 and NumPy >= 1.8.2 are now the minimum supported versions
+     for scikit-learn. The following backported functions in ``sklearn.utils``
+     have been removed or deprecated accordingly.
+     :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai <naoyak>`
+     
+     Removed in 0.19:
+     
+     - ``utils.fixes.argpartition``
+     - ``utils.fixes.array_equal``
+     - ``utils.fixes.astype``
+     - ``utils.fixes.bincount``
+     - ``utils.fixes.expit``
+     - ``utils.fixes.frombuffer_empty``
+     - ``utils.fixes.in1d``
+     - ``utils.fixes.norm``
+     - ``utils.fixes.rankdata``
+     - ``utils.fixes.safe_copy``
+     
+     Deprecated in 0.19, to be removed in 0.21:
+     
+     - ``utils.arpack.eigs``
+     - ``utils.arpack.eigsh``
+     - ``utils.arpack.svds``
+     - ``utils.extmath.fast_dot``
+     - ``utils.extmath.logsumexp``
+     - ``utils.extmath.norm``
+     - ``utils.extmath.pinvh``
+     - ``utils.random.choice``
+     - ``utils.sparsetools.connected_components``
+     - ``utils.stats.rankdata``
+
 
 .. _changes_0_18_1:
 
diff --git a/sklearn/cluster/_k_means.pyx b/sklearn/cluster/_k_means.pyx
index a8f40ad9d349ec236f3cb1ca968208e72ef2cc5e..cdaa31fcb78ef86d18a00fbd973ec176e37ced1d 100644
--- a/sklearn/cluster/_k_means.pyx
+++ b/sklearn/cluster/_k_means.pyx
@@ -16,7 +16,6 @@ cimport cython
 from cython cimport floating
 
 from sklearn.utils.sparsefuncs_fast import assign_rows_csr
-from sklearn.utils.fixes import bincount
 
 ctypedef np.float64_t DOUBLE
 ctypedef np.int32_t INT
@@ -307,7 +306,7 @@ def _centers_dense(np.ndarray[floating, ndim=2] X,
     else:
         centers = np.zeros((n_clusters, n_features), dtype=np.float64)
 
-    n_samples_in_cluster = bincount(labels, minlength=n_clusters)
+    n_samples_in_cluster = np.bincount(labels, minlength=n_clusters)
     empty_clusters = np.where(n_samples_in_cluster == 0)[0]
     # maybe also relocate small clusters?
 
@@ -367,7 +366,7 @@ def _centers_sparse(X, np.ndarray[INT, ndim=1] labels, n_clusters,
     cdef np.ndarray[floating, ndim=2, mode="c"] centers
     cdef np.ndarray[np.npy_intp, ndim=1] far_from_centers
     cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] n_samples_in_cluster = \
-        bincount(labels, minlength=n_clusters)
+        np.bincount(labels, minlength=n_clusters)
     cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] empty_clusters = \
         np.where(n_samples_in_cluster == 0)[0]
     cdef int n_empty_clusters = empty_clusters.shape[0]
diff --git a/sklearn/cluster/_k_means_elkan.pyx b/sklearn/cluster/_k_means_elkan.pyx
index f662402feb85044e0cc54cf5a5d196c822b318e1..804ebabc5450a68ebbc8f8ac2612f664019c4fff 100644
--- a/sklearn/cluster/_k_means_elkan.pyx
+++ b/sklearn/cluster/_k_means_elkan.pyx
@@ -16,7 +16,6 @@ from libc.math cimport sqrt
 
 from ..metrics import euclidean_distances
 from ._k_means import _centers_dense
-from ..utils.fixes import partition
 
 
 cdef floating euclidian_dist(floating* a, floating* b, int n_features) nogil:
@@ -169,7 +168,7 @@ def k_means_elkan(np.ndarray[floating, ndim=2, mode='c'] X_, int n_clusters,
             print("start iteration")
 
         cd =  np.asarray(center_half_distances)
-        distance_next_center = partition(cd, kth=1, axis=0)[1]
+        distance_next_center = np.partition(cd, kth=1, axis=0)[1]
 
         if verbose:
             print("done sorting")
diff --git a/sklearn/cluster/dbscan_.py b/sklearn/cluster/dbscan_.py
index 2f3374027d7c23c4b4122adb0f4c6136541c1d92..6c7bba5af9f8c9385370413bc4872c1d481cd727 100644
--- a/sklearn/cluster/dbscan_.py
+++ b/sklearn/cluster/dbscan_.py
@@ -14,7 +14,6 @@ from scipy import sparse
 
 from ..base import BaseEstimator, ClusterMixin
 from ..utils import check_array, check_consistent_length
-from ..utils.fixes import astype
 from ..neighbors import NearestNeighbors
 
 from ._dbscan_inner import dbscan_inner
@@ -123,7 +122,7 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski', metric_params=None,
         neighborhoods = np.empty(X.shape[0], dtype=object)
         X.sum_duplicates()  # XXX: modifies X's internals in-place
         X_mask = X.data <= eps
-        masked_indices = astype(X.indices, np.intp, copy=False)[X_mask]
+        masked_indices = X.indices.astype(np.intp, copy=False)[X_mask]
         masked_indptr = np.concatenate(([0], np.cumsum(X_mask)))[X.indptr[1:]]
 
         # insert the diagonal: a point is its own neighbor, but 0 distance
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
index 680edc2672a71e581671d693e8a773ed7de8a2f0..d1b9f264fe759e9387a14dbe394019620dfec36f 100644
--- a/sklearn/cluster/k_means_.py
+++ b/sklearn/cluster/k_means_.py
@@ -22,14 +22,12 @@ from ..metrics.pairwise import pairwise_distances_argmin_min
 from ..utils.extmath import row_norms, squared_norm, stable_cumsum
 from ..utils.sparsefuncs_fast import assign_rows_csr
 from ..utils.sparsefuncs import mean_variance_axis
-from ..utils.fixes import astype
 from ..utils import check_array
 from ..utils import check_random_state
 from ..utils import as_float_array
 from ..utils import gen_batches
 from ..utils.validation import check_is_fitted
 from ..utils.validation import FLOAT_DTYPES
-from ..utils.random import choice
 from ..externals.joblib import Parallel
 from ..externals.joblib import delayed
 from ..externals.six import string_types
@@ -1062,16 +1060,15 @@ def _mini_batch_step(X, x_squared_norms, centers, counts,
         n_reassigns = to_reassign.sum()
         if n_reassigns:
             # Pick new clusters amongst observations with uniform probability
-            new_centers = choice(X.shape[0], replace=False, size=n_reassigns,
-                                 random_state=random_state)
+            new_centers = random_state.choice(X.shape[0], replace=False,
+                                              size=n_reassigns)
             if verbose:
                 print("[MiniBatchKMeans] Reassigning %i cluster centers."
                       % n_reassigns)
 
             if sp.issparse(X) and not sp.issparse(centers):
-                assign_rows_csr(X,
-                                astype(new_centers, np.intp),
-                                astype(np.where(to_reassign)[0], np.intp),
+                assign_rows_csr(X, new_centers.astype(np.intp),
+                                np.where(to_reassign)[0].astype(np.intp),
                                 centers)
             else:
                 centers[to_reassign] = X[new_centers]
diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index d56845637fc48b1983366f3a5b204f2d9bf96efa..7646459da3936b2535931d50741aff70a381c71c 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -26,12 +26,10 @@ from .utils import indexable, check_random_state, safe_indexing
 from .utils.validation import (_is_arraylike, _num_samples,
                                column_or_1d)
 from .utils.multiclass import type_of_target
-from .utils.random import choice
 from .externals.joblib import Parallel, delayed, logger
 from .externals.six import with_metaclass
 from .externals.six.moves import zip
 from .metrics.scorer import check_scoring
-from .utils.fixes import bincount
 from .gaussian_process.kernels import Kernel as GPKernel
 from .exceptions import FitFailedWarning
 
@@ -541,7 +539,7 @@ class StratifiedKFold(_BaseKFold):
         y = np.asarray(y)
         n_samples = y.shape[0]
         unique_labels, y_inversed = np.unique(y, return_inverse=True)
-        label_counts = bincount(y_inversed)
+        label_counts = np.bincount(y_inversed)
         min_labels = np.min(label_counts)
         if np.all(self.n_folds > label_counts):
             raise ValueError("All the n_labels for individual classes"
@@ -990,7 +988,7 @@ def _approximate_mode(class_counts, n_draws, rng):
             # if we need to add more, we add them all and
             # go to the next value
             add_now = min(len(inds), need_to_add)
-            inds = choice(inds, size=add_now, replace=False, random_state=rng)
+            inds = rng.choice(inds, size=add_now, replace=False)
             floored[inds] += 1
             need_to_add -= add_now
             if need_to_add == 0:
@@ -1072,7 +1070,7 @@ class StratifiedShuffleSplit(BaseShuffleSplit):
         self.classes, self.y_indices = np.unique(y, return_inverse=True)
         n_cls = self.classes.shape[0]
 
-        if np.min(bincount(self.y_indices)) < 2:
+        if np.min(np.bincount(self.y_indices)) < 2:
             raise ValueError("The least populated class in y has only 1"
                              " member, which is too few. The minimum"
                              " number of labels for any class cannot"
@@ -1089,7 +1087,7 @@ class StratifiedShuffleSplit(BaseShuffleSplit):
 
     def _iter_indices(self):
         rng = check_random_state(self.random_state)
-        cls_count = bincount(self.y_indices)
+        cls_count = np.bincount(self.y_indices)
 
         for n in range(self.n_iter):
             # if there are ties in the class-counts, we want
diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py
index 7a4543aa2068a4f99fc1864ad7d34b86f95bfc2c..3ba9dfd4878689a150a85918ea96ff73a71efccc 100644
--- a/sklearn/datasets/samples_generator.py
+++ b/sklearn/datasets/samples_generator.py
@@ -15,7 +15,6 @@ import scipy.sparse as sp
 from ..preprocessing import MultiLabelBinarizer
 from ..utils import check_array, check_random_state
 from ..utils import shuffle as util_shuffle
-from ..utils.fixes import astype
 from ..utils.random import sample_without_replacement
 from ..externals import six
 map = six.moves.map
@@ -28,9 +27,9 @@ def _generate_hypercube(samples, dimensions, rng):
     if dimensions > 30:
         return np.hstack([_generate_hypercube(samples, dimensions - 30, rng),
                           _generate_hypercube(samples, 30, rng)])
-    out = astype(sample_without_replacement(2 ** dimensions, samples,
-                                            random_state=rng),
-                 dtype='>u4', copy=False)
+    out = sample_without_replacement(2 ** dimensions, samples,
+                                     random_state=rng).astype(dtype='>u4',
+                                                              copy=False)
     out = np.unpackbits(out.view('>u1')).reshape((-1, 32))[:, -dimensions:]
     return out
 
diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
index 52e81da086f627d985a15e5a34dbb1eeae6ad666..a567e2091e1abf6f13d662b8e85e9fadb46bb372 100644
--- a/sklearn/datasets/svmlight_format.py
+++ b/sklearn/datasets/svmlight_format.py
@@ -28,7 +28,6 @@ from ..externals import six
 from ..externals.six import u, b
 from ..externals.six.moves import range, zip
 from ..utils import check_array
-from ..utils.fixes import frombuffer_empty
 
 
 def load_svmlight_file(f, n_features=None, dtype=np.float64,
@@ -162,11 +161,11 @@ def _open_and_load(f, dtype, multilabel, zero_based, query_id):
 
     # convert from array.array, give data the right dtype
     if not multilabel:
-        labels = frombuffer_empty(labels, np.float64)
-    data = frombuffer_empty(data, actual_dtype)
-    indices = frombuffer_empty(ind, np.intc)
+        labels = np.frombuffer(labels, np.float64)
+    data = np.frombuffer(data, actual_dtype)
+    indices = np.frombuffer(ind, np.intc)
     indptr = np.frombuffer(indptr, dtype=np.intc)   # never empty
-    query = frombuffer_empty(query, np.int64)
+    query = np.frombuffer(query, np.int64)
 
     data = np.asarray(data, dtype=dtype)    # no-op for float{32,64}
     return data, indices, indptr, labels, query
diff --git a/sklearn/decomposition/base.py b/sklearn/decomposition/base.py
index 26d6b3ea7283b06dd5647df1f4ac1ff45495c74d..fd6ab3bb7c9fd9712c5d966d618e49b5b4308f01 100644
--- a/sklearn/decomposition/base.py
+++ b/sklearn/decomposition/base.py
@@ -13,7 +13,6 @@ from scipy import linalg
 
 from ..base import BaseEstimator, TransformerMixin
 from ..utils import check_array
-from ..utils.extmath import fast_dot
 from ..utils.validation import check_is_fitted
 from ..externals import six
 from abc import ABCMeta, abstractmethod
@@ -130,7 +129,7 @@ class _BasePCA(six.with_metaclass(ABCMeta, BaseEstimator, TransformerMixin)):
         X = check_array(X)
         if self.mean_ is not None:
             X = X - self.mean_
-        X_transformed = fast_dot(X, self.components_.T)
+        X_transformed = np.dot(X, self.components_.T)
         if self.whiten:
             X_transformed /= np.sqrt(self.explained_variance_)
         return X_transformed
@@ -156,7 +155,7 @@ class _BasePCA(six.with_metaclass(ABCMeta, BaseEstimator, TransformerMixin)):
         exact inverse operation, which includes reversing whitening.
         """
         if self.whiten:
-            return fast_dot(X, np.sqrt(self.explained_variance_[:, np.newaxis]) *
+            return np.dot(X, np.sqrt(self.explained_variance_[:, np.newaxis]) *
                             self.components_) + self.mean_
         else:
-            return fast_dot(X, self.components_) + self.mean_
+            return np.dot(X, self.components_) + self.mean_
diff --git a/sklearn/decomposition/factor_analysis.py b/sklearn/decomposition/factor_analysis.py
index 3326ac197b3af9362b538914a94cb22d0c7ba02f..4440ee90bd84a1b778741131444e893af9441602 100644
--- a/sklearn/decomposition/factor_analysis.py
+++ b/sklearn/decomposition/factor_analysis.py
@@ -28,7 +28,7 @@ from scipy import linalg
 from ..base import BaseEstimator, TransformerMixin
 from ..externals.six.moves import xrange
 from ..utils import check_array, check_random_state
-from ..utils.extmath import fast_logdet, fast_dot, randomized_svd, squared_norm
+from ..utils.extmath import fast_logdet, randomized_svd, squared_norm
 from ..utils.validation import check_is_fitted
 from ..exceptions import ConvergenceWarning
 
@@ -256,8 +256,8 @@ class FactorAnalysis(BaseEstimator, TransformerMixin):
 
         Wpsi = self.components_ / self.noise_variance_
         cov_z = linalg.inv(Ih + np.dot(Wpsi, self.components_.T))
-        tmp = fast_dot(X_transformed, Wpsi.T)
-        X_transformed = fast_dot(tmp, cov_z)
+        tmp = np.dot(X_transformed, Wpsi.T)
+        X_transformed = np.dot(tmp, cov_z)
 
         return X_transformed
 
diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py
index 3cca0b7d6e89ce9b374f2d090554f1e52128bdfd..55f4c38cbe4c8f311193c821a60ba08fb0fc5a9d 100644
--- a/sklearn/decomposition/fastica_.py
+++ b/sklearn/decomposition/fastica_.py
@@ -16,7 +16,6 @@ from ..base import BaseEstimator, TransformerMixin
 from ..externals import six
 from ..externals.six import moves
 from ..utils import check_array, as_float_array, check_random_state
-from ..utils.extmath import fast_dot
 from ..utils.validation import check_is_fitted
 from ..utils.validation import FLOAT_DTYPES
 
@@ -74,7 +73,7 @@ def _ica_def(X, tol, g, fun_args, max_iter, w_init):
         w /= np.sqrt((w ** 2).sum())
 
         for i in moves.xrange(max_iter):
-            gwtx, g_wtx = g(fast_dot(w.T, X), fun_args)
+            gwtx, g_wtx = g(np.dot(w.T, X), fun_args)
 
             w1 = (X * gwtx).mean(axis=1) - g_wtx.mean() * w
 
@@ -103,12 +102,12 @@ def _ica_par(X, tol, g, fun_args, max_iter, w_init):
     del w_init
     p_ = float(X.shape[1])
     for ii in moves.xrange(max_iter):
-        gwtx, g_wtx = g(fast_dot(W, X), fun_args)
-        W1 = _sym_decorrelation(fast_dot(gwtx, X.T) / p_
+        gwtx, g_wtx = g(np.dot(W, X), fun_args)
+        W1 = _sym_decorrelation(np.dot(gwtx, X.T) / p_
                                 - g_wtx[:, np.newaxis] * W)
         del gwtx, g_wtx
         # builtin max, abs are faster than numpy counter parts.
-        lim = max(abs(abs(np.diag(fast_dot(W1, W.T))) - 1))
+        lim = max(abs(abs(np.diag(np.dot(W1, W.T))) - 1))
         W = W1
         if lim < tol:
             break
@@ -345,7 +344,7 @@ def fastica(X, n_components=None, algorithm="parallel", whiten=True,
 
     if whiten:
         if compute_sources:
-            S = fast_dot(fast_dot(W, K), X).T
+            S = np.dot(np.dot(W, K), X).T
         else:
             S = None
         if return_X_mean:
@@ -361,7 +360,7 @@ def fastica(X, n_components=None, algorithm="parallel", whiten=True,
 
     else:
         if compute_sources:
-            S = fast_dot(W, X).T
+            S = np.dot(W, X).T
         else:
             S = None
         if return_X_mean:
@@ -551,7 +550,7 @@ class FastICA(BaseEstimator, TransformerMixin):
         if self.whiten:
             X -= self.mean_
 
-        return fast_dot(X, self.components_.T)
+        return np.dot(X, self.components_.T)
 
     def inverse_transform(self, X, copy=True):
         """Transform the sources back to the mixed data (apply mixing matrix).
@@ -571,7 +570,7 @@ class FastICA(BaseEstimator, TransformerMixin):
         check_is_fitted(self, 'mixing_')
 
         X = check_array(X, copy=(copy and self.whiten), dtype=FLOAT_DTYPES)
-        X = fast_dot(X, self.mixing_.T)
+        X = np.dot(X, self.mixing_.T)
         if self.whiten:
             X += self.mean_
 
diff --git a/sklearn/decomposition/nmf.py b/sklearn/decomposition/nmf.py
index 7623723125e969b9fe628066ad45ca8cdd20e926..72a52f802accba32da8b6a081937137dbfaf01e2 100644
--- a/sklearn/decomposition/nmf.py
+++ b/sklearn/decomposition/nmf.py
@@ -20,7 +20,7 @@ import scipy.sparse as sp
 from ..base import BaseEstimator, TransformerMixin
 from ..utils import check_random_state, check_array
 from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm
-from ..utils.extmath import fast_dot, safe_min
+from ..utils.extmath import safe_min
 from ..utils.validation import check_is_fitted, check_non_negative
 from ..exceptions import ConvergenceWarning
 from .cdnmf_fast import _update_cdnmf_fast
@@ -109,7 +109,7 @@ def _beta_divergence(X, W, H, beta, square_root=False):
         WH_data = _special_sparse_dot(W, H, X).data
         X_data = X.data
     else:
-        WH = fast_dot(W, H)
+        WH = np.dot(W, H)
         WH_data = WH.ravel()
         X_data = X.ravel()
 
@@ -142,7 +142,7 @@ def _beta_divergence(X, W, H, beta, square_root=False):
             # np.sum(np.dot(W, H) ** beta)
             sum_WH_beta = 0
             for i in range(X.shape[1]):
-                sum_WH_beta += np.sum(fast_dot(W, H[:, i]) ** beta)
+                sum_WH_beta += np.sum(np.dot(W, H[:, i]) ** beta)
 
         else:
             sum_WH_beta = np.sum(WH ** beta)
@@ -166,7 +166,7 @@ def _special_sparse_dot(W, H, X):
         WH = sp.coo_matrix((dot_vals, (ii, jj)), shape=X.shape)
         return WH.tocsr()
     else:
-        return fast_dot(W, H)
+        return np.dot(W, H)
 
 
 def _compute_regularization(alpha, l1_ratio, regularization):
@@ -379,7 +379,7 @@ def _update_coordinate_descent(X, W, Ht, l1_reg, l2_reg, shuffle,
     """
     n_components = Ht.shape[1]
 
-    HHt = fast_dot(Ht.T, Ht)
+    HHt = np.dot(Ht.T, Ht)
     XHt = safe_sparse_dot(X, Ht)
 
     # L2 regularization corresponds to increase of the diagonal of HHt
@@ -521,8 +521,8 @@ def _multiplicative_update_w(X, W, H, beta_loss, l1_reg_W, l2_reg_W, gamma,
 
         # Denominator
         if HHt is None:
-            HHt = fast_dot(H, H.T)
-        denominator = fast_dot(W, HHt)
+            HHt = np.dot(H, H.T)
+        denominator = np.dot(W, HHt)
 
     else:
         # Numerator
@@ -566,14 +566,14 @@ def _multiplicative_update_w(X, W, H, beta_loss, l1_reg_W, l2_reg_W, gamma,
                 # (compute row by row, avoiding the dense matrix WH)
                 WHHt = np.empty(W.shape)
                 for i in range(X.shape[0]):
-                    WHi = fast_dot(W[i, :], H)
+                    WHi = np.dot(W[i, :], H)
                     if beta_loss - 1 < 0:
                         WHi[WHi == 0] = EPSILON
                     WHi **= beta_loss - 1
-                    WHHt[i, :] = fast_dot(WHi, H.T)
+                    WHHt[i, :] = np.dot(WHi, H.T)
             else:
                 WH **= beta_loss - 1
-                WHHt = fast_dot(WH, H.T)
+                WHHt = np.dot(WH, H.T)
             denominator = WHHt
 
     # Add L1 and L2 regularization
@@ -597,7 +597,7 @@ def _multiplicative_update_h(X, W, H, beta_loss, l1_reg_H, l2_reg_H, gamma):
     """update H in Multiplicative Update NMF"""
     if beta_loss == 2:
         numerator = safe_sparse_dot(W.T, X)
-        denominator = fast_dot(fast_dot(W.T, W), H)
+        denominator = np.dot(np.dot(W.T, W), H)
 
     else:
         # Numerator
@@ -641,14 +641,14 @@ def _multiplicative_update_h(X, W, H, beta_loss, l1_reg_H, l2_reg_H, gamma):
                 # (compute column by column, avoiding the dense matrix WH)
                 WtWH = np.empty(H.shape)
                 for i in range(X.shape[1]):
-                    WHi = fast_dot(W, H[:, i])
+                    WHi = np.dot(W, H[:, i])
                     if beta_loss - 1 < 0:
                         WHi[WHi == 0] = EPSILON
                     WHi **= beta_loss - 1
-                    WtWH[:, i] = fast_dot(W.T, WHi)
+                    WtWH[:, i] = np.dot(W.T, WHi)
             else:
                 WH **= beta_loss - 1
-                WtWH = fast_dot(W.T, WH)
+                WtWH = np.dot(W.T, WH)
             denominator = WtWH
 
     # Add L1 and L2 regularization
diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index 852a0c42cf7cc12a03bbdb2a250181f65e518551..89b9f67ee437c09e7de5df2f0319c0bc864283cf 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -25,7 +25,7 @@ from ..base import BaseEstimator, TransformerMixin
 from ..utils import deprecated
 from ..utils import check_random_state, as_float_array
 from ..utils import check_array
-from ..utils.extmath import fast_dot, fast_logdet, randomized_svd, svd_flip
+from ..utils.extmath import fast_logdet, randomized_svd, svd_flip
 from ..utils.extmath import stable_cumsum
 from ..utils.validation import check_is_fitted
 
@@ -749,7 +749,7 @@ class RandomizedPCA(BaseEstimator, TransformerMixin):
         if self.mean_ is not None:
             X = X - self.mean_
 
-        X = fast_dot(X, self.components_.T)
+        X = np.dot(X, self.components_.T)
         return X
 
     def fit_transform(self, X, y=None):
@@ -768,7 +768,7 @@ class RandomizedPCA(BaseEstimator, TransformerMixin):
         """
         X = check_array(X)
         X = self._fit(X)
-        return fast_dot(X, self.components_.T)
+        return np.dot(X, self.components_.T)
 
     def inverse_transform(self, X, y=None):
         """Transform data back to its original space.
@@ -792,7 +792,7 @@ class RandomizedPCA(BaseEstimator, TransformerMixin):
         """
         check_is_fitted(self, 'mean_')
 
-        X_original = fast_dot(X, self.components_)
+        X_original = np.dot(X, self.components_)
         if self.mean_ is not None:
             X_original = X_original + self.mean_
         return X_original
diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py
index 6254c147d45a532b9a969783a5709f989a2139a9..3ce53b550cb0e0b90864840c8e3709ca869ca2f3 100644
--- a/sklearn/decomposition/tests/test_nmf.py
+++ b/sklearn/decomposition/tests/test_nmf.py
@@ -16,7 +16,7 @@ from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import ignore_warnings
-from sklearn.utils.extmath import squared_norm, fast_dot
+from sklearn.utils.extmath import squared_norm
 from sklearn.base import clone
 from sklearn.exceptions import ConvergenceWarning
 
@@ -241,7 +241,7 @@ def _beta_divergence_dense(X, W, H, beta):
         H = np.array([[H]])
         X = np.array([[X]])
 
-    WH = fast_dot(W, H)
+    WH = np.dot(W, H)
 
     if beta == 2:
         return squared_norm(X - WH) / 2
diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py
index bc7b1a7945c816caf3308dfa1f17c5211d9204d1..a646e9e6ba0a53c68fd70f7f5a2fc715fec49f80 100644
--- a/sklearn/discriminant_analysis.py
+++ b/sklearn/discriminant_analysis.py
@@ -23,7 +23,6 @@ from .covariance import ledoit_wolf, empirical_covariance, shrunk_covariance
 from .utils.multiclass import unique_labels
 from .utils import check_array, check_X_y
 from .utils.validation import check_is_fitted
-from .utils.fixes import bincount
 from .utils.multiclass import check_classification_targets
 from .preprocessing import StandardScaler
 
@@ -337,8 +336,7 @@ class LinearDiscriminantAnalysis(BaseEstimator, LinearClassifierMixin,
         self.explained_variance_ratio_ = np.sort(evals / np.sum(evals)
                                                  )[::-1][:self._max_components]
         evecs = evecs[:, np.argsort(evals)[::-1]]  # sort eigenvectors
-        # evecs /= np.linalg.norm(evecs, axis=0)  # doesn't work with numpy 1.6
-        evecs /= np.apply_along_axis(np.linalg.norm, 0, evecs)
+        evecs /= np.linalg.norm(evecs, axis=0)
 
         self.scalings_ = evecs
         self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)
@@ -431,7 +429,7 @@ class LinearDiscriminantAnalysis(BaseEstimator, LinearClassifierMixin,
 
         if self.priors is None:  # estimate priors from sample
             _, y_t = np.unique(y, return_inverse=True)  # non-negative ints
-            self.priors_ = bincount(y_t) / float(len(y))
+            self.priors_ = np.bincount(y_t) / float(len(y))
         else:
             self.priors_ = np.asarray(self.priors)
 
@@ -642,7 +640,7 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin):
         if n_classes < 2:
             raise ValueError('y has less than 2 classes')
         if self.priors is None:
-            self.priors_ = bincount(y) / float(n_samples)
+            self.priors_ = np.bincount(y) / float(n_samples)
         else:
             self.priors_ = self.priors
 
diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py
index 711d089d057d957a34795381551dbddbb7702dd4..243d8984393152c183a7db5cc1de08be4aad355f 100644
--- a/sklearn/ensemble/bagging.py
+++ b/sklearn/ensemble/bagging.py
@@ -21,7 +21,6 @@ from ..utils import check_random_state, check_X_y, check_array, column_or_1d
 from ..utils.random import sample_without_replacement
 from ..utils.validation import has_fit_parameter, check_is_fitted
 from ..utils import indices_to_mask, check_consistent_length
-from ..utils.fixes import bincount
 from ..utils.metaestimators import if_delegate_has_method
 from ..utils.multiclass import check_classification_targets
 
@@ -104,7 +103,7 @@ def _parallel_build_estimators(n_estimators, ensemble, X, y, sample_weight,
                 curr_sample_weight = sample_weight.copy()
 
             if bootstrap:
-                sample_counts = bincount(indices, minlength=n_samples)
+                sample_counts = np.bincount(indices, minlength=n_samples)
                 curr_sample_weight *= sample_counts
             else:
                 not_indices_mask = ~indices_to_mask(indices, n_samples)
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 340df896736467d761d75cc6a5a54e43228695f1..60732bf83a446a2b03b4aaa80ba092605ae965dd 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -61,7 +61,7 @@ from ..tree._tree import DTYPE, DOUBLE
 from ..utils import check_random_state, check_array, compute_sample_weight
 from ..exceptions import DataConversionWarning, NotFittedError
 from .base import BaseEnsemble, _partition_estimators
-from ..utils.fixes import bincount, parallel_helper
+from ..utils.fixes import parallel_helper
 from ..utils.multiclass import check_classification_targets
 from ..utils.validation import check_is_fitted
 
@@ -85,7 +85,7 @@ def _generate_sample_indices(random_state, n_samples):
 def _generate_unsampled_indices(random_state, n_samples):
     """Private function used to forest._set_oob_score function."""
     sample_indices = _generate_sample_indices(random_state, n_samples)
-    sample_counts = bincount(sample_indices, minlength=n_samples)
+    sample_counts = np.bincount(sample_indices, minlength=n_samples)
     unsampled_mask = sample_counts == 0
     indices_range = np.arange(n_samples)
     unsampled_indices = indices_range[unsampled_mask]
@@ -107,7 +107,7 @@ def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees,
             curr_sample_weight = sample_weight.copy()
 
         indices = _generate_sample_indices(tree.random_state, n_samples)
-        sample_counts = bincount(indices, minlength=n_samples)
+        sample_counts = np.bincount(indices, minlength=n_samples)
         curr_sample_weight *= sample_counts
 
         if class_weight == 'subsample':
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 8c16ccf78ffa6e42da742b3d6fb481c9e525b12a..779cf2d3b543cd72c5cdde6a730bfb8e402543e3 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -56,7 +56,6 @@ from ..utils import check_array
 from ..utils import check_X_y
 from ..utils import column_or_1d
 from ..utils import check_consistent_length
-from ..utils.fixes import bincount
 from ..utils import deprecated
 from ..utils.stats import _weighted_percentile
 from ..utils.validation import check_is_fitted
@@ -139,7 +138,7 @@ class PriorProbabilityEstimator(object):
     def fit(self, X, y, sample_weight=None):
         if sample_weight is None:
             sample_weight = np.ones_like(y, dtype=np.float64)
-        class_counts = bincount(y, weights=sample_weight)
+        class_counts = np.bincount(y, weights=sample_weight)
         self.priors = class_counts / class_counts.sum()
 
     def predict(self, X):
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
index 63f81e5b5550d5ba79f79879c1290e09a916d225..b964ed768d85e2e1570e39c44f9e415d6b859457 100644
--- a/sklearn/ensemble/tests/test_forest.py
+++ b/sklearn/ensemble/tests/test_forest.py
@@ -41,7 +41,6 @@ from sklearn.ensemble import RandomForestRegressor
 from sklearn.ensemble import RandomTreesEmbedding
 from sklearn.model_selection import GridSearchCV
 from sklearn.svm import LinearSVC
-from sklearn.utils.fixes import bincount
 from sklearn.utils.validation import check_random_state
 
 from sklearn.tree.tree import SPARSE_SPLITTERS
@@ -255,7 +254,7 @@ def test_importances_asymptotic():
         n_samples = len(samples)
         entropy = 0.
 
-        for count in bincount(samples):
+        for count in np.bincount(samples):
             p = 1. * count / n_samples
             if p > 0:
                 entropy -= p * np.log2(p)
@@ -735,7 +734,7 @@ def check_min_samples_leaf(name):
     est = ForestEstimator(min_samples_leaf=5, n_estimators=1, random_state=0)
     est.fit(X, y)
     out = est.estimators_[0].tree_.apply(X)
-    node_counts = bincount(out)
+    node_counts = np.bincount(out)
     # drop inner nodes
     leaf_count = node_counts[node_counts != 0]
     assert_greater(np.min(leaf_count), 4,
@@ -777,7 +776,7 @@ def check_min_weight_fraction_leaf(name):
 
         est.fit(X, y, sample_weight=weights)
         out = est.estimators_[0].tree_.apply(X)
-        node_weights = bincount(out, weights=weights)
+        node_weights = np.bincount(out, weights=weights)
         # drop inner nodes
         leaf_weights = node_weights[node_weights != 0]
         assert_greater_equal(
diff --git a/sklearn/feature_extraction/_hashing.pyx b/sklearn/feature_extraction/_hashing.pyx
index 39c2b10378132246d29cf10d19c16d8b46809797..e0c1d1bdaece52b9b01d22d3ee45c70774f139f3 100644
--- a/sklearn/feature_extraction/_hashing.pyx
+++ b/sklearn/feature_extraction/_hashing.pyx
@@ -76,8 +76,5 @@ def transform(raw_X, Py_ssize_t n_features, dtype):
         array.resize_smart(indptr, len(indptr) + 1)
         indptr[len(indptr) - 1] = size
 
-    if len(indices):
-        indices_a = np.frombuffer(indices, dtype=np.int32)
-    else:       # workaround for NumPy < 1.7.0
-        indices_a = np.empty(0, dtype=np.int32)
+    indices_a = np.frombuffer(indices, dtype=np.int32)
     return (indices_a, np.frombuffer(indptr, dtype=np.int32), values[:size])
diff --git a/sklearn/feature_extraction/dict_vectorizer.py b/sklearn/feature_extraction/dict_vectorizer.py
index 66390d7a2c96349d1437bdfaca9cc67e9454330c..53804ed83ac455c11c8696905d57b1f40bcb8656 100644
--- a/sklearn/feature_extraction/dict_vectorizer.py
+++ b/sklearn/feature_extraction/dict_vectorizer.py
@@ -13,7 +13,6 @@ from ..base import BaseEstimator, TransformerMixin
 from ..externals import six
 from ..externals.six.moves import xrange
 from ..utils import check_array, tosequence
-from ..utils.fixes import frombuffer_empty
 
 
 def _tosequence(X):
@@ -183,7 +182,7 @@ class DictVectorizer(BaseEstimator, TransformerMixin):
         if len(indptr) == 1:
             raise ValueError("Sample sequence X is empty.")
 
-        indices = frombuffer_empty(indices, dtype=np.intc)
+        indices = np.frombuffer(indices, dtype=np.intc)
         indptr = np.frombuffer(indptr, dtype=np.intc)
         shape = (len(indptr) - 1, len(vocab))
 
diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py
index 708424cb3f84367fbc8c9178b8734fcc722e7f8e..37e1a7e3465e9f138a1ee54141db056506db9649 100644
--- a/sklearn/feature_extraction/image.py
+++ b/sklearn/feature_extraction/image.py
@@ -16,7 +16,6 @@ from scipy import sparse
 from numpy.lib.stride_tricks import as_strided
 
 from ..utils import check_array, check_random_state
-from ..utils.fixes import astype
 from ..base import BaseEstimator
 
 __all__ = ['PatchExtractor',
@@ -108,7 +107,7 @@ def _to_graph(n_x, n_y, n_z, mask=None, img=None,
         n_voxels = diag.size
     else:
         if mask is not None:
-            mask = astype(mask, dtype=np.bool, copy=False)
+            mask = mask.astype(dtype=np.bool, copy=False)
             mask = np.asarray(mask, dtype=np.bool)
             edges = _mask_edges_weights(mask, edges)
             n_voxels = np.sum(mask)
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 341486abd3b1c87f01ecb2ff60bf959d3f79e5a1..de6674646c981d1042d9c83c28041521a9b271b8 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -24,7 +24,6 @@ import numpy as np
 from numpy.testing import assert_array_almost_equal
 from numpy.testing import assert_array_equal
 from numpy.testing import assert_raises
-from sklearn.utils.random import choice
 from sklearn.utils.testing import (assert_equal, assert_false, assert_true,
                                    assert_not_equal, assert_almost_equal,
                                    assert_in, assert_less, assert_greater,
@@ -863,8 +862,7 @@ def test_countvectorizer_vocab_sets_when_pickling():
     vocab_words = np.array(['beer', 'burger', 'celeri', 'coke', 'pizza',
                             'salad', 'sparkling', 'tomato', 'water'])
     for x in range(0, 100):
-        vocab_set = set(choice(vocab_words, size=5, replace=False,
-                               random_state=rng))
+        vocab_set = set(rng.choice(vocab_words, size=5, replace=False))
         cv = CountVectorizer(vocabulary=vocab_set)
         unpickled_cv = pickle.loads(pickle.dumps(cv))
         cv.fit(ALL_FOOD_DOCS)
@@ -878,7 +876,7 @@ def test_countvectorizer_vocab_dicts_when_pickling():
                             'salad', 'sparkling', 'tomato', 'water'])
     for x in range(0, 100):
         vocab_dict = dict()
-        words = choice(vocab_words, size=5, replace=False, random_state=rng)
+        words = rng.choice(vocab_words, size=5, replace=False)
         for y in range(0, 5):
             vocab_dict[words[y]] = y
         cv = CountVectorizer(vocabulary=vocab_dict)
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 539e88973bcc001954e0d2f70483cc2997044e70..3cf76187350f66b785e880f3d74b485f7d486172 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -29,7 +29,6 @@ from ..externals.six.moves import xrange
 from ..preprocessing import normalize
 from .hashing import FeatureHasher
 from .stop_words import ENGLISH_STOP_WORDS
-from ..utils.fixes import frombuffer_empty, bincount
 from ..utils.validation import check_is_fitted
 
 __all__ = ['CountVectorizer',
@@ -503,7 +502,7 @@ class HashingVectorizer(BaseEstimator, VectorizerMixin):
 def _document_frequency(X):
     """Count the number of non-zero values for each feature in sparse X."""
     if sp.isspmatrix_csr(X):
-        return bincount(X.indices, minlength=X.shape[1])
+        return np.bincount(X.indices, minlength=X.shape[1])
     else:
         return np.diff(sp.csc_matrix(X, copy=False).indptr)
 
@@ -783,7 +782,7 @@ class CountVectorizer(BaseEstimator, VectorizerMixin):
 
         j_indices = np.asarray(j_indices, dtype=np.intc)
         indptr = np.frombuffer(indptr, dtype=np.intc)
-        values = frombuffer_empty(values, dtype=np.intc)
+        values = np.frombuffer(values, dtype=np.intc)
 
         X = sp.csr_matrix((values, j_indices, indptr),
                           shape=(len(indptr) - 1, len(vocabulary)),
diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
index dada33e9a75cc4e4ee38037d92d9a2359d2124ec..ab6ce1ddb25456e75762321dc0bcd394047dff3a 100644
--- a/sklearn/feature_selection/from_model.py
+++ b/sklearn/feature_selection/from_model.py
@@ -8,7 +8,6 @@ from ..base import BaseEstimator, clone, MetaEstimatorMixin
 from ..externals import six
 
 from ..exceptions import NotFittedError
-from ..utils.fixes import norm
 from ..utils.metaestimators import if_delegate_has_method
 
 
@@ -21,7 +20,8 @@ def _get_feature_importances(estimator, norm_order=1):
             importances = np.abs(estimator.coef_)
 
         else:
-            importances = norm(estimator.coef_, axis=0, ord=norm_order)
+            importances = np.linalg.norm(estimator.coef_, axis=0,
+                                         ord=norm_order)
 
     elif importances is None:
         raise ValueError(
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index 6ef0d824b587cad960c14d8c801361e48c6d3834..6ac6b8630b1395adf68a961474a53a52b7017582 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -17,7 +17,6 @@ from sklearn.svm import LinearSVC
 from sklearn.feature_selection import SelectFromModel
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.linear_model import PassiveAggressiveClassifier
-from sklearn.utils.fixes import norm
 
 iris = datasets.load_iris()
 data, y = iris.data, iris.target
@@ -101,7 +100,7 @@ def test_feature_importances_2d_coef():
 
             # Manually check that the norm is correctly performed
             est.fit(X, y)
-            importances = norm(est.coef_, axis=0, ord=order)
+            importances = np.linalg.norm(est.coef_, axis=0, ord=order)
             feature_mask = importances > func(importances)
             assert_array_equal(X_new, X[:, feature_mask])
 
diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py
index 910c23508592a39a7956eadcaded617d4549d39c..245fc95ff69b2459f5fff3cf0f1f2663f03e32ca 100644
--- a/sklearn/isotonic.py
+++ b/sklearn/isotonic.py
@@ -9,7 +9,6 @@ from scipy.stats import spearmanr
 from .base import BaseEstimator, TransformerMixin, RegressorMixin
 from .utils import as_float_array, check_array, check_consistent_length
 from .utils import deprecated
-from .utils.fixes import astype
 from ._isotonic import _inplace_contiguous_isotonic_regression, _make_unique
 import warnings
 import math
@@ -291,7 +290,7 @@ class IsotonicRegression(BaseEstimator, TransformerMixin, RegressorMixin):
             sample_weight = np.ones(len(y))
 
         order = np.lexsort((y, X))
-        X, y, sample_weight = [astype(array[order], np.float64, copy=False)
+        X, y, sample_weight = [array[order].astype(np.float64, copy=False)
                                for array in [X, y, sample_weight]]
         unique_X, unique_y, unique_sample_weight = _make_unique(
             X, y, sample_weight)
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index 59d55cad3eb7f706f67d2c5838d09953f8a547f0..0cfe4c3cad031f9ff49eb8b80e9dee3c83a9226b 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -14,7 +14,6 @@ from .externals.joblib import Parallel, delayed
 from .cross_validation import _safe_split, _score, _fit_and_score
 from .metrics.scorer import check_scoring
 from .utils import indexable
-from .utils.fixes import astype
 
 
 warnings.warn("This module was deprecated in version 0.18 in favor of the "
@@ -214,7 +213,7 @@ def _translate_train_sizes(train_sizes, n_max_training_samples):
                              "must be within (0, 1], but is within [%f, %f]."
                              % (n_min_required_samples,
                                 n_max_required_samples))
-        train_sizes_abs = astype(train_sizes_abs * n_max_training_samples,
+        train_sizes_abs = (train_sizes_abs * n_max_training_samples).astype(
                                  dtype=np.int, copy=False)
         train_sizes_abs = np.clip(train_sizes_abs, 1,
                                   n_max_training_samples)
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index b3c61408470cc35c515415dfa51e97e758b3c153..85f2b8ef7df0797dfbdcb05704196c06893e0fb4 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -20,7 +20,6 @@ from ..utils.validation import check_is_fitted
 from ..externals import six
 
 from .sgd_fast import plain_sgd, average_sgd
-from ..utils.fixes import astype
 from ..utils import compute_class_weight
 from ..utils import deprecated
 from .sgd_fast import Hinge
@@ -864,7 +863,7 @@ class BaseSGDRegressor(BaseSGD, RegressorMixin):
                      n_iter, sample_weight,
                      coef_init, intercept_init):
         X, y = check_X_y(X, y, "csr", copy=False, order='C', dtype=np.float64)
-        y = astype(y, np.float64, copy=False)
+        y = y.astype(np.float64, copy=False)
 
         n_samples, n_features = X.shape
 
diff --git a/sklearn/linear_model/theil_sen.py b/sklearn/linear_model/theil_sen.py
index b51f7d6dd3c32cf457068decabb7d0a0af80979b..544f79f9df0541d3cf05aa44a75cb9119df27910 100644
--- a/sklearn/linear_model/theil_sen.py
+++ b/sklearn/linear_model/theil_sen.py
@@ -21,7 +21,6 @@ from .base import LinearModel
 from ..base import RegressorMixin
 from ..utils import check_random_state
 from ..utils import check_X_y, _get_n_jobs
-from ..utils.random import choice
 from ..externals.joblib import Parallel, delayed
 from ..externals.six.moves import xrange as range
 from ..exceptions import ConvergenceWarning
@@ -365,10 +364,8 @@ class TheilSenRegressor(LinearModel, RegressorMixin):
         if np.rint(binom(n_samples, n_subsamples)) <= self.max_subpopulation:
             indices = list(combinations(range(n_samples), n_subsamples))
         else:
-            indices = [choice(n_samples,
-                              size=n_subsamples,
-                              replace=False,
-                              random_state=random_state)
+            indices = [random_state.choice(n_samples, size=n_subsamples,
+                                           replace=False)
                        for _ in range(self.n_subpopulation_)]
 
         n_jobs = _get_n_jobs(self.n_jobs)
diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py
index 30b9ca16e88c2f7611902e706791f84eaad68337..0fc30e1aaa166d1cb336ea8cebb48f9c9b5e0140 100644
--- a/sklearn/manifold/t_sne.py
+++ b/sklearn/manifold/t_sne.py
@@ -17,12 +17,10 @@ from ..neighbors import BallTree
 from ..base import BaseEstimator
 from ..utils import check_array
 from ..utils import check_random_state
-from ..utils.extmath import _ravel
 from ..decomposition import PCA
 from ..metrics.pairwise import pairwise_distances
 from . import _utils
 from . import _barnes_hut_tsne
-from ..utils.fixes import astype
 from ..externals.six import string_types
 from ..utils import deprecated
 
@@ -53,7 +51,7 @@ def _joint_probabilities(distances, desired_perplexity, verbose):
     """
     # Compute conditional probabilities such that they approximately match
     # the desired perplexity
-    distances = astype(distances, np.float32, copy=False)
+    distances = distances.astype(np.float32, copy=False)
     conditional_P = _utils._binary_search_perplexity(
         distances, None, desired_perplexity, verbose)
     P = conditional_P + conditional_P.T
@@ -90,8 +88,8 @@ def _joint_probabilities_nn(distances, neighbors, desired_perplexity, verbose):
     """
     # Compute conditional probabilities such that they approximately match
     # the desired perplexity
-    distances = astype(distances, np.float32, copy=False)
-    neighbors = astype(neighbors, np.int64, copy=False)
+    distances = distances.astype(np.float32, copy=False)
+    neighbors = neighbors.astype(np.int64, copy=False)
     conditional_P = _utils._binary_search_perplexity(
         distances, neighbors, desired_perplexity, verbose)
     m = "All probabilities should be finite"
@@ -158,7 +156,8 @@ def _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components,
     grad = np.ndarray((n_samples, n_components))
     PQd = squareform((P - Q) * n)
     for i in range(skip_num_points, n_samples):
-        np.dot(_ravel(PQd[i]), X_embedded[i] - X_embedded, out=grad[i])
+        np.dot(np.ravel(PQd[i], order='K'), X_embedded[i] - X_embedded,
+               out=grad[i])
     grad = grad.ravel()
     c = 2.0 * (degrees_of_freedom + 1.0) / degrees_of_freedom
     grad *= c
@@ -277,9 +276,9 @@ def _kl_divergence_bh(params, P, neighbors, degrees_of_freedom, n_samples,
         Unraveled gradient of the Kullback-Leibler divergence with respect to
         the embedding.
     """
-    params = astype(params, np.float32, copy=False)
+    params = params.astype(np.float32, copy=False)
     X_embedded = params.reshape(n_samples, n_components)
-    neighbors = astype(neighbors, np.int64, copy=False)
+    neighbors = neighbors.astype(np.int64, copy=False)
     if len(P.shape) == 1:
         sP = squareform(P).astype(np.float32)
     else:
diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index bcf9c48ab30ba90b04ac691036d384e461453eae..a01314589869b372b4fb43a38d86f82643fbd2a7 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -38,7 +38,6 @@ from ..utils.multiclass import unique_labels
 from ..utils.multiclass import type_of_target
 from ..utils.validation import _num_samples
 from ..utils.sparsefuncs import count_nonzero
-from ..utils.fixes import bincount
 from ..exceptions import UndefinedMetricWarning
 
 
@@ -1088,16 +1087,16 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
             tp_bins_weights = None
 
         if len(tp_bins):
-            tp_sum = bincount(tp_bins, weights=tp_bins_weights,
+            tp_sum = np.bincount(tp_bins, weights=tp_bins_weights,
                               minlength=len(labels))
         else:
             # Pathological case
             true_sum = pred_sum = tp_sum = np.zeros(len(labels))
         if len(y_pred):
-            pred_sum = bincount(y_pred, weights=sample_weight,
+            pred_sum = np.bincount(y_pred, weights=sample_weight,
                                 minlength=len(labels))
         if len(y_true):
-            true_sum = bincount(y_true, weights=sample_weight,
+            true_sum = np.bincount(y_true, weights=sample_weight,
                                 minlength=len(labels))
 
         # Retain only selected labels
diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py
index 6c984ec9e39d7f10db522a9d7467c530114e99dc..9115b93abefbab8b58879a9e749482b9d18ab778 100644
--- a/sklearn/metrics/cluster/supervised.py
+++ b/sklearn/metrics/cluster/supervised.py
@@ -22,7 +22,6 @@ from scipy.misc import comb
 from scipy import sparse as sp
 
 from .expected_mutual_info_fast import expected_mutual_information
-from ...utils.fixes import bincount
 from ...utils.validation import check_array
 
 
@@ -862,7 +861,7 @@ def entropy(labels):
     if len(labels) == 0:
         return 1.0
     label_idx = np.unique(labels, return_inverse=True)[1]
-    pi = bincount(label_idx).astype(np.float64)
+    pi = np.bincount(label_idx).astype(np.float64)
     pi = pi[pi > 0]
     pi_sum = np.sum(pi)
     # log(a / b) should be calculated as log(a) - log(b) for
diff --git a/sklearn/metrics/cluster/unsupervised.py b/sklearn/metrics/cluster/unsupervised.py
index 3be683ae08d9810b6a1ac2251087a4b76dc52a58..adb141c3120f055756b5dba8eca5e4cd7b17bdec 100644
--- a/sklearn/metrics/cluster/unsupervised.py
+++ b/sklearn/metrics/cluster/unsupervised.py
@@ -9,7 +9,6 @@ import numpy as np
 
 from ...utils import check_random_state
 from ...utils import check_X_y
-from ...utils.fixes import bincount
 from ..pairwise import pairwise_distances
 from ...preprocessing import LabelEncoder
 
@@ -169,7 +168,7 @@ def silhouette_samples(X, labels, metric='euclidean', **kwds):
 
     distances = pairwise_distances(X, metric=metric, **kwds)
     unique_labels = le.classes_
-    n_samples_per_label = bincount(labels, minlength=len(unique_labels))
+    n_samples_per_label = np.bincount(labels, minlength=len(unique_labels))
 
     # For sample i, store the mean distance of the cluster to which
     # it belongs in intra_clust_dists[i]
diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index 57142e1c4b8bc46bd721d8c8625fe9687e4180cc..0e273f7026efe50b7388819ebae5b3295464c3ce 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -29,8 +29,6 @@ from ..utils import check_consistent_length
 from ..utils import column_or_1d, check_array
 from ..utils.multiclass import type_of_target
 from ..utils.extmath import stable_cumsum
-from ..utils.fixes import bincount
-from ..utils.fixes import array_equal
 from ..utils.sparsefuncs import count_nonzero
 from ..exceptions import UndefinedMetricWarning
 
@@ -306,11 +304,11 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None):
     # ensure binary classification if pos_label is not specified
     classes = np.unique(y_true)
     if (pos_label is None and
-        not (array_equal(classes, [0, 1]) or
-             array_equal(classes, [-1, 1]) or
-             array_equal(classes, [0]) or
-             array_equal(classes, [-1]) or
-             array_equal(classes, [1]))):
+        not (np.array_equal(classes, [0, 1]) or
+             np.array_equal(classes, [-1, 1]) or
+             np.array_equal(classes, [0]) or
+             np.array_equal(classes, [-1]) or
+             np.array_equal(classes, [1]))):
         raise ValueError("Data is not binary and pos_label is not specified")
     elif pos_label is None:
         pos_label = 1.
@@ -742,10 +740,10 @@ def label_ranking_loss(y_true, y_score, sample_weight=None):
         # Sort and bin the label scores
         unique_scores, unique_inverse = np.unique(y_score[i],
                                                   return_inverse=True)
-        true_at_reversed_rank = bincount(
+        true_at_reversed_rank = np.bincount(
             unique_inverse[y_true.indices[start:stop]],
             minlength=len(unique_scores))
-        all_at_reversed_rank = bincount(unique_inverse,
+        all_at_reversed_rank = np.bincount(unique_inverse,
                                         minlength=len(unique_scores))
         false_at_reversed_rank = all_at_reversed_rank - true_at_reversed_rank
 
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 2e6e24051507dc848a317ee584344c764f05da5c..4163240e24b68543ee63424e15eb44114013af6f 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -691,14 +691,8 @@ def test_classification_report_multiclass_with_unicode_label():
 
 avg / total       0.51      0.53      0.47        75
 """
-    if np_version[:3] < (1, 7, 0):
-        expected_message = ("NumPy < 1.7.0 does not implement"
-                            " searchsorted on unicode data correctly.")
-        assert_raise_message(RuntimeError, expected_message,
-                             classification_report, y_true, y_pred)
-    else:
-        report = classification_report(y_true, y_pred)
-        assert_equal(report, expected_report)
+    report = classification_report(y_true, y_pred)
+    assert_equal(report, expected_report)
 
 
 def test_classification_report_multiclass_with_long_string_label():
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 369125f339559a67d82488bbeda141fa70a7ad8c..d51487cd1cb4e6f3954ec9f0faf4b7790107f7e8 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -29,9 +29,7 @@ from ..utils.validation import check_array
 from ..utils.multiclass import type_of_target
 from ..externals.six import with_metaclass
 from ..externals.six.moves import zip
-from ..utils.fixes import bincount
 from ..utils.fixes import signature
-from ..utils.random import choice
 from ..base import _pprint
 
 __all__ = ['BaseCrossValidator',
@@ -577,7 +575,7 @@ class StratifiedKFold(_BaseKFold):
         y = np.asarray(y)
         n_samples = y.shape[0]
         unique_y, y_inversed = np.unique(y, return_inverse=True)
-        y_counts = bincount(y_inversed)
+        y_counts = np.bincount(y_inversed)
         min_groups = np.min(y_counts)
         if np.all(self.n_splits > y_counts):
             raise ValueError("All the n_groups for individual classes"
@@ -1377,6 +1375,7 @@ def _approximate_mode(class_counts, n_draws, rng):
     ...                   n_draws=2, rng=42)
     array([1, 1, 0, 0])
     """
+    rng = check_random_state(rng)
     # this computes a bad approximation to the mode of the
     # multivariate hypergeometric given by class_counts and n_draws
     continuous = n_draws * class_counts / class_counts.sum()
@@ -1397,7 +1396,7 @@ def _approximate_mode(class_counts, n_draws, rng):
             # if we need to add more, we add them all and
             # go to the next value
             add_now = min(len(inds), need_to_add)
-            inds = choice(inds, size=add_now, replace=False, random_state=rng)
+            inds = rng.choice(inds, size=add_now, replace=False)
             floored[inds] += 1
             need_to_add -= add_now
             if need_to_add == 0:
@@ -1476,7 +1475,7 @@ class StratifiedShuffleSplit(BaseShuffleSplit):
         classes, y_indices = np.unique(y, return_inverse=True)
         n_classes = classes.shape[0]
 
-        class_counts = bincount(y_indices)
+        class_counts = np.bincount(y_indices)
         if np.min(class_counts) < 2:
             raise ValueError("The least populated class in y has only 1"
                              " member, which is too few. The minimum"
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index e105f0d0b122f00f77597d9f5cf78c8ccdde9063..db830619567d8e5e7c889b2357faa8d76503b8af 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -21,7 +21,6 @@ import scipy.sparse as sp
 
 from ..base import is_classifier, clone
 from ..utils import indexable, check_random_state, safe_indexing
-from ..utils.fixes import astype
 from ..utils.validation import _is_arraylike, _num_samples
 from ..utils.metaestimators import _safe_split
 from ..externals.joblib import Parallel, delayed, logger
@@ -855,8 +854,8 @@ def _translate_train_sizes(train_sizes, n_max_training_samples):
                              "must be within (0, 1], but is within [%f, %f]."
                              % (n_min_required_samples,
                                 n_max_required_samples))
-        train_sizes_abs = astype(train_sizes_abs * n_max_training_samples,
-                                 dtype=np.int, copy=False)
+        train_sizes_abs = (train_sizes_abs * n_max_training_samples).astype(
+                             dtype=np.int, copy=False)
         train_sizes_abs = np.clip(train_sizes_abs, 1,
                                   n_max_training_samples)
     else:
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 3f804d414b75010d1bb7013c07d55e70bc1490f2..1d6cf50ec1c33e65a5f0b40a97565c5a3e161896 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -11,7 +11,6 @@ import sys
 import numpy as np
 import scipy.sparse as sp
 
-from sklearn.utils.fixes import in1d
 from sklearn.utils.fixes import sp_version
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_not_equal
@@ -1015,7 +1014,7 @@ def test_grid_search_correct_score_results():
         expected_keys = (("mean_test_score", "rank_test_score") +
                          tuple("split%d_test_score" % cv_i
                                for cv_i in range(n_splits)))
-        assert_true(all(in1d(expected_keys, result_keys)))
+        assert_true(all(np.in1d(expected_keys, result_keys)))
 
         cv = StratifiedKFold(n_splits=n_splits)
         n_splits = grid_search.n_splits_
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index c351d1169f4b22001fbde3e10cf2ec0615ae1259..5c81dc7041124a92eb427559f383852669b7dedb 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -29,7 +29,6 @@ from .preprocessing import label_binarize
 from .utils import check_X_y, check_array, check_consistent_length
 from .utils.extmath import safe_sparse_dot
 from .utils.multiclass import _check_partial_fit_first_call
-from .utils.fixes import in1d
 from .utils.validation import check_is_fitted
 from .externals import six
 
@@ -387,7 +386,7 @@ class GaussianNB(BaseNB):
         classes = self.classes_
 
         unique_y = np.unique(y)
-        unique_y_in_classes = in1d(unique_y, classes)
+        unique_y_in_classes = np.in1d(unique_y, classes)
 
         if not np.all(unique_y_in_classes):
             raise ValueError("The target label(s) %s in y do not exist in the "
diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index 0cf8bc04ae2301db0ee6df3c5552d55d3ea92701..e963c2744c416a17ea436f6090bf6d2dfa1f23fc 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -18,7 +18,6 @@ from ..base import BaseEstimator
 from ..metrics import pairwise_distances
 from ..metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
 from ..utils import check_X_y, check_array, _get_n_jobs, gen_even_slices
-from ..utils.fixes import argpartition
 from ..utils.multiclass import check_classification_targets
 from ..externals import six
 from ..externals.joblib import Parallel, delayed
@@ -356,7 +355,7 @@ class KNeighborsMixin(object):
                     X, self._fit_X, self.effective_metric_, n_jobs=n_jobs,
                     **self.effective_metric_params_)
 
-            neigh_ind = argpartition(dist, n_neighbors - 1, axis=1)
+            neigh_ind = np.argpartition(dist, n_neighbors - 1, axis=1)
             neigh_ind = neigh_ind[:, :n_neighbors]
             # argpartition doesn't guarantee sorted order, so we sort again
             neigh_ind = neigh_ind[
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index e9821797e83a20a673ad0618aa15a63b57989fcf..46937c77bee46bc743af9d804f8cd82f5ed8434d 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -19,7 +19,6 @@ from ..externals import six
 from ..utils import check_array
 from ..utils.extmath import row_norms
 from ..utils.extmath import _incremental_mean_and_var
-from ..utils.fixes import bincount
 from ..utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1,
                                       inplace_csr_row_normalize_l2)
 from ..utils.sparsefuncs import (inplace_column_scale,
@@ -1149,7 +1148,7 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
         combinations = self._combinations(self.n_input_features_, self.degree,
                                           self.interaction_only,
                                           self.include_bias)
-        return np.vstack(bincount(c, minlength=self.n_input_features_)
+        return np.vstack(np.bincount(c, minlength=self.n_input_features_)
                          for c in combinations)
 
     def get_feature_names(self, input_features=None):
diff --git a/sklearn/preprocessing/imputation.py b/sklearn/preprocessing/imputation.py
index e414e98f424df98206c0cc4fd5cb91cd2d4ef7cb..12d5425fbf604b39ae2d26b70ca91378dca1a70f 100644
--- a/sklearn/preprocessing/imputation.py
+++ b/sklearn/preprocessing/imputation.py
@@ -10,7 +10,6 @@ from scipy import stats
 
 from ..base import BaseEstimator, TransformerMixin
 from ..utils import check_array
-from ..utils.fixes import astype
 from ..utils.sparsefuncs import _get_median
 from ..utils.validation import check_is_fitted
 from ..utils.validation import FLOAT_DTYPES
@@ -226,7 +225,7 @@ class Imputer(BaseEstimator, TransformerMixin):
                                     X.indptr[1:-1])
 
             # astype necessary for bug in numpy.hsplit before v1.9
-            columns = [col[astype(mask, bool, copy=False)]
+            columns = [col[mask.astype(bool, copy=False)]
                        for col, mask in zip(columns_all, mask_valids)]
 
             # Median
@@ -357,8 +356,8 @@ class Imputer(BaseEstimator, TransformerMixin):
             indexes = np.repeat(np.arange(len(X.indptr) - 1, dtype=np.int),
                                 np.diff(X.indptr))[mask]
 
-            X.data[mask] = astype(valid_statistics[indexes], X.dtype,
-                                  copy=False)
+            X.data[mask] = valid_statistics[indexes].astype(X.dtype,
+                                                            copy=False)
         else:
             if sparse.issparse(X):
                 X = X.toarray()
diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py
index f2f7d9afad34735e3b7b72841e91c74fda81cd62..e8ea17f413a59e82f6ef2506aace74af19009e5f 100644
--- a/sklearn/preprocessing/label.py
+++ b/sklearn/preprocessing/label.py
@@ -15,10 +15,7 @@ import scipy.sparse as sp
 
 from ..base import BaseEstimator, TransformerMixin
 
-from ..utils.fixes import np_version
 from ..utils.fixes import sparse_min_max
-from ..utils.fixes import astype
-from ..utils.fixes import in1d
 from ..utils import column_or_1d
 from ..utils.validation import check_array
 from ..utils.validation import check_is_fitted
@@ -39,20 +36,6 @@ __all__ = [
 ]
 
 
-def _check_numpy_unicode_bug(labels):
-    """Check that user is not subject to an old numpy bug
-
-    Fixed in master before 1.7.0:
-
-      https://github.com/numpy/numpy/pull/243
-
-    """
-    if np_version[:3] < (1, 7, 0) and labels.dtype.kind == 'U':
-        raise RuntimeError("NumPy < 1.7.0 does not implement searchsorted"
-                           " on unicode data correctly. Please upgrade"
-                           " NumPy to use LabelEncoder with unicode inputs.")
-
-
 class LabelEncoder(BaseEstimator, TransformerMixin):
     """Encode labels with value between 0 and n_classes-1.
 
@@ -110,7 +93,6 @@ class LabelEncoder(BaseEstimator, TransformerMixin):
         self : returns an instance of self.
         """
         y = column_or_1d(y, warn=True)
-        _check_numpy_unicode_bug(y)
         self.classes_ = np.unique(y)
         return self
 
@@ -127,7 +109,6 @@ class LabelEncoder(BaseEstimator, TransformerMixin):
         y : array-like of shape [n_samples]
         """
         y = column_or_1d(y, warn=True)
-        _check_numpy_unicode_bug(y)
         self.classes_, y = np.unique(y, return_inverse=True)
         return y
 
@@ -147,7 +128,6 @@ class LabelEncoder(BaseEstimator, TransformerMixin):
         y = column_or_1d(y, warn=True)
 
         classes = np.unique(y)
-        _check_numpy_unicode_bug(classes)
         if len(np.intersect1d(classes, self.classes_)) < len(classes):
             diff = np.setdiff1d(classes, self.classes_)
             raise ValueError("y contains new labels: %s" % str(diff))
@@ -520,7 +500,7 @@ def label_binarize(y, classes, neg_label=0, pos_label=1, sparse_output=False):
         y = column_or_1d(y)
 
         # pick out the known labels from y
-        y_in_classes = in1d(y, classes)
+        y_in_classes = np.in1d(y, classes)
         y_seen = y[y_in_classes]
         indices = np.searchsorted(sorted_class, y_seen)
         indptr = np.hstack((0, np.cumsum(y_in_classes)))
@@ -541,7 +521,7 @@ def label_binarize(y, classes, neg_label=0, pos_label=1, sparse_output=False):
 
     if not sparse_output:
         Y = Y.toarray()
-        Y = astype(Y, int, copy=False)
+        Y = Y.astype(int, copy=False)
 
         if neg_label != 0:
             Y[Y == 0] = neg_label
@@ -549,7 +529,7 @@ def label_binarize(y, classes, neg_label=0, pos_label=1, sparse_output=False):
         if pos_switch:
             Y[Y == pos_label] = 0
     else:
-        Y.data = astype(Y.data, int, copy=False)
+        Y.data = Y.data.astype(int, copy=False)
 
     # preserve label ordering
     if np.any(classes != sorted_class):
diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py
index 353f0023fc686e9aa0b65d19082cca79048a4c87..5b7637c0c3ee3fa8dc01d18d5a2aaa7f185393fe 100644
--- a/sklearn/utils/class_weight.py
+++ b/sklearn/utils/class_weight.py
@@ -4,9 +4,6 @@
 
 import numpy as np
 from ..externals import six
-from ..utils.fixes import in1d
-
-from .fixes import bincount
 
 
 def compute_class_weight(class_weight, classes, y):
@@ -55,7 +52,7 @@ def compute_class_weight(class_weight, classes, y):
             raise ValueError("classes should have valid labels that are in y")
 
         recip_freq = len(y) / (len(le.classes_) *
-                               bincount(y_ind).astype(np.float64))
+                               np.bincount(y_ind).astype(np.float64))
         weight = recip_freq[le.transform(classes)]
     else:
         # user-defined dictionary
@@ -170,7 +167,7 @@ def compute_sample_weight(class_weight, y, indices=None):
 
         if classes_missing:
             # Make missing classes' weight zero
-            weight_k[in1d(y_full, list(classes_missing))] = 0.
+            weight_k[np.in1d(y_full, list(classes_missing))] = 0.
 
         expanded_class_weight.append(weight_k)
 
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 18a3db760b0babc1a5d54733b0309bc37decac35..c8365ae9ab1662b2aa2c661292475f303a65c784 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -12,7 +12,6 @@ Extended math utilities.
 # License: BSD 3 clause
 
 from __future__ import division
-from functools import partial
 import warnings
 
 import numpy as np
@@ -26,7 +25,6 @@ from ._logistic_sigmoid import _log_logistic_sigmoid
 from ..externals.six.moves import xrange
 from .sparsefuncs_fast import csr_row_norms
 from .validation import check_array
-from ..exceptions import NonBLASDotWarning
 
 
 @deprecated("sklearn.utils.extmath.norm was deprecated in version 0.19"
@@ -40,20 +38,13 @@ def norm(x):
     return linalg.norm(x)
 
 
-# Newer NumPy has a ravel that needs less copying.
-if np_version < (1, 7, 1):
-    _ravel = np.ravel
-else:
-    _ravel = partial(np.ravel, order='K')
-
-
 def squared_norm(x):
     """Squared Euclidean or Frobenius norm of x.
 
     Returns the Euclidean norm when x is a vector, the Frobenius norm when x
     is a matrix (2-d array). Faster than norm(x) ** 2.
     """
-    x = _ravel(x)
+    x = np.ravel(x, order='K')
     if np.issubdtype(x.dtype, np.integer):
         warnings.warn('Array type is integer, np.dot may overflow. '
                       'Data should be float type to avoid this issue',
@@ -103,68 +94,10 @@ def _impose_f_order(X):
         return check_array(X, copy=False, order='F'), False
 
 
-def _fast_dot(A, B):
-    if B.shape[0] != A.shape[A.ndim - 1]:  # check adopted from '_dotblas.c'
-        raise ValueError
-
-    if A.dtype != B.dtype or any(x.dtype not in (np.float32, np.float64)
-                                 for x in [A, B]):
-        warnings.warn('Falling back to np.dot. '
-                      'Data must be of same type of either '
-                      '32 or 64 bit float for the BLAS function, gemm, to be '
-                      'used for an efficient dot operation. ',
-                      NonBLASDotWarning)
-        raise ValueError
-
-    if min(A.shape) == 1 or min(B.shape) == 1 or A.ndim != 2 or B.ndim != 2:
-        raise ValueError
-
-    # scipy 0.9 compliant API
-    dot = linalg.get_blas_funcs(['gemm'], (A, B))[0]
-    A, trans_a = _impose_f_order(A)
-    B, trans_b = _impose_f_order(B)
-    return dot(alpha=1.0, a=A, b=B, trans_a=trans_a, trans_b=trans_b)
-
-
-def _have_blas_gemm():
-    try:
-        linalg.get_blas_funcs(['gemm'])
-        return True
-    except (AttributeError, ValueError):
-        warnings.warn('Could not import BLAS, falling back to np.dot')
-        return False
-
-
-# Only use fast_dot for older NumPy; newer ones have tackled the speed issue.
-if np_version < (1, 7, 2) and _have_blas_gemm():
-    def fast_dot(A, B):
-        """Compute fast dot products directly calling BLAS.
-
-        This function calls BLAS directly while warranting Fortran contiguity.
-        This helps avoiding extra copies `np.dot` would have created.
-        For details see section `Linear Algebra on large Arrays`:
-        http://wiki.scipy.org/PerformanceTips
-
-        Parameters
-        ----------
-        A, B: instance of np.ndarray
-            Input arrays. Arrays are supposed to be of the same dtype and to
-            have exactly 2 dimensions. Currently only floats are supported.
-            In case these requirements aren't met np.dot(A, B) is returned
-            instead. To activate the related warning issued in this case
-            execute the following lines of code:
-
-            >> import warnings
-            >> from sklearn.exceptions import NonBLASDotWarning
-            >> warnings.simplefilter('always', NonBLASDotWarning)
-        """
-        try:
-            return _fast_dot(A, B)
-        except ValueError:
-            # Maltyped or malformed data.
-            return np.dot(A, B)
-else:
-    fast_dot = np.dot
+@deprecated("sklearn.utils.extmath.fast_dot was deprecated in version 0.19"
+            "and will be removed in 0.21. Use the equivalent np.dot instead.")
+def fast_dot(a, b, out=None):
+    return np.dot(a, b, out)
 
 
 def density(w, **kwargs):
@@ -191,7 +124,7 @@ def safe_sparse_dot(a, b, dense_output=False):
             ret = ret.toarray()
         return ret
     else:
-        return fast_dot(a, b)
+        return np.dot(a, b)
 
 
 def randomized_range_finder(A, size, n_iter,
diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
index a9a20c61b34282cf3c9eec6f8f3dbb85d4f64dc1..bfa5c917b0030fce591ce392ac374055bdc4a54a 100644
--- a/sklearn/utils/fixes.py
+++ b/sklearn/utils/fixes.py
@@ -43,17 +43,7 @@ np_version = _parse_version(np.__version__)
 sp_version = _parse_version(scipy.__version__)
 
 
-# little danse to see if np.copy has an 'order' keyword argument
-# Supported since numpy 1.7.0
-if 'order' in signature(np.copy).parameters:
-    def safe_copy(X):
-        # Copy, but keep the order
-        return np.copy(X, order='K')
-else:
-    # Before an 'order' argument was introduced, numpy wouldn't muck with
-    # the ordering
-    safe_copy = np.copy
-
+# Remove when minimum required NumPy >= 1.10
 try:
     if (not np.allclose(np.divide(.4, 1, casting="unsafe"),
                         np.divide(.4, 1, casting="unsafe", dtype=np.float64))
@@ -82,18 +72,6 @@ except TypeError:
         return out
 
 
-try:
-    np.array(5).astype(float, copy=False)
-except TypeError:
-    # Compat where astype accepted no copy argument (numpy < 1.7.0)
-    def astype(array, dtype, copy=True):
-        if not copy and array.dtype == dtype:
-            return array
-        return array.astype(dtype)
-else:
-    astype = np.ndarray.astype
-
-
 try:
     with warnings.catch_warnings(record=True):
         # Don't raise the numpy deprecation warnings that appear in
@@ -106,11 +84,7 @@ except (TypeError, AttributeError):
 
     def _minor_reduce(X, ufunc):
         major_index = np.flatnonzero(np.diff(X.indptr))
-        if X.data.size == 0 and major_index.size == 0:
-            # Numpy < 1.8.0 don't handle empty arrays in reduceat
-            value = np.zeros_like(X.data)
-        else:
-            value = ufunc.reduceat(X.data, X.indptr[major_index])
+        value = ufunc.reduceat(X.data, X.indptr[major_index])
         return major_index, value
 
     def _min_or_max_axis(X, axis, min_or_max):
@@ -164,79 +138,6 @@ else:
                 X.max(axis=axis).toarray().ravel())
 
 
-try:
-    from numpy import argpartition
-except ImportError:
-    # numpy.argpartition was introduced in v 1.8.0
-    def argpartition(a, kth, axis=-1, kind='introselect', order=None):
-        return np.argsort(a, axis=axis, order=order)
-
-try:
-    from numpy import partition
-except ImportError:
-    warnings.warn('Using `sort` instead of partition.'
-                  'Upgrade numpy to 1.8 for better performace on large number'
-                  'of clusters')
-    def partition(a, kth, axis=-1, kind='introselect', order=None):
-        return np.sort(a, axis=axis, order=order)
-
-
-if np_version < (1, 7):
-    # Prior to 1.7.0, np.frombuffer wouldn't work for empty first arg.
-    def frombuffer_empty(buf, dtype):
-        if len(buf) == 0:
-            return np.empty(0, dtype=dtype)
-        else:
-            return np.frombuffer(buf, dtype=dtype)
-else:
-    frombuffer_empty = np.frombuffer
-
-
-if np_version < (1, 8):
-    def in1d(ar1, ar2, assume_unique=False, invert=False):
-        # Backport of numpy function in1d 1.8.1 to support numpy 1.6.2
-        # Ravel both arrays, behavior for the first array could be different
-        ar1 = np.asarray(ar1).ravel()
-        ar2 = np.asarray(ar2).ravel()
-
-        # This code is significantly faster when the condition is satisfied.
-        if len(ar2) < 10 * len(ar1) ** 0.145:
-            if invert:
-                mask = np.ones(len(ar1), dtype=np.bool)
-                for a in ar2:
-                    mask &= (ar1 != a)
-            else:
-                mask = np.zeros(len(ar1), dtype=np.bool)
-                for a in ar2:
-                    mask |= (ar1 == a)
-            return mask
-
-        # Otherwise use sorting
-        if not assume_unique:
-            ar1, rev_idx = np.unique(ar1, return_inverse=True)
-            ar2 = np.unique(ar2)
-
-        ar = np.concatenate((ar1, ar2))
-        # We need this to be a stable sort, so always use 'mergesort'
-        # here. The values from the first array should always come before
-        # the values from the second array.
-        order = ar.argsort(kind='mergesort')
-        sar = ar[order]
-        if invert:
-            bool_ar = (sar[1:] != sar[:-1])
-        else:
-            bool_ar = (sar[1:] == sar[:-1])
-        flag = np.concatenate((bool_ar, [invert]))
-        indx = order.argsort(kind='mergesort')[:len(ar1)]
-
-        if assume_unique:
-            return flag[indx]
-        else:
-            return flag[indx][rev_idx]
-else:
-    from numpy import in1d
-
-
 if sp_version < (0, 15):
     # Backport fix for scikit-learn/scikit-learn#2986 / scipy/scipy#4142
     from ._scipy_sparse_lsqr_backport import lsqr as sparse_lsqr
@@ -249,22 +150,6 @@ def parallel_helper(obj, methodname, *args, **kwargs):
     return getattr(obj, methodname)(*args, **kwargs)
 
 
-if np_version < (1, 6, 2):
-    # Allow bincount to accept empty arrays
-    # https://github.com/numpy/numpy/commit/40f0844846a9d7665616b142407a3d74cb65a040
-    def bincount(x, weights=None, minlength=None):
-        if len(x) > 0:
-            return np.bincount(x, weights, minlength)
-        else:
-            if minlength is None:
-                minlength = 0
-            minlength = np.asscalar(np.asarray(minlength, dtype=np.intp))
-            return np.zeros(minlength, dtype=np.intp)
-
-else:
-    from numpy import bincount
-
-
 if 'exist_ok' in signature(os.makedirs).parameters:
     makedirs = os.makedirs
 else:
@@ -287,20 +172,6 @@ else:
                 raise
 
 
-if np_version < (1, 8, 1):
-    def array_equal(a1, a2):
-        # copy-paste from numpy 1.8.1
-        try:
-            a1, a2 = np.asarray(a1), np.asarray(a2)
-        except:
-            return False
-        if a1.shape != a2.shape:
-            return False
-        return bool(np.asarray(a1 == a2).all())
-else:
-    from numpy import array_equal
-
-
 if np_version < (1, 12):
     class MaskedArray(np.ma.MaskedArray):
         # Before numpy 1.12, np.ma.MaskedArray object is not picklable
@@ -317,33 +188,3 @@ if np_version < (1, 12):
                                  self._fill_value)
 else:
     from numpy.ma import MaskedArray    # noqa
-
-if 'axis' not in signature(np.linalg.norm).parameters:
-
-    def norm(X, ord=None, axis=None):
-        """
-        Handles the axis parameter for the norm function
-        in old versions of numpy (useless for numpy >= 1.8).
-        """
-
-        if axis is None or X.ndim == 1:
-            result = np.linalg.norm(X, ord=ord)
-            return result
-
-        if axis not in (0, 1):
-            raise NotImplementedError("""
-            The fix that adds axis parameter to the old numpy
-            norm only works for 1D or 2D arrays.
-            """)
-
-        if axis == 0:
-            X = X.T
-
-        result = np.zeros(X.shape[0])
-        for i in range(len(result)):
-            result[i] = np.linalg.norm(X[i], ord=ord)
-
-        return result
-
-else:
-    norm = np.linalg.norm
diff --git a/sklearn/utils/linear_assignment_.py b/sklearn/utils/linear_assignment_.py
index 5282c84e211307add3f127fe418aa89957fcdfbe..54a7d99fa369f67da2df5415a89729cffa0717db 100644
--- a/sklearn/utils/linear_assignment_.py
+++ b/sklearn/utils/linear_assignment_.py
@@ -15,8 +15,6 @@ Hungarian algorithm (also known as Munkres algorithm).
 
 import numpy as np
 
-from .fixes import astype
-
 
 def linear_assignment(X):
     """Solve the linear assignment problem using the Hungarian algorithm.
@@ -193,7 +191,7 @@ def _step4(state):
     # We convert to int as numpy operations are faster on int
     C = (state.C == 0).astype(np.int)
     covered_C = C * state.row_uncovered[:, np.newaxis]
-    covered_C *= astype(state.col_uncovered, dtype=np.int, copy=False)
+    covered_C *= state.col_uncovered.astype(dtype=np.int, copy=False)
     n = state.C.shape[0]
     m = state.C.shape[1]
     while True:
@@ -215,7 +213,7 @@ def _step4(state):
                 state.row_uncovered[row] = False
                 state.col_uncovered[col] = True
                 covered_C[:, col] = C[:, col] * (
-                    astype(state.row_uncovered, dtype=np.int, copy=False))
+                    state.row_uncovered.astype(dtype=np.int, copy=False))
                 covered_C[row] = 0
 
 
diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 2d3c80510db0dfcba244bdbeb03660ca567a3d6f..34560225b1e8b093bef40d374469265723c6ce9f 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -20,8 +20,7 @@ import numpy as np
 
 from ..externals.six import string_types
 from .validation import check_array
-from ..utils.fixes import bincount
-from ..utils.fixes import array_equal
+
 
 
 def _unique_multiclass(y):
@@ -299,7 +298,7 @@ def _check_partial_fit_first_call(clf, classes=None):
 
     elif classes is not None:
         if getattr(clf, 'classes_', None) is not None:
-            if not array_equal(clf.classes_, unique_labels(classes)):
+            if not np.array_equal(clf.classes_, unique_labels(classes)):
                 raise ValueError(
                     "`classes=%r` is not the same as on last call "
                     "to partial_fit, was: %r" % (classes, clf.classes_))
@@ -360,7 +359,7 @@ def class_distribution(y, sample_weight=None):
 
             classes_k, y_k = np.unique(y.data[y.indptr[k]:y.indptr[k + 1]],
                                        return_inverse=True)
-            class_prior_k = bincount(y_k, weights=nz_samp_weight)
+            class_prior_k = np.bincount(y_k, weights=nz_samp_weight)
 
             # An explicit zero was found, combine its weight with the weight
             # of the implicit zeros
@@ -382,7 +381,7 @@ def class_distribution(y, sample_weight=None):
             classes_k, y_k = np.unique(y[:, k], return_inverse=True)
             classes.append(classes_k)
             n_classes.append(classes_k.shape[0])
-            class_prior_k = bincount(y_k, weights=sample_weight)
+            class_prior_k = np.bincount(y_k, weights=sample_weight)
             class_prior.append(class_prior_k / class_prior_k.sum())
 
     return (classes, n_classes, class_prior)
diff --git a/sklearn/utils/random.py b/sklearn/utils/random.py
index 5805f9be2c8fa51c52205a2f8893fd5e5cdbd024..73ac0a9a572a32c38c446219b5d27747c77330c0 100644
--- a/sklearn/utils/random.py
+++ b/sklearn/utils/random.py
@@ -4,18 +4,20 @@
 from __future__ import division
 import numpy as np
 import scipy.sparse as sp
-import operator
 import array
 
 from sklearn.utils import check_random_state
-from sklearn.utils.fixes import astype
 from ._random import sample_without_replacement
+from .deprecation import deprecated
 
 __all__ = ['sample_without_replacement', 'choice']
 
 
 # This is a backport of np.random.choice from numpy 1.7
 # The function can be removed when we bump the requirements to >=1.7
+@deprecated("sklearn.utils.random.choice was deprecated in version 0.19"
+            "and will be removed in 0.21. Use np.random.choice or"
+            "np.random.RandomState.choice instead.")
 def choice(a, size=None, replace=True, p=None, random_state=None):
     """
     choice(a, size=None, replace=True, p=None)
@@ -104,102 +106,11 @@ def choice(a, size=None, replace=True, p=None, random_state=None):
     dtype='|S11')
 
     """
-    random_state = check_random_state(random_state)
-
-    # Format and Verify input
-    a = np.array(a, copy=False)
-    if a.ndim == 0:
-        try:
-            # __index__ must return an integer by python rules.
-            pop_size = operator.index(a.item())
-        except TypeError:
-            raise ValueError("a must be 1-dimensional or an integer")
-        if pop_size <= 0:
-            raise ValueError("a must be greater than 0")
-    elif a.ndim != 1:
-        raise ValueError("a must be 1-dimensional")
+    if random_state is not None:
+        random_state = check_random_state(random_state)
+        return random_state.choice(a, size, replace, p)
     else:
-        pop_size = a.shape[0]
-        if pop_size is 0:
-            raise ValueError("a must be non-empty")
-
-    if p is not None:
-        p = np.array(p, dtype=np.double, ndmin=1, copy=False)
-        if p.ndim != 1:
-            raise ValueError("p must be 1-dimensional")
-        if p.size != pop_size:
-            raise ValueError("a and p must have same size")
-        if np.any(p < 0):
-            raise ValueError("probabilities are not non-negative")
-        if not np.allclose(p.sum(), 1):
-            raise ValueError("probabilities do not sum to 1")
-
-    shape = size
-    if shape is not None:
-        size = np.prod(shape, dtype=np.intp)
-    else:
-        size = 1
-
-    # Actual sampling
-    if replace:
-        if p is not None:
-            cdf = p.cumsum()
-            cdf /= cdf[-1]
-            uniform_samples = random_state.random_sample(shape)
-            idx = cdf.searchsorted(uniform_samples, side='right')
-            # searchsorted returns a scalar
-            idx = np.array(idx, copy=False)
-        else:
-            idx = random_state.randint(0, pop_size, size=shape)
-    else:
-        if size > pop_size:
-            raise ValueError("Cannot take a larger sample than "
-                             "population when 'replace=False'")
-
-        if p is not None:
-            if np.sum(p > 0) < size:
-                raise ValueError("Fewer non-zero entries in p than size")
-            n_uniq = 0
-            p = p.copy()
-            found = np.zeros(shape, dtype=np.int)
-            flat_found = found.ravel()
-            while n_uniq < size:
-                x = random_state.rand(size - n_uniq)
-                if n_uniq > 0:
-                    p[flat_found[0:n_uniq]] = 0
-                cdf = np.cumsum(p)
-                cdf /= cdf[-1]
-                new = cdf.searchsorted(x, side='right')
-                _, unique_indices = np.unique(new, return_index=True)
-                unique_indices.sort()
-                new = new.take(unique_indices)
-                flat_found[n_uniq:n_uniq + new.size] = new
-                n_uniq += new.size
-            idx = found
-        else:
-            idx = random_state.permutation(pop_size)[:size]
-            if shape is not None:
-                idx.shape = shape
-
-    if shape is None and isinstance(idx, np.ndarray):
-        # In most cases a scalar will have been made an array
-        idx = idx.item(0)
-
-    # Use samples as indices for a if a is array-like
-    if a.ndim == 0:
-        return idx
-
-    if shape is not None and idx.ndim == 0:
-        # If size == () then the user requested a 0-d array as opposed to
-        # a scalar object when size is None. However a[idx] is always a
-        # scalar and not an array. So this makes sure the result is an
-        # array, taking into account that np.array(item) may not work
-        # for object arrays.
-        res = np.empty((), dtype=a.dtype)
-        res[()] = a[idx]
-        return res
-
-    return a[idx]
+        return np.random.choice(a, size, replace, p)
 
 
 def random_choice_csc(n_samples, classes, class_probability=None,
@@ -238,7 +149,7 @@ def random_choice_csc(n_samples, classes, class_probability=None,
         if classes[j].dtype.kind != 'i':
             raise ValueError("class dtype %s is not supported" %
                              classes[j].dtype)
-        classes[j] = astype(classes[j], np.int64, copy=False)
+        classes[j] = classes[j].astype(np.int64, copy=False)
 
         # use uniform distribution if no class_probability is given
         if class_probability is None:
diff --git a/sklearn/utils/sparsefuncs.py b/sklearn/utils/sparsefuncs.py
index 8515ff2593f31fdb43a8d3055c4906b078e6c8dd..9b081ec45f4212a1b204223f5b03753b372e8b54 100644
--- a/sklearn/utils/sparsefuncs.py
+++ b/sklearn/utils/sparsefuncs.py
@@ -6,7 +6,7 @@
 import scipy.sparse as sp
 import numpy as np
 
-from .fixes import sparse_min_max, bincount
+from .fixes import sparse_min_max
 from .sparsefuncs_fast import (
     csr_mean_variance_axis0 as _csr_mean_var_axis0,
     csc_mean_variance_axis0 as _csc_mean_var_axis0,
@@ -401,10 +401,10 @@ def count_nonzero(X, axis=None, sample_weight=None):
         return out * sample_weight
     elif axis == 0:
         if sample_weight is None:
-            return bincount(X.indices, minlength=X.shape[1])
+            return np.bincount(X.indices, minlength=X.shape[1])
         else:
             weights = np.repeat(sample_weight, np.diff(X.indptr))
-            return bincount(X.indices, minlength=X.shape[1],
+            return np.bincount(X.indices, minlength=X.shape[1],
                             weights=weights)
     else:
         raise ValueError('Unsupported axis: {0}'.format(axis))
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index 40dcfbbe137ff718fb0060b10de6c296328bdf87..2b720310b21d52124b42c26932d138b2a1deaedb 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -32,7 +32,6 @@ from sklearn.utils.extmath import row_norms
 from sklearn.utils.extmath import weighted_mode
 from sklearn.utils.extmath import cartesian
 from sklearn.utils.extmath import log_logistic
-from sklearn.utils.extmath import fast_dot, _fast_dot
 from sklearn.utils.extmath import svd_flip
 from sklearn.utils.extmath import _incremental_mean_and_var
 from sklearn.utils.extmath import _deterministic_vector_sign_flip
@@ -426,84 +425,6 @@ def test_logistic_sigmoid():
     assert_array_almost_equal(log_logistic(extreme_x), [-100, 0])
 
 
-def test_fast_dot():
-    # Check fast dot blas wrapper function
-    if fast_dot is np.dot:
-        return
-
-    rng = np.random.RandomState(42)
-    A = rng.random_sample([2, 10])
-    B = rng.random_sample([2, 10])
-
-    try:
-        linalg.get_blas_funcs(['gemm'])[0]
-        has_blas = True
-    except (AttributeError, ValueError):
-        has_blas = False
-
-    if has_blas:
-        # Test _fast_dot for invalid input.
-
-        # Maltyped data.
-        for dt1, dt2 in [['f8', 'f4'], ['i4', 'i4']]:
-            assert_raises(ValueError, _fast_dot, A.astype(dt1),
-                          B.astype(dt2).T)
-
-        # Malformed data.
-
-        # ndim == 0
-        E = np.empty(0)
-        assert_raises(ValueError, _fast_dot, E, E)
-
-        # ndim == 1
-        assert_raises(ValueError, _fast_dot, A, A[0])
-
-        # ndim > 2
-        assert_raises(ValueError, _fast_dot, A.T, np.array([A, A]))
-
-        # min(shape) == 1
-        assert_raises(ValueError, _fast_dot, A, A[0, :][None, :])
-
-        # test for matrix mismatch error
-        assert_raises(ValueError, _fast_dot, A, A)
-
-    # Test cov-like use case + dtypes.
-    for dtype in ['f8', 'f4']:
-        A = A.astype(dtype)
-        B = B.astype(dtype)
-
-        #  col < row
-        C = np.dot(A.T, A)
-        C_ = fast_dot(A.T, A)
-        assert_almost_equal(C, C_, decimal=5)
-
-        C = np.dot(A.T, B)
-        C_ = fast_dot(A.T, B)
-        assert_almost_equal(C, C_, decimal=5)
-
-        C = np.dot(A, B.T)
-        C_ = fast_dot(A, B.T)
-        assert_almost_equal(C, C_, decimal=5)
-
-    # Test square matrix * rectangular use case.
-    A = rng.random_sample([2, 2])
-    for dtype in ['f8', 'f4']:
-        A = A.astype(dtype)
-        B = B.astype(dtype)
-
-        C = np.dot(A, B)
-        C_ = fast_dot(A, B)
-        assert_almost_equal(C, C_, decimal=5)
-
-        C = np.dot(A.T, B)
-        C_ = fast_dot(A.T, B)
-        assert_almost_equal(C, C_, decimal=5)
-
-    if has_blas:
-        for x in [np.array([[d] * 10] * 2) for d in [np.inf, np.nan]]:
-            assert_raises(ValueError, _fast_dot, x, x.T)
-
-
 def test_incremental_variance_update_formulas():
     # Test Youngs and Cramer incremental variance formulas.
     # Doggie data from http://www.mathsisfun.com/data/standard-deviation.html
diff --git a/sklearn/utils/tests/test_fixes.py b/sklearn/utils/tests/test_fixes.py
index de48d8a33691c4ea9dbe35a2266b2fba1b961fdd..7bdcfc2fc13dfb005306f813c1a5b3a972131a57 100644
--- a/sklearn/utils/tests/test_fixes.py
+++ b/sklearn/utils/tests/test_fixes.py
@@ -4,46 +4,18 @@
 # License: BSD 3 clause
 
 import pickle
-import numpy as np
-import math
 
 from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_false
-from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_array_equal
 
 from sklearn.utils.fixes import divide
-from sklearn.utils.fixes import astype
 from sklearn.utils.fixes import MaskedArray
-from sklearn.utils.fixes import norm
 
 
 def test_divide():
     assert_equal(divide(.6, 1), .600000000000)
 
 
-def test_astype_copy_memory():
-    a_int32 = np.ones(3, np.int32)
-
-    # Check that dtype conversion works
-    b_float32 = astype(a_int32, dtype=np.float32, copy=False)
-    assert_equal(b_float32.dtype, np.float32)
-
-    # Changing dtype forces a copy even if copy=False
-    assert_false(np.may_share_memory(b_float32, a_int32))
-
-    # Check that copy can be skipped if requested dtype match
-    c_int32 = astype(a_int32, dtype=np.int32, copy=False)
-    assert_true(c_int32 is a_int32)
-
-    # Check that copy can be forced, and is the case by default:
-    d_int32 = astype(a_int32, dtype=np.int32, copy=True)
-    assert_false(np.may_share_memory(d_int32, a_int32))
-
-    e_int32 = astype(a_int32, dtype=np.int32)
-    assert_false(np.may_share_memory(e_int32, a_int32))
-
-
 def test_masked_array_obj_dtype_pickleable():
     marr = MaskedArray([1, None, 'a'], dtype=object)
 
@@ -52,26 +24,3 @@ def test_masked_array_obj_dtype_pickleable():
         marr_pickled = pickle.loads(pickle.dumps(marr))
         assert_array_equal(marr.data, marr_pickled.data)
         assert_array_equal(marr.mask, marr_pickled.mask)
-
-
-def test_norm():
-    X = np.array([[-2, 4, 5],
-                  [1, 3, -4],
-                  [0, 0, 8],
-                  [0, 0, 0]]).astype(float)
-
-    # Test various axis and order
-    assert_equal(math.sqrt(135), norm(X))
-    assert_array_equal(
-        np.array([math.sqrt(5), math.sqrt(25), math.sqrt(105)]),
-        norm(X, axis=0)
-    )
-    assert_array_equal(np.array([3, 7, 17]), norm(X, axis=0, ord=1))
-    assert_array_equal(np.array([2, 4, 8]), norm(X, axis=0, ord=np.inf))
-    assert_array_equal(np.array([0, 0, 0]), norm(X, axis=0, ord=-np.inf))
-    assert_array_equal(np.array([11, 8, 8, 0]), norm(X, axis=1, ord=1))
-
-    # Test shapes
-    assert_equal((), norm(X).shape)
-    assert_equal((3,), norm(X, axis=0).shape)
-    assert_equal((4,), norm(X, axis=1).shape)