From 6a2d8d5bbc261309d59ac30bf650e982f1477dd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 2 Nov 2016 12:57:04 +0100
Subject: [PATCH] [MRG+2] BUILD Rewrite setup.py files to handle cython
 dependencies (#7719)

* Rewriting of cythonization in setup.py

By using Cython.Build.cythonize and switching between .c and .pyx files
as appropriate cython dependencies are correctly taken into account.

* Use cythonize once on the root config rather than in each subpackage

* Fix for Windows

* Remove caching from Travis

Cython dependencies are taken care of by Cython.Build.cythonize and
based on file timestamps, so .C and .so files will always be rebuild
from scratch on each build in Travis.

* Specify .pyx in setup.files for cython generated extensions

More natural this way. Tweak the extensions to generate from .c and .cpp
files for a release.

* COSMIT Remove commented out code

* Check cython version is greater than 0.23

* COSMIT better names for functions

* flake8 fix (imported module not at top of file)

* Install cython 0.23 for Python 2.6

now that cython >= 0.23 requirement is enforced in setup.py

* Use module constant for minimum required cython version

* Fix Travis install.sh

No easy way to put comments inside multi-line command
---
 .travis.yml                         |  15 +--
 build_tools/cythonize.py            | 198 ----------------------------
 build_tools/travis/install.sh       |  19 ++-
 build_tools/travis/test_script.sh   |   5 +-
 setup.py                            |  37 +-----
 sklearn/__check_build/setup.py      |   2 +-
 sklearn/_build_utils/__init__.py    |  51 ++++++-
 sklearn/cluster/setup.py            |  26 ++--
 sklearn/datasets/setup.py           |   2 +-
 sklearn/decomposition/setup.py      |   4 +-
 sklearn/ensemble/setup.py           |   2 +-
 sklearn/feature_extraction/setup.py |   2 +-
 sklearn/linear_model/setup.py       |   6 +-
 sklearn/manifold/setup.py           |   4 +-
 sklearn/metrics/cluster/setup.py    |   2 +-
 sklearn/metrics/setup.py            |   2 +-
 sklearn/neighbors/setup.py          |   8 +-
 sklearn/setup.py                    |  15 ++-
 sklearn/svm/setup.py                |  10 +-
 sklearn/tree/setup.py               |   8 +-
 sklearn/utils/setup.py              |  26 ++--
 sklearn/utils/sparsetools/setup.py  |  12 +-
 22 files changed, 128 insertions(+), 328 deletions(-)
 delete mode 100755 build_tools/cythonize.py

diff --git a/.travis.yml b/.travis.yml
index 1f517d188c..5677901f66 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,13 +6,7 @@ language: python
 # Pre-install packages for the ubuntu distribution
 cache:
   apt: true
-  # We use three different cache directory
-  # to work around a Travis bug with multi-platform cache
   directories:
-  - $HOME/sklearn_build_ubuntu
-  - $HOME/sklearn_build_oldest
-  - $HOME/sklearn_build_latest
-  - $HOME/sklearn_build_numpy_dev
   - $HOME/.cache/pip
   - $HOME/download
 addons:
@@ -33,21 +27,19 @@ env:
     # This environment tests that scikit-learn can be built against
     # versions of numpy, scipy with ATLAS that comes with Ubuntu Precise 12.04
     - DISTRIB="ubuntu" PYTHON_VERSION="2.7" CYTHON_VERSION="0.23.4"
-      CACHED_BUILD_DIR="$HOME/sklearn_build_ubuntu" COVERAGE=true
+      COVERAGE=true
     # This environment tests the oldest supported anaconda env
     - DISTRIB="conda" PYTHON_VERSION="2.6" INSTALL_MKL="false"
-      NUMPY_VERSION="1.6.2" SCIPY_VERSION="0.11.0" CYTHON_VERSION="0.21"
-      CACHED_BUILD_DIR="$HOME/sklearn_build_oldest"
+      NUMPY_VERSION="1.6.2" SCIPY_VERSION="0.11.0" CYTHON_VERSION="0.23"
     # This environment tests the newest supported anaconda env
     # It also runs tests requiring Pandas.
     - DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
       NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.17.0" PANDAS_VERSION="0.18.0"
-      CYTHON_VERSION="0.23.4" CACHED_BUILD_DIR="$HOME/sklearn_build_latest"
+      CYTHON_VERSION="0.23.4"
     # flake8 linting on diff wrt common ancestor with upstream/master
     - RUN_FLAKE8="true" SKIP_TESTS="true"
       DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
       NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4"
-      CACHED_BUILD_DIR="$HOME/dummy"
 
 
 matrix:
@@ -66,7 +58,6 @@ matrix:
     # the before_install step with and addons/apt/packages declaration.
     -  python: 3.5
        env: DISTRIB="scipy-dev-wheels"
-            CACHED_BUILD_DIR="$HOME/sklearn_build_numpy_dev"
        sudo: True
        before_install: sudo apt-get install -yqq libatlas3gf-base libatlas-dev
 
diff --git a/build_tools/cythonize.py b/build_tools/cythonize.py
deleted file mode 100755
index b01da58231..0000000000
--- a/build_tools/cythonize.py
+++ /dev/null
@@ -1,198 +0,0 @@
-#!/usr/bin/env python
-""" cythonize
-
-Cythonize pyx files into C files as needed.
-
-Usage: cythonize [root_dir]
-
-Default [root_dir] is 'sklearn'.
-
-Checks pyx files to see if they have been changed relative to their
-corresponding C files.  If they have, then runs cython on these files to
-recreate the C files.
-
-The script detects changes in the pyx/pxd files using checksums
-[or hashes] stored in a database file
-
-Simple script to invoke Cython on all .pyx
-files; while waiting for a proper build system. Uses file hashes to
-figure out if rebuild is needed.
-
-It is called by ./setup.py sdist so that sdist package can be installed without
-cython
-
-Originally written by Dag Sverre Seljebotn, and adapted from statsmodel 0.6.1
-(Modified BSD 3-clause)
-
-We copied it for scikit-learn.
-
-Note: this script does not check any of the dependent C libraries; it only
-operates on the Cython .pyx files or their corresponding Cython header (.pxd)
-files.
-"""
-# Author: Arthur Mensch <arthur.mensch@inria.fr>
-# Author: Raghav R V <rvraghav93@gmail.com>
-#
-# License: BSD 3 clause
-
-from __future__ import division, print_function, absolute_import
-
-import os
-import re
-import sys
-import hashlib
-import subprocess
-
-HASH_FILE = 'cythonize.dat'
-DEFAULT_ROOT = 'sklearn'
-
-# WindowsError is not defined on unix systems
-try:
-    WindowsError
-except NameError:
-    WindowsError = None
-
-
-def cythonize(cython_file, gen_file):
-    try:
-        from Cython.Compiler.Version import version as cython_version
-        from distutils.version import LooseVersion
-        if LooseVersion(cython_version) < LooseVersion('0.21'):
-            raise Exception('Building scikit-learn requires Cython >= 0.21')
-
-    except ImportError:
-        pass
-
-    flags = ['--fast-fail']
-    if gen_file.endswith('.cpp'):
-        flags += ['--cplus']
-
-    try:
-        try:
-            rc = subprocess.call(['cython'] +
-                                 flags + ["-o", gen_file, cython_file])
-            if rc != 0:
-                raise Exception('Cythonizing %s failed' % cython_file)
-        except OSError:
-            # There are ways of installing Cython that don't result in a cython
-            # executable on the path, see scipy issue gh-2397.
-            rc = subprocess.call([sys.executable, '-c',
-                                  'import sys; from Cython.Compiler.Main '
-                                  'import setuptools_main as main;'
-                                  ' sys.exit(main())'] + flags +
-                                 ["-o", gen_file, cython_file])
-            if rc != 0:
-                raise Exception('Cythonizing %s failed' % cython_file)
-    except OSError:
-        raise OSError('Cython needs to be installed')
-
-
-def load_hashes(filename):
-    """Load the hashes dict from the hashfile"""
-    # { filename : (sha1 of header if available or 'NA',
-    #               sha1 of input,
-    #               sha1 of output) }
-
-    hashes = {}
-    try:
-        with open(filename, 'r') as cython_hash_file:
-            for hash_record in cython_hash_file:
-                (filename, header_hash,
-                 cython_hash, gen_file_hash) = hash_record.split()
-                hashes[filename] = (header_hash, cython_hash, gen_file_hash)
-    except (KeyError, ValueError, AttributeError, IOError):
-        hashes = {}
-    return hashes
-
-
-def save_hashes(hashes, filename):
-    """Save the hashes dict to the hashfile"""
-    with open(filename, 'w') as cython_hash_file:
-        for key, value in hashes.items():
-            cython_hash_file.write("%s %s %s %s\n"
-                                   % (key, value[0], value[1], value[2]))
-
-
-def sha1_of_file(filename):
-    h = hashlib.sha1()
-    with open(filename, "rb") as f:
-        h.update(f.read())
-    return h.hexdigest()
-
-
-def clean_path(path):
-    """Clean the path"""
-    path = path.replace(os.sep, '/')
-    if path.startswith('./'):
-        path = path[2:]
-    return path
-
-
-def get_hash_tuple(header_path, cython_path, gen_file_path):
-    """Get the hashes from the given files"""
-
-    header_hash = (sha1_of_file(header_path)
-                   if os.path.exists(header_path) else 'NA')
-    from_hash = sha1_of_file(cython_path)
-    to_hash = (sha1_of_file(gen_file_path)
-               if os.path.exists(gen_file_path) else 'NA')
-
-    return header_hash, from_hash, to_hash
-
-
-def cythonize_if_unchanged(path, cython_file, gen_file, hashes):
-    full_cython_path = os.path.join(path, cython_file)
-    full_header_path = full_cython_path.replace('.pyx', '.pxd')
-    full_gen_file_path = os.path.join(path, gen_file)
-
-    current_hash = get_hash_tuple(full_header_path, full_cython_path,
-                                  full_gen_file_path)
-
-    if current_hash == hashes.get(clean_path(full_cython_path)):
-        print('%s has not changed' % full_cython_path)
-        return
-
-    print('Processing %s' % full_cython_path)
-    cythonize(full_cython_path, full_gen_file_path)
-
-    # changed target file, recompute hash
-    current_hash = get_hash_tuple(full_header_path, full_cython_path,
-                                  full_gen_file_path)
-
-    # Update the hashes dict with the new hash
-    hashes[clean_path(full_cython_path)] = current_hash
-
-
-def check_and_cythonize(root_dir):
-    print(root_dir)
-    hashes = load_hashes(HASH_FILE)
-
-    for cur_dir, dirs, files in os.walk(root_dir):
-        for filename in files:
-            if filename.endswith('.pyx'):
-                gen_file_ext = '.c'
-                # Cython files with libcpp imports should be compiled to cpp
-                with open(os.path.join(cur_dir, filename), 'rb') as f:
-                    data = f.read()
-                    m = re.search(b"libcpp", data, re.I | re.M)
-                    if m:
-                        gen_file_ext = ".cpp"
-                cython_file = filename
-                gen_file = filename.replace('.pyx', gen_file_ext)
-                cythonize_if_unchanged(cur_dir, cython_file, gen_file, hashes)
-
-                # Save hashes once per module. This prevents cythonizing prev.
-                # files again when debugging broken code in a single file
-                save_hashes(hashes, HASH_FILE)
-
-
-def main(root_dir=DEFAULT_ROOT):
-    check_and_cythonize(root_dir)
-
-
-if __name__ == '__main__':
-    try:
-        root_dir_arg = sys.argv[1]
-    except IndexError:
-        root_dir_arg = DEFAULT_ROOT
-    main(root_dir_arg)
diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index bfd39bc58a..def59e35f1 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -53,17 +53,23 @@ if [[ "$DISTRIB" == "conda" ]]; then
     if [[ "$INSTALL_MKL" == "true" ]]; then
         conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
             numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION numpy scipy \
-            cython=$CYTHON_VERSION libgfortran mkl flake8 \
+            libgfortran mkl flake8 \
             ${PANDAS_VERSION+pandas=$PANDAS_VERSION}
             
     else
         conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
-            numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION cython=$CYTHON_VERSION \
+            numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
             libgfortran nomkl \
             ${PANDAS_VERSION+pandas=$PANDAS_VERSION}
     fi
     source activate testenv
 
+    # Temporary work around for Python 2.6 because cython >= 0.23 is
+    # required for building scikit-learn but python 2.6 and cython
+    # 0.23 are not compatible in conda. Remove the next line and
+    # install cython via conda when Python 2.6 support is removed.
+    pip install cython==$CYTHON_VERSION
+
     # Install nose-timer via pip
     pip install nose-timer
 
@@ -101,15 +107,6 @@ fi
 if [[ "$SKIP_TESTS" == "true" ]]; then
     echo "No need to build scikit-learn when not running the tests"
 else
-    if [ ! -d "$CACHED_BUILD_DIR" ]; then
-        mkdir -p $CACHED_BUILD_DIR
-    fi
-
-    rsync -av --exclude '.git/' --exclude='testvenv/' \
-          $TRAVIS_BUILD_DIR $CACHED_BUILD_DIR
-
-    cd $CACHED_BUILD_DIR/scikit-learn
-
     # Build scikit-learn in the install.sh script to collapse the verbose
     # build output in the travis output when it succeeds.
     python --version
diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index d40fd2e917..3e9bb99781 100755
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -39,11 +39,8 @@ run_tests() {
         nosetests -s --with-timer --timer-top-n 20 sklearn
     fi
 
-    # Is directory still empty ?
-    ls -ltra
-
     # Test doc
-    cd $CACHED_BUILD_DIR/scikit-learn
+    cd $OLDPWD
     make test-doc test-sphinxext
 }
 
diff --git a/setup.py b/setup.py
index 1a50d3b618..1a10d70f3b 100755
--- a/setup.py
+++ b/setup.py
@@ -84,9 +84,6 @@ class CleanCommand(Clean):
         cwd = os.path.abspath(os.path.dirname(__file__))
         remove_c_files = not os.path.exists(os.path.join(cwd, 'PKG-INFO'))
         if remove_c_files:
-            cython_hash_file = os.path.join(cwd, 'cythonize.dat')
-            if os.path.exists(cython_hash_file):
-                os.unlink(cython_hash_file)
             print('Will remove generated .c files')
         if os.path.exists('build'):
             shutil.rmtree('build')
@@ -181,18 +178,6 @@ def get_numpy_status():
     return numpy_status
 
 
-def generate_cython():
-    cwd = os.path.abspath(os.path.dirname(__file__))
-    print("Cythonizing sources")
-    p = subprocess.call([sys.executable, os.path.join(cwd,
-                                                      'build_tools',
-                                                      'cythonize.py'),
-                         'sklearn'],
-                        cwd=cwd)
-    if p != 0:
-        raise RuntimeError("Running cythonize failed!")
-
-
 def setup_package():
     metadata = dict(name=DISTNAME,
                     maintainer=MAINTAINER,
@@ -230,7 +215,7 @@ def setup_package():
                                                     'egg_info',
                                                     '--version',
                                                     'clean'))):
-        # For these actions, NumPy is not required, nor Cythonization
+        # For these actions, NumPy is not required
         #
         # They are required to succeed without Numpy for example when
         # pip is used to install Scikit-learn when Numpy is not yet present in
@@ -278,26 +263,6 @@ def setup_package():
 
         metadata['configuration'] = configuration
 
-        if len(sys.argv) >= 2 and sys.argv[1] not in 'config':
-            # Cythonize if needed
-
-            print('Generating cython files')
-            cwd = os.path.abspath(os.path.dirname(__file__))
-            if not os.path.exists(os.path.join(cwd, 'PKG-INFO')):
-                # Generate Cython sources, unless building from source release
-                generate_cython()
-
-            # Clean left-over .so file
-            for dirpath, dirnames, filenames in os.walk(
-                    os.path.join(cwd, 'sklearn')):
-                for filename in filenames:
-                    extension = os.path.splitext(filename)[1]
-                    if extension in (".so", ".pyd", ".dll"):
-                        pyx_file = str.replace(filename, extension, '.pyx')
-                        print(pyx_file)
-                        if not os.path.exists(os.path.join(dirpath, pyx_file)):
-                            os.unlink(os.path.join(dirpath, filename))
-
     setup(**metadata)
 
 
diff --git a/sklearn/__check_build/setup.py b/sklearn/__check_build/setup.py
index c9a76db2b5..b8c30d9c83 100644
--- a/sklearn/__check_build/setup.py
+++ b/sklearn/__check_build/setup.py
@@ -8,7 +8,7 @@ def configuration(parent_package='', top_path=None):
     from numpy.distutils.misc_util import Configuration
     config = Configuration('__check_build', parent_package, top_path)
     config.add_extension('_check_build',
-                         sources=['_check_build.c'],
+                         sources=['_check_build.pyx'],
                          include_dirs=[numpy.get_include()])
 
     return config
diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py
index 85049f7492..0ed4968a1a 100644
--- a/sklearn/_build_utils/__init__.py
+++ b/sklearn/_build_utils/__init__.py
@@ -6,10 +6,15 @@ Utilities useful during the build.
 
 from __future__ import division, print_function, absolute_import
 
-DEFAULT_ROOT = 'sklearn'
+import os
+
+from distutils.version import LooseVersion
 
 from numpy.distutils.system_info import get_info
 
+DEFAULT_ROOT = 'sklearn'
+CYTHON_MIN_VERSION = '0.23'
+
 
 def get_blas_info():
     def atlas_not_found(blas_info_):
@@ -33,3 +38,47 @@ def get_blas_info():
         cblas_libs = blas_info.pop('libraries', [])
 
     return cblas_libs, blas_info
+
+
+def build_from_c_and_cpp_files(extensions):
+    """Modify the extensions to build from the .c and .cpp files.
+
+    This is useful for releases, this way cython is not required to
+    run python setup.py install.
+    """
+    for extension in extensions:
+        sources = []
+        for sfile in extension.sources:
+            path, ext = os.path.splitext(sfile)
+            if ext in ('.pyx', '.py'):
+                if extension.language == 'c++':
+                    ext = '.cpp'
+                else:
+                    ext = '.c'
+                sfile = path + ext
+            sources.append(sfile)
+        extension.sources = sources
+
+
+def maybe_cythonize_extensions(top_path, config):
+    """Tweaks for building extensions between release and development mode."""
+    is_release = os.path.exists(os.path.join(top_path, 'PKG-INFO'))
+
+    if is_release:
+        build_from_c_and_cpp_files(config.ext_modules)
+    else:
+        message = ('Please install cython with a version >= {0} in order '
+                   'to build a scikit-learn development version.').format(
+                       CYTHON_MIN_VERSION)
+        try:
+            import Cython
+            if LooseVersion(Cython.__version__) < CYTHON_MIN_VERSION:
+                message += ' Your version of Cython was {0}.'.format(
+                    Cython.__version__)
+                raise ValueError(message)
+            from Cython.Build import cythonize
+        except ImportError as exc:
+            exc.args += (message,)
+            raise
+
+        config.ext_modules = cythonize(config.ext_modules)
diff --git a/sklearn/cluster/setup.py b/sklearn/cluster/setup.py
index 672983c7aa..99c4dcd617 100644
--- a/sklearn/cluster/setup.py
+++ b/sklearn/cluster/setup.py
@@ -20,30 +20,30 @@ def configuration(parent_package='', top_path=None):
 
     config = Configuration('cluster', parent_package, top_path)
     config.add_extension('_dbscan_inner',
-                         sources=['_dbscan_inner.cpp'],
+                         sources=['_dbscan_inner.pyx'],
                          include_dirs=[numpy.get_include()],
                          language="c++")
 
     config.add_extension('_hierarchical',
-                         sources=['_hierarchical.cpp'],
+                         sources=['_hierarchical.pyx'],
                          language="c++",
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
     config.add_extension('_k_means_elkan',
-                         sources=['_k_means_elkan.c'],
+                         sources=['_k_means_elkan.pyx'],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
 
-    config.add_extension(
-        '_k_means',
-        libraries=cblas_libs,
-        sources=['_k_means.c'],
-        include_dirs=[join('..', 'src', 'cblas'),
-                      numpy.get_include(),
-                      blas_info.pop('include_dirs', [])],
-        extra_compile_args=blas_info.pop('extra_compile_args', []),
-        **blas_info
-    )
+    config.add_extension('_k_means',
+                         libraries=cblas_libs,
+                         sources=['_k_means.pyx'],
+                         include_dirs=[join('..', 'src', 'cblas'),
+                                       numpy.get_include(),
+                                       blas_info.pop('include_dirs', [])],
+                         extra_compile_args=blas_info.pop(
+                             'extra_compile_args', []),
+                         **blas_info
+                         )
 
     config.add_subpackage('tests')
 
diff --git a/sklearn/datasets/setup.py b/sklearn/datasets/setup.py
index 78327e8b3f..a1def76c1b 100644
--- a/sklearn/datasets/setup.py
+++ b/sklearn/datasets/setup.py
@@ -11,7 +11,7 @@ def configuration(parent_package='', top_path=None):
     config.add_data_dir('images')
     config.add_data_dir(os.path.join('tests', 'data'))
     config.add_extension('_svmlight_format',
-                         sources=['_svmlight_format.c'],
+                         sources=['_svmlight_format.pyx'],
                          include_dirs=[numpy.get_include()])
     config.add_subpackage('tests')
     return config
diff --git a/sklearn/decomposition/setup.py b/sklearn/decomposition/setup.py
index ffa523d2fe..dc57808ddc 100644
--- a/sklearn/decomposition/setup.py
+++ b/sklearn/decomposition/setup.py
@@ -11,12 +11,12 @@ def configuration(parent_package="", top_path=None):
         libraries.append('m')
 
     config.add_extension("_online_lda",
-                         sources=["_online_lda.c"],
+                         sources=["_online_lda.pyx"],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
 
     config.add_extension('cdnmf_fast',
-                         sources=['cdnmf_fast.c'],
+                         sources=['cdnmf_fast.pyx'],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
 
diff --git a/sklearn/ensemble/setup.py b/sklearn/ensemble/setup.py
index 59c01f81f6..34fb63b906 100644
--- a/sklearn/ensemble/setup.py
+++ b/sklearn/ensemble/setup.py
@@ -5,7 +5,7 @@ from numpy.distutils.misc_util import Configuration
 def configuration(parent_package="", top_path=None):
     config = Configuration("ensemble", parent_package, top_path)
     config.add_extension("_gradient_boosting",
-                         sources=["_gradient_boosting.c"],
+                         sources=["_gradient_boosting.pyx"],
                          include_dirs=[numpy.get_include()])
 
     config.add_subpackage("tests")
diff --git a/sklearn/feature_extraction/setup.py b/sklearn/feature_extraction/setup.py
index 075cac470b..7b71dfdcc8 100644
--- a/sklearn/feature_extraction/setup.py
+++ b/sklearn/feature_extraction/setup.py
@@ -11,7 +11,7 @@ def configuration(parent_package='', top_path=None):
         libraries.append('m')
 
     config.add_extension('_hashing',
-                         sources=['_hashing.c'],
+                         sources=['_hashing.pyx'],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
     config.add_subpackage("tests")
diff --git a/sklearn/linear_model/setup.py b/sklearn/linear_model/setup.py
index 4f8ab23cac..9c3822b8e7 100644
--- a/sklearn/linear_model/setup.py
+++ b/sklearn/linear_model/setup.py
@@ -16,7 +16,7 @@ def configuration(parent_package='', top_path=None):
     if os.name == 'posix':
         cblas_libs.append('m')
 
-    config.add_extension('cd_fast', sources=['cd_fast.c'],
+    config.add_extension('cd_fast', sources=['cd_fast.pyx'],
                          libraries=cblas_libs,
                          include_dirs=[join('..', 'src', 'cblas'),
                                        numpy.get_include(),
@@ -25,7 +25,7 @@ def configuration(parent_package='', top_path=None):
                                                           []), **blas_info)
 
     config.add_extension('sgd_fast',
-                         sources=['sgd_fast.c'],
+                         sources=['sgd_fast.pyx'],
                          include_dirs=[join('..', 'src', 'cblas'),
                                        numpy.get_include(),
                                        blas_info.pop('include_dirs', [])],
@@ -35,7 +35,7 @@ def configuration(parent_package='', top_path=None):
                          **blas_info)
 
     config.add_extension('sag_fast',
-                         sources=['sag_fast.c'],
+                         sources=['sag_fast.pyx'],
                          include_dirs=numpy.get_include())
 
     # add other directories
diff --git a/sklearn/manifold/setup.py b/sklearn/manifold/setup.py
index d1b6ebf9e0..a2562cd3c0 100644
--- a/sklearn/manifold/setup.py
+++ b/sklearn/manifold/setup.py
@@ -12,7 +12,7 @@ def configuration(parent_package="", top_path=None):
     if os.name == 'posix':
         libraries.append('m')
     config.add_extension("_utils",
-                         sources=["_utils.c"],
+                         sources=["_utils.pyx"],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries,
                          extra_compile_args=["-O3"])
@@ -21,7 +21,7 @@ def configuration(parent_package="", top_path=None):
     eca.append("-O4")
     config.add_extension("_barnes_hut_tsne",
                          libraries=cblas_libs,
-                         sources=["_barnes_hut_tsne.c"],
+                         sources=["_barnes_hut_tsne.pyx"],
                          include_dirs=[join('..', 'src', 'cblas'),
                                        numpy.get_include(),
                                        blas_info.pop('include_dirs', [])],
diff --git a/sklearn/metrics/cluster/setup.py b/sklearn/metrics/cluster/setup.py
index 22debe88fe..910cc829a1 100644
--- a/sklearn/metrics/cluster/setup.py
+++ b/sklearn/metrics/cluster/setup.py
@@ -10,7 +10,7 @@ def configuration(parent_package="", top_path=None):
     if os.name == 'posix':
         libraries.append('m')
     config.add_extension("expected_mutual_info_fast",
-                         sources=["expected_mutual_info_fast.c"],
+                         sources=["expected_mutual_info_fast.pyx"],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
 
diff --git a/sklearn/metrics/setup.py b/sklearn/metrics/setup.py
index 7e2f4e6ae4..946016e3df 100644
--- a/sklearn/metrics/setup.py
+++ b/sklearn/metrics/setup.py
@@ -15,7 +15,7 @@ def configuration(parent_package="", top_path=None):
         cblas_libs.append('m')
 
     config.add_extension("pairwise_fast",
-                         sources=["pairwise_fast.c"],
+                         sources=["pairwise_fast.pyx"],
                          include_dirs=[os.path.join('..', 'src', 'cblas'),
                                        numpy.get_include(),
                                        blas_info.pop('include_dirs', [])],
diff --git a/sklearn/neighbors/setup.py b/sklearn/neighbors/setup.py
index 575b4fce66..1180b8c365 100644
--- a/sklearn/neighbors/setup.py
+++ b/sklearn/neighbors/setup.py
@@ -11,24 +11,24 @@ def configuration(parent_package='', top_path=None):
         libraries.append('m')
 
     config.add_extension('ball_tree',
-                         sources=['ball_tree.c'],
+                         sources=['ball_tree.pyx'],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
 
     config.add_extension('kd_tree',
-                         sources=['kd_tree.c'],
+                         sources=['kd_tree.pyx'],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
 
     config.add_extension('dist_metrics',
-                         sources=['dist_metrics.c'],
+                         sources=['dist_metrics.pyx'],
                          include_dirs=[numpy.get_include(),
                                        os.path.join(numpy.get_include(),
                                                     'numpy')],
                          libraries=libraries)
 
     config.add_extension('typedefs',
-                         sources=['typedefs.c'],
+                         sources=['typedefs.pyx'],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
 
diff --git a/sklearn/setup.py b/sklearn/setup.py
index 5166785159..8adbbd9d49 100644
--- a/sklearn/setup.py
+++ b/sklearn/setup.py
@@ -2,6 +2,8 @@ import os
 from os.path import join
 import warnings
 
+from sklearn._build_utils import maybe_cythonize_extensions
+
 
 def configuration(parent_package='', top_path=None):
     from numpy.distutils.misc_util import Configuration
@@ -55,12 +57,11 @@ def configuration(parent_package='', top_path=None):
     config.add_subpackage('svm')
 
     # add cython extension module for isotonic regression
-    config.add_extension(
-        '_isotonic',
-        sources=['_isotonic.c'],
-        include_dirs=[numpy.get_include()],
-        libraries=libraries,
-    )
+    config.add_extension('_isotonic',
+                         sources=['_isotonic.pyx'],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries,
+                         )
 
     # some libs needs cblas, fortran-compiled BLAS will not be sufficient
     blas_info = get_info('blas_opt', 0)
@@ -78,6 +79,8 @@ def configuration(parent_package='', top_path=None):
     # add the test directory
     config.add_subpackage('tests')
 
+    maybe_cythonize_extensions(top_path, config)
+
     return config
 
 if __name__ == '__main__':
diff --git a/sklearn/svm/setup.py b/sklearn/svm/setup.py
index 711e868b11..399b1a841e 100644
--- a/sklearn/svm/setup.py
+++ b/sklearn/svm/setup.py
@@ -24,7 +24,7 @@ def configuration(parent_package='', top_path=None):
                        extra_link_args=['-lstdc++'],
                        )
 
-    libsvm_sources = ['libsvm.c']
+    libsvm_sources = ['libsvm.pyx']
     libsvm_depends = [join('src', 'libsvm', 'libsvm_helper.c'),
                       join('src', 'libsvm', 'libsvm_template.cpp'),
                       join('src', 'libsvm', 'svm.cpp'),
@@ -38,12 +38,12 @@ def configuration(parent_package='', top_path=None):
                          depends=libsvm_depends,
                          )
 
-    ### liblinear module
+    # liblinear module
     cblas_libs, blas_info = get_blas_info()
     if os.name == 'posix':
         cblas_libs.append('m')
 
-    liblinear_sources = ['liblinear.c',
+    liblinear_sources = ['liblinear.pyx',
                          join('src', 'liblinear', '*.cpp')]
 
     liblinear_depends = [join('src', 'liblinear', '*.h'),
@@ -61,10 +61,10 @@ def configuration(parent_package='', top_path=None):
                          # extra_compile_args=['-O0 -fno-inline'],
                          ** blas_info)
 
-    ## end liblinear module
+    # end liblinear module
 
     # this should go *after* libsvm-skl
-    libsvm_sparse_sources = ['libsvm_sparse.c']
+    libsvm_sparse_sources = ['libsvm_sparse.pyx']
     config.add_extension('libsvm_sparse', libraries=['libsvm-skl'],
                          sources=libsvm_sparse_sources,
                          include_dirs=[numpy.get_include(),
diff --git a/sklearn/tree/setup.py b/sklearn/tree/setup.py
index 94f61ef825..079ae9d869 100644
--- a/sklearn/tree/setup.py
+++ b/sklearn/tree/setup.py
@@ -10,22 +10,22 @@ def configuration(parent_package="", top_path=None):
     if os.name == 'posix':
         libraries.append('m')
     config.add_extension("_tree",
-                         sources=["_tree.c"],
+                         sources=["_tree.pyx"],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries,
                          extra_compile_args=["-O3"])
     config.add_extension("_splitter",
-                         sources=["_splitter.c"],
+                         sources=["_splitter.pyx"],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries,
                          extra_compile_args=["-O3"])
     config.add_extension("_criterion",
-                         sources=["_criterion.c"],
+                         sources=["_criterion.pyx"],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries,
                          extra_compile_args=["-O3"])
     config.add_extension("_utils",
-                         sources=["_utils.c"],
+                         sources=["_utils.pyx"],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries,
                          extra_compile_args=["-O3"])
diff --git a/sklearn/utils/setup.py b/sklearn/utils/setup.py
index ad1e3bef81..9590692b0d 100644
--- a/sklearn/utils/setup.py
+++ b/sklearn/utils/setup.py
@@ -22,11 +22,11 @@ def configuration(parent_package='', top_path=None):
         libraries.append('m')
         cblas_libs.append('m')
 
-    config.add_extension('sparsefuncs_fast', sources=['sparsefuncs_fast.c'],
+    config.add_extension('sparsefuncs_fast', sources=['sparsefuncs_fast.pyx'],
                          libraries=libraries)
 
     config.add_extension('arrayfuncs',
-                         sources=['arrayfuncs.c'],
+                         sources=['arrayfuncs.pyx'],
                          depends=[join('src', 'cholesky_delete.h')],
                          libraries=cblas_libs,
                          include_dirs=cblas_includes,
@@ -34,43 +34,43 @@ def configuration(parent_package='', top_path=None):
                          **blas_info
                          )
 
-    config.add_extension(
-        'murmurhash',
-        sources=['murmurhash.c', join('src', 'MurmurHash3.cpp')],
-        include_dirs=['src'])
+    config.add_extension('murmurhash',
+                         sources=['murmurhash.pyx', join(
+                             'src', 'MurmurHash3.cpp')],
+                         include_dirs=['src'])
 
     config.add_extension('lgamma',
-                         sources=['lgamma.c', join('src', 'gamma.c')],
+                         sources=['lgamma.pyx', join('src', 'gamma.c')],
                          include_dirs=['src'],
                          libraries=libraries)
 
     config.add_extension('graph_shortest_path',
-                         sources=['graph_shortest_path.c'],
+                         sources=['graph_shortest_path.pyx'],
                          include_dirs=[numpy.get_include()])
 
     config.add_extension('fast_dict',
-                         sources=['fast_dict.cpp'],
+                         sources=['fast_dict.pyx'],
                          language="c++",
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
 
     config.add_extension('seq_dataset',
-                         sources=['seq_dataset.c'],
+                         sources=['seq_dataset.pyx'],
                          include_dirs=[numpy.get_include()])
 
     config.add_extension('weight_vector',
-                         sources=['weight_vector.c'],
+                         sources=['weight_vector.pyx'],
                          include_dirs=cblas_includes,
                          libraries=cblas_libs,
                          **blas_info)
 
     config.add_extension("_random",
-                         sources=["_random.c"],
+                         sources=["_random.pyx"],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
 
     config.add_extension("_logistic_sigmoid",
-                         sources=["_logistic_sigmoid.c"],
+                         sources=["_logistic_sigmoid.pyx"],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
 
diff --git a/sklearn/utils/sparsetools/setup.py b/sklearn/utils/sparsetools/setup.py
index 9a6082341f..7b5bc33edd 100644
--- a/sklearn/utils/sparsetools/setup.py
+++ b/sklearn/utils/sparsetools/setup.py
@@ -7,15 +7,11 @@ def configuration(parent_package='', top_path=None):
     config = Configuration('sparsetools', parent_package, top_path)
 
     config.add_extension('_traversal',
-                         sources=['_traversal.c'],
-                         include_dirs=[numpy.get_include()],
-                         #libraries=libraries
-                         )
+                         sources=['_traversal.pyx'],
+                         include_dirs=[numpy.get_include()])
     config.add_extension('_graph_tools',
-                         sources=['_graph_tools.c'],
-                         include_dirs=[numpy.get_include()],
-                         #libraries=libraries
-                         )
+                         sources=['_graph_tools.pyx'],
+                         include_dirs=[numpy.get_include()])
 
     config.add_subpackage('tests')
 
-- 
GitLab