diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst
index 18c66b8975cbc956885a1c794319b709cdcc5a4f..96ea729f8163e1ffff310e663e054b59153ee53d 100644
--- a/doc/datasets/index.rst
+++ b/doc/datasets/index.rst
@@ -3,7 +3,7 @@
 
     >>> import numpy as np
     >>> import os
-    >>> from scikits.learn import datasets
+    >>> from sklearn import datasets
     >>> datasets.mldata.urllib2 = mock_urllib2
 
 .. _datasets:
@@ -12,9 +12,9 @@
 Dataset loading utilities
 =========================
 
-.. currentmodule:: scikits.learn.datasets
+.. currentmodule:: sklearn.datasets
 
-The ``scikits.learn.datasets`` package embeds some small toy datasets
+The ``sklearn.datasets`` package embeds some small toy datasets
 as introduced in the "Getting Started" section.
 
 To evaluate the impact of the scale of the dataset (``n_samples`` and
@@ -108,7 +108,7 @@ Scipy sparse CSR matrices are used for ``X`` and numpy arrays are used for ``y``
 
 You may load a dataset like this::
 
-  >>> from scikits.learn.datasets import load_svmlight_file
+  >>> from sklearn.datasets import load_svmlight_file
   >>> X_train, y_train = load_svmlight_file("/path/to/train_dataset.txt")
   ...                                                         # doctest: +SKIP
 
diff --git a/doc/datasets/labeled_faces.rst b/doc/datasets/labeled_faces.rst
index 89673b4cd20e24aa5cc3bcec93f23344ec8cf647..7f86da507ca9297d54ef131c0e5aafbc028e3171 100644
--- a/doc/datasets/labeled_faces.rst
+++ b/doc/datasets/labeled_faces.rst
@@ -39,7 +39,7 @@ less than 200ms by using a memmaped version memoized on the disk in the
 The first loader is used for the Face Identification task: a multi-class
 classification task (hence supervised learning)::
 
-  >>> from scikits.learn.datasets import fetch_lfw_people
+  >>> from sklearn.datasets import fetch_lfw_people
   >>> lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)
 
   >>> for name in lfw_people.target_names:
@@ -74,7 +74,7 @@ array::
 The second loader is typically used for the face verification task: each sample
 is a pair of two picture belonging or not to the same person::
 
-  >>> from scikits.learn.datasets import fetch_lfw_pairs
+  >>> from sklearn.datasets import fetch_lfw_pairs
   >>> lfw_pairs_train = fetch_lfw_pairs(subset='train')
 
   >>> list(lfw_pairs_train.target_names)
diff --git a/doc/datasets/labeled_faces_fixture.py b/doc/datasets/labeled_faces_fixture.py
index ac15044f1caa7a50587428a1722758d68daae858..0d13c8ddd80a903f720cc685886ecae647126f3b 100644
--- a/doc/datasets/labeled_faces_fixture.py
+++ b/doc/datasets/labeled_faces_fixture.py
@@ -6,7 +6,7 @@ and cached in the past.
 from os.path import exists
 from os.path import join
 from nose import SkipTest
-from scikits.learn.datasets import get_data_home
+from sklearn.datasets import get_data_home
 
 
 def setup_module(module):
diff --git a/doc/datasets/mldata.rst b/doc/datasets/mldata.rst
index 12d824b2a2a1df0f2a64a6ffcdbbb0876d63b790..65b10b3013787650bdadeff0569d4aeb1cc13d4e 100644
--- a/doc/datasets/mldata.rst
+++ b/doc/datasets/mldata.rst
@@ -4,12 +4,12 @@ Downloading datasets from the mldata.org repository
 `mldata.org <http://mldata.org>`_ is a public repository for machine learning
 data, supported by the `PASCAL network <http://www.pascal-network.org>`_ .
 
-The ``scikits.learn.datasets`` package is able to directly download data
+The ``sklearn.datasets`` package is able to directly download data
 sets from the repository using the function ``fetch_mldata(dataname)``.
 
 For example, to download the MNIST digit recognition database::
 
-  >>> from scikits.learn.datasets import fetch_mldata
+  >>> from sklearn.datasets import fetch_mldata
   >>> mnist = fetch_mldata('MNIST original', data_home=custom_data_home)
 
 The MNIST database contains a total of 70000 examples of handwritten digits
@@ -36,7 +36,7 @@ datasets:
 
 * The data arrays in `mldata.org <http://mldata.org>`_ are most often
   shaped as ``(n_features, n_samples)``. This is the opposite of the
-  ``scikits.learn`` convention, so ``fetch_mldata`` transposes the matrix
+  ``scikit-learn`` convention, so ``fetch_mldata`` transposes the matrix
   by default. The ``transpose_data`` keyword controls this behavior::
 
     >>> iris = fetch_mldata('iris', data_home=custom_data_home)
diff --git a/doc/datasets/mldata_fixture.py b/doc/datasets/mldata_fixture.py
index 192daa4af5598fccda35801e15e990c6b62a84bb..2267288c38fb54542211d1b5c038548650e1c9f9 100644
--- a/doc/datasets/mldata_fixture.py
+++ b/doc/datasets/mldata_fixture.py
@@ -5,8 +5,8 @@ Mock urllib2 access to mldata.org
 
 from os import makedirs
 from os.path import join
-from scikits.learn import datasets
-from scikits.learn.utils.testing import mock_urllib2
+from sklearn import datasets
+from sklearn.utils.testing import mock_urllib2
 import tempfile
 import scipy as sp
 import shutil
diff --git a/doc/datasets/twenty_newsgroups.rst b/doc/datasets/twenty_newsgroups.rst
index 13fd2f1955b800d1d38daf023afd7cf2f0910264..c4fd379e2111ac07ae2cba8011bc9b9206f99134 100644
--- a/doc/datasets/twenty_newsgroups.rst
+++ b/doc/datasets/twenty_newsgroups.rst
@@ -13,21 +13,21 @@ provides a version where the data is already vectorized.
 
 This is not the case for this loader. Instead, it returns the list of
 the raw text files that can be fed to  text feature extractors such as
-:class:`scikits.learn.feature_extraction.text.Vectorizer` with custom
+:class:`sklearn.feature_extraction.text.Vectorizer` with custom
 parameters so as to extract feature vectors.
 
 
 Usage
 -----
 
-The ``scikits.learn.datasets.fetch_20newsgroups`` function is a data
+The ``sklearn.datasets.fetch_20newsgroups`` function is a data
 fetching / caching functions that downloads the data archive from
 the original `20 newsgroups website`_, extracts the archive contents
 in the ``~/scikit_learn_data/20news_home`` folder and calls the
-``scikits.learn.datasets.load_file`` on either the training or
+``sklearn.datasets.load_file`` on either the training or
 testing set folder, or both of them::
 
-  >>> from scikits.learn.datasets import fetch_20newsgroups
+  >>> from sklearn.datasets import fetch_20newsgroups
   >>> newsgroups_train = fetch_20newsgroups(subset='train')
 
   >>> from pprint import pprint
@@ -81,11 +81,11 @@ list of the categories to load to the ``fetch_20newsgroups`` function::
 In order to feed predictive or clustering models with the text data,
 one first need to turn the text into vectors of numerical values suitable
 for statistical analysis. This can be achieved with the utilities of the
-``scikits.learn.feature_extraction.text`` as demonstrated in the following
+``sklearn.feature_extraction.text`` as demonstrated in the following
 example that extract `TF-IDF`_ vectors of unigram tokens::
 
 
-  >>> from scikits.learn.feature_extraction.text import Vectorizer
+  >>> from sklearn.feature_extraction.text import Vectorizer
   >>> documents = [open(f).read() for f in newsgroups_train.filenames]
   >>> vectorizer = Vectorizer()
   >>> vectors = vectorizer.fit_transform(documents)
diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst
index 250e0e82df0ac6c9b7a5ee44782d723a3d061340..d962348e6d9cbf9628b2306292574d611c2a9c8a 100644
--- a/doc/developers/performance.rst
+++ b/doc/developers/performance.rst
@@ -26,7 +26,7 @@ code for the scikit-learn project.
 Python, Cython or C/C++?
 ========================
 
-.. currentmodule:: scikits.learn
+.. currentmodule:: sklearn
 
 In general, the scikit-learn project emphasizes the **readability** of
 the source code to make it easy for the project users to dive into the
@@ -89,9 +89,9 @@ Suppose we want to profile the Non Negative Matrix Factorization module
 of the scikit. Let us setup a new IPython session and load the digits
 dataset and as in the :ref:`example_decomposition_plot_nmf.py` example::
 
-  In [1]: from scikits.learn.decomposition import NMF
+  In [1]: from sklearn.decomposition import NMF
 
-  In [2]: from scikits.learn.datasets import load_digits
+  In [2]: from sklearn.datasets import load_digits
 
   In [3]: X = load_digits().data
 
@@ -188,16 +188,16 @@ Towards the end of the file, define the ``%lprun`` magic::
 
 Now restart IPython and let us use this new toy::
 
-  In [1]: from scikits.learn.datasets import load_digits
+  In [1]: from sklearn.datasets import load_digits
 
-  In [2]: from scikits.learn.decomposition.nmf import _nls_subproblem, NMF
+  In [2]: from sklearn.decomposition.nmf import _nls_subproblem, NMF
 
   In [3]: X = load_digits().data
 
   In [4]: %lprun -f _nls_subproblem NMF(n_components=16, tol=1e-2).fit(X)
   Timer unit: 1e-06 s
 
-  File: scikits/learn/decomposition/nmf.py
+  File: sklearn/decomposition/nmf.py
   Function: _nls_subproblem at line 137
   Total time: 1.73153 s
 
diff --git a/examples/cluster/README.txt b/examples/cluster/README.txt
index 767b917d2bb6351233d081149aca51a77d34c23d..1b38bab9cbb285283045e84098104988c5e70fb3 100644
--- a/examples/cluster/README.txt
+++ b/examples/cluster/README.txt
@@ -3,5 +3,5 @@
 Clustering
 ----------
 
-Examples concerning the `scikits.learn.cluster` package.
+Examples concerning the `sklearn.cluster` package.
 
diff --git a/examples/covariance/README.txt b/examples/covariance/README.txt
index 5160f8bb618cc2eaa3b5b5958af634c1b804ae0e..0767f1031d8b04effd2d82fd5e69970390830e03 100644
--- a/examples/covariance/README.txt
+++ b/examples/covariance/README.txt
@@ -1,4 +1,4 @@
 Covariance estimation
 ---------------------
 
-Examples concerning the `scikits.learn.covariance` package.
+Examples concerning the `sklearn.covariance` package.
diff --git a/examples/decomposition/README.txt b/examples/decomposition/README.txt
index c2bd41efe01f2409121a27a543a5495f9e28ba82..b5f710c810f77c5d35a60264d7ba13a24d590fb5 100644
--- a/examples/decomposition/README.txt
+++ b/examples/decomposition/README.txt
@@ -3,5 +3,5 @@
 Decomposition 
 -------------
 
-Examples concerning the `scikits.learn.decomposition` package.
+Examples concerning the `sklearn.decomposition` package.
 
diff --git a/examples/gaussian_process/README.txt b/examples/gaussian_process/README.txt
index c749e7a7e9dc903bfa877626658159608af80355..216660e8acfe39a37cfeba9e07fa7bc3aeea1be8 100644
--- a/examples/gaussian_process/README.txt
+++ b/examples/gaussian_process/README.txt
@@ -3,5 +3,5 @@
 Gaussian Process for Machine Learning
 -------------------------------------
 
-Examples concerning the `scikits.learn.gaussian_process` package.
+Examples concerning the `sklearn.gaussian_process` package.
 
diff --git a/examples/gaussian_process/gp_diabetes_dataset.py b/examples/gaussian_process/gp_diabetes_dataset.py
index f3d1e46cdf04107aefa8e9226fa0490b5b8291d2..fbf0b791b6c033ba4fd436eed7ffae12ffbb8ffd 100644
--- a/examples/gaussian_process/gp_diabetes_dataset.py
+++ b/examples/gaussian_process/gp_diabetes_dataset.py
@@ -27,7 +27,7 @@ from sklearn import datasets
 from sklearn.gaussian_process import GaussianProcess
 from sklearn.cross_val import cross_val_score, KFold
 
-# Load the dataset from scikits' data sets
+# Load the dataset from scikit's data sets
 diabetes = datasets.load_diabetes()
 X, y = diabetes.data, diabetes.target
 
diff --git a/examples/linear_model/README.txt b/examples/linear_model/README.txt
index 77439a5aa15eae3d76c3ab5e499c3d84267941f2..d70d3bed9d2bf0f09bfe33b52f10293b7438a40f 100644
--- a/examples/linear_model/README.txt
+++ b/examples/linear_model/README.txt
@@ -2,4 +2,4 @@
 Generalized Linear Models
 -------------------------
 
-Examples concerning the `scikits.learn.linear_model` package.
+Examples concerning the `sklearn.linear_model` package.
diff --git a/examples/manifold/README.txt b/examples/manifold/README.txt
index 42a5dfcca72fe26ddcbf4c0feede5c892248fb22..13a9ed3bbe9ca576f49e1eb8de2a3b16f6c87ed2 100644
--- a/examples/manifold/README.txt
+++ b/examples/manifold/README.txt
@@ -3,5 +3,5 @@
 Manifold learning
 -----------------------
 
-Examples concerning the `scikits.learn.manifold` package.
+Examples concerning the `sklearn.manifold` package.
 
diff --git a/examples/manifold/plot_lle_digits.py.prof b/examples/manifold/plot_lle_digits.py.prof
deleted file mode 100644
index 2d30366716f99aa15baa8284df16ef3f2900d2b0..0000000000000000000000000000000000000000
Binary files a/examples/manifold/plot_lle_digits.py.prof and /dev/null differ
diff --git a/examples/mixture/README.txt b/examples/mixture/README.txt
index bbf508e59d26d8987f7a301cd91e60b7e79d6cc9..1cc9671e401501926275ded9c8eb58149f21e767 100644
--- a/examples/mixture/README.txt
+++ b/examples/mixture/README.txt
@@ -2,4 +2,4 @@
 Gaussian Mixture Models
 -----------------------
 
-Examples concerning the `scikits.learn.mixture` package.
+Examples concerning the `sklearn.mixture` package.
diff --git a/examples/svm/README.txt b/examples/svm/README.txt
index 9c83e641b5b682049eba33af979d7e89d08a0e44..f9f3b57afc4566e1e73fdcfa4faa1dfde7af37f6 100644
--- a/examples/svm/README.txt
+++ b/examples/svm/README.txt
@@ -3,5 +3,5 @@
 Support Vector Machines
 -----------------------
 
-Examples concerning the `scikits.learn.svm` package.
+Examples concerning the `sklearn.svm` package.
 
diff --git a/setup.py b/setup.py
index af79845f6e57ba0c8db64ea4cffe9b0ecd32666c..1197d029ac5a330fa797bc7246c7a359fe3ab99c 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@ import sys
 import os
 import shutil
 
-DISTNAME = 'sklearn'
+DISTNAME = 'scikit-learn'
 DESCRIPTION = 'A set of python modules for machine learning and data mining'
 LONG_DESCRIPTION = open('README.rst').read()
 MAINTAINER = 'Fabian Pedregosa'