diff --git a/doc/conf.py b/doc/conf.py
index 72ebc1be58a9d49a56cc22b17e757356f665787e..79a5de7745e26a10ec081075c2c29cb4f8aab902 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -22,7 +22,7 @@ sys.path.insert(0, os.path.abspath('sphinxext'))
 
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'numpydoc', 'sphinx.ext.pngmath']
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'numpydoc', 'sphinx.ext.pngmath']
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -37,17 +37,17 @@ source_suffix = '.rst'
 master_doc = 'index'
 
 # General information about the project.
-project = u'scikit-learn'
-copyright = u'2010, Scikit-Learn Developers'
+project = u'scikits.learn'
+copyright = u'2010, scikits.learn developers'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
-version = '0.1'
+version = '0.2-beta'
 # The full version, including alpha/beta/rc tags.
-release = '0.1'
+release = '0.2-beta'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -192,3 +192,4 @@ latex_documents = [
 
 # If false, no module index is generated.
 #latex_use_modindex = True
+
diff --git a/doc/contribute.rst b/doc/contribute.rst
index 71e4d185f46b2531e3315ce655ce370e815a558d..0d36485369ddef885626e9c00ef50c0a380e4541 100644
--- a/doc/contribute.rst
+++ b/doc/contribute.rst
@@ -19,8 +19,7 @@ you just made some modifications that you'd like to share with the
 world. The way to proceed is the following:
 
 1. Create a patch file. The command::
-
-  svn diff > patch.diff
+    svn diff > patch.diff
 
 will create a file "patch.diff" with the changes you made with
 the code base. 
diff --git a/doc/modules/em.rst b/doc/modules/em.rst
index db9a4658d7210d6b002ba7afaeda11de17b37a40..586651abe23217257c6be826a78b2fa657b8f9c3 100644
--- a/doc/modules/em.rst
+++ b/doc/modules/em.rst
@@ -56,7 +56,7 @@ plot it. The following example show how to create a 2 dimension Gaussian Model
 with 3 components, sample it and plot its confidence ellipsoids with
 matplotlib:
 
-.. literalinclude::  ../../../scikits/learn/em/examples/basic_example1.py
+.. literalinclude::  ../../scikits/learn/em/examples/basic_example1.py
 
 
 which plots this figure:
@@ -84,7 +84,7 @@ iterations of EM; once the EM has finished the computation, the GM instance of
 GMM contains the computed parameters.
 
 
-.. literalinclude::  ../../../scikits/learn/em/examples/basic_example2.py
+.. literalinclude::  ../../scikits/learn/em/examples/basic_example2.py
 
 
 GMM class do all the hard work for learning: it can compute the sufficient
@@ -111,7 +111,7 @@ clusters, and prints which number of clusters is the most likely from the BIC:
 
 
 
-.. literalinclude::  ../../../scikits/learn/em/examples/basic_example3.py
+.. literalinclude::  ../../scikits/learn/em/examples/basic_example3.py
 
 
 which plots this figure:
diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst
index 8b47f61b18c37ce556482ea4c98bdb98994bdb70..73253c1ed5140bbb603e47cac1a443361dbbc563 100644
--- a/doc/modules/feature_selection.rst
+++ b/doc/modules/feature_selection.rst
@@ -13,7 +13,7 @@ univariate test statistic. Although it can seen as a preprocessing step
 to an estimator, `scikit.learn` exposes an object to wrap as existing
 estimator with feature selection and expose a new estimator:
 
-.. autofunction:: scikits.learn.feature_select.univ_selection.UnivSelection
+.. autofunction:: scikits.learn.feature_selection.univ_selection.UnivSelection
 
 
 
@@ -28,30 +28,30 @@ Feature scoring functions
 For classification
 .......................
 
-.. autofunction:: scikits.learn.feature_select.univ_selection.f_classif
+.. autofunction:: scikits.learn.feature_selection.univ_selection.f_classif
 
 For regression
 .................
 
-.. autofunction:: scikits.learn.feature_select.univ_selection.f_regression
+.. autofunction:: scikits.learn.feature_selection.univ_selection.f_regression
 
 Feature selection functions
 ----------------------------
 
-.. autofunction:: scikits.learn.feature_select.univ_selection.select_k_best
+.. autofunction:: scikits.learn.feature_selection.univ_selection.select_k_best
 
-.. autofunction:: scikits.learn.feature_select.univ_selection.select_percentile
+.. autofunction:: scikits.learn.feature_selection.univ_selection.select_percentile
 
-.. autofunction:: scikits.learn.feature_select.univ_selection.select_fpr
+.. autofunction:: scikits.learn.feature_selection.univ_selection.select_fpr
 
-.. autofunction:: scikits.learn.feature_select.univ_selection.select_fdr
+.. autofunction:: scikits.learn.feature_selection.univ_selection.select_fdr
 
-.. autofunction:: scikits.learn.feature_select.univ_selection.select_fwe
+.. autofunction:: scikits.learn.feature_selection.univ_selection.select_fwe
 
 
 Examples
 ----------
 
-.. literalinclude:: ../../examples/feature_select.py
+.. literalinclude:: ../../examples/feature_selection.py
 
 
diff --git a/doc/modules/glm.rst b/doc/modules/glm.rst
index bb22508b0993665d35ed2fb2508ab60fb737dc3d..0945fa5a8e087fb25b9b5b4e72e04f6bf2b371f1 100644
--- a/doc/modules/glm.rst
+++ b/doc/modules/glm.rst
@@ -14,9 +14,9 @@ Parameter W is estimated by least squares.
 
 .. what happens if there are duplicate rows ?
 
-Linear regression is done via instances of :class:`LinearRegression`.
+Linear regression is done via instances of:
 
-.. autoclass:: scikits.learn.glm.LinearRegression
+.. autoclass:: scikits.learn.glm.regression.LinearRegression
     :members:
 
 >>> from scikits.learn import glm
diff --git a/doc/modules/manifold.rst b/doc/modules/manifold.rst
deleted file mode 100644
index e93b9fc0a2dcf032c361f0e1b2a6a5140fdd80ee..0000000000000000000000000000000000000000
--- a/doc/modules/manifold.rst
+++ /dev/null
@@ -1,45 +0,0 @@
-=================
-Manifold Learning
-=================
-
-
-.. automodule:: scikits.learn.manifold
-
-Compression
-===========
-
-.. automodule:: scikits.learn.manifold.compression
-   :members:
-
-barycenters
------------
-
-.. automodule:: scikits.learn.manifold.compression.barycenters
-   :members:
-
-
-k-Nearest Neighbor
-==================
-
-the k-nearest neighbors algorithm (k-NN) is a method for classifying
-objects based on closest training examples in the feature space. k-NN
-is a type of instance-based learning, or lazy learning where the
-function is only approximated locally and all computation is deferred
-until classification. The k-nearest neighbor algorithm is amongst the
-simplest of all machine learning algorithms: an object is classified
-by a majority vote of its neighbors, with the object being assigned to
-the class most common amongst its k nearest neighbors (k is a positive
-integer, typically small). If k = 1, then the object is simply
-assigned to the class of its nearest neighbor.
-
-
-
-.. autoclass:: scikits.learn.manifold.regression.neighbors.Neighbors
-   :members:
-
-
-Tools
-=====
-
-.. automodule:: scikits.learn.manifold.compression.tools
-   :members:
diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index ad5c316f02dfe85f18388e922a221e21e86e52a1..e15c6146bda4655c7ef72bec1117269e5b2e614c 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -2,81 +2,75 @@
 Support Vector Machines
 =======================
 
-Support vector machines (SVMs) are a set of supervised learning
+**Support vector machines (SVMs)** are a set of supervised learning
 methods used for classification and regression. In simple words, given
 a set of training examples, witheach sample marked as belonging to one
 of the multiple categories, an SVM training algorithm builds a model
 that predicts whether a new example falls into one category or the
 other.
 
+More formally, a support vector machine constructs a hyperplane or set
+of hyperplanes in a high or infinite dimensional space, which can be
+used for classification, regression or other tasks. Intuitively, a
+good separation is achieved by the hyperplane that has the largest
+distance to the nearest training datapoints of any class (so-called
+functional margin), since in general the larger the margin the lower
+the generalization error of the classifier.
 
+SVMs belong to a family of generalized linear classifiers. They can
+also be considered a special case of Tikhonov regularization. A
+special property is that they simultaneously minimize the empirical
+classification error and maximize the geometric margin; hence they are
+also known as maximum margin classifiers.
+
+A comparison of the SVM to other classifiers has been made by Meyer,
+Leisch and Hornik.[1]
 
 Classification
 ==============
-
-Classification is implemented in class SVC. There are two variants of the algorithm, C-SVC and Nu-SVC.
-
+In the case of support vector machines, a data point is viewed as a
+p-dimensional vector (a list of p numbers), and we want to know
+whether we can separate such points with a (p-1)-dimensional
+hyperplane. There are many hyperplanes that might classify the
+data. One reasonable choice as the best hyperplane is the one that
+represents the largest separation, or margin, between the two
+classes. So we choose the hyperplane so that the distance from it to
+the nearest data point on each side is maximized. If such a hyperplane
+exists, it is known as the maximum-margin hyperplane and the linear
+classifier it defines is known as a maximum margin classifier.
+
+Classification of a dataset is implemented in class SVC.
 
 .. autoclass:: scikits.learn.svm.SVC
    :members:
 
+This class implements two classification algorithms using Support
+Vector Machine, C-SVC and Nu-SVC. These can be selected using keyword
+impl to the constructor. The following is a brief description of these
+two algorithms (you don't have to understand it to use the
+classifier). For a detailed info, please consult the references.
 
-C-support vector classification (C-SVC)
----------------------------------------
-Given training vectors :math:`x_i \in \mathbb{R}^n , i=1, ..., l` in two classes, and a vector :math:`y \in \mathbb{R}^l` such that :math:`y_i \in {1, -1}`, C-SVC solves the following primal problem:
-
-.. math::    \min_{w, b, \xi} {1 \over 2} w^T w + C \sum_{i=1}^l \xi_i
-
-              \textrm{subject to}\ y_i (w^T \phi(x_i) + b) \geq 1 - \xi_i
-
-              \xi_i >= 0, i=1, .., l
-
-Here training vectors :math:`x_i` are mapped into a higher (maybe infinite) dimensional space by the function :math:`phi`. The decision function is
-
-.. math::    sgn(\sum_{i=0}^l y_i \alpha_i K(x_i, x) + b)
-
-
-Nu-Support Vector Classification
---------------------------------
-The nu-Support Vector Classification uses a new parameter :math:`\nu`
-which controls the number of support vectors and trainign errors. The
-parameter :math:`nu \in (0, 1]` is an upper bound on the fraction of
-training errors and a lower bound of the fraction of support vectors.
-
-Given training vectors :math:`x_i \in \mathbb{R}^n , i=1, ..., l` in two classes, and a vector :math:`y \in \mathbb{R}^l` such that :math:`y_i \in {1, -1}`, C-SVC solves the following primal problem:
 
-.. math::    \min_{w, b, \xi} {1 \over 2} w^T w - \nu \rho + {1 \over 2} \sum_{i=1}^l \xi_i
-
-              \textrm{subject to}\ y_i (w^T \phi(x_i) + b) \geq \rho - \xi_i
-
-              \xi_i \geq 0, i=1, .., l, \rho \geq 0
-
-The decision function is:
-
-.. math::    sgn(\sum_{i=1}^l y_i \alpha_i K(x_i, x) + b
-
-Implementation
---------------
-
-Both problems are implemented in class scikits.learn.svm.SVC . This class follows the pattern of an estimator. See section Parameters for more details about available parameters.
-
-Examples
---------
-.. literalinclude:: ../../examples/plot_svm.py
 
+Regression
+==========
+Given a set of data points, :math:`{(x_1, z_1), ..., (x_l, z_l)}`, such that :math:`x_i \in \mathbb{R}^n` is an input and :math:`z_i \in \mathbb{R}` is a target output, the standard form of support vector regression:
 
+.. autoclass:: scikits.learn.svm.SVR
+   :members:
 
 Distribution estimation
 =======================
-One-class
+One-class SVM was proposed by Scholkopf et al. (2001) for estimating
+the support of a high-dimensional distribution. Given training vectors
+:math:`x_i \in \mathbb{R}^n, i=1, .., l` without any class
+information, the primal form is:
 
-Regression
-==========
+.. math::    \min_{w, b, \xi} {1 \over 2} w^T w - \rho + {1 \over \nu l} \sum_{i=1}^l \xi_i
 
+             \textrm{subject to} w^T \phi(x_i) \geq \rho - \xi_i
 
-
-epsilon-support vector regression (epsilon-SVR), and ν-support vector regression
-(ν-SVR)
+             \xi_i \geq 0, i=1,...,l
 
 
 Parameters
@@ -118,6 +112,60 @@ Coefficient for support vectors
 
 TODO: include image
 
+
+
+Examples
+--------
+.. literalinclude:: ../../examples/plot_svm.py
+
+This creates the plot:
+
+.. image:: svm_data/example_plot.png
+
+
+Mathematical formulation
+========================
+
+
+C-support vector classification (C-SVC)
+---------------------------------------
+Given training vectors :math:`x_i \in \mathbb{R}^n , i=1, ..., l` in
+two classes, and a vector :math:`y \in \mathbb{R}^l` such that
+:math:`y_i \in {1, -1}`, C-SVC solves the following primal problem:
+
+.. math:: \min_{w, b, \xi} {1 \over 2} w^T w + C \sum_{i=1}^l \xi_i
+.. math:: \textrm{subject to}\ y_i (w^T \phi(x_i) + b) \geq 1 - \xi_i
+.. math:: \xi_i >= 0, i=1, .., l
+
+Here training vectors :math:`x_i` are mapped into a higher (maybe
+infinite) dimensional space by the function :math:`phi`. The decision
+function is
+
+.. math::    sgn(\sum_{i=0}^l y_i \alpha_i K(x_i, x) + b)
+
+
+Nu-Support Vector Classification
+--------------------------------
+The nu-Support Vector Classification uses a new parameter :math:`\nu`
+which controls the number of support vectors and trainign errors. The
+parameter :math:`nu \in (0, 1]` is an upper bound on the fraction of
+training errors and a lower bound of the fraction of support vectors.
+
+Given training vectors :math:`x_i \in \mathbb{R}^n , i=1, ..., l` in
+two classes, and a vector :math:`y \in \mathbb{R}^l` such that
+:math:`y_i \in {1, -1}`, C-SVC solves the following primal problem:
+
+.. math:: \min_{w, b, \xi} {1 \over 2} w^T w - \nu \rho + {1 \over 2} \sum_{i=1}^l \xi_i
+
+          \textrm{subject to}\ y_i (w^T \phi(x_i) + b) \geq \rho - \xi_i
+
+          \xi_i \geq 0, i=1, .., l, \rho \geq 0
+
+The decision function is:
+
+.. math::    sgn(\sum_{i=1}^l y_i \alpha_i K(x_i, x) + b
+
+
 Low-level implementation
 ========================
 
@@ -125,3 +173,9 @@ Internally, we use libsvm[1] to handle all computations. Libsvm is binded
 through some wrappers written in C and Cython.
 
 .. [1] http://www.csie.ntu.edu.tw/~cjlin/libsvm/
+
+
+References
+==========
+
+http://en.wikipedia.org/wiki/Support_vector_machine
diff --git a/doc/modules/svm_data/example_plot.png b/doc/modules/svm_data/example_plot.png
new file mode 100644
index 0000000000000000000000000000000000000000..99a87d6a83b284dc87e325cc82349c1c05b206f4
Binary files /dev/null and b/doc/modules/svm_data/example_plot.png differ
diff --git a/examples/feature_select.py b/examples/feature_selection.py
similarity index 100%
rename from examples/feature_select.py
rename to examples/feature_selection.py
diff --git a/examples/plot_svm.py b/examples/plot_svm.py
index c0c8c5861966ea4b895272e1940dcfec2b6fbb9b..f4f2decd8b4e6057995c8b2563ed27ac8a89afb3 100644
--- a/examples/plot_svm.py
+++ b/examples/plot_svm.py
@@ -16,7 +16,7 @@ h=.05 # step size in the mesh
 
 # we create an instance of SVM and fit out data. We do not scale our
 # data since we want to plot the support vectors
-clf = svm.SVC(kernel='linear', scale=False)
+clf = svm.SVC(kernel='linear')
 clf.fit(X, Y)
 
 # Plot the decision boundary. For that, we will asign a color to each
diff --git a/scikits/learn/glm/regression.py b/scikits/learn/glm/regression.py
index 35b30a909c6c82fbb2e524d2aca2adcec1cedc8d..7412cf2cc46bb04df785b408509c07a1579ca64d 100644
--- a/scikits/learn/glm/regression.py
+++ b/scikits/learn/glm/regression.py
@@ -19,8 +19,8 @@ class LinearRegression(object):
     ----------
     This class takes no parameters
 
-    Members
-    -------
+    Attributes
+    ----------
     coef_ : array
         Estimated coefficients for the linear regression problem.
 
diff --git a/scikits/learn/svm.py b/scikits/learn/svm.py
index c3b19b12f113aad2e0c0ccb22deb890e314f69c6..7df4143c8e38c1a24d1978f4eaa065913aaa637f 100644
--- a/scikits/learn/svm.py
+++ b/scikits/learn/svm.py
@@ -38,7 +38,8 @@ class BaseSVM(object):
 
     def fit(self, X, y):
         """
-        should empty arrays created be order='C' ?
+        Fit the model with vectors X, Y.
+
         """
         X = np.asanyarray(X, dtype=np.float, order='C')
         y = np.asanyarray(y, dtype=np.float, order='C')
@@ -111,7 +112,7 @@ class SVC(BaseSVM):
     """
     Support Vector Classification
 
-    Implementats C-SVC, nu-SVC
+    Implements C-SVC, nu-SVC
 
     Parameters
     ----------
@@ -126,7 +127,7 @@ class SVC(BaseSVM):
         formulations of the SVM optimization problem.
         Can be one of 'c_svc', 'nu_svc'. By default 'c_svc' will be chosen.
 
-    nu: float, optional
+    nu : float, optional
         An upper bound on the fraction of training errors and a lower
         bound of the fraction of support vectors. Should be in the
         interval (0, 1].
@@ -142,17 +143,16 @@ class SVC(BaseSVM):
         degree of kernel function
         is significant only in POLY, RBF, SIGMOID
 
-    Members
-    -------
-    support_ : array-like, shape = [nSV, D]
-        estimated support vectors.
-        where nSV is the number of support vectors, D is the dimension
-        of the underlying space.
 
-    coef_ : array
+    Attributes
+    ----------
+    support : array-like, shape = [nSV, nfeatures]
+        support vectors
+
+    coef : array
         coefficient of the support vector in the decission function.
 
-    rho_ : array
+    rho : array
         constants in decision function
 
 
@@ -168,15 +168,17 @@ class SVC(BaseSVM):
 
     See also
     --------
-    http://scikit-learn.sourceforge.net/doc/modules/svm.html
+    SVR
 
-    http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf
+    References
+    ----------
+    - http://scikit-learn.sourceforge.net/doc/modules/svm.html
+    - http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf
     """
     def __init__(self, impl='c_svc', kernel='rbf', degree=3,
                  gamma=0.0, coef0=0.0, cache_size=100.0, eps=1e-3,
                  C=1.0, nr_weight=0, nu=0.5, p=0.1, shrinking=1,
                  probability=0):
-
         BaseSVM.__init__(self, impl, kernel, degree, gamma, coef0,
                          cache_size, eps, C, nr_weight, nu, p,
                          shrinking, probability)    
@@ -192,7 +194,11 @@ class SVR(BaseSVM):
         Training vector
     Y : array, shape = [N]
         Target vector relative to X
-    
+
+
+    See also
+    --------
+    SVC
     """
     def __init__(self, svm='epsilon_svr', kernel='rbf', degree=3,
                  gamma=0.0, coef0=0.0, cache_size=100.0, eps=1e-3,