diff --git a/doc/conf.py b/doc/conf.py index 72ebc1be58a9d49a56cc22b17e757356f665787e..79a5de7745e26a10ec081075c2c29cb4f8aab902 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -22,7 +22,7 @@ sys.path.insert(0, os.path.abspath('sphinxext')) # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'numpydoc', 'sphinx.ext.pngmath'] +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'numpydoc', 'sphinx.ext.pngmath'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -37,17 +37,17 @@ source_suffix = '.rst' master_doc = 'index' # General information about the project. -project = u'scikit-learn' -copyright = u'2010, Scikit-Learn Developers' +project = u'scikits.learn' +copyright = u'2010, scikits.learn developers' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '0.1' +version = '0.2-beta' # The full version, including alpha/beta/rc tags. -release = '0.1' +release = '0.2-beta' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -192,3 +192,4 @@ latex_documents = [ # If false, no module index is generated. #latex_use_modindex = True + diff --git a/doc/contribute.rst b/doc/contribute.rst index 71e4d185f46b2531e3315ce655ce370e815a558d..0d36485369ddef885626e9c00ef50c0a380e4541 100644 --- a/doc/contribute.rst +++ b/doc/contribute.rst @@ -19,8 +19,7 @@ you just made some modifications that you'd like to share with the world. The way to proceed is the following: 1. Create a patch file. The command:: - - svn diff > patch.diff + svn diff > patch.diff will create a file "patch.diff" with the changes you made with the code base. diff --git a/doc/modules/em.rst b/doc/modules/em.rst index db9a4658d7210d6b002ba7afaeda11de17b37a40..586651abe23217257c6be826a78b2fa657b8f9c3 100644 --- a/doc/modules/em.rst +++ b/doc/modules/em.rst @@ -56,7 +56,7 @@ plot it. The following example show how to create a 2 dimension Gaussian Model with 3 components, sample it and plot its confidence ellipsoids with matplotlib: -.. literalinclude:: ../../../scikits/learn/em/examples/basic_example1.py +.. literalinclude:: ../../scikits/learn/em/examples/basic_example1.py which plots this figure: @@ -84,7 +84,7 @@ iterations of EM; once the EM has finished the computation, the GM instance of GMM contains the computed parameters. -.. literalinclude:: ../../../scikits/learn/em/examples/basic_example2.py +.. literalinclude:: ../../scikits/learn/em/examples/basic_example2.py GMM class do all the hard work for learning: it can compute the sufficient @@ -111,7 +111,7 @@ clusters, and prints which number of clusters is the most likely from the BIC: -.. literalinclude:: ../../../scikits/learn/em/examples/basic_example3.py +.. literalinclude:: ../../scikits/learn/em/examples/basic_example3.py which plots this figure: diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst index 8b47f61b18c37ce556482ea4c98bdb98994bdb70..73253c1ed5140bbb603e47cac1a443361dbbc563 100644 --- a/doc/modules/feature_selection.rst +++ b/doc/modules/feature_selection.rst @@ -13,7 +13,7 @@ univariate test statistic. Although it can seen as a preprocessing step to an estimator, `scikit.learn` exposes an object to wrap as existing estimator with feature selection and expose a new estimator: -.. autofunction:: scikits.learn.feature_select.univ_selection.UnivSelection +.. autofunction:: scikits.learn.feature_selection.univ_selection.UnivSelection @@ -28,30 +28,30 @@ Feature scoring functions For classification ....................... -.. autofunction:: scikits.learn.feature_select.univ_selection.f_classif +.. autofunction:: scikits.learn.feature_selection.univ_selection.f_classif For regression ................. -.. autofunction:: scikits.learn.feature_select.univ_selection.f_regression +.. autofunction:: scikits.learn.feature_selection.univ_selection.f_regression Feature selection functions ---------------------------- -.. autofunction:: scikits.learn.feature_select.univ_selection.select_k_best +.. autofunction:: scikits.learn.feature_selection.univ_selection.select_k_best -.. autofunction:: scikits.learn.feature_select.univ_selection.select_percentile +.. autofunction:: scikits.learn.feature_selection.univ_selection.select_percentile -.. autofunction:: scikits.learn.feature_select.univ_selection.select_fpr +.. autofunction:: scikits.learn.feature_selection.univ_selection.select_fpr -.. autofunction:: scikits.learn.feature_select.univ_selection.select_fdr +.. autofunction:: scikits.learn.feature_selection.univ_selection.select_fdr -.. autofunction:: scikits.learn.feature_select.univ_selection.select_fwe +.. autofunction:: scikits.learn.feature_selection.univ_selection.select_fwe Examples ---------- -.. literalinclude:: ../../examples/feature_select.py +.. literalinclude:: ../../examples/feature_selection.py diff --git a/doc/modules/glm.rst b/doc/modules/glm.rst index bb22508b0993665d35ed2fb2508ab60fb737dc3d..0945fa5a8e087fb25b9b5b4e72e04f6bf2b371f1 100644 --- a/doc/modules/glm.rst +++ b/doc/modules/glm.rst @@ -14,9 +14,9 @@ Parameter W is estimated by least squares. .. what happens if there are duplicate rows ? -Linear regression is done via instances of :class:`LinearRegression`. +Linear regression is done via instances of: -.. autoclass:: scikits.learn.glm.LinearRegression +.. autoclass:: scikits.learn.glm.regression.LinearRegression :members: >>> from scikits.learn import glm diff --git a/doc/modules/manifold.rst b/doc/modules/manifold.rst deleted file mode 100644 index e93b9fc0a2dcf032c361f0e1b2a6a5140fdd80ee..0000000000000000000000000000000000000000 --- a/doc/modules/manifold.rst +++ /dev/null @@ -1,45 +0,0 @@ -================= -Manifold Learning -================= - - -.. automodule:: scikits.learn.manifold - -Compression -=========== - -.. automodule:: scikits.learn.manifold.compression - :members: - -barycenters ------------ - -.. automodule:: scikits.learn.manifold.compression.barycenters - :members: - - -k-Nearest Neighbor -================== - -the k-nearest neighbors algorithm (k-NN) is a method for classifying -objects based on closest training examples in the feature space. k-NN -is a type of instance-based learning, or lazy learning where the -function is only approximated locally and all computation is deferred -until classification. The k-nearest neighbor algorithm is amongst the -simplest of all machine learning algorithms: an object is classified -by a majority vote of its neighbors, with the object being assigned to -the class most common amongst its k nearest neighbors (k is a positive -integer, typically small). If k = 1, then the object is simply -assigned to the class of its nearest neighbor. - - - -.. autoclass:: scikits.learn.manifold.regression.neighbors.Neighbors - :members: - - -Tools -===== - -.. automodule:: scikits.learn.manifold.compression.tools - :members: diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst index ad5c316f02dfe85f18388e922a221e21e86e52a1..e15c6146bda4655c7ef72bec1117269e5b2e614c 100644 --- a/doc/modules/svm.rst +++ b/doc/modules/svm.rst @@ -2,81 +2,75 @@ Support Vector Machines ======================= -Support vector machines (SVMs) are a set of supervised learning +**Support vector machines (SVMs)** are a set of supervised learning methods used for classification and regression. In simple words, given a set of training examples, witheach sample marked as belonging to one of the multiple categories, an SVM training algorithm builds a model that predicts whether a new example falls into one category or the other. +More formally, a support vector machine constructs a hyperplane or set +of hyperplanes in a high or infinite dimensional space, which can be +used for classification, regression or other tasks. Intuitively, a +good separation is achieved by the hyperplane that has the largest +distance to the nearest training datapoints of any class (so-called +functional margin), since in general the larger the margin the lower +the generalization error of the classifier. +SVMs belong to a family of generalized linear classifiers. They can +also be considered a special case of Tikhonov regularization. A +special property is that they simultaneously minimize the empirical +classification error and maximize the geometric margin; hence they are +also known as maximum margin classifiers. + +A comparison of the SVM to other classifiers has been made by Meyer, +Leisch and Hornik.[1] Classification ============== - -Classification is implemented in class SVC. There are two variants of the algorithm, C-SVC and Nu-SVC. - +In the case of support vector machines, a data point is viewed as a +p-dimensional vector (a list of p numbers), and we want to know +whether we can separate such points with a (p-1)-dimensional +hyperplane. There are many hyperplanes that might classify the +data. One reasonable choice as the best hyperplane is the one that +represents the largest separation, or margin, between the two +classes. So we choose the hyperplane so that the distance from it to +the nearest data point on each side is maximized. If such a hyperplane +exists, it is known as the maximum-margin hyperplane and the linear +classifier it defines is known as a maximum margin classifier. + +Classification of a dataset is implemented in class SVC. .. autoclass:: scikits.learn.svm.SVC :members: +This class implements two classification algorithms using Support +Vector Machine, C-SVC and Nu-SVC. These can be selected using keyword +impl to the constructor. The following is a brief description of these +two algorithms (you don't have to understand it to use the +classifier). For a detailed info, please consult the references. -C-support vector classification (C-SVC) ---------------------------------------- -Given training vectors :math:`x_i \in \mathbb{R}^n , i=1, ..., l` in two classes, and a vector :math:`y \in \mathbb{R}^l` such that :math:`y_i \in {1, -1}`, C-SVC solves the following primal problem: - -.. math:: \min_{w, b, \xi} {1 \over 2} w^T w + C \sum_{i=1}^l \xi_i - - \textrm{subject to}\ y_i (w^T \phi(x_i) + b) \geq 1 - \xi_i - - \xi_i >= 0, i=1, .., l - -Here training vectors :math:`x_i` are mapped into a higher (maybe infinite) dimensional space by the function :math:`phi`. The decision function is - -.. math:: sgn(\sum_{i=0}^l y_i \alpha_i K(x_i, x) + b) - - -Nu-Support Vector Classification --------------------------------- -The nu-Support Vector Classification uses a new parameter :math:`\nu` -which controls the number of support vectors and trainign errors. The -parameter :math:`nu \in (0, 1]` is an upper bound on the fraction of -training errors and a lower bound of the fraction of support vectors. - -Given training vectors :math:`x_i \in \mathbb{R}^n , i=1, ..., l` in two classes, and a vector :math:`y \in \mathbb{R}^l` such that :math:`y_i \in {1, -1}`, C-SVC solves the following primal problem: -.. math:: \min_{w, b, \xi} {1 \over 2} w^T w - \nu \rho + {1 \over 2} \sum_{i=1}^l \xi_i - - \textrm{subject to}\ y_i (w^T \phi(x_i) + b) \geq \rho - \xi_i - - \xi_i \geq 0, i=1, .., l, \rho \geq 0 - -The decision function is: - -.. math:: sgn(\sum_{i=1}^l y_i \alpha_i K(x_i, x) + b - -Implementation --------------- - -Both problems are implemented in class scikits.learn.svm.SVC . This class follows the pattern of an estimator. See section Parameters for more details about available parameters. - -Examples --------- -.. literalinclude:: ../../examples/plot_svm.py +Regression +========== +Given a set of data points, :math:`{(x_1, z_1), ..., (x_l, z_l)}`, such that :math:`x_i \in \mathbb{R}^n` is an input and :math:`z_i \in \mathbb{R}` is a target output, the standard form of support vector regression: +.. autoclass:: scikits.learn.svm.SVR + :members: Distribution estimation ======================= -One-class +One-class SVM was proposed by Scholkopf et al. (2001) for estimating +the support of a high-dimensional distribution. Given training vectors +:math:`x_i \in \mathbb{R}^n, i=1, .., l` without any class +information, the primal form is: -Regression -========== +.. math:: \min_{w, b, \xi} {1 \over 2} w^T w - \rho + {1 \over \nu l} \sum_{i=1}^l \xi_i + \textrm{subject to} w^T \phi(x_i) \geq \rho - \xi_i - -epsilon-support vector regression (epsilon-SVR), and ν-support vector regression -(ν-SVR) + \xi_i \geq 0, i=1,...,l Parameters @@ -118,6 +112,60 @@ Coefficient for support vectors TODO: include image + + +Examples +-------- +.. literalinclude:: ../../examples/plot_svm.py + +This creates the plot: + +.. image:: svm_data/example_plot.png + + +Mathematical formulation +======================== + + +C-support vector classification (C-SVC) +--------------------------------------- +Given training vectors :math:`x_i \in \mathbb{R}^n , i=1, ..., l` in +two classes, and a vector :math:`y \in \mathbb{R}^l` such that +:math:`y_i \in {1, -1}`, C-SVC solves the following primal problem: + +.. math:: \min_{w, b, \xi} {1 \over 2} w^T w + C \sum_{i=1}^l \xi_i +.. math:: \textrm{subject to}\ y_i (w^T \phi(x_i) + b) \geq 1 - \xi_i +.. math:: \xi_i >= 0, i=1, .., l + +Here training vectors :math:`x_i` are mapped into a higher (maybe +infinite) dimensional space by the function :math:`phi`. The decision +function is + +.. math:: sgn(\sum_{i=0}^l y_i \alpha_i K(x_i, x) + b) + + +Nu-Support Vector Classification +-------------------------------- +The nu-Support Vector Classification uses a new parameter :math:`\nu` +which controls the number of support vectors and trainign errors. The +parameter :math:`nu \in (0, 1]` is an upper bound on the fraction of +training errors and a lower bound of the fraction of support vectors. + +Given training vectors :math:`x_i \in \mathbb{R}^n , i=1, ..., l` in +two classes, and a vector :math:`y \in \mathbb{R}^l` such that +:math:`y_i \in {1, -1}`, C-SVC solves the following primal problem: + +.. math:: \min_{w, b, \xi} {1 \over 2} w^T w - \nu \rho + {1 \over 2} \sum_{i=1}^l \xi_i + + \textrm{subject to}\ y_i (w^T \phi(x_i) + b) \geq \rho - \xi_i + + \xi_i \geq 0, i=1, .., l, \rho \geq 0 + +The decision function is: + +.. math:: sgn(\sum_{i=1}^l y_i \alpha_i K(x_i, x) + b + + Low-level implementation ======================== @@ -125,3 +173,9 @@ Internally, we use libsvm[1] to handle all computations. Libsvm is binded through some wrappers written in C and Cython. .. [1] http://www.csie.ntu.edu.tw/~cjlin/libsvm/ + + +References +========== + +http://en.wikipedia.org/wiki/Support_vector_machine diff --git a/doc/modules/svm_data/example_plot.png b/doc/modules/svm_data/example_plot.png new file mode 100644 index 0000000000000000000000000000000000000000..99a87d6a83b284dc87e325cc82349c1c05b206f4 Binary files /dev/null and b/doc/modules/svm_data/example_plot.png differ diff --git a/examples/feature_select.py b/examples/feature_selection.py similarity index 100% rename from examples/feature_select.py rename to examples/feature_selection.py diff --git a/examples/plot_svm.py b/examples/plot_svm.py index c0c8c5861966ea4b895272e1940dcfec2b6fbb9b..f4f2decd8b4e6057995c8b2563ed27ac8a89afb3 100644 --- a/examples/plot_svm.py +++ b/examples/plot_svm.py @@ -16,7 +16,7 @@ h=.05 # step size in the mesh # we create an instance of SVM and fit out data. We do not scale our # data since we want to plot the support vectors -clf = svm.SVC(kernel='linear', scale=False) +clf = svm.SVC(kernel='linear') clf.fit(X, Y) # Plot the decision boundary. For that, we will asign a color to each diff --git a/scikits/learn/glm/regression.py b/scikits/learn/glm/regression.py index 35b30a909c6c82fbb2e524d2aca2adcec1cedc8d..7412cf2cc46bb04df785b408509c07a1579ca64d 100644 --- a/scikits/learn/glm/regression.py +++ b/scikits/learn/glm/regression.py @@ -19,8 +19,8 @@ class LinearRegression(object): ---------- This class takes no parameters - Members - ------- + Attributes + ---------- coef_ : array Estimated coefficients for the linear regression problem. diff --git a/scikits/learn/svm.py b/scikits/learn/svm.py index c3b19b12f113aad2e0c0ccb22deb890e314f69c6..7df4143c8e38c1a24d1978f4eaa065913aaa637f 100644 --- a/scikits/learn/svm.py +++ b/scikits/learn/svm.py @@ -38,7 +38,8 @@ class BaseSVM(object): def fit(self, X, y): """ - should empty arrays created be order='C' ? + Fit the model with vectors X, Y. + """ X = np.asanyarray(X, dtype=np.float, order='C') y = np.asanyarray(y, dtype=np.float, order='C') @@ -111,7 +112,7 @@ class SVC(BaseSVM): """ Support Vector Classification - Implementats C-SVC, nu-SVC + Implements C-SVC, nu-SVC Parameters ---------- @@ -126,7 +127,7 @@ class SVC(BaseSVM): formulations of the SVM optimization problem. Can be one of 'c_svc', 'nu_svc'. By default 'c_svc' will be chosen. - nu: float, optional + nu : float, optional An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Should be in the interval (0, 1]. @@ -142,17 +143,16 @@ class SVC(BaseSVM): degree of kernel function is significant only in POLY, RBF, SIGMOID - Members - ------- - support_ : array-like, shape = [nSV, D] - estimated support vectors. - where nSV is the number of support vectors, D is the dimension - of the underlying space. - coef_ : array + Attributes + ---------- + support : array-like, shape = [nSV, nfeatures] + support vectors + + coef : array coefficient of the support vector in the decission function. - rho_ : array + rho : array constants in decision function @@ -168,15 +168,17 @@ class SVC(BaseSVM): See also -------- - http://scikit-learn.sourceforge.net/doc/modules/svm.html + SVR - http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf + References + ---------- + - http://scikit-learn.sourceforge.net/doc/modules/svm.html + - http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf """ def __init__(self, impl='c_svc', kernel='rbf', degree=3, gamma=0.0, coef0=0.0, cache_size=100.0, eps=1e-3, C=1.0, nr_weight=0, nu=0.5, p=0.1, shrinking=1, probability=0): - BaseSVM.__init__(self, impl, kernel, degree, gamma, coef0, cache_size, eps, C, nr_weight, nu, p, shrinking, probability) @@ -192,7 +194,11 @@ class SVR(BaseSVM): Training vector Y : array, shape = [N] Target vector relative to X - + + + See also + -------- + SVC """ def __init__(self, svm='epsilon_svr', kernel='rbf', degree=3, gamma=0.0, coef0=0.0, cache_size=100.0, eps=1e-3,