diff --git a/scikits/learn/linear_model/sparse/logistic.py b/scikits/learn/linear_model/sparse/logistic.py index dbccc24ad1ecdbe3c7bfcb7872c5d6baede4e25f..73e76c76c3aabaa7be853cd8e661d3f3ee1a825d 100644 --- a/scikits/learn/linear_model/sparse/logistic.py +++ b/scikits/learn/linear_model/sparse/logistic.py @@ -6,7 +6,6 @@ designed to handle efficiently data in sparse matrix format. """ import numpy as np -from scipy import sparse from ...base import ClassifierMixin from ...svm.sparse.base import SparseBaseLibLinear @@ -71,17 +70,18 @@ class LogisticRegression(SparseBaseLibLinear, ClassifierMixin): dual=dual, loss='lr', eps=eps, C=C, fit_intercept=fit_intercept) - def predict_proba(self, T): + def predict_proba(self, X): """ Probability estimates. The returned estimates for all classes are ordered by the label of classes. """ - T = sparse.csr_matrix(T) - T.data = np.asanyarray(T.data, dtype=np.float64, order='C') - probas = csr_predict_prob(T.shape[1], T.data, T.indices, - T.indptr, self.raw_coef_, + import scipy.sparse + X = scipy.sparse.csr_matrix(X) + X.data = np.asanyarray(X.data, dtype=np.float64, order='C') + probas = csr_predict_prob(X.shape[1], X.data, X.indices, + X.indptr, self.raw_coef_, self._get_solver_type(), self.eps, self.C, self.class_weight_label, diff --git a/scikits/learn/svm/__init__.py b/scikits/learn/svm/__init__.py index 9f121c3ad7230580e95f42003627559db5245a2d..a2ac5f3fefcd838dc23c13560630fa285f49cdae 100644 --- a/scikits/learn/svm/__init__.py +++ b/scikits/learn/svm/__init__.py @@ -1,8 +1,13 @@ """ -Module that implements Support Vector Machnine related algorithms. +Support Vector Machnine algorithms. See http://scikit-learn.sourceforge.net/modules/svm.html for complete documentation. + +Author: Fabian Pedregosa <fabian.pedregosa@inria.fr> with help from + the scikit-learn community. LibSVM and LibLinear are copyright + of their respective owners. +License: New BSD, (C) INRIA 2010 """ from .libsvm import SVC, NuSVC, SVR, NuSVR, OneClassSVM diff --git a/scikits/learn/svm/base.py b/scikits/learn/svm/base.py index fe168ec19c7a0044d6668eb67f3d60600c329ea5..be5bfa8af7033572052fafe1a52ee42e2908049c 100644 --- a/scikits/learn/svm/base.py +++ b/scikits/learn/svm/base.py @@ -5,7 +5,6 @@ from ._libsvm import libsvm_train, libsvm_predict, libsvm_predict_proba, \ from . import _liblinear from ..base import BaseEstimator, RegressorMixin, ClassifierMixin - def _get_class_weight(class_weight, y): """ Estimate class weights for unbalanced datasets. @@ -73,7 +72,7 @@ class BaseLibSVM(BaseEstimator): return kernel_type, _X - def fit(self, X, y, class_weight={}, sample_weight=[]): + def fit(self, X, y, class_weight={}, sample_weight=[], **params): """ Fit the SVM model according to the given training data and parameters. @@ -81,8 +80,8 @@ class BaseLibSVM(BaseEstimator): Parameters ---------- X : array-like, shape = [n_samples, n_features] - Training vector, where n_samples is the number of samples and - n_features is the number of features. + Training vectors, where n_samples is the number of samples + and n_features is the number of features. y : array-like, shape = [n_samples] Target values (integers in classification, real numbers in @@ -104,6 +103,8 @@ class BaseLibSVM(BaseEstimator): self : object Returns self. """ + self._set_params(**params) + X = np.asanyarray(X, dtype=np.float64, order='C') y = np.asanyarray(y, dtype=np.float64, order='C') sample_weight = np.asanyarray(sample_weight, dtype=np.float64, @@ -160,7 +161,7 @@ class BaseLibSVM(BaseEstimator): Returns ------- - C : array, shape = [nsample] + C : array, shape = [n_samples] """ T = np.atleast_2d(np.asanyarray(T, dtype=np.float64, order='C')) kernel_type, T = self._get_kernel(T) @@ -338,23 +339,15 @@ class BaseLibLinear(BaseEstimator): def predict(self, X): """ - This function does classification or regression on an array of - test vectors X. - - For a classification model, the predicted class for each - sample in X is returned. For a regression model, the function - value of X calculated is returned. - - For a one-class model, +1 or -1 is returned. + Predict target values of X according to the fitted model. Parameters ---------- X : array-like, shape = [n_samples, n_features] - Returns ------- - C : array, shape = [nsample] + C : array, shape = [n_samples] """ X = np.asanyarray(X, dtype=np.float64, order='C') self._check_n_features(X) diff --git a/scikits/learn/svm/liblinear.py b/scikits/learn/svm/liblinear.py index 3beff32a4a85674d1d9ecacd2d2330537e34c7a6..e02be399d4f7ef7625ca358e16d6e1ba37c8c59d 100644 --- a/scikits/learn/svm/liblinear.py +++ b/scikits/learn/svm/liblinear.py @@ -4,7 +4,7 @@ from .base import BaseLibLinear class LinearSVC(BaseLibLinear, ClassifierMixin): - """Linear Support Vector Classification + """Linear Support Vector Classification. Similar to SVC with parameter kernel='linear', but uses internally liblinear rather than libsvm, so it has more flexibility in the diff --git a/scikits/learn/svm/libsvm.py b/scikits/learn/svm/libsvm.py index fe032492099ec9a75cb609b3f81f570ed0bd8aac..5a75fa88dcf1354ee2806ac6cb2d2d90021c41ec 100644 --- a/scikits/learn/svm/libsvm.py +++ b/scikits/learn/svm/libsvm.py @@ -4,9 +4,7 @@ from .base import BaseLibSVM class SVC(BaseLibSVM, ClassifierMixin): - """ - C-Support Vector Classification. - + """C-Support Vector Classification. Parameters ---------- @@ -94,9 +92,7 @@ class SVC(BaseLibSVM, ClassifierMixin): class NuSVC(BaseLibSVM, ClassifierMixin): - """ - Nu-Support Vector Classification. - + """Nu-Support Vector Classification. Parameters ---------- @@ -200,9 +196,7 @@ class NuSVC(BaseLibSVM, ClassifierMixin): class SVR(BaseLibSVM, RegressorMixin): - """ - Support Vector Regression. - + """Support Vector Regression. Parameters ---------- @@ -299,11 +293,11 @@ class SVR(BaseLibSVM, RegressorMixin): class NuSVR(BaseLibSVM, RegressorMixin): - """ - Nu Support Vector Regression. Similar to NuSVC, for regression, - uses a paramter nu to control the number of support - vectors. However, unlike NuSVC, where nu replaces with C, here nu - replaces with the parameter p of SVR. + """Nu Support Vector Regression. + + Similar to NuSVC, for regression, uses a paramter nu to control + the number of support vectors. However, unlike NuSVC, where nu + replaces with C, here nu replaces with the parameter p of SVR. Parameters ---------- @@ -398,7 +392,7 @@ class NuSVR(BaseLibSVM, RegressorMixin): class OneClassSVM(BaseLibSVM): - """Unsupervised outliers detection + """Unsupervised Outliers Detection. Estimate the support of a high-dimensional distribution. diff --git a/scikits/learn/svm/sparse/__init__.py b/scikits/learn/svm/sparse/__init__.py index 200bfeb0404b3782ce762e8aca3420f5fc133f6e..639fade57fda0b573e720e853a5195f8dd6b8744 100644 --- a/scikits/learn/svm/sparse/__init__.py +++ b/scikits/learn/svm/sparse/__init__.py @@ -1,5 +1,21 @@ """ -Sparse version of algorithms found in the svm module +Support Vector Machine algorithms for sparse matrices. + +This module should have the same API as scikits.learn.svm, except that +matrices are expected to be in some sparse format supported by +scipy.sparse. + +See http://scikit-learn.sourceforge.net/modules/svm.html + +Notes +----- +Some fields, like dual_coef_ are not sparse matrices strictly speaking. +However, they are converted to a sparse matrix for consistency and +efficiency when multiplying to other sparse matrices. + +Author: Fabian Pedregosa <fabian.pedregosa@inria.fr> with help from + the scikit-learn community. +License: New BSD, (C) INRIA 2010 """ from .libsvm import SVC, NuSVC, SVR, NuSVR, OneClassSVM diff --git a/scikits/learn/svm/sparse/base.py b/scikits/learn/svm/sparse/base.py index ba3a4019c2a3ffcc83584107432f4dde4cd6f7c4..7e5a1e20e5fd2a076946f1d7adbb9e3b668141a5 100644 --- a/scikits/learn/svm/sparse/base.py +++ b/scikits/learn/svm/sparse/base.py @@ -1,22 +1,4 @@ -""" -Support Vector Machine algorithms for sparse matrices. - -Warning: this module is a work in progress. It is not tested and surely -contains bugs. - -Notes ------ - -Some fields, like dual_coef_ are not sparse matrices strictly speaking. -However, they are converted to a sparse matrix for consistency and -efficiency when multiplying to other sparse matrices. - -Author: Fabian Pedregosa <fabian.pedregosa@inria.fr> -License: New BSD -""" - import numpy as np -from scipy import sparse from ...base import ClassifierMixin from ..base import BaseLibSVM, BaseLibLinear, _get_class_weight @@ -67,13 +49,46 @@ class SparseBaseLibSVM(BaseLibSVM): self.n_support = np.empty(0, dtype=np.int32, order='C') - def fit(self, X, y, class_weight={}, sample_weight=[]): + def fit(self, X, y, class_weight={}, sample_weight=[], **params): """ - X is expected to be a sparse matrix. For maximum effiency, use a - sparse matrix in csr format (scipy.sparse.csr_matrix) + Fit the SVM model according to the given training data and + parameters. + + Parameters + ---------- + X : sparse matrix, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of features. + + y : array-like, shape = [n_samples] + Target values (integers in classification, real numbers in + regression) + + class_weight : dict | 'auto', optional + Weights associated with classes in the form + {class_label : weight}. If not given, all classes are + supposed to have weight one. + + The 'auto' mode uses the values of y to automatically adjust + weights inversely proportional to class frequencies. + + sample_weight : array-like, shape = [n_samples], optional + Weights applied to individual samples (1. for unweighted). + + Returns + ------- + self : object + Returns an instance of self. + + Notes + ----- + For maximum effiency, use a sparse matrix in csr format + (scipy.sparse.csr_matrix) """ + self._set_params(**params) - X = sparse.csr_matrix(X) + import scipy.sparse + X = scipy.sparse.csr_matrix(X) X.data = np.asanyarray(X.data, dtype=np.float64, order='C') y = np.asanyarray(y, dtype=np.float64, order='C') sample_weight = np.asanyarray(sample_weight, dtype=np.float64, @@ -108,12 +123,12 @@ class SparseBaseLibSVM(BaseLibSVM): # this will fail if n_SV is zero. This is a limitation # in scipy.sparse, which does not permit empty matrices - self.support_vectors_ = sparse.csr_matrix((self._support_data, + self.support_vectors_ = scipy.sparse.csr_matrix((self._support_data, self._support_indices, self._support_indptr), (n_SV, X.shape[1]) ) - self.dual_coef_ = sparse.csr_matrix((self._dual_coef_data, + self.dual_coef_ = scipy.sparse.csr_matrix((self._dual_coef_data, dual_coef_indices, dual_coef_indptr), (n_class, n_SV) @@ -138,9 +153,10 @@ class SparseBaseLibSVM(BaseLibSVM): Returns ------- - C : array, shape = [nsample] + C : array, shape = [n_samples] """ - T = sparse.csr_matrix(T) + import scipy.sparse + T = scipy.sparse.csr_matrix(T) T.data = np.asanyarray(T.data, dtype=np.float64, order='C') kernel_type = self._kernel_types.index(self.kernel) @@ -161,16 +177,25 @@ class SparseBaseLibLinear(BaseLibLinear): def fit(self, X, y, class_weight={}, **params): """ + Fit the model using X, y as training data. + Parameters ---------- - X : array-like, shape = [n_samples, n_features] + X : sparse matrix, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array, shape = [n_samples] Target vector relative to X + + Returns + ------- + self : object + Returns an instance of self. """ self._set_params(**params) - X = sparse.csr_matrix(X) + + import scipy.sparse + X = scipy.sparse.csr_matrix(X) X.data = np.asanyarray(X.data, dtype=np.float64, order='C') y = np.asanyarray(y, dtype=np.int32, order='C') @@ -186,7 +211,19 @@ class SparseBaseLibLinear(BaseLibLinear): return self def predict(self, X): - X = sparse.csr_matrix(X) + """ + Predict target values of X according to the fitted model. + + Parameters + ---------- + X : sparse matrix, shape = [n_samples, n_features] + + Returns + ------- + C : array, shape = [n_samples] + """ + import scipy.sparse + X = scipy.sparse.csr_matrix(X) self._check_n_features(X) X.data = np.asanyarray(X.data, dtype=np.float64, order='C') diff --git a/scikits/learn/svm/sparse/libsvm.py b/scikits/learn/svm/sparse/libsvm.py index fb6b05c54bcd3db64ff39c0beca1aaba9deb69f5..d338a85452a2ba27e0af99dfc384b6f93b58719a 100644 --- a/scikits/learn/svm/sparse/libsvm.py +++ b/scikits/learn/svm/sparse/libsvm.py @@ -4,8 +4,12 @@ from .base import SparseBaseLibSVM class SVC(SparseBaseLibSVM, ClassifierMixin): - """SVC for sparse matrices (csr) + """SVC for sparse matrices (csr). + See :class:`scikits.learn.svm.SVC` for a complete list of parameters + + Notes + ----- For best results, this accepts a matrix in csr format (scipy.sparse.csr), but should be able to convert from any array-like object (including other sparse representations). @@ -22,8 +26,12 @@ class SVC(SparseBaseLibSVM, ClassifierMixin): class NuSVC (SparseBaseLibSVM, ClassifierMixin): - """NuSVC for sparse matrices (csr) + """NuSVC for sparse matrices (csr). + + See :class:`scikits.learn.svm.NuSVC` for a complete list of parameters + Notes + ----- For best results, this accepts a matrix in csr format (scipy.sparse.csr), but should be able to convert from any array-like object (including other sparse representations). @@ -44,6 +52,10 @@ class NuSVC (SparseBaseLibSVM, ClassifierMixin): class SVR (SparseBaseLibSVM, RegressorMixin): """SVR for sparse matrices (csr) + See :class:`scikits.learn.svm.SVR` for a complete list of parameters + + Notes + ----- For best results, this accepts a matrix in csr format (scipy.sparse.csr), but should be able to convert from any array-like object (including other sparse representations). @@ -65,6 +77,10 @@ class SVR (SparseBaseLibSVM, RegressorMixin): class NuSVR (SparseBaseLibSVM, RegressorMixin): """NuSVR for sparse matrices (csr) + See :class:`scikits.learn.svm.NuSVC` for a complete list of parameters + + Notes + ----- For best results, this accepts a matrix in csr format (scipy.sparse.csr), but should be able to convert from any array-like object (including other sparse representations). @@ -83,6 +99,10 @@ class NuSVR (SparseBaseLibSVM, RegressorMixin): class OneClassSVM (SparseBaseLibSVM): """NuSVR for sparse matrices (csr) + See :class:`scikits.learn.svm.NuSVC` for a complete list of parameters + + Notes + ----- For best results, this accepts a matrix in csr format (scipy.sparse.csr), but should be able to convert from any array-like object (including other sparse representations).