diff --git a/.mailmap b/.mailmap index a6d895ebdb5640628838a055bdbd1c38c0018d29..5b4dfe5a84feb60e8b07118df1a299c7941b84f1 100644 --- a/.mailmap +++ b/.mailmap @@ -13,4 +13,6 @@ Peter Prettenhofer <peter.prettenhofer@gmail.com> pprett <peter.prettenhofer@gma Anne-Laure Fouque <afouque@is208050.(none)> Anne-Laure FOUQUE <af216607@is206635.intra.cea.fr> Vincent Dubourg <vincent.dubourg@gmail.com> dubourg <vincent.dubourg@gmail.com> Vincent Dubourg <vincent.dubourg@gmail.com> dubourg <dubourg@PTlami14.(none)> -Christian Osendorfer <osendorf@gmail.com> osdf <osendorf@gmail.com> \ No newline at end of file +Christian Osendorfer <osendorf@gmail.com> osdf <osendorf@gmail.com> +James Bergstra <james.bergstra@gmail.com> james.bergstra <james.bergstra@gmail.com> +Xinfan Meng <mxf3306@gmail.com> mxf <mxf@chomsky.localdomain> \ No newline at end of file diff --git a/AUTHORS.rst b/AUTHORS.rst index 3367a57abeaa533f2497f40577f57a4538668054..b7728db5b6049142095289767be281b8d6e03aa6 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -77,7 +77,8 @@ People * `Mathieu Blondel <http://mblondel.org/journal>`_ joined the project in September 2010 and has worked since on the sparse - matrix support, text feature extraction and general bug fixes. + matrix support, Ridge generalized crossval, text feature + extraction and general bug fixes. * `Peter Prettenhofer <http://sites.google.com/site/peterprettenhofer/>`_ joined the diff --git a/README.rst b/README.rst index a1349738143c1dd2675fd2a753d0fa76b4e5edc0..154472c45f9f99d380e11d3d964adeeed4c7fe9b 100644 --- a/README.rst +++ b/README.rst @@ -25,9 +25,7 @@ Dependencies ============ The required dependencies to build the software are python >= 2.5, -setuptools, NumPy >= 1.1, SciPy >= 0.6 (although having at least 0.7 -is highly recommended and required by some modules) and a working C++ -compiler. +setuptools, NumPy >= 1.2, SciPy >= 0.7 and a working C++ compiler. To run the tests you will also need nose >= 0.10. @@ -79,8 +77,8 @@ Bugs ---- Please submit bugs you might encounter, as well as patches and feature -requests to the tracker located at the address -https://sourceforge.net/apps/trac/scikit-learn/report +requests to the tracker located at github +https://github.com/scikit-learn/scikit-learn/issues Testing diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/benchmarks/bench_plot_balltree.py b/benchmarks/bench_plot_balltree.py index 032218506376a0a354c2cf9e36fe06b04cea6b08..0e9dd31531f78c680f470c0cb2050ff6010800d3 100644 --- a/benchmarks/bench_plot_balltree.py +++ b/benchmarks/bench_plot_balltree.py @@ -26,36 +26,37 @@ def compare_nbrs(nbrs1, nbrs2): elif(nbrs1.ndim == 1): return np.all(nbrs1 == nbrs2) -n_samples = 1000 -leaf_size = 1 # leaf size -k = 20 -BT_results = [] -KDT_results = [] - -for i in range(1, 10): - print 'Iteration %s' %i - n_features = i*100 - X = np.random.random([n_samples, n_features]) - - t0 = time() - BT = BallTree(X, leaf_size) - d, nbrs1 = BT.query(X, k) - delta = time() - t0 - BT_results.append(delta) - - t0 = time() - KDT = cKDTree(X, leaf_size) - d, nbrs2 = KDT.query(X, k) - delta = time() - t0 - KDT_results.append(delta) - - # this checks we get the correct result - assert compare_nbrs(nbrs1, nbrs2) - -xx = 100 * np.arange(1, 10) -pl.plot(xx, BT_results, label='scikits.learn (BallTree)') -pl.plot(xx, KDT_results, label='scipy (cKDTree)') -pl.xlabel('number of dimensions') -pl.ylabel('time (seconds)') -pl.legend() -pl.show() +if __name__ == '__main__': + n_samples = 1000 + leaf_size = 1 # leaf size + k = 20 + BT_results = [] + KDT_results = [] + + for i in range(1, 10): + print 'Iteration %s' %i + n_features = i*100 + X = np.random.random([n_samples, n_features]) + + t0 = time() + BT = BallTree(X, leaf_size) + d, nbrs1 = BT.query(X, k) + delta = time() - t0 + BT_results.append(delta) + + t0 = time() + KDT = cKDTree(X, leaf_size) + d, nbrs2 = KDT.query(X, k) + delta = time() - t0 + KDT_results.append(delta) + + # this checks we get the correct result + assert compare_nbrs(nbrs1, nbrs2) + + xx = 100 * np.arange(1, 10) + pl.plot(xx, BT_results, label='scikits.learn (BallTree)') + pl.plot(xx, KDT_results, label='scipy (cKDTree)') + pl.xlabel('number of dimensions') + pl.ylabel('time (seconds)') + pl.legend() + pl.show() diff --git a/doc/conf.py b/doc/conf.py index 9fbad66ae8fc7a76b13b4b5f737e679acac0ac8a..94f471e429db44c7cfdc29c8de1fe11ceb9bde04 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -53,16 +53,17 @@ master_doc = 'index' # General information about the project. project = u'scikits.learn' -copyright = u'2010, scikits.learn developers (BSD Lincense)' +copyright = u'2010, scikits.learn developers (BSD License)' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '0.6' +version = '0.7' # The full version, including alpha/beta/rc tags. -release = '0.6.0' +import scikits.learn as skl +release = skl.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -187,7 +188,7 @@ htmlhelp_basename = 'scikit-learndoc' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('user_guide', 'user_guide.tex', u'scikits.learn user guide', + ('index', 'user_guide.tex', u'scikits.learn user guide', u'scikits.learn developers', 'manual'), ] diff --git a/doc/developers/index.rst b/doc/developers/index.rst index c84bcfc0f57f86bedf1f7c908af98c202eaa49a2..7e46cb8725b292d8b1def7fb297b821fb44dabc6 100644 --- a/doc/developers/index.rst +++ b/doc/developers/index.rst @@ -121,6 +121,25 @@ Developers web site More information can be found at the `developer's wiki <https://github.com/scikit-learn/scikit-learn/wiki>`_. + +Other ways to contribute +======================== + +Code is not the only way to contribute to this project. For instance, +documentation is also a very important part of the project and ofter +doesn't get as much attention as it deserves. If you find a typo in +the documentation, or have made improvements, don't hesitate to send +an email to the mailing list or a github pull request. Full +documentation can be found under directory doc/. + +It also helps us if you spread the word: reference it from your blog, +articles, link to us from your website, or simply by saying "I use +it": + +.. raw:: html + <script type="text/javascript" src="http://www.ohloh.net/p/480792/widgets/project_users.js?style=rainbow"></script> + + .. _coding-guidelines: Coding guidelines diff --git a/doc/developers/neighbors.rst b/doc/developers/neighbors.rst new file mode 100644 index 0000000000000000000000000000000000000000..ae98b5b0be323565accaef51f112996c2dbc6284 --- /dev/null +++ b/doc/developers/neighbors.rst @@ -0,0 +1,69 @@ + +.. _notes_neighbors: + + +.. currentmodule:: scikits.learn.neighbors + +===================================== +scikits.learn.neighbors working notes +===================================== + +barycenter +========== + +Function :func:`barycenter` tries to find appropriate weights to +reconstruct the point x from a subset (y1, y2, ..., yn), where weights +sum to one. + +This is just a simple case of Equality Constrained Least Squares +[#f1]_ with constrain dot(np.ones(n), x) = 1. In particular, the Q +matrix from the QR decomposition of B is the Householder reflection of +np.ones(n). + + +Purpose +------- + +This method was added to ease some computations in the future manifold +module, namely in LLE. However, it is still to be shown that it is +useful and efficient in that context. + + +Performance +----------- + +The algorithm has to iterate over n_samples, which is the main +bottleneck. It would be great to vectorize this loop. Also, the rank +updates could probably be moved outside the loop. + +Also, least squares solution could be computed more efficiently by a +QR factorization, since probably we don't care about a minimum norm +solution for the undertermined case. + +The paper 'An introduction to Locally Linear Embeddings', Saul & +Roweis solves the problem by the normal equation method over the +covariance matrix. However, it does not degrade grathefully when the +covariance is singular, requiring to explicitly add regularization. + + +Stability +--------- + +Should be good as it uses SVD to solve the LS problem. TODO: explicit +bounds. + + +API +--- + +The API is convenient to use from NeighborsBarycenter and +kneighbors_graph, but might not be very easy to use directly due to +the fact that Y must be a 3-D array. + +It should be checked that it is usable in other contexts. + + +.. rubric:: Footnotes + +.. [#f1] Section 12.1.4 ('Equality Constrained Least Squares'), + 'Matrix Computations' by Golub & Van Loan diff --git a/doc/index.rst b/doc/index.rst index a4bc49fd6180b33eb5f4074cd133e68ee593fdc2..edd183d483ed15578ccfafc9d793d3d642b3d5b0 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -93,5 +93,6 @@ Development :maxdepth: 2 developers/index + developers/neighbors performance about diff --git a/doc/install.rst b/doc/install.rst index 4f111329e8eccc6a26851dbe1d6e9e80b526cc6f..6c8afde59f3d80da3585d38be300abf5e32ecccc 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -30,8 +30,8 @@ Installing an official release Installing from source ---------------------- -Installing from source requires you to have installed numpy, -setuptools, python development headers and a working C++ +Installing from source requires you to have installed numpy, +scipy, setuptools, python development headers and a working C++ compiler. Under debian-like systems you can get all this by executing with root privileges:: @@ -118,6 +118,13 @@ by typing the following command:: sudo port install py26-scikits-learn +NetBSD +------ + +scikits.learn is available via `pkgsrc-wip <http://pkgsrc-wip.sourceforge.net/>`_: + + http://pkgsrc.se/wip/py-scikits_learn + .. _install_bleeding_edge: Bleeding Edge diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index cbf70f1dd4709ea3221fd5a8a28f332986c1bd06..a15ed80042e7472fe025bd482e4670193319ed06 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -46,6 +46,7 @@ Generalized Linear Models linear_model.LinearRegression linear_model.Ridge + linear_model.RidgeCV linear_model.Lasso linear_model.LassoCV linear_model.ElasticNet @@ -105,14 +106,15 @@ Nearest Neighbors :toctree: generated/ :template: class.rst - neighbors.Neighbors - neighbors.NeighborsBarycenter + neighbors.NeighborsClassifier + neighbors.NeighborsRegressor ball_tree.BallTree .. autosummary:: :toctree: generated/ :template: function.rst + neighbors.kneighbors_graph ball_tree.knn_brute Gaussian Mixture Models @@ -150,6 +152,32 @@ Clustering cluster.AffinityPropagation +Metrics +======= + + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + metrics.euclidean_distances + metrics.unique_labels + metrics.confusion_matrix + metrics.roc_curve + metrics.auc + metrics.precision_score + metrics.recall_score + metrics.fbeta_score + metrics.f1_score + metrics.precision_recall_fscore_support + metrics.classification_report + metrics.precision_recall_curve + metrics.r2_score + metrics.zero_one_score + metrics.zero_one + metrics.mean_square_error + + Covariance Estimators ===================== @@ -179,6 +207,7 @@ Signal Decomposition pca.PCA pca.ProbabilisticPCA + pca.RandomizedPCA fastica.FastICA .. autosummary:: diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index 58d6d19d08cdb1f2e337bbcf0b559345056665e9..9f91a818aba09d0c9515c82a08f3b7e91844c854 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -13,13 +13,24 @@ that, given train data, returns an array of integer labels corresponding to the different clusters. For the class, the labels over the training data can be found in the `labels_` attribute. -Here, we only explain the different algorithms. For usage examples, click -on the class name to read the reference documentation. +.. currentmodule:: scikits.learn.cluster + +One important thing to note is that the algorithms implemented in this module +take different kinds of matrix as input. On one hand, :class:`MeanShift` and +:class:`KMeans` take data matrices of shape [n_samples, n_features]. These can +be obtained from the classes in the `scikits.learn.feature_extraction` module. +On the other hand, :class:`AffinityPropagation` and :class:`SpectralClustering` +take similarity matrices of shape [n_samples, n_samples]. These can be +obtained from the functions in the `scikits.learn.metrics.pairwise` module. +In other words, :class:`MeanShift` and :class:`KMeans` work with points in a +vector space, whereas :class:`AffinityPropagation` and +:class:`SpectralClustering` can work with arbitrary objects, as long as a +similarity measure exists for such objects. + Affinity propagation ==================== -.. currentmodule:: scikits.learn.cluster :class:`AffinityPropagation` clusters data by diffusion in the similarity matrix. This algorithm automatically sets its numbers of cluster. It diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index 20736d3fcc6b905ebf9dd472f2402c33bc8b091c..54ddd4f245a86e7ee95be9ca8cbc4529e6b6db05 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -56,7 +56,7 @@ data as only one sample is removed from the learning set. >>> loo = LeaveOneOut(len(Y)) >>> print loo scikits.learn.cross_val.LeaveOneOut(n=4) - >>> for train, test in loo: print train,test + >>> for train, test in loo: print train, test [False True True True] [ True False False False] [ True False True True] [False True False False] [ True True False True] [False False True False] @@ -69,10 +69,22 @@ Thus, one can create the training/test sets using: >>> X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] +If X or Y are `scipy.sparse` matrices, train and test need to be +integer indices. It can be obtained by setting the parameter indices to True +when creating the cross-validation procedure. - - - + >>> import numpy as np + >>> from scikits.learn.cross_val import LeaveOneOut + >>> X = np.array([[0., 0.], [1., 1.], [-1., -1.], [2., 2.]]) + >>> Y = np.array([0, 1, 0, 1]) + >>> loo = LeaveOneOut(len(Y), indices=True) + >>> print loo + scikits.learn.cross_val.LeaveOneOut(n=4) + >>> for train, test in loo: print train, test + [1 2 3] [0] + [0 2 3] [1] + [0 1 3] [2] + [0 1 2] [3] Leave-P-Out - LPO @@ -162,13 +174,13 @@ Example of stratified 2-fold: >>> from scikits.learn.cross_val import StratifiedKFold >>> X = [[0., 0.], [1., 1.], [-1., -1.], [2., 2.], [3., 3.], [4., 4.], [0., 1.]] >>> Y = [0, 0, 0, 1, 1, 1, 0] ->>> loo = StratifiedKFold(Y, 2) ->>> print loo +>>> skf = StratifiedKFold(Y, 2) +>>> print skf scikits.learn.cross_val.StratifiedKFold(labels=[0 0 0 1 1 1 0], k=2) ->>> for train, test in loo: print train,test -[False False True False True True True] [ True True False True False False False] -[ True True False True False False False] [False False True False True True True] - +>>> for train, test in skf: print train, test +[False True False False True False True] [ True False True True False True False] +[ True False True True False True False] [False True False False True False True] + diff --git a/doc/modules/decompositions.rst b/doc/modules/decompositions.rst index a668c1c557c6435c10a0700da02a4ff6639ef58b..15975a9f589b7086c2718444c26fd03712d4f69b 100644 --- a/doc/modules/decompositions.rst +++ b/doc/modules/decompositions.rst @@ -1,21 +1,33 @@ -==================================================================== +================================================================= Decomposing signals in components (matrix factorization problems) -==================================================================== +================================================================= .. _PCA: Principal component analysis (PCA) -==================================== +================================== .. currentmodule:: scikits.learn.pca + +Exact PCA and probabilistic interpretation +------------------------------------------ + PCA is used to decompose a multivariate dataset in a set of successive orthogonal components that explain a maximum amount of the variance. In the scikit-learn, :class:`PCA` is implemented as a `transformer` object that learns n components in its `fit` method, and can be used on new data to project it on these components. +The optional parameter `whiten=True` parameter make it possible to +project the data onto the singular space while scaling each component +to unit variance. This is often useful if the models down-stream make +strong assumptions on the isotropy of the signal: this is for example +the case for Support Vector Machines with the RBF kernel and the K-Means +clustering algorithm. However in that case the inverse transform is no +longer exact since some information is lost while forward transforming. + In addition, the :class:`ProbabilisticPCA` object provides a probabilistic interpretation of the PCA that can give a likelihood of data based on the amount of variance it explains. As such it implements a @@ -33,10 +45,68 @@ features, projected on the 2 dimensions that explain most variance: * :ref:`example_plot_pca.py` + +Approximate PCA +--------------- + +Often we are interested in projecting the data onto a lower dimensional +space that preseves most of the variance by droping the singular vector +of components associated with lower singular values. + +For instance for face recognition, if we work with 64x64 gray level pixel +pictures the dimensionality of the data is 4096 and it is slow to train a +RBF Support Vector Machine on such wide data. Furthermore we know that +intrinsic dimensionality of the data is much lower than 4096 since all +faces pictures look alike. The samples lie on a manifold of much lower +dimension (say around 200 for instance). The PCA algorithm can be used +to linearly transform the data while both reducing the dimensionality +and preserve most of the explained variance at the same time. + +The class :class:`RandomizedPCA` is very useful in that case: since we +are going to drop most of the singular vectors it is much more efficient +to limit the computation to an approximated estimate of the singular +vectors we will keep to actually perform the transform. + +:class:`RandomizedPCA` can hence be used as a drop in replacement for +:class:`PCA` minor the exception that we need to give it the size of +the lower dimensional space `n_components` as mandatory input parameter. + +If we note :math:`n_{max} = max(n_{samples}, n_{features})` and +:math:`n_{min} = min(n_{samples}, n_{features})`, the time complexity +of :class:`RandomizedPCA` is :math:`O(n_{max}^2 \cdot n_{components})` +instead of :math:`O(n_{max}^2 \cdot n_{min})` for the exact method +implemented in :class:`PCA`. + +The memory footprint of :class:`RandomizedPCA` is also proportional to +:math:`2 \cdot n_{max} \cdot n_{components}` instead of :math:`n_{max} +\cdot n_{min}` for the exact method. + +Furthermore :class:`RandomizedPCA` is able to work with +`scipy.sparse` matrices as input which make it suitable for reducing +the dimensionality of features extracted from text documents for +instance. + +Note: the implementation of `inverse_transform` in :class:`RandomizedPCA` +is not the exact inverse transform of `transform` even when +`whiten=False` (default). + + +.. topic:: Examples: + + * :ref:`example_applications_plot_face_recognition.py` + +.. topic:: References: + + * `"Finding structure with randomness: Stochastic algorithms for + constructing approximate matrix decompositions" + <http://arxiv.org/abs/0909.4061>`_ + Halko, et al., 2009 + + .. _ICA: Independent component analysis (ICA) -===================================== +==================================== .. currentmodule:: scikits.learn.fastica diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst index 558c2f3c73e335c840fb7ebf7159c7a177f38a4f..811ede07ddbf7fe3eaab67630a5dc67ee1aba793 100644 --- a/doc/modules/grid_search.rst +++ b/doc/modules/grid_search.rst @@ -4,11 +4,9 @@ Grid Search .. currentmodule:: scikits.learn.grid_search -`scikits.learn.grid_search` is a package to optimize -the parameters of a model (e.g. Support Vector Classifier) -using cross-validation. -The computation can be run in parallel using the multiprocessing package. +Grid Search is used to optimize the parameters of a model +(e.g. Support Vector Classifier, Lasso, etc.) using cross-validation. Main class is :class:`GridSearchCV`. @@ -23,3 +21,7 @@ of Grid Search coupling parameters from a text documents feature extractor (n-gram count vectorizer and TF-IDF transformer) with a classifier (here a linear SVM trained with SGD with either elastic net or L2 penalty). +Notes +----- +Computations can be run in parallel if your OS supports it, by using +the keyword n_jobs=-1, see function signature for more details. diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst index 269dd2b19f41fd503a7dd505f27257cca2e0829d..6ea229cfc715341e2cd99f91fa1543fd37932e2b 100644 --- a/doc/modules/linear_model.rst +++ b/doc/modules/linear_model.rst @@ -17,8 +17,8 @@ value. Across the module, we designate the vector :math:`\beta = (\beta_1, ..., \beta_D)` as ``coef_`` and :math:`\beta_0` as ``intercept_``. - -.. TODO: reference to logistic regression. +To perform classification with generalized linear models, see +:ref:`Logistic regression`. .. _ordinary_least_squares: @@ -64,13 +64,12 @@ example, when data are collected without an experimental design. OLS Complexity ------------------- +-------------- This method computes the least squares solution using a singular value decomposition of X. If X is a matrix of size (n, p ) this method has a cost of :math:`O(n p^2)`, assuming that :math:`n \geq p`. - Ridge Regression ================ @@ -106,6 +105,27 @@ Ridge Complexity This method has the same order of complexity than an :ref:`ordinary_least_squares`. +Generalized Cross-Validation +---------------------------- + +:class:`RidgeCV` implements ridge regression with built-in cross-validation of the alpha parameter. +The object works in the same way as GridSearchCV except that it defaults to Generalized Cross-Validation (GCV), an efficient form of leave-one-out cross-validation. + + >>> from scikits.learn import linear_model + >>> clf = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0]) + >>> clf.fit ([[0, 0], [0, 0], [1, 1]], [0, .1, 1]) + RidgeCV(alphas=[0.10000000000000001, 1.0, 10.0], loss_func=None, cv=None, + score_func=None, fit_intercept=True) + >>> clf.best_alpha + 0.10000000000000001 + +.. topic:: References + + * "Notes on Regularized Least Squares", Rifkin & Lippert (`technical report + <http://cbcl.mit.edu/projects/cbcl/publications/ps/MIT-CSAIL-TR-2007-025.pdf>`_, + `course slides + <http://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf>`_). + Lasso ===== @@ -427,6 +447,26 @@ where :math:`\alpha` is the precision of the noise. * Original Algorithm is detailed in the book *Bayesian learning for neural networks* by Radford M. Neal +Logisitic regression +====================== + +If the task at hand is to do choose which class a sample belongs to given +a finite (hopefuly small) set of choices, the learning problem is a +classification, rather than regression. Linear models can be used for +such a decision, but it is best to use what is called a +`logistic regression <http://en.wikipedia.org/wiki/Logistic_regression>`__, +that doesn't try to minimize the sum of square residuals, as in regression, +but rather a "hit or miss" cost. + +The :class:`LogisticRegression` class can be used to do L1 or L2 penalized +logistic regression, in order to have sparse predicting weights. + +.. topic:: Examples: + + * :ref:`example_logistic_l1_l2_coef.py` + + * :ref:`example_linear_model_plot_logistic_path.py` + Stochastic Gradient Descent - SGD ================================= diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst index e069ec15bfc4c33ae03be30e12035bfe24a8b5a1..dc9df8c36ac8f6db2cb7ba7f1f39bdcc6f294438 100644 --- a/doc/modules/neighbors.rst +++ b/doc/modules/neighbors.rst @@ -16,9 +16,22 @@ the decision boundary is very irregular. Classification ============== -The :class:`Neighbors` estimators implements the nearest-neighbors -classification method using a vote heuristic: the class most present in -the k nearest neighbors of a point is assigned to this point. +The :class:`NeighborsClassifier` implements the nearest-neighbors +classification method using a vote heuristic: the class most present +in the k nearest neighbors of a point is assigned to this point. + +It is possible to use different nearest neighbor search algorithms by +using the keyword ``algorithm``. Possible values are ``'auto'``, +``'ball_tree'``, ``'brute'`` and ``'brute_inplace'``. ``'ball_tree'`` +will create an instance of :class:`BallTree` to conduct the search, +which is usually very efficient in low-dimensional spaces. In higher +dimension, a brute-force approach is prefered thus parameters +``'brute'`` and ``'brute_inplace'`` can be used . Both conduct a +brute-force search, the difference being that ``'brute_inplace'`` does +not perform any precomputations, and thus is better suited for +low-memory settings. Finally, ``'auto'`` is a simple heuristic that +will guess the best approach based on the current dataset. + .. figure:: ../auto_examples/images/plot_neighbors.png :target: ../auto_examples/plot_neighbors.html @@ -31,12 +44,17 @@ the k nearest neighbors of a point is assigned to this point. * :ref:`example_plot_neighbors.py`: an example of classification using nearest neighbor. + Regression ========== -The :class:`NeighborsBarycenter` estimator implements a nearest-neighbors -regression method using barycenter weighting of the targets of the -k-neighbors. +The :class:`NeighborsRegressor` estimator implements a +nearest-neighbors regression method by weighting the targets of the +k-neighbors. Two different weighting strategies are implemented: +``barycenter`` and ``mean``. ``barycenter`` will apply the weights +that best reconstruct the point from its neighbors while ``mean`` will +apply constant weights to each point. This plot shows the behavior of +both classifier for a simple regression task. .. figure:: ../auto_examples/images/plot_neighbors_regression.png :target: ../auto_examples/plot_neighbors_regression.html diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst index c8731577356de5469c90c15905ab7109465fbbfb..01beffea5b5c024209531a10e0f38b560c94b8fa 100644 --- a/doc/modules/svm.rst +++ b/doc/modules/svm.rst @@ -42,20 +42,8 @@ The disadvantages of Support Vector Machines include: Classification ============== -Suppose some given data points each belonging to one of N classes, and -the goal is to decide which class a new data point will be in. This -problem is called classification, and can be solved with SVMs using -*Support Vector Classifiers*, SVC. The -classes that perform this task are :class:`SVC`, :class:`NuSVC` and -:class:`LinearSVC`. - -:class:`SVC` and :class:`NuSVC` are similar methods, but accept -slightly different sets of parameters and have different mathematical -formulations (see section :ref:`svm_mathematical_formulation`). On the -other hand, :class:`LinearSVC` is another implementation of SVC -optimized in the case of a linear kernel. Note that :class:`LinearSVC` -does not accept keyword 'kernel', as this is assumed to be linear. It -also lacks some of the members of SVC and NuSVC, like support\_. +:class:`SVC`, :class:`NuSVC` and :class:`LinearSVC` are classes +capable of performing multi-class classification on a dataset. .. figure:: ../auto_examples/svm/images/plot_iris.png @@ -63,14 +51,24 @@ also lacks some of the members of SVC and NuSVC, like support\_. :align: center -As other classifiers, SVC and NuSVC have to be fitted with two arrays: -an array X of size [m_samples, n_features] holding the training -samples, and an array Y of size [n_samples] holding the target values -(class labels) for the training samples:: +:class:`SVC` and :class:`NuSVC` are similar methods, but accept +slightly different sets of parameters and have different mathematical +formulations (see section :ref:`svm_mathematical_formulation`). On the +other hand, :class:`LinearSVC` is another implementation of Support +Vector Classification for the case of a linear kernel. Note that +:class:`LinearSVC` does not accept keyword 'kernel', as this is +assumed to be linear. It also lacks some of the members of +:class:`SVC` and :class:`NuSVC`, like support\_. + +As other classifiers, :class:`SVC`, :class:`NuSVC` and +:class:`LinearSVC` take as input two arrays: an array X of size +[n_samples, n_features] holding the training samples, and an array Y +of integer values, size [n_samples], holding the class labels for the +training samples:: >>> from scikits.learn import svm - >>> X = [[0., 0.], [1., 1.]] + >>> X = [[0, 0], [1, 1]] >>> Y = [0, 1] >>> clf = svm.SVC() >>> clf.fit(X, Y) @@ -82,19 +80,60 @@ After being fitted, the model can then be used to predict new values:: >>> clf.predict([[2., 2.]]) array([ 1.]) -SVMs perform classification as a function of some subset of the -training data, called the support vectors. These vectors can be -accessed in member `support_`: +SVMs decision function depends on some subset of the training data, +called the support vectors. Some properties of these support vectors +can be found in members `support_vectors_`, `support_` and +`n_support`:: + + >>> # get support vectors + >>> clf.support_vectors_ + array([[ 0., 0.], + [ 1., 1.]]) + >>> # get indices of support vectors + >>> clf.support_ # doctest: +ELLIPSIS + array([0, 1]...) + >>> # get number of support vectors for each class + >>> clf.n_support_ # doctest: +ELLIPSIS + array([1, 1]...) + - >>> clf.support_ - array([0, 1], dtype=int32) +Multi-class classification +-------------------------- -Member `n_support_` holds the number of support vectors for each class: +:class:`SVC` and :class:`NuSVC` implement the "one-against-one" +approach (Knerr et al., 1990) for multi- class classification. If +n_class is the number of classes, then n_class * (n_class - 1)/2 +classifiers are constructed and each one trains data from two classes. - >>> clf.n_support_ - array([1, 1], dtype=int32) + + >>> X = [[0], [1], [2], [3]] + >>> Y = [0, 1, 2, 3] + >>> clf = svm.SVC() + >>> clf.fit(X, Y) + SVC(kernel='rbf', C=1.0, probability=False, degree=3, coef0=0.0, eps=0.001, + cache_size=100.0, shrinking=True, gamma=0.25) + >>> dec = clf.decision_function([[1]]) + >>> dec.shape[1] # 4 classes: 4*3/2 = 6 + 6 + + +On the other hand, :class:`LinearSVC` implements "one-vs-the-rest" +multi-class strategy, thus training n_class models. If there are only +two classes, only one model is trained. + >>> lin_clf = svm.LinearSVC() + >>> lin_clf.fit(X, Y) + LinearSVC(loss='l2', C=1.0, intercept_scaling=1, fit_intercept=True, + eps=0.0001, penalty='l2', multi_class=False, dual=True) + >>> dec = lin_clf.decision_function([[1]]) + >>> dec.shape[1] + 4 + + +See :ref:`svm_mathematical_formulation` for a complete description of +the decision function. + .. topic:: Examples: * :ref:`example_svm_plot_iris.py`, @@ -125,6 +164,16 @@ As with classification classes, the fit method will take as argument vectors X, y, only that in this case y is expected to have floating point values instead of integer values. + >>> from scikits.learn import svm + >>> X = [[0, 0], [2, 2]] + >>> y = [0.5, 2.5] + >>> clf = svm.SVR() + >>> clf.fit(X, y) + SVR(kernel='rbf', C=1.0, probability=False, degree=3, shrinking=True, + eps=0.001, p=0.1, cache_size=100.0, coef0=0.0, nu=0.5, gamma=0.5) + >>> clf.predict([[1, 1]]) + array([ 1.5]) + .. topic:: Examples: @@ -154,7 +203,7 @@ will only take as input an array X, as there are no class labels. .. topic:: Examples: * :ref:`example_svm_plot_oneclass.py` - + * :ref:`example_applications_plot_species_distribution_modeling.py` .. currentmodule:: scikits.learn.svm.sparse diff --git a/doc/support.rst b/doc/support.rst index 5ef09c946f7d0af5d4cd9d2ff69a44c83d2c217f..549d3a3cb21d70b497dd5417b3164d32dde94902 100644 --- a/doc/support.rst +++ b/doc/support.rst @@ -36,6 +36,7 @@ This documentation is relative to |release|. Documentation for other versions can be found here: * `Development version <http://scikit-learn.sf.net/dev/>`_ + * `0.7 <http://scikit-learn.sf.net/0.7/>`_ * `0.6 <http://scikit-learn.sf.net/0.6/>`_ * `0.5 <http://scikit-learn.sf.net/0.5/>`_ diff --git a/doc/themes/scikit-learn/layout.html b/doc/themes/scikit-learn/layout.html index eb915728e7fdee370c82c5b2fade3ae06be97728..96a8e79cef4326ff334a8344cfa460def3b1cd3f 100644 --- a/doc/themes/scikit-learn/layout.html +++ b/doc/themes/scikit-learn/layout.html @@ -77,7 +77,7 @@ {% else %} <h3>News</h3> - <p>scikits.learn 0.6 is available + <p>scikits.learn 0.7 is available for <a href="https://sourceforge.net/projects/scikit-learn/files/">download</a>. See <a href="{{pathto('whats_new')}}">what's new</a> and tips on <a href="{{pathto('install')}}">installing</a>.</p> diff --git a/doc/tutorial.rst b/doc/tutorial.rst index fcc32eadb41f5429742a66a4a509b02637a13a0f..bedb0aa5e901ef8d4b8071cc4c91e6e52141f910 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -23,7 +23,7 @@ We can separate learning problems in a few large categories: * **classification**: samples belong to two or more classes and we want to learn from already labeled data how to predict the class - of un-labeled data. An example of classification problem would + of unlabeled data. An example of classification problem would be the digit recognition example, in which the aim is to assign each input vector to one of a finite number of discrete categories. @@ -143,7 +143,7 @@ box and not worry about these: We call our estimator instance `clf` as it is a classifier. It now must be fitted to the model, that is, it must `learn` from the model. This is done by passing our training set to the ``fit`` method. As a training -set, let us use the all the images of our dataset appart from the last +set, let us use all the images of our dataset apart from the last one: >>> clf.fit(digits.data[:-1], digits.target[:-1]) @@ -169,3 +169,33 @@ resolution. Do you agree with the classifier? A complete example of this classification problem is available as an example that you can run and study: :ref:`example_plot_digits_classification.py`. + +Model persistence +----------------- + +It is possible to save a model in the scikit by using Python's built-in +persistence model, namely `pickle <http://docs.python.org/library/pickle.html>`_. + +>>> from scikits.learn import svm +>>> from scikits.learn import datasets +>>> clf = svm.SVC() +>>> iris = datasets.load_iris() +>>> X, y = iris.data, iris.target +>>> clf.fit(X, y) +SVC(kernel='rbf', C=1.0, probability=False, degree=3, coef0=0.0, eps=0.001, + cache_size=100.0, shrinking=True, gamma=0.00666666666667) +>>> import pickle +>>> s = pickle.dumps(clf) +>>> clf2 = pickle.loads(s) +>>> clf2.predict(X[0]) +array([ 0.]) +>>> y[0] +0 + +In the specific case of the scikit, it may be more interesting to use +joblib's replacement of pickle, which is more efficient on big data, but +can only pickle to the disk and not to a string: + +>>> from scikits.learn.externals import joblib +>>> joblib.dump(clf, 'filename.pkl') # doctest: +SKIP + diff --git a/doc/user_guide.rst b/doc/user_guide.rst index 96ddbcdc519f8554b8a1de5907feb97cc4f45253..59663102b5be142a553e5eafa0b27cae6c6efd90 100644 --- a/doc/user_guide.rst +++ b/doc/user_guide.rst @@ -10,9 +10,5 @@ User guide: contents .. include:: bigger_toc_css.rst -This is the html version of user guide. A PDF version for printing can -be found `here -<http://sourceforge.net/projects/scikit-learn/files/user_guide.pdf/download>`_. - .. include:: contents.rst diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 5413a8dbc03efb8abea533857831797a3aa1a815..d8de63bfc533709b9d308334842a441bcfd9c2c5 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -1,6 +1,97 @@ .. currentmodule:: scikits.learn + +.. _changes_0_7: + +0.7 +=== + +scikits.learn 0.7 was released in March 2011, roughly three months +after the 0.6 release. This release is marked by the speed +improvements in existing algorithms like k-Nearest Neighbors and +K-Means algorithm and by the inclusion of an efficient algorithm for +computing the Ridge Generalized Cross Validation solution. Unlike the +preceding release, no new modules where added to this release. + +Changelog +--------- + + - Performance improvements for Gaussian Mixture Model sampling [Jan + Schlüter]. + + - Implementation of efficient leave-one-out cross-validated Ridge in + :class:`linear_model.RidgeCV` [`Mathieu Blondel`_] + + - Better handling of collinearity and early stopping in + :func:`linear_model.lars_path` [`Alexandre Gramfort`_ and `Fabian + Pedregosa`_]. + + - Fixes for liblinear ordering of labels and sign of coefficients + [Dan Yamins, Paolo Losi, `Mathieu Blondel`_ and `Fabian Pedregosa`_]. + + - Performance improvements for Nearest Neighbors algorithm in + high-dimensional spaces [`Fabian Pedregosa`_]. + + - Performance improvements for :class:`cluster.KMeans` [`Gael + Varoquaux`_ and `James Bergstra`_]. + + - Sanity checks for SVM-based classes [`Mathieu Blondel`_]. + + - Refactoring of :class:`neighbors.NeighborsClassifier` and + :func:`neighbors.kneighbors_graph`: added different algorithms for + the k-Nearest Neighbor Search and implemented a more stable + algorithm for finding barycenter weigths. Also added some + developer documentation for this module, see + :ref:`notes_neighbors` for more information [`Fabian Pedregosa`_]. + + - Documentation improvements: Added :class:`pca.RandomizedPCA` and + :class:`linear_model.LogisticRegression` to the class + reference. Also added references of matrices used for clustering + and other fixes [`Gael Varoquaux`_, `Fabian Pedregosa`_, `Mathieu + Blondel`_, `Olivier Grisel`_, Virgile Fritsch , Emmanuelle + Gouillart] + + - Binded decision_function in classes that make use of liblinear_, + dense and sparse variants, like :class:`svm.LinearSVC` or + :class:`linear_model.LogisticRegression` [`Fabian Pedregosa`_]. + + - Performance and API improvements to + :func:`metrics.euclidean_distances` and to + :class:`pca.RandomizedPCA` [`James Bergstra`_]. + + - Fix compilation issues under NetBSD [Kamel Ibn Hassen Derouiche] + + - Allow input sequences of different lengths in :class:`hmm.GaussianHMM` + [`Ron Weiss`_]. + + - Fix bug in affinity propagation caused by incorrect indexing [Xinfan Meng] + + +People +------ + +People that made this release possible preceeded by number of commits: + + - 85 `Fabian Pedregosa`_ + - 67 `Mathieu Blondel`_ + - 20 `Alexandre Gramfort`_ + - 19 `James Bergstra`_ + - 14 Dan Yamins + - 13 `Olivier Grisel`_ + - 12 `Gael Varoquaux`_ + - 4 Edouard Duchesnay + - 4 `Ron Weiss`_ + - 2 Satrajit Ghosh + - 2 Vincent Dubourg + - 1 Emmanuelle Gouillart + - 1 Kamel Ibn Hassen Derouiche + - 1 Paolo Losi + - 1 VirgileFritsch + - 1 `Yaroslav Halchenko`_ + - 1 Xinfan Meng + + .. _changes_0_6: 0.6 @@ -106,8 +197,7 @@ People that made this release possible preceeded by number of commits: * 97 `Peter Prettenhofer <http://sites.google.com/site/peterprettenhofer/>`_ - * 68 `Alexandre Gramfort - <http://www-sop.inria.fr/members/Alexandre.Gramfort/index.fr.html>`_ + * 68 `Alexandre Gramfort <http://www-sop.inria.fr/members/Alexandre.Gramfort/>`_ * 59 `Mathieu Blondel <http://www.mblondel.org/journal/>`_ @@ -299,3 +389,15 @@ of commits): * 2 Vincent Michel * 1 Chris Filo Gorgolewski + +.. _Alexandre Gramfort: http://www-sop.inria.fr/members/Alexandre.Gramfort/ + +.. _Fabian Pedregosa: http://fseoane.net/blog/ + +.. _Mathieu Blondel: http://www.mblondel.org/journal/ + +.. _James Bergstra: http://www-etud.iro.umontreal.ca/~bergstrj/ + +.. _liblinear: http://www.csie.ntu.edu.tw/~cjlin/liblinear/ + +.. _Yaroslav Halchenko: http://www.onerussian.com/ diff --git a/examples/document_classification_20newsgroups.py b/examples/document_classification_20newsgroups.py index 8044a85e5bcec4d46a0e0d86775ca97d86ea8192..f4d72ec4389bcbbdf6df698d1a3964516789165a 100644 --- a/examples/document_classification_20newsgroups.py +++ b/examples/document_classification_20newsgroups.py @@ -41,6 +41,7 @@ import sys from scikits.learn.datasets import load_files from scikits.learn.feature_extraction.text.sparse import Vectorizer +from scikits.learn.linear_model import RidgeClassifier from scikits.learn.svm.sparse import LinearSVC from scikits.learn.linear_model.sparse import SGDClassifier from scikits.learn import metrics @@ -154,6 +155,11 @@ def benchmark(clf): print return score, train_time, test_time +for clf, name in ((RidgeClassifier(), "Ridge Classifier"),): + print 80*'=' + print name + results = benchmark(clf) + for penalty in ["l2", "l1"]: print 80*'=' print "%s penalty" % penalty.upper() diff --git a/examples/mixture/README.txt b/examples/mixture/README.txt index fb448977e111a8007ff91bf32bf137b13f123b93..0dfe861743bb57f856259ad532ee61bc8990cdea 100644 --- a/examples/mixture/README.txt +++ b/examples/mixture/README.txt @@ -2,4 +2,4 @@ Gaussian Mixture Models ------------------------- -Examples concerning the `scikits.learn.gmm` package. +Examples concerning the `scikits.learn.mixture` package. diff --git a/examples/plot_covariance_estimation.py b/examples/plot_covariance_estimation.py index 2e0954957e64097f02ad10889dff273553256b1f..217397e22db08de851bc53f4926cb6ee0d6db0a7 100644 --- a/examples/plot_covariance_estimation.py +++ b/examples/plot_covariance_estimation.py @@ -40,7 +40,6 @@ negative_logliks = [-cov.fit(X_train, shrinkage=s).score(X_test) \ ############################################################################### # Plot results -pl.close('all') pl.loglog(shrinkages, negative_logliks) pl.xlabel('Shrinkage') pl.ylabel('Negative log-likelihood') diff --git a/examples/plot_digits_classification.py b/examples/plot_digits_classification.py index ec06d2c502f47348d72f768e186ee728d2516066..986790a3d3c28e041475fa0f228d2c7c2ec66a09 100644 --- a/examples/plot_digits_classification.py +++ b/examples/plot_digits_classification.py @@ -19,7 +19,7 @@ import pylab as pl from scikits.learn import datasets digits = datasets.load_digits() -# The data that we are interesting in is made of 8x8 images of digits, +# The data that we are interested in is made of 8x8 images of digits, # let's have a look at the first 3 images. We know which digit they # represent: it is given in the 'target' of the dataset. for index, (image, label) in enumerate(zip(digits.images, digits.target)[:4]): diff --git a/examples/plot_ica_vs_pca.py b/examples/plot_ica_vs_pca.py index 76509a3cfa71b7558b7d74c76c593218911d1492..4787cecda4f80ea843e6a1f2d4c39dda96d18bd6 100644 --- a/examples/plot_ica_vs_pca.py +++ b/examples/plot_ica_vs_pca.py @@ -77,7 +77,6 @@ def plot_samples(S, axis_list=None): pl.xlabel('$x$') pl.ylabel('$y$') -pl.close('all') pl.subplot(2, 2, 1) plot_samples(S / S.std()) pl.title('True Independant Sources') diff --git a/examples/plot_lda_qda.py b/examples/plot_lda_qda.py index 9e035bb3abf19a8625e23ddf9aaa2827fa3edec2..71c97ce646cfb51720be82fe0f4c5fc83d8caafe 100644 --- a/examples/plot_lda_qda.py +++ b/examples/plot_lda_qda.py @@ -1,9 +1,9 @@ """ -============================================================== -Linear Discriminant Analysis & Quadratic Discriminant Analysis -============================================================== +============================================================================== +Linear Discriminant Analysis & Quadratic Discriminant Analysis with confidence +============================================================================== -Plot the confidence ellipsoids of each class and decision boundary +Plot the decision boundary """ print __doc__ diff --git a/examples/plot_lda_vs_qda.py b/examples/plot_lda_vs_qda.py index b9dab313b6c343d5f79dd273b4d54298ae964b69..246684326c8be7f8e02741c552db2322e160337e 100644 --- a/examples/plot_lda_vs_qda.py +++ b/examples/plot_lda_vs_qda.py @@ -1,7 +1,7 @@ """ -============================================================== -Linear Discriminant Analysis & Quadratic Discriminant Analysis -============================================================== +==================================================================== +Linear and Quadratic Discriminant Analysis with confidence ellipsoid +==================================================================== Plot the confidence ellipsoids of each class and decision boundary """ diff --git a/examples/plot_neighbors.py b/examples/plot_neighbors.py index 4bdf51f44e778d217c7ac1b0d033b5450f786040..812a23f95a52ee49785b02c8a84ded165d9336ca 100644 --- a/examples/plot_neighbors.py +++ b/examples/plot_neighbors.py @@ -22,7 +22,7 @@ h = .02 # step size in the mesh # we create an instance of SVM and fit out data. We do not scale our # data since we want to plot the support vectors -clf = neighbors.Neighbors() +clf = neighbors.NeighborsClassifier() clf.fit(X, Y) # Plot the decision boundary. For that, we will asign a color to each diff --git a/examples/plot_neighbors_regression.py b/examples/plot_neighbors_regression.py index 8bb31ea9167b27afec931891a88b3d2abfb95023..e9bf38e283673752eb44b95c4d1b7330ef9aa650 100644 --- a/examples/plot_neighbors_regression.py +++ b/examples/plot_neighbors_regression.py @@ -5,18 +5,26 @@ k-Nearest Neighbors regression Demonstrate the resolution of a regression problem using a k-Nearest Neighbor and the interpolation of the -target using barycenter computation. +target using both barycenter and constant weights. """ print __doc__ +# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr> +# Fabian Pedregosa <fabian.pedregosa@inria.fr> +# +# License: BSD, (C) INRIA + + ############################################################################### # Generate sample data import numpy as np +import pylab as pl +from scikits.learn import neighbors np.random.seed(0) X = np.sort(5*np.random.rand(40, 1), axis=0) -T = np.linspace(0, 5, 500) +T = np.linspace(0, 5, 500)[:, np.newaxis] y = np.sin(X).ravel() # Add noise to targets @@ -25,20 +33,17 @@ y[::5] += 1*(0.5 - np.random.rand(8)) ############################################################################### # Fit regression model -from scikits.learn import neighbors +for i, mode in enumerate(('mean', 'barycenter')): + knn = neighbors.NeighborsRegressor(n_neighbors=4, mode=mode) + y_ = knn.fit(X, y).predict(T) -knn_barycenter = neighbors.NeighborsBarycenter(n_neighbors=5) -y_ = knn_barycenter.fit(X, y).predict(T) + pl.subplot(2, 1, 1 + i) + pl.scatter(X, y, c='k', label='data') + pl.plot(T, y_, c='g', label='prediction') + pl.axis('tight') + pl.legend() + pl.title('NeighborsRegressor with %s weights' % mode) -############################################################################### -# look at the results -import pylab as pl -pl.scatter(X, y, c='k', label='data') -pl.hold('on') -pl.plot(T, y_, c='g', label='k-NN prediction') -pl.xlabel('data') -pl.ylabel('target') -pl.legend() -pl.title('k-NN Regression') +pl.subplots_adjust(0.1, 0.04, 0.95, 0.94, 0.3, 0.28) pl.show() diff --git a/examples/plot_pca.py b/examples/plot_pca.py index 7e40b2c73674b39a98124736dc272794788788e2..db317f76dc8c2577fa14bf3ca4c51c7fc80c9518 100644 --- a/examples/plot_pca.py +++ b/examples/plot_pca.py @@ -1,6 +1,6 @@ """ ==================================== -PCA 2d projection of of Iris dataset +PCA 2D projection of Iris dataset ==================================== The Iris dataset represents 3 kind of Iris flowers (Setosa, Versicolour diff --git a/examples/plot_permutation_test_for_classification.py b/examples/plot_permutation_test_for_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..d41eb0c3c78d24704a14ae59fc7bdfe95f2258eb --- /dev/null +++ b/examples/plot_permutation_test_for_classification.py @@ -0,0 +1,63 @@ +""" +================================================================= +Test with permutations the significance of a classification score +================================================================= + +In order to test if a classification score is significative a technique +in repeating the classification procedure after randomizing, permuting, +the labels. The p-value is then given by the percentage of runs for +which the score obtained is greater than the classification score +obtained in the first place. + +""" + +# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr> +# License: BSD + +print __doc__ + +import numpy as np +import pylab as pl + +from scikits.learn.svm import SVC +from scikits.learn.cross_val import StratifiedKFold, permutation_test_score +from scikits.learn import datasets +from scikits.learn.metrics import zero_one_score + + +############################################################################## +# Loading a dataset +iris = datasets.load_iris() +X = iris.data +y = iris.target +n_classes = np.unique(y).size + +# Some noisy data not correlated +random = np.random.RandomState(seed=0) +E = random.normal(size=(len(X), 2200)) + +# Add noisy data to the informative features for make the task harder +X = np.c_[X, E] + +svm = SVC(kernel='linear') +cv = StratifiedKFold(y, 2) + +score, permutation_scores, pvalue = permutation_test_score(svm, X, y, + zero_one_score, cv=cv, + n_permutations=100, n_jobs=1) + +print "Classification score %s (pvalue : %s)" % (score, pvalue) + +############################################################################### +# View histogram of permutation scores +pl.hist(permutation_scores, label='Permutation scores') +ylim = pl.ylim() +pl.vlines(score, ylim[0], ylim[1], linestyle='--', + color='g', linewidth=3, label='Classification Score' + ' (pvalue %s)' % pvalue) +pl.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--', + color='k', linewidth=3, label='Luck') +pl.ylim(ylim) +pl.legend() +pl.xlabel('Score') +pl.show() diff --git a/examples/svm/plot_iris.py b/examples/svm/plot_iris.py index c250eed62cfb2dfe24d3c52455d52aff549376e8..1a7e65ed36da3a683b795fbf0fa8e2b4d7a75789 100644 --- a/examples/svm/plot_iris.py +++ b/examples/svm/plot_iris.py @@ -53,12 +53,11 @@ for i, clf in enumerate((svc, rbf_svc, nu_svc, lin_svc)): Z = Z.reshape(xx.shape) pl.set_cmap(pl.cm.Paired) pl.contourf(xx, yy, Z) - pl.axis('tight') + pl.axis('off') # Plot also the training points pl.scatter(X[:,0], X[:,1], c=Y) pl.title(titles[i]) -pl.axis('tight') pl.show() diff --git a/examples/svm/plot_separating_hyperplane.py b/examples/svm/plot_separating_hyperplane.py index 05833eece9ec5efbe1cd46efc952fbe2af097d2e..fec1d76cab97fe86ac2ce6977261850e4cba1b18 100644 --- a/examples/svm/plot_separating_hyperplane.py +++ b/examples/svm/plot_separating_hyperplane.py @@ -40,8 +40,10 @@ pl.set_cmap(pl.cm.Paired) pl.plot(xx, yy, 'k-') pl.plot(xx, yy_down, 'k--') pl.plot(xx, yy_up, 'k--') + +pl.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], + s=80, facecolors='none') pl.scatter(X[:,0], X[:,1], c=Y) -pl.scatter(clf.support_vectors_[:,0], clf.support_vectors_[:,1], c='white') pl.axis('tight') pl.show() diff --git a/scikits/learn/__init__.py b/scikits/learn/__init__.py index 5eef62887d90d9d7687a40281fdaccb43a00f3c0..4638dbb41f42ec15db2a55c41b29f2a2e88e82bc 100644 --- a/scikits/learn/__init__.py +++ b/scikits/learn/__init__.py @@ -36,7 +36,10 @@ except: __all__ = ['cross_val', 'ball_tree', 'cluster', 'covariance', 'datasets', - 'gmm', 'linear_model', 'logistic', 'lda', 'metrics', 'svm', - 'features', 'clone', 'metrics', 'test', 'gaussian_process'] + 'fastica', 'feature_extraction', 'feature_selection', + 'gaussian_process', 'grid_search', 'hmm', 'lda', 'linear_model', + 'metrics', 'mixture', 'naive_bayes', 'neighbors', + 'pca', 'pipeline', 'preprocessing', 'qda', 'svm', 'test', + 'clone'] -__version__ = '0.6.0' +__version__ = '0.7.1' diff --git a/scikits/learn/base.py b/scikits/learn/base.py index 77d6fbf71d5f5130117c340f3e4c6f08378d2c11..817398959a7792045222fe0bfcb9b602a859a282 100644 --- a/scikits/learn/base.py +++ b/scikits/learn/base.py @@ -252,6 +252,37 @@ class RegressorMixin(object): return r2_score(y, self.predict(X)) +################################################################################ +class TransformerMixin(object): + """ Mixin class for all transformers in the scikit learn + """ + + def fit_transform(self, X, y=None, **fit_params): + """Fit model to data and subsequently transform the data + + Sometimes, fit and transform can be implemented more efficiently jointly + than separately. In those cases, the estimator will typically override + the method. + + Parameters + ---------- + X : numpy array of shape [n_samples, n_features] + Training set. + + y : numpy array of shape [n_samples] + Target values. + + Returns + ------- + self : returns an instance of self. + """ + if y is None: + # fit method of arity 1 (unsupervised transformation) + return self.fit(X, **fit_params).transform(X) + else: + # fit method of arity 2 (supervised transformation) + return self.fit(X, y, **fit_params).transform(X) + ################################################################################ # XXX: Temporary solution to figure out if an estimator is a classifier diff --git a/scikits/learn/cluster/affinity_propagation_.py b/scikits/learn/cluster/affinity_propagation_.py index febebec6db54e0efb93ce571294c4c7d29b9d6e5..29210bd02a273a0fac7321975fc77d8a4ab5e719 100644 --- a/scikits/learn/cluster/affinity_propagation_.py +++ b/scikits/learn/cluster/affinity_propagation_.py @@ -51,7 +51,9 @@ def affinity_propagation(S, p=None, convit=30, max_iter=200, damping=0.5, """ if copy: # Copy the affinity matrix to avoid modifying it inplace - S = S.copy() + S = np.array(S, copy=True, dtype=np.float) + else: + S = np.asanyarray(S, dtype=np.float) n_points = S.shape[0] @@ -72,9 +74,8 @@ def affinity_propagation(S, p=None, convit=30, max_iter=200, damping=0.5, R = np.zeros((n_points, n_points)) # Initialize messages # Remove degeneracies - S += ( np.finfo(np.double).eps*S - + np.finfo(np.double).tiny*100 - )*random_state.randn(n_points, n_points) + S += (np.finfo(np.double).eps * S + np.finfo(np.double).tiny * 100) * \ + random_state.randn(n_points, n_points) # Execute parallel affinity propagation updates e = np.zeros((n_points, convit)) @@ -118,7 +119,7 @@ def affinity_propagation(S, p=None, convit=30, max_iter=200, damping=0.5, K = np.sum(E, axis=0) if it >= convit: - se = np.sum(e, axis=1); + se = np.sum(e, axis=1) unconverged = np.sum((se == convit) + (se == 0)) != n_points if (not unconverged and (K>0)) or (it==max_iter): if verbose: @@ -137,7 +138,7 @@ def affinity_propagation(S, p=None, convit=30, max_iter=200, damping=0.5, # Refine the final set of exemplars and clusters and return results for k in range(K): ii = np.where(c==k)[0] - j = np.argmax(np.sum(S[ii, ii], axis=0)) + j = np.argmax(np.sum(S[ii[:,np.newaxis], ii], axis=0)) I[k] = ii[j] c = np.argmax(S[:, I], axis=1) @@ -153,7 +154,8 @@ def affinity_propagation(S, p=None, convit=30, max_iter=200, damping=0.5, return cluster_centers_indices, labels -################################################################################ +############################################################################### + class AffinityPropagation(BaseEstimator): """Perform Affinity Propagation Clustering of data @@ -225,8 +227,8 @@ class AffinityPropagation(BaseEstimator): """ self._set_params(**params) - self.cluster_centers_indices_, self.labels_ = affinity_propagation(S, p, - max_iter=self.max_iter, convit=self.convit, damping=self.damping, + self.cluster_centers_indices_, self.labels_ = affinity_propagation(S, + p, max_iter=self.max_iter, convit=self.convit, + damping=self.damping, copy=self.copy) return self - diff --git a/scikits/learn/cluster/k_means_.py b/scikits/learn/cluster/k_means_.py index 99e2a0dcd7ce19d6abaf8c5e077796d679db3f71..fc9baec2cf9b706b28e8b53ecbd7cea48c63b510 100644 --- a/scikits/learn/cluster/k_means_.py +++ b/scikits/learn/cluster/k_means_.py @@ -3,6 +3,7 @@ # Authors: Gael Varoquaux <gael.xaroquaux@normalesup.org> # Thomas Rueckstiess <ruecksti@in.tum.de> +# James Bergstra <james.bergstra@umontreal.ca> # License: BSD import warnings @@ -10,13 +11,13 @@ import warnings import numpy as np from ..base import BaseEstimator +from ..metrics.pairwise import euclidean_distances -################################################################################ + +############################################################################### # Initialisation heuristic -# kinit originaly from pybrain: -# http://github.com/pybrain/pybrain/raw/master/pybrain/auxiliary/kmeans.py -def k_init(X, k, n_samples_max=500): +def k_init(X, k, n_samples_max=500, rng=None): """Init k seeds according to kmeans++ Parameters @@ -42,38 +43,46 @@ def k_init(X, k, n_samples_max=500): Implementation from Yong Sun's website http://blogs.sun.com/yongsun/entry/k_means_and_k_means + + kinit originaly from pybrain: + http://github.com/pybrain/pybrain/raw/master/pybrain/auxiliary/kmeans.py """ n_samples = X.shape[0] + if rng is None: + rng = np.random + if n_samples >= n_samples_max: - X = X[np.random.randint(n_samples, size=n_samples_max)] + X = X[rng.randint(n_samples, size=n_samples_max)] n_samples = n_samples_max - 'choose the 1st seed randomly, and store D(x)^2 in D[]' - centers = [X[np.random.randint(n_samples)]] - D = ((X - centers[0]) ** 2).sum(axis=-1) + distances = euclidean_distances(X, X, squared=True) + + # choose the 1st seed randomly, and store D(x)^2 in D[] + first_idx = rng.randint(n_samples) + centers = [X[first_idx]] + D = distances[first_idx] for _ in range(k - 1): - bestDsum = bestIdx = -1 + best_d_sum = best_idx = -1 for i in range(n_samples): - 'Dsum = sum_{x in X} min(D(x)^2,||x-xi||^2)' - Dsum = np.minimum(D, ((X - X[i]) ** 2).sum(axis=-1) - ).sum() + # d_sum = sum_{x in X} min(D(x)^2, ||x - xi||^2) + d_sum = np.minimum(D, distances[i]).sum() - if bestDsum < 0 or Dsum < bestDsum: - bestDsum, bestIdx = Dsum, i + if best_d_sum < 0 or d_sum < best_d_sum: + best_d_sum, best_idx = d_sum, i - centers.append(X[bestIdx]) - D = np.minimum(D, ((X - X[bestIdx]) ** 2).sum(axis=-1)) + centers.append(X[best_idx]) + D = np.minimum(D, distances[best_idx]) return np.array(centers) -################################################################################ +############################################################################### # K-means estimation by EM (expectation maximisation) def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0, - delta=1e-4): + tol=1e-4, rng=None, copy_x=True): """ K-means clustering algorithm. Parameters @@ -92,11 +101,11 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0, Maximum number of iterations of the k-means algorithm to run. n_init: int, optional, default: 10 - Number of time the k-means algorithm will be run with different centroid - seeds. The final results will be the best output of n_init consecutive - runs in terms of inertia. + Number of time the k-means algorithm will be run with different + centroid seeds. The final results will be the best output of + n_init consecutive runs in terms of inertia. - init: {'k-means++', 'random', or an ndarray}, optional + init: {'k-means++', 'random', or ndarray, or a callable}, optional Method for initialization, default to 'k-means++': 'k-means++' : selects initial cluster centers for k-mean @@ -109,12 +118,22 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0, If an ndarray is passed, it should be of shape (k, p) and gives the initial centers. - delta: float, optional + tol: float, optional The relative increment in the results before declaring convergence. verbose: boolean, optional Terbosity mode + rng: numpy.RandomState, optional + The generator used to initialize the centers. Defaults to numpy.random. + + copy_x: boolean, optional + When pre-computing distances it is more numerically accurate to center + the data first. If copy_x is True, then the original data is not + modified. If False, the original data is modified, and put back before + the function returns, but small numerical differences may be introduced + by subtracting and then adding the data mean. + Returns ------- centroid: ndarray @@ -129,6 +148,8 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0, The final value of the inertia criterion """ + if rng is None: + rng = np.random n_samples = X.shape[0] vdata = np.mean(np.var(X, 0)) @@ -139,44 +160,59 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0, warnings.warn('Explicit initial center position passed: ' 'performing only one init in the k-means') n_init = 1 + 'subtract of mean of x for more accurate distance computations' + Xmean = X.mean(axis=0) + if copy_x: + X = X.copy() + X -= Xmean for it in range(n_init): # init if init == 'k-means++': - centers = k_init(X, k) + centers = k_init(X, k, rng=rng) elif init == 'random': - seeds = np.argsort(np.random.rand(n_samples))[:k] + seeds = np.argsort(rng.rand(n_samples))[:k] centers = X[seeds] elif hasattr(init, '__array__'): centers = np.asanyarray(init).copy() + elif callable(init): + centers = init(X, k, rng=rng) else: raise ValueError("the init parameter for the k-means should " - "be 'k-mean++' or 'random' or an ndarray, " + "be 'k-means++' or 'random' or an ndarray, " "'%s' (type '%s') was passed.") + if verbose: + print 'Initialization complete' # iterations + x_squared_norms = X.copy() + x_squared_norms **=2 + x_squared_norms = x_squared_norms.sum(axis=1) for i in range(max_iter): centers_old = centers.copy() - labels, inertia = _e_step(X, centers) + labels, inertia = _e_step(X, centers, + x_squared_norms=x_squared_norms) centers = _m_step(X, labels, k) if verbose: - print 'Iteration %i, intertia %s' % (i, inertia) - if np.sum((centers_old - centers) ** 2) < delta * vdata: + print 'Iteration %i, inertia %s' % (i, inertia) + if np.sum((centers_old - centers) ** 2) < tol * vdata: if verbose: print 'Converged to similar centers at iteration', i break if inertia < best_inertia: + best_labels = labels.copy() best_centers = centers.copy() - best_labels = labels.copy() best_inertia = inertia else: + best_labels = labels best_centers = centers - best_labels = labels best_inertia = inertia - return best_centers, best_labels, best_inertia + if not copy_x: + X += Xmean + return best_centers + Xmean, best_labels, best_inertia -def _m_step(x, z ,k): +def _m_step(x, z, k): """ M step of the K-means EM algorithm Computation of cluster centers/means @@ -196,16 +232,22 @@ def _m_step(x, z ,k): The resulting centers """ dim = x.shape[1] - centers = np.repeat(np.reshape(x.mean(0), (1, dim)), k, 0) + centers = np.empty((k, dim)) + X_center = None for q in range(k): - if np.sum(z==q)==0: - pass + this_center_mask = (z == q) + if not np.any(this_center_mask): + # The centroid of empty clusters is set to the center of + # everything + if X_center is None: + X_center = x.mean(axis=0) + centers[q] = X_center else: - centers[q] = np.mean(x[z==q], axis=0) + centers[q] = np.mean(x[this_center_mask], axis=0) return centers -def _e_step(x, centers): +def _e_step(x, centers, precompute_distances=True, x_squared_norms=None): """E step of the K-means EM algorithm Computation of the input-to-cluster assignment @@ -226,21 +268,28 @@ def _e_step(x, centers): inertia: float The value of the inertia criterion with the assignment """ + n_samples = x.shape[0] - z = -np.ones(n_samples).astype(np.int) - mindist = np.infty * np.ones(n_samples) k = centers.shape[0] + + if precompute_distances: + distances = euclidean_distances(centers, x, x_squared_norms, + squared=True) + z = np.empty(n_samples, dtype=np.int) + z.fill(-1) + mindist = np.empty(n_samples) + mindist.fill(np.infty) for q in range(k): - dist = np.sum((x - centers[q]) ** 2, 1) - z[dist<mindist] = q + if precompute_distances: + dist = distances[q] + else: + dist = np.sum((x - centers[q]) ** 2, axis=1) + z[dist < mindist] = q mindist = np.minimum(dist, mindist) inertia = mindist.sum() return z, inertia - -################################################################################ - class KMeans(BaseEstimator): """ K-Means clustering @@ -258,15 +307,16 @@ class KMeans(BaseEstimator): interpreted as initial cluster to use instead. max_iter : int - Maximum number of iterations of the k-means algorithm for a single run. + Maximum number of iterations of the k-means algorithm for a + single run. n_init: int, optional, default: 10 - Number of time the k-means algorithm will be run with different centroid - seeds. The final results will be the best output of n_init consecutive - runs in terms of inertia. + Number of time the k-means algorithm will be run with different + centroid seeds. The final results will be the best output of + n_init consecutive runs in terms of inertia. init : {'k-means++', 'random', 'points', 'matrix'} - Method for initialization, defaults to 'k-means++': + Method for initialization, defaults to 'random': 'k-means++' : selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. See section @@ -281,6 +331,9 @@ class KMeans(BaseEstimator): 'matrix': interpret the k parameter as a k by M (or length k array for one-dimensional data) array of initial centroids. + tol: float, optional default: 1e-4 + Relative tolerance w.r.t. inertia to declare convergence + Methods ------- @@ -317,19 +370,24 @@ class KMeans(BaseEstimator): it can be useful to restart it several times. """ - - def __init__(self, k=8, init='random', n_init=10, max_iter=300): + def __init__(self, k=8, init='random', n_init=10, max_iter=300, tol=1e-4, + verbose=0, rng=None, copy_x=True): self.k = k self.init = init self.max_iter = max_iter + self.tol = tol self.n_init = n_init + self.verbose = verbose + self.rng = rng + self.copy_x = copy_x def fit(self, X, **params): - """ Compute k-means""" + """Compute k-means""" X = np.asanyarray(X) self._set_params(**params) - self.cluster_centers_, self.labels_, self.inertia_ = k_means(X, - k=self.k, init=self.init, n_init=self.n_init, - max_iter=self.max_iter) + self.cluster_centers_, self.labels_, self.inertia_ = k_means( + X, k=self.k, init=self.init, n_init=self.n_init, + max_iter=self.max_iter, verbose=self.verbose, + tol=self.tol, rng=self.rng, copy_x=self.copy_x) return self diff --git a/scikits/learn/cluster/mean_shift_.py b/scikits/learn/cluster/mean_shift_.py index e7b1a39eaf4ce33a9bd5f6e26f4fbfbf4a48045d..07742ebcb98136bc351edd57c0ad037c8da3891d 100644 --- a/scikits/learn/cluster/mean_shift_.py +++ b/scikits/learn/cluster/mean_shift_.py @@ -9,7 +9,7 @@ from math import floor import numpy as np from ..base import BaseEstimator -from ..metrics.pairwise import euclidian_distances +from ..metrics.pairwise import euclidean_distances def estimate_bandwidth(X, quantile=0.3): @@ -23,7 +23,7 @@ def estimate_bandwidth(X, quantile=0.3): should be between [0, 1] 0.5 means that the median is all pairwise distances is used """ - distances = euclidian_distances(X, X) + distances = euclidean_distances(X, X) distances = np.triu(distances, 1) distances_sorted = np.sort(distances[distances > 0]) bandwidth = distances_sorted[floor(quantile * len(distances_sorted))] diff --git a/scikits/learn/cluster/spectral.py b/scikits/learn/cluster/spectral.py index 82ff5089e27b3376f4bc5235c55b8f19c2c3b3c0..60c548122ca87a46601fb4be65921c26801a1686 100644 --- a/scikits/learn/cluster/spectral.py +++ b/scikits/learn/cluster/spectral.py @@ -15,7 +15,7 @@ from .k_means_ import k_means def spectral_embedding(adjacency, k=8, mode=None): """ Spectral embedding: project the sample on the k first - eigen vectors of the graph laplacian. + eigen vectors of the normalized graph Laplacian. Parameters ----------- @@ -123,6 +123,9 @@ def spectral_clustering(adjacency, k=8, mode=None): ------ The graph should contain only one connect component, elsewhere the results make little sens. + + This algorithm solves the normalized cut for k=2: it is a + normalized spectral clustering. """ maps = spectral_embedding(adjacency, k=k, mode=mode) maps = maps[1:] @@ -172,9 +175,21 @@ class SpectralClustering(BaseEstimator): ----------- X: array-like or sparse matrix, shape: (p, p) The adjacency matrix of the graph to embed. + X is an adjacency matrix of a similarity graph: its + entries must be positive or zero. Zero means that + elements have nothing in common, whereas high values mean + that elements are strongly similar. Notes ------ + If you have an affinity matrix, such as a distance matrix, + for which 0 means identical elements, and high values means + very dissimilar elements, it can be transformed in a + similarity matrix that is well suited for the algorithm by + applying the gaussian (heat) kernel:: + + np.exp(- X**2/2. * delta**2) + If the pyamg package is installed, it is used. This greatly speeds up computation. """ diff --git a/scikits/learn/cluster/tests/test_affinity_propagation.py b/scikits/learn/cluster/tests/test_affinity_propagation.py index c412b679f571ae7c7509c73591ed66ae0000df1a..229afb47b853257203f57f5c53ddcb34d0fadd37 100644 --- a/scikits/learn/cluster/tests/test_affinity_propagation.py +++ b/scikits/learn/cluster/tests/test_affinity_propagation.py @@ -4,12 +4,13 @@ Testing for Clustering methods """ import numpy as np -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_array_equal from ..affinity_propagation_ import AffinityPropagation, \ affinity_propagation from .common import generate_clustered_data + n_clusters = 3 X = generate_clustered_data(n_clusters=n_clusters) @@ -21,7 +22,7 @@ def test_affinity_propagation(): """ # Compute similarities X_norms = np.sum(X*X, axis=1) - S = - X_norms[:,np.newaxis] - X_norms[np.newaxis,:] + 2 * np.dot(X, X.T) + S = - X_norms[:, np.newaxis] - X_norms[np.newaxis, :] + 2 * np.dot(X, X.T) p = 10*np.median(S) # Compute Affinity Propagation @@ -37,5 +38,8 @@ def test_affinity_propagation(): n_clusters_ = len(cluster_centers_indices) assert_equal(np.unique(labels).size, n_clusters_) - assert_equal(n_clusters, n_clusters_) + + # Test also with no copy + _, labels_no_copy = affinity_propagation(S, p, copy=False) + assert_array_equal(labels, labels_no_copy) diff --git a/scikits/learn/cluster/tests/test_k_means.py b/scikits/learn/cluster/tests/test_k_means.py index 8a4ff66f36c4fff0f1160bb38b3f95ce9ccf64c3..363280db36db198bfa1181290f70b246e099598a 100644 --- a/scikits/learn/cluster/tests/test_k_means.py +++ b/scikits/learn/cluster/tests/test_k_means.py @@ -1,7 +1,4 @@ -""" -Testing for K-means. - -""" +"""Testing for K-means""" import numpy as np from numpy.testing import assert_equal @@ -55,5 +52,3 @@ def test_k_means_fixed_array_init(): assert_equal(np.unique(labels[20:40]).size, 1) assert_equal(np.unique(labels[40:]).size, 1) - - diff --git a/scikits/learn/cross_val.py b/scikits/learn/cross_val.py index 3f6aa0af4db0cd31747be8db104d31700009af3b..2eb44a516a18df620cec651ec3e2d474e809b199 100644 --- a/scikits/learn/cross_val.py +++ b/scikits/learn/cross_val.py @@ -16,18 +16,22 @@ from .externals.joblib import Parallel, delayed class LeaveOneOut(object): """Leave-One-Out cross validation iterator - Provides train/test indexes to split data in train test sets + Provides train/test indices to split data in train test sets """ - def __init__(self, n): + def __init__(self, n, indices=False): """Leave-One-Out cross validation iterator - Provides train/test indexes to split data in train test sets + Provides train/test indices to split data in train test sets Parameters =========== n: int Total number of elements + indices: boolean, optional (default False) + Return train/test split with integer indices or boolean mask. + Integer indices are useful when dealing with sparse matrices + that cannot be indexed by boolean masks. Examples ======== @@ -50,13 +54,18 @@ class LeaveOneOut(object): [[1 2]] [[3 4]] [1] [2] """ self.n = n + self.indices = indices def __iter__(self): n = self.n for i in xrange(n): - test_index = np.zeros(n, dtype=np.bool) + test_index = np.zeros(n, dtype=np.bool) test_index[i] = True train_index = np.logical_not(test_index) + if self.indices: + ind = np.arange(n) + train_index = ind[train_index] + test_index = ind[test_index] yield train_index, test_index def __repr__(self): @@ -72,13 +81,13 @@ class LeaveOneOut(object): class LeavePOut(object): """Leave-P-Out cross validation iterator - Provides train/test indexes to split data in train test sets + Provides train/test indices to split data in train test sets """ - def __init__(self, n, p): + def __init__(self, n, p, indices=False): """Leave-P-Out cross validation iterator - Provides train/test indexes to split data in train test sets + Provides train/test indices to split data in train test sets Parameters =========== @@ -86,6 +95,10 @@ class LeavePOut(object): Total number of elements p: int Size test sets + indices: boolean, optional (default False) + Return train/test split with integer indices or boolean mask. + Integer indices are useful when dealing with sparse matrices + that cannot be indexed by boolean masks. Examples ======== @@ -110,6 +123,7 @@ class LeavePOut(object): """ self.n = n self.p = p + self.indices = indices def __iter__(self): n = self.n @@ -119,6 +133,10 @@ class LeavePOut(object): test_index = np.zeros(n, dtype=np.bool) test_index[np.array(idx)] = True train_index = np.logical_not(test_index) + if self.indices: + ind = np.arange(n) + train_index = ind[train_index] + test_index = ind[test_index] yield train_index, test_index def __repr__(self): @@ -137,13 +155,13 @@ class LeavePOut(object): class KFold(object): """K-Folds cross validation iterator - Provides train/test indexes to split data in train test sets + Provides train/test indices to split data in train test sets """ - def __init__(self, n, k): + def __init__(self, n, k, indices=False): """K-Folds cross validation iterator - Provides train/test indexes to split data in train test sets + Provides train/test indices to split data in train test sets Parameters ---------- @@ -151,6 +169,10 @@ class KFold(object): Total number of elements k: int number of folds + indices: boolean, optional (default False) + Return train/test split with integer indices or boolean mask. + Integer indices are useful when dealing with sparse matrices + that cannot be indexed by boolean masks. Examples -------- @@ -174,10 +196,11 @@ class KFold(object): All the folds have size trunc(n/k), the last one has the complementary """ assert k>0, ('cannot have k below 1') - assert k<n, ('cannot have k=%d greater than the number ' + assert k<=n, ('cannot have k=%d greater than the number ' 'of samples: %d'% (k, n)) self.n = n self.k = k + self.indices = indices def __iter__(self): n = self.n @@ -185,12 +208,16 @@ class KFold(object): j = ceil(n / k) for i in xrange(k): - test_index = np.zeros(n, dtype=np.bool) - if i<k-1: + test_index = np.zeros(n, dtype=np.bool) + if i < k-1: test_index[i*j:(i+1)*j] = True else: test_index[i*j:] = True train_index = np.logical_not(test_index) + if self.indices: + ind = np.arange(n) + train_index = ind[train_index] + test_index = ind[test_index] yield train_index, test_index def __repr__(self): @@ -208,19 +235,17 @@ class KFold(object): class StratifiedKFold(object): """Stratified K-Folds cross validation iterator - Provides train/test indexes to split data in train test sets + Provides train/test indices to split data in train test sets This cross-validation object is a variation of KFold, which returns stratified folds. The folds are made by preserving the percentage of samples for each class. """ - # XXX: Should maybe have an argument to raise when - # folds are not balanced - def __init__(self, y, k): + def __init__(self, y, k, indices=False): """K-Folds cross validation iterator - Provides train/test indexes to split data in train test sets + Provides train/test indices to split data in train test sets Parameters ---------- @@ -228,6 +253,10 @@ class StratifiedKFold(object): Samples to split in K folds k: int number of folds + indices: boolean, optional (default False) + Return train/test split with integer indices or boolean mask. + Integer indices are useful when dealing with sparse matrices + that cannot be indexed by boolean masks. Examples -------- @@ -253,38 +282,28 @@ class StratifiedKFold(object): y = np.asanyarray(y) n = y.shape[0] assert k>0, ValueError('cannot have k below 1') - assert k<n, ValueError('cannot have k=%d greater than the number ' + assert k<=n, ValueError('cannot have k=%d greater than the number ' 'of samples %d' % (k, n)) _, y_sorted = unique(y, return_inverse=True) assert k <= np.min(np.bincount(y_sorted)) self.y = y self.k = k + self.indices = indices def __iter__(self): y = self.y.copy() k = self.k - n = y.shape[0] - - classes = unique(y) - - idx_c = dict() - j_c = dict() - n_c = dict() - for c in classes: - idx_c[c] = np.where(y == c)[0] - n_c[c] = len(idx_c[c]) - j_c[c] = int(ceil(n_c[c] / k)) + n = y.size + idx = np.argsort(y) for i in xrange(k): - test_index = np.zeros(n, dtype=np.bool) - for c in classes: - if i<k-1: - test_index_c = range(i*j_c[c], (i+1)*j_c[c]) - else: - test_index_c = range(i*j_c[c], n_c[c]) - test_index[idx_c[c][test_index_c]] = True - + test_index = np.zeros(n, dtype=np.bool) + test_index[idx[i::k]] = True train_index = np.logical_not(test_index) + if self.indices: + ind = np.arange(n) + train_index = ind[train_index] + test_index = ind[test_index] yield train_index, test_index def __repr__(self): @@ -300,21 +319,26 @@ class StratifiedKFold(object): ############################################################################## + class LeaveOneLabelOut(object): """Leave-One-Label_Out cross-validation iterator - Provides train/test indexes to split data in train test sets + Provides train/test indices to split data in train test sets """ - def __init__(self, labels): + def __init__(self, labels, indices=False): """Leave-One-Label_Out cross validation - Provides train/test indexes to split data in train test sets + Provides train/test indices to split data in train test sets Parameters ---------- labels : list List of labels + indices: boolean, optional (default False) + Return train/test split with integer indices or boolean mask. + Integer indices are useful when dealing with sparse matrices + that cannot be indexed by boolean masks. Examples ---------- @@ -344,14 +368,19 @@ class LeaveOneLabelOut(object): """ self.labels = labels self.n_labels = unique(labels).size + self.indices = indices def __iter__(self): # We make a copy here to avoid side-effects during iteration labels = np.array(self.labels, copy=True) for i in unique(labels): - test_index = np.zeros(len(labels), dtype=np.bool) + test_index = np.zeros(len(labels), dtype=np.bool) test_index[labels==i] = True train_index = np.logical_not(test_index) + if self.indices: + ind = np.arange(len(labels)) + train_index = ind[train_index] + test_index = ind[test_index] yield train_index, test_index def __repr__(self): @@ -368,18 +397,22 @@ class LeaveOneLabelOut(object): class LeavePLabelOut(object): """Leave-P-Label_Out cross-validation iterator - Provides train/test indexes to split data in train test sets + Provides train/test indices to split data in train test sets """ - def __init__(self, labels, p): + def __init__(self, labels, p, indices=False): """Leave-P-Label_Out cross validation - Provides train/test indexes to split data in train test sets + Provides train/test indices to split data in train test sets Parameters ---------- labels : list - List of labels + List of labels + indices: boolean, optional (default False) + Return train/test split with integer indices or boolean mask. + Integer indices are useful when dealing with sparse matrices + that cannot be indexed by boolean masks. Examples ---------- @@ -412,6 +445,7 @@ class LeavePLabelOut(object): self.unique_labels = unique(self.labels) self.n_labels = self.unique_labels.size self.p = p + self.indices = indices def __iter__(self): # We make a copy here to avoid side-effects during iteration @@ -426,6 +460,10 @@ class LeavePLabelOut(object): for l in unique_labels[idx]: test_index[labels == l] = True train_index = np.logical_not(test_index) + if self.indices: + ind = np.arange(labels.size) + train_index = ind[train_index] + test_index = ind[test_index] yield train_index, test_index def __repr__(self): @@ -477,7 +515,7 @@ def cross_val_score(estimator, X, y=None, score_func=None, cv=None, iid=False, cv: cross-validation generator, optional A cross-validation generator. If None, a 3-fold cross validation is used or 3-fold stratified cross-validation - when y is supplied. + when y is supplied and estimator is a classifier. iid: boolean, optional If True, the data is assumed to be identically distributed across the folds, and the loss minimized is the total loss per sample, @@ -498,10 +536,9 @@ def cross_val_score(estimator, X, y=None, score_func=None, cv=None, iid=False, assert hasattr(estimator, 'score'), ValueError( "If no score_func is specified, the estimator passed " "should have a 'score' method. The estimator %s " - "does not." % estimator - ) + "does not." % estimator) # We clone the estimator to make sure that all the folds are - # independent, and that it is pickable. + # independent, and that it is pickle-able. scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_cross_val_score)(clone(estimator), X, y, score_func, train, test, iid) @@ -509,3 +546,101 @@ def cross_val_score(estimator, X, y=None, score_func=None, cv=None, iid=False, return np.array(scores) +def _permutation_test_score(estimator, X, y, cv, score_func): + """Auxilary function for permutation_test_score + """ + y_test = list() + y_pred = list() + for train, test in cv: + y_test.append(y[test]) + y_pred.append(estimator.fit(X[train], y[train]).predict(X[test])) + return score_func(np.ravel(y_test), np.ravel(y_pred)) + + +def _shuffle(y, labels, rng): + """Return a shuffled copy of y eventually shuffle among same labels. + """ + if labels is None: + ind = rng.permutation(y.size) + else: + ind = np.arange(labels.size) + for label in np.unique(labels): + this_mask = (labels == label) + ind[this_mask] = rng.permutation(ind[this_mask]) + return y[ind] + + +def permutation_test_score(estimator, X, y, score_func, cv=None, + n_permutations=100, n_jobs=1, labels=None, + rng=0, verbose=0): + """Evaluate the significance of a cross-validated score with permutations + + Parameters + ---------- + estimator: estimator object implementing 'fit' + The object to use to fit the data + X: array-like of shape at least 2D + The data to fit. + y: array-like, optional + The target variable to try to predict in the case of + supervised learning. + score_func: callable, optional + callable taking as arguments the test targets (y_test) and + the predicted targets (y_pred). Returns a float. + cv: cross-validation generator, optional + A cross-validation generator. If None, a 3-fold cross + validation is used or 3-fold stratified cross-validation + when the estimator is a classifier. + n_jobs: integer, optional + The number of CPUs to use to do the computation. -1 means + 'all CPUs'. + labels: array-like of shape [n_samples] (optional) + Labels constrain the permutation among groups of samples with + a same label. + rng: RandomState or an int seed (0 by default) + A random number generator instance to define the state of the + random permutations generator. + verbose: integer, optional + The verbosity level + + Returns + ------- + score: float + The true score without permuting targets. + permutation_scores : array, shape = [n_permutations] + The scores obtained for each permutations. + pvalue: float + The p-value. + + Notes + ----- + In corresponds to Test 1 in : + Ojala and Garriga. Permutation Tests for Studying Classifier Performance. + The Journal of Machine Learning Research (2010) vol. 11 + """ + n_samples = len(X) + if cv is None: + if is_classifier(estimator): + cv = StratifiedKFold(y, k=3) + else: + cv = KFold(n_samples, k=3) + + if rng is None: + rng = np.random.RandomState() + elif isinstance(rng, int): + rng = np.random.RandomState(rng) + + # We clone the estimator to make sure that all the folds are + # independent, and that it is pickle-able. + score = _permutation_test_score(clone(estimator), X, y, cv, score_func) + permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( + delayed(_permutation_test_score)(clone(estimator), X, + _shuffle(y, labels, rng), + cv, score_func) + for _ in range(n_permutations)) + permutation_scores = np.array(permutation_scores) + pvalue = (np.sum(permutation_scores >= score) + 1.0) / (n_permutations + 1) + return score, permutation_scores, pvalue + + +permutation_test_score.__test__ = False # to avoid a pb with nosetests diff --git a/scikits/learn/feature_extraction/image.py b/scikits/learn/feature_extraction/image.py index cd5fb0865c21995a43ae7be6ba53d8c6e8076624..825dc4c689c67c7a814f5c68ef0d33121677e7b3 100644 --- a/scikits/learn/feature_extraction/image.py +++ b/scikits/learn/feature_extraction/image.py @@ -14,16 +14,16 @@ from ..utils.fixes import in1d # From an image to a graph def _make_edges_3d(n_x, n_y, n_z=1): - """ Returns a list of edges for a 3D image. - - Parameters - =========== - n_x: integer - The size of the grid in the x direction. - n_y: integer - The size of the grid in the y direction. - n_z: integer, optional - The size of the grid in the z direction, defaults to 1 + """Returns a list of edges for a 3D image. + + Parameters + =========== + n_x: integer + The size of the grid in the x direction. + n_y: integer + The size of the grid in the y direction. + n_z: integer, optional + The size of the grid in the z direction, defaults to 1 """ vertices = np.arange(n_x*n_y*n_z).reshape((n_x, n_y, n_z)) edges_deep = np.vstack((vertices[:, :, :-1].ravel(), @@ -48,8 +48,7 @@ def _compute_gradient_3d(edges, img): # XXX: Why mask the image after computing the weights? def _mask_edges_weights(mask, edges, weights): - """ Given a image mask and the - """ + """Apply a mask to weighted edges""" inds = np.arange(mask.size) inds = inds[mask.ravel()] ind_mask = np.logical_and(in1d(edges[0], inds), @@ -61,23 +60,23 @@ def _mask_edges_weights(mask, edges, weights): return edges, weights -def img_to_graph(img, mask=None, - return_as=sparse.coo_matrix, dtype=None): - """ Create a graph of the pixel-to-pixel connections with the - gradient of the image as a the edge value. - - Parameters - =========== - img: ndarray, 2D or 3D - 2D or 3D image - mask : ndarray of booleans, optional - An optional mask of the image, to consider only part of the - pixels. - return_as: np.ndarray or a sparse matrix class, optional - The class to use to build the returned adjacency matrix. - dtype: None or dtype, optional - The data of the returned sparse matrix. By default it is the - dtype of img +def img_to_graph(img, mask=None, return_as=sparse.coo_matrix, dtype=None): + """Graph of the pixel-to-pixel gradient connections + + Edges are weighted with the gradient values. + + Parameters + =========== + img: ndarray, 2D or 3D + 2D or 3D image + mask : ndarray of booleans, optional + An optional mask of the image, to consider only part of the + pixels. + return_as: np.ndarray or a sparse matrix class, optional + The class to use to build the returned adjacency matrix. + dtype: None or dtype, optional + The data of the returned sparse matrix. By default it is the + dtype of img """ img = np.atleast_3d(img) if dtype is None: @@ -104,3 +103,4 @@ def img_to_graph(img, mask=None, return return_as(graph) + diff --git a/scikits/learn/feature_extraction/text/dense.py b/scikits/learn/feature_extraction/text/dense.py index ec960193363f143be5c2ee0cd8cfa636aec465f8..28ee500e995aecb277432fbe9f0b3324c8413c8c 100644 --- a/scikits/learn/feature_extraction/text/dense.py +++ b/scikits/learn/feature_extraction/text/dense.py @@ -8,7 +8,6 @@ from operator import itemgetter import re import unicodedata import numpy as np -import scipy.sparse as sp from ...base import BaseEstimator ENGLISH_STOP_WORDS = set([ @@ -97,6 +96,7 @@ class RomanPreprocessor(object): DEFAULT_PREPROCESSOR = RomanPreprocessor() +DEFAULT_TOKEN_PATTERN = r"\b\w\w+\b" class WordNGramAnalyzer(BaseEstimator): """Simple analyzer: transform a text document into a sequence of word tokens @@ -105,20 +105,20 @@ class WordNGramAnalyzer(BaseEstimator): - lower case conversion - unicode accents removal - token extraction using unicode regexp word bounderies for token of - minimum size of 2 symbols + minimum size of 2 symbols (by default) - output token n-grams (unigram only by default) """ - token_pattern = re.compile(r"\b\w\w+\b", re.UNICODE) - def __init__(self, charset='utf-8', min_n=1, max_n=1, preprocessor=DEFAULT_PREPROCESSOR, - stop_words=ENGLISH_STOP_WORDS): + stop_words=ENGLISH_STOP_WORDS, + token_pattern=DEFAULT_TOKEN_PATTERN): self.charset = charset self.stop_words = stop_words self.min_n = min_n self.max_n = max_n self.preprocessor = preprocessor + self.token_pattern = token_pattern def analyze(self, text_document): if hasattr(text_document, 'read'): @@ -130,8 +130,10 @@ class WordNGramAnalyzer(BaseEstimator): text_document = self.preprocessor.preprocess(text_document) - # word boundaries tokenizer - tokens = self.token_pattern.findall(text_document) + # word boundaries tokenizer (cannot compile it in the __init__ because + # we want support for pickling and runtime parameter fitting) + compiled = re.compile(self.token_pattern, re.UNICODE) + tokens = compiled.findall(text_document) # handle token n-grams if self.min_n != 1 or self.max_n != 1: diff --git a/scikits/learn/feature_selection/rfe.py b/scikits/learn/feature_selection/rfe.py index 730e8b7a3dcead677f82e7fc23cbcd125559711b..cccf6064e1992d732413977c02fdd3404dca8b8a 100644 --- a/scikits/learn/feature_selection/rfe.py +++ b/scikits/learn/feature_selection/rfe.py @@ -8,9 +8,9 @@ import numpy as np from ..base import BaseEstimator + class RFE(BaseEstimator): - """ - Feature ranking with Recursive feature elimination + """Feature ranking with Recursive feature elimination Parameters ---------- @@ -64,7 +64,7 @@ class RFE(BaseEstimator): self.estimator = estimator def fit(self, X, y): - """Fit the RFE model according to the given training data and parameters. + """Fit the RFE model Parameters ---------- @@ -80,11 +80,12 @@ class RFE(BaseEstimator): support_ = np.ones(n_features_total, dtype=np.bool) ranking_ = np.ones(n_features_total, dtype=np.int) while np.sum(support_) > self.n_features: - estimator.fit(X[:,support_], y) + estimator.fit(X[:, support_], y) # rank features based on coef_ (handle multi class) abs_coef_ = np.sum(estimator.coef_ ** 2, axis=0) sorted_abs_coef_ = np.sort(abs_coef_) - threshold = sorted_abs_coef_[np.int(np.sum(support_) * self.percentage)] + threshold = sorted_abs_coef_[np.int(np.sum(support_) * + self.percentage)] support_[support_] = abs_coef_ > threshold ranking_[support_] += 1 self.support_ = support_ @@ -100,7 +101,7 @@ class RFE(BaseEstimator): Vector, where n_samples in the number of samples and n_features is the number of features. """ - X_r = X[:,self.support_] + X_r = X[:, self.support_] return X_r.copy() if copy else X_r @@ -161,7 +162,7 @@ class RFECV(RFE): self.loss_func = loss_func def fit(self, X, y, cv=None): - """Fit the RFE model according to the given training data and parameters. + """Fit the RFE model with cross-validation The final size of the support is tuned by cross validation. @@ -181,7 +182,7 @@ class RFECV(RFE): clf = self.estimator n_models = np.max(self.ranking_) self.cv_scores_ = np.zeros(n_models) - self.n_features_ = np.bincount(self.ranking_)[::-1].cumsum()[-2::-1] + self.n_features_ = np.bincount(self.ranking_)[::-1].cumsum()[-2::-1] for train, test in cv: ranking_ = rfe.fit(X[train], y[train]).ranking_ @@ -189,11 +190,12 @@ class RFECV(RFE): # assert n_models == np.max(ranking_) for k in range(n_models): mask = ranking_ >= (k+1) - clf.fit(X[train][:,mask], y[train]) - y_pred = clf.predict(X[test][:,mask]) + clf.fit(X[train][:, mask], y[train]) + y_pred = clf.predict(X[test][:, mask]) self.cv_scores_[k] += self.loss_func(y[test], y_pred) - self.support_ = self.ranking_ >= (np.argmin(self.cv_scores_) + 1) + # Take the best model (if multiple models have the same accuracy + # use the last one ie the one with minimum number of features) + min_score = n_models - np.argmin(self.cv_scores_[::-1]) + self.support_ = self.ranking_ >= min_score return self - - diff --git a/scikits/learn/feature_selection/tests/test_feature_select.py b/scikits/learn/feature_selection/tests/test_feature_select.py index 1c8e32223d99d473cfc804148223794a59cf9199..e943e510e37b3bb5a68b64488a42495f0a7381d0 100644 --- a/scikits/learn/feature_selection/tests/test_feature_select.py +++ b/scikits/learn/feature_selection/tests/test_feature_select.py @@ -190,6 +190,9 @@ def test_select_percentile_regression(): gtruth = np.zeros(20) gtruth[:5]=1 assert_array_equal(support, gtruth) + X_2 = X.copy() + X_2[:, np.logical_not(support)] = 0 + assert_array_equal(X_2, univariate_filter.inverse_transform(X_r)) def test_select_percentile_regression_full(): diff --git a/scikits/learn/feature_selection/tests/test_rfe.py b/scikits/learn/feature_selection/tests/test_rfe.py index 12bdb73191ea96d288e1b1dcecdc454a95f1cd48..48f350d609b86e71ed7d89f5fe1f28f446f2ce85 100644 --- a/scikits/learn/feature_selection/tests/test_rfe.py +++ b/scikits/learn/feature_selection/tests/test_rfe.py @@ -2,7 +2,6 @@ Testing Recursive feature elimination """ - import numpy as np from ...svm import SVC @@ -11,7 +10,8 @@ from ... import datasets from ..rfe import RFECV from ...metrics import zero_one -################################################################################ + +############################################################################## # Loading a dataset iris = datasets.load_iris() X = iris.data @@ -24,14 +24,15 @@ E = random.normal(size=(len(X), 5)) # Add the noisy data to the informative features X = np.c_[X, E] + def test_rfe(): - """Check that rfe recoverse the correct features on IRIS dataset""" + """Check that rfe recovers the correct features on IRIS dataset""" svc = SVC(kernel='linear') - rfecv = RFECV(estimator=svc, n_features=4, percentage=0.1, loss_func=zero_one) - rfecv.fit(X, y, cv=StratifiedKFold(y, 2)) + rfecv = RFECV(estimator=svc, n_features=4, percentage=0.1, + loss_func=zero_one) + rfecv.fit(X, y, cv=StratifiedKFold(y, 3)) X_r = rfecv.transform(X) assert X_r.shape[1] == iris.data.shape[1] assert rfecv.support_.sum() == iris.data.shape[1] - diff --git a/scikits/learn/feature_selection/univariate_selection.py b/scikits/learn/feature_selection/univariate_selection.py index c5d965917dbe064112ece45b7627e27f69f2d19c..941a76a30f7a8dd6c080e6275bfd38a6f787a781 100644 --- a/scikits/learn/feature_selection/univariate_selection.py +++ b/scikits/learn/feature_selection/univariate_selection.py @@ -8,7 +8,7 @@ Univariate features selection. import numpy as np from scipy import stats -from ..base import BaseEstimator +from ..base import BaseEstimator, TransformerMixin ###################################################################### # Scoring functions @@ -164,7 +164,7 @@ def f_regression(X, y, center=True): ###################################################################### -class _AbstractUnivariateFilter(BaseEstimator): +class _AbstractUnivariateFilter(BaseEstimator, TransformerMixin): """ Abstract class, not meant to be used directly """ @@ -184,10 +184,11 @@ class _AbstractUnivariateFilter(BaseEstimator): self.score_func = score_func - def fit(self, X, y): + def fit(self, X, y, **params): """ Evaluate the function """ + self._set_params(**params) _scores = self.score_func(X, y) self._scores = _scores[0] self._pvalues = _scores[1] @@ -202,6 +203,20 @@ class _AbstractUnivariateFilter(BaseEstimator): return X[:, self.get_support()] + def inverse_transform(self, X_red): + """ Transform reduced data back in original feature space + """ + n_samples, _ = X_red.shape + support = self.get_support() + if n_samples == 1: + X = np.zeros((support.shape[0]), dtype=X_red.dtype) + X[support] = X_red + else: + X = np.zeros((n_samples, support.shape[0]), dtype=X_red.dtype) + X[:, support] = X_red + return X + + ###################################################################### # Specific filters ###################################################################### diff --git a/scikits/learn/gaussian_process/gaussian_process.py b/scikits/learn/gaussian_process/gaussian_process.py index 3a66ce02c5ba78145788d182c3ca7d8392369e52..722e9cc2dffe95be3667455d716bd6c1eb95c0ba 100644 --- a/scikits/learn/gaussian_process/gaussian_process.py +++ b/scikits/learn/gaussian_process/gaussian_process.py @@ -471,7 +471,7 @@ class GaussianProcess(BaseEstimator, RegressorMixin): self.Ft = par['Ft'] self.G = par['G'] - rt = solve_triangular(C, r.T, lower=True) + rt = solve_triangular(self.C, r.T, lower=True) if self.beta0 is None: # Universal Kriging @@ -503,11 +503,11 @@ class GaussianProcess(BaseEstimator, RegressorMixin): if eval_MSE: y, MSE = np.zeros(n_eval), np.zeros(n_eval) - for k in range(n_eval / batch_size): + for k in range(max(1, n_eval / batch_size)): batch_from = k * batch_size batch_to = min([(k + 1) * batch_size + 1, n_eval + 1]) y[batch_from:batch_to], MSE[batch_from:batch_to] = \ - self.predict(X[batch_from:batch_to][:], + self.predict(X[batch_from:batch_to], eval_MSE=eval_MSE, batch_size=None) return y, MSE @@ -515,11 +515,11 @@ class GaussianProcess(BaseEstimator, RegressorMixin): else: y = np.zeros(n_eval) - for k in range(n_eval / batch_size): + for k in range(max(1, n_eval / batch_size)): batch_from = k * batch_size batch_to = min([(k + 1) * batch_size + 1, n_eval + 1]) y[batch_from:batch_to] = \ - self.predict(X[batch_from:batch_to][:], + self.predict(X[batch_from:batch_to], eval_MSE=eval_MSE, batch_size=None) return y @@ -578,7 +578,7 @@ class GaussianProcess(BaseEstimator, RegressorMixin): if D is None: # Light storage mode (need to recompute D, ij and F) - D, ij = compute_componentwise_l1_cross_distances(X) + D, ij = compute_componentwise_l1_cross_distances(self.X) if np.min(np.sum(np.abs(D), axis=1)) == 0. \ and self.corr != correlation.pure_nugget: raise Exception("Multiple X are not allowed") diff --git a/scikits/learn/grid_search.py b/scikits/learn/grid_search.py index ca9a7580f6c3fdc8ae1b62a23580d52bd48bf433..1f3456d7c8aa644c8d2c06b025133a8903c9b694 100644 --- a/scikits/learn/grid_search.py +++ b/scikits/learn/grid_search.py @@ -156,31 +156,38 @@ class GridSearchCV(BaseEstimator): the folds, and the loss minimized is the total loss per sample, and not the mean loss across the folds. - Methods - ------- - fit(X, Y) : self - Fit the model + cv : crossvalidation generator + see scikits.learn.cross_val module - predict(X) : array - Predict using the model. + refit: boolean + refit the best estimator with the entire dataset Examples -------- - >>> import numpy as np - >>> from scikits.learn.cross_val import LeaveOneOut - >>> from scikits.learn.svm import SVR - >>> from scikits.learn.grid_search import GridSearchCV - >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) - >>> y = np.array([1, 1, 2, 2]) + >>> from scikits.learn import svm, grid_search, datasets + >>> iris = datasets.load_iris() >>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]} - >>> svr = SVR() - >>> clf = GridSearchCV(svr, parameters, n_jobs=1) - >>> clf.fit(X, y).predict([[-0.8, -1]]) - array([ 1.13101459]) + >>> svr = svm.SVR() + >>> clf = grid_search.GridSearchCV(svr, parameters) + >>> clf.fit(iris.data, iris.target) # doctest: +ELLIPSIS + GridSearchCV(n_jobs=1, fit_params={}, loss_func=None, refit=True, cv=None, + iid=True, + estimator=SVR(kernel='rbf', C=1.0, probability=False, ... + ... + + Notes + ------ + + The parameters selected are those that maximize the score of the + left out data, unless an explicit score_func is passed in which + case it is used instead. If a loss function loss_func is passed, + it overrides the score functions and is minimized. + """ def __init__(self, estimator, param_grid, loss_func=None, score_func=None, - fit_params={}, n_jobs=1, iid=True): + fit_params={}, n_jobs=1, iid=True, refit=True, cv=None, + ): assert hasattr(estimator, 'fit') and (hasattr(estimator, 'predict') or hasattr(estimator, 'score')), ( "estimator should a be an estimator implementing 'fit' and " @@ -201,8 +208,10 @@ class GridSearchCV(BaseEstimator): self.n_jobs = n_jobs self.fit_params = fit_params self.iid = iid + self.refit = refit + self.cv = cv - def fit(self, X, y=None, refit=True, cv=None, **kw): + def fit(self, X, y=None, **params): """Run fit with all sets of parameters Returns the best classifier @@ -217,13 +226,10 @@ class GridSearchCV(BaseEstimator): y: array, [n_samples] or None Target vector relative to X, None for unsupervised problems - cv : crossvalidation generator - see scikits.learn.cross_val module - - refit: boolean - refit the best estimator with the entire dataset """ + self._set_params(**params) estimator = self.estimator + cv = self.cv if cv is None: if hasattr(X, 'shape'): n_samples = X.shape[0] @@ -260,7 +266,7 @@ class GridSearchCV(BaseEstimator): self.best_score = best_score - if refit: + if self.refit: # fit the best estimator using the entire dataset best_estimator.fit(X, y, **self.fit_params) diff --git a/scikits/learn/hmm.py b/scikits/learn/hmm.py index 0c40af99a759a9f0bd9bcc28bfef4f4538cda2fd..ff644601a1975d932e1bf64c9f02f3418ced2879 100644 --- a/scikits/learn/hmm.py +++ b/scikits/learn/hmm.py @@ -327,8 +327,6 @@ class _BaseHMM(BaseEstimator): small). You can fix this by getting more training data, or decreasing `covars_prior`. """ - obs = np.asanyarray(obs) - self._init(obs, init_params) logprob = [] @@ -679,11 +677,13 @@ class GaussianHMM(_BaseHMM): def _init(self, obs, params='stmc'): super(GaussianHMM, self)._init(obs, params=params) - if hasattr(self, 'n_features') and self.n_features != obs.shape[2]: + if (hasattr(self, 'n_features') + and self.n_features != obs[0].shape[1]): raise ValueError('Unexpected number of dimensions, got %s but ' - 'expected %s' % (obs.shape[2], self.n_features)) + 'expected %s' % (obs[0].shape[1], + self.n_features)) - self.n_features = obs.shape[2] + self.n_features = obs[0].shape[1] if 'm' in params: self._means = cluster.KMeans( diff --git a/scikits/learn/linear_model/__init__.py b/scikits/learn/linear_model/__init__.py index b508272bcc02e0ebf4df12ef11a9e1868eef8c14..d83bf4fe8352f94b731f34df02c98a37176b006e 100644 --- a/scikits/learn/linear_model/__init__.py +++ b/scikits/learn/linear_model/__init__.py @@ -22,7 +22,7 @@ from .least_angle import LARS, LassoLARS, lars_path from .coordinate_descent import Lasso, ElasticNet, LassoCV, ElasticNetCV, \ lasso_path, enet_path from .stochastic_gradient import SGDClassifier, SGDRegressor -from .ridge import Ridge +from .ridge import Ridge, RidgeCV, RidgeClassifier, RidgeClassifierCV from .logistic import LogisticRegression from . import sparse diff --git a/scikits/learn/linear_model/base.py b/scikits/learn/linear_model/base.py index 130a9d24825d55759fd509d397d71bdf8b02065b..7e542f14a93ecfa8d6204da481620fa3963caaeb 100644 --- a/scikits/learn/linear_model/base.py +++ b/scikits/learn/linear_model/base.py @@ -7,13 +7,18 @@ Generalized Linear models. # Olivier Grisel <olivier.grisel@ensta.org> # Vincent Michel <vincent.michel@inria.fr> # Peter Prettenhofer <peter.prettenhofer@gmail.com> +# Mathieu Blondel <mathieu@mblondel.org> # # License: BSD Style. import numpy as np +import scipy.sparse as sp from ..base import BaseEstimator, RegressorMixin, ClassifierMixin from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber +from ..utils.extmath import safe_sparse_dot +from ..utils import safe_asanyarray + ### ### TODO: intercept for all models @@ -39,8 +44,8 @@ class LinearModel(BaseEstimator, RegressorMixin): C : array, shape = [n_samples] Returns predicted values. """ - X = np.asanyarray(X) - return np.dot(X, self.coef_) + self.intercept_ + X = safe_asanyarray(X) + return safe_sparse_dot(X, self.coef_) + self.intercept_ @staticmethod def _center_data(X, y, fit_intercept): @@ -50,9 +55,12 @@ class LinearModel(BaseEstimator, RegressorMixin): centered. """ if fit_intercept: - Xmean = X.mean(axis=0) + if sp.issparse(X): + Xmean = np.zeros(X.shape[1]) + else: + Xmean = X.mean(axis=0) + X = X - Xmean ymean = y.mean() - X = X - Xmean y = y - ymean else: Xmean = np.zeros(X.shape[1]) @@ -290,3 +298,19 @@ class BaseSGDRegressor(BaseSGD, RegressorMixin): """ X = np.asanyarray(X) return np.dot(X, self.coef_) + self.intercept_ + + +class CoefSelectTransformerMixin(object): + """Mixin for linear models that can find sparse solutions. + """ + + def transform(self, X, threshold=1e-10): + if len(self.coef_.shape) == 1 or self.coef_.shape[1] == 1: + # 2-class case + coef = np.ravel(self.coef_) + else: + # multi-class case + coef = np.mean(self.coef_, axis=0) + + return X[:, coef <= threshold] + diff --git a/scikits/learn/linear_model/least_angle.py b/scikits/learn/linear_model/least_angle.py index 28c59759e80e6c85b0c7955b3d5550c170b72082..24729be4649c399a6828f98862bc8a257ad9c356 100644 --- a/scikits/learn/linear_model/least_angle.py +++ b/scikits/learn/linear_model/least_angle.py @@ -16,7 +16,7 @@ from .base import LinearModel from ..utils import arrayfuncs def lars_path(X, y, Xy=None, Gram=None, max_features=None, - alpha_min=0, method="lar", overwrite_X=False, + alpha_min=0, method='lar', overwrite_X=False, overwrite_Gram=False, verbose=False): """ Compute Least Angle Regression and LASSO path @@ -39,22 +39,26 @@ def lars_path(X, y, Xy=None, Gram=None, max_features=None, Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso. - method: 'lar' or 'lasso' - Specifies the problem solved: the LAR or its variant the - LASSO-LARS that gives the solution of the LASSO problem - for any regularization parameter. + method: 'lar' | 'lasso' + Specifies the returned model. Select 'lar' for Least Angle + Regression, 'lasso' for the Lasso. Returns -------- - alphas: array, shape: (k) - The alphas along the path + alphas: array, shape: (max_features + 1,) + Maximum of covariances (in absolute value) at each + iteration. - active: array, shape (?) + active: array, shape (max_features,) Indices of active variables at the end of the path. - coefs: array, shape (p, k) + coefs: array, shape (n_features, max_features+1) Coefficients along the path + See also + -------- + :ref:`LassoLARS`, :ref:`LARS` + Notes ------ * http://en.wikipedia.org/wiki/Least-angle_regression @@ -75,6 +79,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_features=None, # holds the sign of covariance sign_active = np.empty(max_features, dtype=np.int8) drop = False + eps = np.finfo(X.dtype).eps # will hold the cholesky factorization. Only lower part is # referenced. @@ -103,8 +108,8 @@ def lars_path(X, y, Xy=None, Gram=None, max_features=None, while 1: if Cov.size: - imax = np.argmax(np.abs(Cov)) # TODO: rename - C_ = Cov[imax] + C_idx = np.argmax(np.abs(Cov)) + C_ = Cov[C_idx] C = np.fabs(C_) # to match a for computing gamma_ else: @@ -115,7 +120,17 @@ def lars_path(X, y, Xy=None, Gram=None, max_features=None, alphas[n_iter] = C / n_samples - if (C < alpha_min) or (n_active == max_features): + # Check for early stopping + if alphas[n_iter] < alpha_min: # interpolate + # interpolation factor 0 <= ss < 1 + ss = (alphas[n_iter-1] - alpha_min) / (alphas[n_iter-1] - + alphas[n_iter]) + coefs[n_iter] = coefs[n_iter-1] + ss*(coefs[n_iter] - + coefs[n_iter-1]) + alphas[n_iter] = alpha_min + break + + if n_active == max_features: break if not drop: @@ -129,9 +144,9 @@ def lars_path(X, y, Xy=None, Gram=None, max_features=None, # where u is the last added to the active set # sign_active[n_active] = np.sign(C_) - m, n = n_active, imax+n_active + m, n = n_active, C_idx+n_active - Cov[imax], Cov[0] = swap(Cov[imax], Cov[0]) + Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0]) indices[n], indices[m] = indices[m], indices[n] Cov = Cov[1:] # remove Cov[0] @@ -152,7 +167,8 @@ def lars_path(X, y, Xy=None, Gram=None, max_features=None, arrayfuncs.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) v = np.dot(L[n_active, :n_active], L[n_active, :n_active]) - L[n_active, n_active] = np.sqrt(c - v) + diag = max(np.sqrt(np.abs(c - v)), eps) + L[n_active, n_active] = diag active.append(indices[n_active]) n_active += 1 @@ -182,24 +198,31 @@ def lars_path(X, y, Xy=None, Gram=None, max_features=None, corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, least_squares) - # equation 2.13 + g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir)) g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir)) gamma_ = min(g1, g2, C/AA) - if method == 'lasso': - drop = False - z = - coefs[n_iter, active] / least_squares - z_pos = arrayfuncs.min_pos(z) - if z_pos < gamma_: - idx = np.where(z == z_pos)[0] - gamma_ = z_pos - drop = True + # TODO: better names for these variables: z + drop = False + z = - coefs[n_iter, active] / least_squares + z_pos = arrayfuncs.min_pos(z) + if z_pos < gamma_: + + # some coefficients have changed sign + idx = np.where(z == z_pos)[0] + + # update the sign, important for LAR + sign_active[idx] = -sign_active[idx] + + if method == 'lasso': gamma_ = z_pos + drop = True n_iter += 1 - if n_iter >= coefs.shape[0]: # resize - add_features = 2 * (max_features - n_active) # heuristic + if n_iter >= coefs.shape[0]: + # resize the coefs and alphas array + add_features = 2 * (max_features - n_active) coefs.resize((n_iter + add_features, n_features)) alphas.resize(n_iter + add_features) @@ -212,7 +235,8 @@ def lars_path(X, y, Xy=None, Gram=None, max_features=None, if n_active > n_features: break - if drop: + # See if any coefficient has changed sign + if drop and method == 'lasso': arrayfuncs.cholesky_delete(L[:n_active, :n_active], idx) @@ -251,19 +275,11 @@ def lars_path(X, y, Xy=None, Gram=None, max_features=None, temp = np.dot(X.T[drop_idx], residual) Cov = np.r_[temp, Cov] - # do an append to maintain size sign_active = np.delete(sign_active, idx) - sign_active = np.append(sign_active, 0.) + sign_active = np.append(sign_active, 0.) # just to maintain size if verbose: print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, '', drop_idx, n_active, abs(temp)) - if C < alpha_min: # interpolate - # interpolation factor 0 <= ss < 1 - ss = (alphas[n_iter-1] - alpha_min) / (alphas[n_iter-1] - - alphas[n_iter]) - coefs[n_iter] = coefs[n_iter-1] + ss*(coefs[n_iter] - coefs[n_iter-1]) - alphas[n_iter] = alpha_min - # resize coefs in case of early stop alphas = alphas[:n_iter+1] @@ -408,11 +424,11 @@ class LassoLARS (LARS): Examples -------- >>> from scikits.learn import linear_model - >>> clf = linear_model.LassoLARS(alpha=0.1) + >>> clf = linear_model.LassoLARS(alpha=0.01) >>> clf.fit([[-1,1], [0, 0], [1, 1]], [-1, 0, -1]) - LassoLARS(alpha=0.1, verbose=False, fit_intercept=True) + LassoLARS(alpha=0.01, verbose=False, fit_intercept=True) >>> print clf.coef_ - [ 0. 0.08350342] + [ 0. -0.72649658] References ---------- diff --git a/scikits/learn/linear_model/logistic.py b/scikits/learn/linear_model/logistic.py index e2e7eef4527fd274b7bf4f64e41e979ede56d365..6b5bf6eb5751913e0a82bbe84bd6d7f9f953d47a 100644 --- a/scikits/learn/linear_model/logistic.py +++ b/scikits/learn/linear_model/logistic.py @@ -1,10 +1,12 @@ import numpy as np from ..base import ClassifierMixin +from ..linear_model.base import CoefSelectTransformerMixin from ..svm.base import BaseLibLinear from ..svm import _liblinear -class LogisticRegression(BaseLibLinear, ClassifierMixin): +class LogisticRegression(BaseLibLinear, ClassifierMixin, + CoefSelectTransformerMixin): """ Logistic Regression. @@ -28,6 +30,17 @@ class LogisticRegression(BaseLibLinear, ClassifierMixin): Specifies if a constant (a.k.a. bias or intercept) should be added the decision function + intercept_scaling : float, default: 1 + when self.fit_intercept is True, instance vector x becomes + [x, self.intercept_scaling], + i.e. a "synthetic" feature with constant value equals to + intercept_scaling is appended to the instance vector. + The intercept becomes intercept_scaling * synthetic feature weight + Note! the synthetic feature weight is subject to l1/l2 regularization + as all other features. + To lessen the effect of regularization on synthetic feature weight + (and therefore on the intercept) intercept_scaling has to be increased + Attributes ---------- @@ -56,11 +69,11 @@ class LogisticRegression(BaseLibLinear, ClassifierMixin): """ def __init__(self, penalty='l2', dual=False, eps=1e-4, C=1.0, - fit_intercept=True): + fit_intercept=True, intercept_scaling=1): super(LogisticRegression, self).__init__ (penalty=penalty, dual=dual, loss='lr', eps=eps, C=C, - fit_intercept=fit_intercept) + fit_intercept=fit_intercept, intercept_scaling=intercept_scaling) def predict_proba(self, X): """ diff --git a/scikits/learn/linear_model/ridge.py b/scikits/learn/linear_model/ridge.py index 8a406ec6e6013748d8bd2910bbd81728b75d6851..36a9e50e56b33e083bfdb9c9bc0c81f91e2e1c96 100644 --- a/scikits/learn/linear_model/ridge.py +++ b/scikits/learn/linear_model/ridge.py @@ -2,10 +2,16 @@ Ridge regression """ +# Author: Mathieu Blondel <mathieu@mblondel.org> +# License: Simplified BSD + import numpy as np -from scipy import linalg from .base import LinearModel +from ..utils.extmath import safe_sparse_dot +from ..utils import safe_asanyarray +from ..preprocessing import LabelBinarizer +from ..grid_search import GridSearchCV class Ridge(LinearModel): @@ -15,10 +21,13 @@ class Ridge(LinearModel): Parameters ---------- alpha : float - Small positive values of alpha improve the coditioning of the + Small positive values of alpha improve the conditioning of the problem and reduce the variance of the estimates. + Alpha corresponds to (2*C)^-1 in other linear models such as + LogisticRegression or LinearSVC. + fit_intercept : boolean - wether to calculate the intercept for this model. If set + Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (e.g. data is expected to be already centered). @@ -39,7 +48,7 @@ class Ridge(LinearModel): self.alpha = alpha self.fit_intercept = fit_intercept - def fit(self, X, y, **params): + def fit(self, X, y, sample_weight=1.0, solver="default", **params): """ Fit Ridge regression model @@ -47,34 +56,426 @@ class Ridge(LinearModel): ---------- X : numpy array of shape [n_samples,n_features] Training data + y : numpy array of shape [n_samples] Target values + sample_weight : float or numpy array of shape [n_samples] + Sample weight + + solver : 'default' | 'cg' + Solver to use in the computational routines. 'default' + will use the standard scipy.linalg.solve function, 'cg' + will use the a conjugate gradient solver as found in + scipy.sparse.linalg.cg. + Returns ------- self : returns an instance of self. """ self._set_params(**params) + self.solver = solver - X = np.asanyarray(X, dtype=np.float) + X = safe_asanyarray(X, dtype=np.float) y = np.asanyarray(y, dtype=np.float) + X, y, Xmean, ymean = \ + LinearModel._center_data(X, y, self.fit_intercept) + + import scipy.sparse as sp + if sp.issparse(X): + self._solve_sparse(X, y, sample_weight) + else: + self._solve_dense(X, y, sample_weight) + + self._set_intercept(Xmean, ymean) + + return self + + def _solve_dense(self, X, y, sample_weight): n_samples, n_features = X.shape - X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept) + if n_features > n_samples or \ + isinstance(sample_weight, np.ndarray) or \ + sample_weight != 1.0: - if n_samples > n_features: + # kernel ridge + # w = X.T * inv(X X^t + alpha*Id) y + A = np.dot(X, X.T) + A.flat[::n_samples + 1] += self.alpha * sample_weight + self.coef_ = np.dot(X.T, self._solve(A, y)) + else: + # ridge # w = inv(X^t X + alpha*Id) * X.T y A = np.dot(X.T, X) - A.flat[::n_features+1] += self.alpha - self.coef_ = linalg.solve(A, np.dot(X.T, y), - overwrite_a=True, sym_pos=True) + A.flat[::n_features + 1] += self.alpha + self.coef_ = self._solve(A, np.dot(X.T, y)) + + def _solve_sparse(self, X, y, sample_weight): + n_samples, n_features = X.shape + + import scipy.sparse as sp + if n_features > n_samples or \ + isinstance(sample_weight, np.ndarray) or \ + sample_weight != 1.0: + + I = sp.lil_matrix((n_samples, n_samples)) + I.setdiag(np.ones(n_samples) * self.alpha * sample_weight) + c = self._solve(X * X.T + I, y) + self.coef_ = X.T * c else: - # w = X.T * inv(X X^t + alpha*Id) y - A = np.dot(X, X.T) - A.flat[::n_samples+1] += self.alpha - self.coef_ = np.dot(X.T, linalg.solve(A, y, overwrite_a=True, - sym_pos=True)) + I = sp.lil_matrix((n_features, n_features)) + I.setdiag(np.ones(n_features) * self.alpha) + self.coef_ = self._solve(X.T * X + I, X.T * y) + + def _solve(self, A, b): + if self.solver == "cg": + # this solver cannot handle a 2-d b. + from scipy.sparse import linalg as sp_linalg + sol, error = sp_linalg.cg(A, b) + if error: + raise ValueError("Failed with error code %d" % error) + return sol + else: + import scipy.sparse as sp + # we are working with dense symmetric positive A + if sp.issparse(A): + A = A.todense() + from scipy import linalg + return linalg.solve(A, b, sym_pos=True, overwrite_a=True) + + +class RidgeClassifier(Ridge): + """Classifier using Ridge regression + + Parameters + ---------- + alpha : float + Small positive values of alpha improve the conditioning of the + problem and reduce the variance of the estimates. + Alpha corresponds to (2*C)^-1 in other linear models such as + LogisticRegression or LinearSVC. + + fit_intercept : boolean + Whether to calculate the intercept for this model. If set + to false, no intercept will be used in calculations + (e.g. data is expected to be already centered). + + Note + ---- + For multi-class classification, n_class classifiers are trained in + a one-versus-all approach. + """ + + def fit(self, X, y): + """ + Fit Ridge regression model. + + Parameters + ---------- + X : numpy array of shape [n_samples,n_features] + Training data + + y : numpy array of shape [n_samples] + Target values + + Returns + ------- + self : returns an instance of self. + """ + self.label_binarizer = LabelBinarizer() + Y = self.label_binarizer.fit_transform(y) + Ridge.fit(self, X, Y) + return self + + def decision_function(self, X): + return Ridge.predict(self, X) + + def predict(self, X): + """ + Predict target values according to the fitted model. + + Parameters + ---------- + X : array-like, shape = [n_samples, n_features] + + Returns + ------- + C : array, shape = [n_samples] + """ + Y = self.decision_function(X) + return self.label_binarizer.inverse_transform(Y) + + +class _RidgeGCV(LinearModel): + """ + Ridge regression with built-in Generalized Cross-Validation, i.e. + efficient Leave-One-Out cross-validation. + + This class is not intended to be used directly. Use RidgeCV instead. + + Notes + ----- + + We want to solve (K + alpha*Id)c = y, + where K = X X^T is the kernel matrix. + + Let G = (K + alpha*Id)^-1. + + Dual solution: c = Gy + Primal solution: w = X^T c + + Compute eigendecomposition K = Q V Q^T. + Then G = Q (V + alpha*Id)^-1 Q^T, + where (V + alpha*Id) is diagonal. + It is thus inexpensive to inverse for many alphas. + + Let loov be the vector of prediction values for each example + when the model was fitted with all examples but this example. + + loov = (KGY - diag(KG)Y) / diag(I-KG) + + Let looe be the vector of prediction errors for each example + when the model was fitted with all examples but this example. + + looe = y - loov = c / diag(G) + + Reference + --------- + + http://cbcl.mit.edu/projects/cbcl/publications/ps/MIT-CSAIL-TR-2007-025.pdf + http://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf + """ + + def __init__(self, alphas=[0.1, 1.0, 10.0], fit_intercept=True, + score_func=None, loss_func=None): + self.alphas = np.asanyarray(alphas) + self.fit_intercept = fit_intercept + self.score_func = score_func + self.loss_func = loss_func + + def _pre_compute(self, X, y): + # even if X is very sparse, K is usually very dense + K = safe_sparse_dot(X, X.T, dense_output=True) + from scipy import linalg + v, Q = linalg.eigh(K) + return K, v, Q + + def _errors(self, v, Q, y, alpha): + G = np.dot(np.dot(Q, np.diag(1.0 / (v + alpha))), Q.T) + c = np.dot(G, y) + G_diag = np.diag(G) + # handle case when y is 2-d + G_diag = G_diag if len(y.shape) == 1 else G_diag[:, np.newaxis] + return (c / G_diag) ** 2, c + + def _values(self, K, v, Q, y, alpha): + n_samples = y.shape[0] + + G = np.dot(np.dot(Q, np.diag(1.0 / (v + alpha))), Q.T) + c = np.dot(G, y) + KG = np.dot(K, G) + #KG = np.dot(np.dot(Q, np.diag(v / (v + alpha))), Q.T) + KG_diag = np.diag(KG) + + denom = np.ones(n_samples) - KG_diag + if len(y.shape) == 2: + # handle case when y is 2-d + KG_diag = KG_diag[:, np.newaxis] + denom = denom[:, np.newaxis] + + num = np.dot(KG, y) - KG_diag * y + + return num / denom, c + + def fit(self, X, y, sample_weight=1.0): + """Fit Ridge regression model + + Parameters + ---------- + X : numpy array of shape [n_samples, n_features] + Training data + + y : numpy array of shape [n_samples] or [n_samples, n_responses] + Target values + + sample_weight : float or numpy array of shape [n_samples] + Sample weight + + Returns + ------- + self : Returns self. + """ + X = safe_asanyarray(X, dtype=np.float) + y = np.asanyarray(y, dtype=np.float) + + n_samples = X.shape[0] + + X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept) + + K, v, Q = self._pre_compute(X, y) + n_y = 1 if len(y.shape) == 1 else y.shape[1] + M = np.zeros((n_samples * n_y, len(self.alphas))) + C = [] + + error = self.score_func is None and self.loss_func is None + + for i, alpha in enumerate(self.alphas): + if error: + out, c = self._errors(v, Q, y, sample_weight * alpha) + else: + out, c = self._values(K, v, Q, y, sample_weight * alpha) + M[:, i] = out.ravel() + C.append(c) + + if error: + best = M.mean(axis=0).argmin() + else: + func = self.score_func if self.score_func else self.loss_func + out = [func(y.ravel(), M[:, i]) for i in range(len(self.alphas))] + best = np.argmax(out) if self.score_func else np.argmin(out) + + self.best_alpha = self.alphas[best] + self.dual_coef_ = C[best] + self.coef_ = safe_sparse_dot(X.T, self.dual_coef_) self._set_intercept(Xmean, ymean) + return self + + +class RidgeCV(LinearModel): + """ + Ridge regression with built-in cross-validation. + + By default, it performs Generalized Cross-Validation, which is a form of + efficient Leave-One-Out cross-validation. Currently, only the n_features > + n_samples case is handled efficiently. + + Parameters + ---------- + alphas: numpy array of shape [n_alpha] + Array of alpha values to try. + Small positive values of alpha improve the conditioning of the + problem and reduce the variance of the estimates. + Alpha corresponds to (2*C)^-1 in other linear models such as + LogisticRegression or LinearSVC. + + fit_intercept : boolean + Whether to calculate the intercept for this model. If set + to false, no intercept will be used in calculations + (e.g. data is expected to be already centered). + + loss_func: callable, optional + function that takes 2 arguments and compares them in + order to evaluate the performance of prediciton (small is good) + if None is passed, the score of the estimator is maximized + + score_func: callable, optional + function that takes 2 arguments and compares them in + order to evaluate the performance of prediciton (big is good) + if None is passed, the score of the estimator is maximized + + See also + -------- + Ridge + """ + + def __init__(self, alphas=np.array([0.1, 1.0, 10.0]), fit_intercept=True, + score_func=None, loss_func=None, cv=None): + self.alphas = alphas + self.fit_intercept = fit_intercept + self.score_func = score_func + self.loss_func = loss_func + self.cv = cv + + def fit(self, X, y, sample_weight=1.0, **params): + """Fit Ridge regression model + + Parameters + ---------- + X : numpy array of shape [n_samples, n_features] + Training data + + y : numpy array of shape [n_samples] or [n_samples, n_responses] + Target values + + sample_weight : float or numpy array of shape [n_samples] + Sample weight + + cv : cross-validation generator, optional + If None, Generalized Cross-Validationn (efficient Leave-One-Out) + will be used. + + Returns + ------- + self : Returns self. + """ + self._set_params(**params) + + if self.cv is None: + estimator = _RidgeGCV(self.alphas, self.fit_intercept, + self.score_func, self.loss_func) + estimator.fit(X, y, sample_weight=sample_weight) + self.best_alpha = estimator.best_alpha + else: + parameters = {'alpha': self.alphas} + # FIXME: sample_weight must be split into training/validation data + # too! + #fit_params = {'sample_weight' : sample_weight} + fit_params = {} + gs = GridSearchCV(Ridge(fit_intercept=self.fit_intercept), + parameters, fit_params=fit_params, cv=self.cv) + gs.fit(X, y) + estimator = gs.best_estimator + self.best_alpha = gs.best_estimator.alpha + + self.coef_ = estimator.coef_ + self.intercept_ = estimator.intercept_ + + return self + + +class RidgeClassifierCV(RidgeCV): + + def fit(self, X, y, sample_weight=1.0, class_weight={}, **params): + """ + Fit the ridge classifier. + + Parameters + ---------- + X : array-like, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples + and n_features is the number of features. + + y : array-like, shape = [n_samples] + Target values. + + class_weight : dict, optional + Weights associated with classes in the form + {class_label : weight}. If not given, all classes are + supposed to have weight one. + + sample_weight : float or numpy array of shape [n_samples] + Sample weight + + Returns + ------- + self : object + Returns self. + """ + self._set_params(**params) + sample_weight2 = np.array([class_weight.get(k, 1.0) for k in y]) + self.label_binarizer = LabelBinarizer() + Y = self.label_binarizer.fit_transform(y) + RidgeCV.fit(self, X, Y, + sample_weight=sample_weight * sample_weight2, + cv=self.cv) + return self + + def decision_function(self, X): + return RidgeCV.predict(self, X) + + def predict(self, X): + Y = self.decision_function(X) + return self.label_binarizer.inverse_transform(Y) diff --git a/scikits/learn/linear_model/setup.py b/scikits/learn/linear_model/setup.py index 707f673a567ac65d838558418db9cb7a641ee4d2..a8f7a079f628ceae4fb0a52ae7bac21dc6848a49 100644 --- a/scikits/learn/linear_model/setup.py +++ b/scikits/learn/linear_model/setup.py @@ -1,11 +1,9 @@ from os.path import join -import warnings import numpy -import sys def configuration(parent_package='', top_path=None): from numpy.distutils.misc_util import Configuration - from numpy.distutils.system_info import get_info, get_standard_file, BlasNotFoundError + from numpy.distutils.system_info import get_info config = Configuration('linear_model', parent_package, top_path) # cd fast needs CBLAS diff --git a/scikits/learn/linear_model/sparse/base.py b/scikits/learn/linear_model/sparse/base.py new file mode 100644 index 0000000000000000000000000000000000000000..01dd1910e8b613431ce465f0d88ad4af8ee7d9f2 --- /dev/null +++ b/scikits/learn/linear_model/sparse/base.py @@ -0,0 +1,24 @@ + +# Author: Mathieu Blondel <mathieu@mblondel.org> +# +# License: BSD Style. + +import numpy as np + +class CoefSelectTransformerMixin(object): + """Mixin for linear models that can find sparse solutions. + """ + + def transform(self, X, threshold=1e-10): + import scipy.sparse as sp + X = sp.csc_matrix(X) + ind = np.arange(X.shape[0]) + + if len(self.coef_.shape) == 1 or self.coef_.shape[1] == 1: + # 2-class case + coef = np.ravel(self.coef_) + else: + # multi-class case + coef = np.mean(self.coef_, axis=0) + + return X[:, ind[coef <= threshold]] diff --git a/scikits/learn/linear_model/sparse/logistic.py b/scikits/learn/linear_model/sparse/logistic.py index 0cea75125aa4a18851dc5c1b40fe1f44e9468b70..641469ff79c5883d5b4f677d891ba66b103da91e 100644 --- a/scikits/learn/linear_model/sparse/logistic.py +++ b/scikits/learn/linear_model/sparse/logistic.py @@ -9,9 +9,11 @@ import numpy as np from ...base import ClassifierMixin from ...svm.sparse.base import SparseBaseLibLinear +from ...linear_model.sparse.base import CoefSelectTransformerMixin from ...svm._liblinear import csr_predict_prob -class LogisticRegression(SparseBaseLibLinear, ClassifierMixin): +class LogisticRegression(SparseBaseLibLinear, ClassifierMixin, + CoefSelectTransformerMixin): """ Logistic Regression. @@ -35,6 +37,17 @@ class LogisticRegression(SparseBaseLibLinear, ClassifierMixin): Specifies if a constant (a.k.a. bias or intercept) should be added the decision function + intercept_scaling : float, default: 1 + when self.fit_intercept is True, instance vector x becomes + [x, self.intercept_scaling], + i.e. a "synthetic" feature with constant value equals to + intercept_scaling is appended to the instance vector. + The intercept becomes intercept_scaling * synthetic feature weight + Note! the synthetic feature weight is subject to l1/l2 regularization + as all other features. + To lessen the effect of regularization on synthetic feature weight + (and therefore on the intercept) intercept_scaling has to be increased + Attributes ---------- @@ -63,11 +76,11 @@ class LogisticRegression(SparseBaseLibLinear, ClassifierMixin): """ def __init__(self, penalty='l2', dual=False, eps=1e-4, C=1.0, - fit_intercept=True): + fit_intercept=True, intercept_scaling=1): super(LogisticRegression, self).__init__ (penalty=penalty, dual=dual, loss='lr', eps=eps, C=C, - fit_intercept=fit_intercept) + fit_intercept=fit_intercept, intercept_scaling=intercept_scaling) def predict_proba(self, X): """ diff --git a/scikits/learn/linear_model/tests/test_least_angle.py b/scikits/learn/linear_model/tests/test_least_angle.py index bbaf4df951291674ed691a5bba97b4b37c1b6682..022905793fd70517a94151296e73d4b1d6fe774d 100644 --- a/scikits/learn/linear_model/tests/test_least_angle.py +++ b/scikits/learn/linear_model/tests/test_least_angle.py @@ -75,6 +75,19 @@ def test_lasso_gives_lstsq_solution(): assert_array_almost_equal(coef_lstsq , coef_path_[:,-1]) +def test_collinearity(): + """Check that lars_path is robust to collinearity in input""" + + X = np.array([[3., 3., 1.], + [2., 2., 0.], + [1., 1., 0]]) + y = np.array([1., 0., 0]) + + _, _, coef_path_ = linear_model.lars_path(X, y) + assert (not np.isnan(coef_path_).any()) + assert_array_almost_equal(np.dot(X, coef_path_[:,-1]), y) + + def test_singular_matrix(): """ Test when input is a singular matrix @@ -98,6 +111,22 @@ def test_lasso_lars_vs_lasso_cd(verbose=False): error = np.linalg.norm(c - lasso_cd.coef_) assert error < 0.01 +def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False): + """ + Test that LassoLars and Lasso using coordinate descent give the + same results when early stopping is used. + (test : before, in the middle, and in the last part of the path) + """ + alphas_min = [10, 0.9, 1e-4] + for alphas_min in alphas_min: + alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso', + alpha_min=0.9) + lasso_cd = linear_model.Lasso(fit_intercept=False) + lasso_cd.alpha = alphas[-1] + lasso_cd.fit(X, y, tol=1e-8) + error = np.linalg.norm(lasso_path[:,-1] - lasso_cd.coef_) + assert error < 0.01 + if __name__ == '__main__': import nose diff --git a/scikits/learn/linear_model/tests/test_logistic.py b/scikits/learn/linear_model/tests/test_logistic.py index 1dcb876fdfee75b1cb5e44e19bf3fa1c267bab46..ace415e5f31e8c8b01aeee271851a3c1e5c650fc 100644 --- a/scikits/learn/linear_model/tests/test_logistic.py +++ b/scikits/learn/linear_model/tests/test_logistic.py @@ -1,7 +1,7 @@ import numpy as np from numpy.testing import assert_array_equal, \ - assert_array_almost_equal + assert_array_almost_equal, assert_almost_equal import nose from nose.tools import assert_raises @@ -60,6 +60,15 @@ def test_inconsistent_input(): logistic.LogisticRegression().fit(X_, y_).predict, np.random.random((3,12))) +def test_transform(): + clf = logistic.LogisticRegression(penalty="l1") + clf.fit(iris.data, iris.target) + X_new = clf.transform(iris.data) + clf = logistic.LogisticRegression() + clf.fit(X_new, iris.target) + pred = clf.predict(X_new) + assert np.mean(pred == iris.target) >= 0.75 + if __name__ == '__main__': import nose nose.runmodule() diff --git a/scikits/learn/linear_model/tests/test_ridge.py b/scikits/learn/linear_model/tests/test_ridge.py index 1a7673d6768ae040fc3ebd35cd677574afaa2268..327683691c662bb5564505598f9121e59c3afc07 100644 --- a/scikits/learn/linear_model/tests/test_ridge.py +++ b/scikits/learn/linear_model/tests/test_ridge.py @@ -1,10 +1,40 @@ import numpy as np +import scipy.sparse as sp -from numpy.testing import assert_almost_equal +from numpy.testing import assert_almost_equal, assert_array_almost_equal, \ + assert_equal, assert_array_equal -from ..ridge import Ridge -from ..base import LinearRegression +from scikits.learn import datasets +from scikits.learn.metrics import mean_square_error +from scikits.learn.linear_model.base import LinearRegression + +from scikits.learn.linear_model.ridge import Ridge +from scikits.learn.linear_model.ridge import _RidgeGCV +from scikits.learn.linear_model.ridge import RidgeCV +from scikits.learn.linear_model.ridge import RidgeClassifier +from scikits.learn.linear_model.ridge import RidgeClassifierCV + + +from scikits.learn.cross_val import KFold + +diabetes = datasets.load_diabetes() + +X_diabetes, y_diabetes = diabetes.data, diabetes.target +ind = np.arange(X_diabetes.shape[0]) +np.random.shuffle(ind) +ind = ind[:200] +X_diabetes, y_diabetes = X_diabetes[ind], y_diabetes[ind] + +iris = datasets.load_iris() + +X_iris = sp.csr_matrix(iris.data) +y_iris = iris.target + +np.random.seed(0) + +DENSE_FILTER = lambda X: X +SPARSE_FILTER = lambda X: sp.csr_matrix(X) def test_ridge(): """Ridge regression convergence test using score @@ -16,7 +46,6 @@ def test_ridge(): # With more samples than features n_samples, n_features = 6, 5 - np.random.seed(0) y = np.random.randn(n_samples) X = np.random.randn(n_samples, n_features) @@ -24,15 +53,19 @@ def test_ridge(): ridge.fit(X, y) assert ridge.score(X, y) > 0.5 + ridge.fit(X, y, sample_weight=np.ones(n_samples)) + assert ridge.score(X, y) > 0.5 + # With more features than samples n_samples, n_features = 5, 10 - np.random.seed(0) y = np.random.randn(n_samples) X = np.random.randn(n_samples, n_features) ridge = Ridge(alpha=alpha) ridge.fit(X, y) assert ridge.score(X, y) > .9 + ridge.fit(X, y, sample_weight=np.ones(n_samples)) + assert ridge.score(X, y) > 0.9 def test_toy_ridge_object(): """Test BayesianRegression ridge classifier @@ -46,6 +79,17 @@ def test_toy_ridge_object(): X_test = [[1], [2], [3], [4]] assert_almost_equal(clf.predict(X_test), [1., 2, 3, 4]) + assert_equal(len(clf.coef_.shape), 1) + assert_equal(type(clf.intercept_), np.float64) + + Y = np.vstack((Y,Y)).T + + clf.fit(X, Y) + X_test = [[1], [2], [3], [4]] + + assert_equal(len(clf.coef_.shape), 2) + assert_equal(type(clf.intercept_), np.ndarray) + def test_ridge_vs_lstsq(): """On alpha=0., Ridge and OLS yield the same solution.""" @@ -67,4 +111,124 @@ def test_ridge_vs_lstsq(): ols.fit (X, y, fit_intercept=False) assert_almost_equal(ridge.coef_, ols.coef_) +def _test_ridge_loo(filter_): + # test that can work with both dense or sparse matrices + n_samples = X_diabetes.shape[0] + + ret = [] + + ridge_gcv = _RidgeGCV(fit_intercept=False) + ridge = Ridge(fit_intercept=False) + + # generalized cross-validation (efficient leave-one-out) + K, v, Q = ridge_gcv._pre_compute(X_diabetes, y_diabetes) + errors, c = ridge_gcv._errors(v, Q, y_diabetes, 1.0) + values, c = ridge_gcv._values(K, v, Q, y_diabetes, 1.0) + + # brute-force leave-one-out: remove one example at a time + errors2 = [] + values2 = [] + for i in range(n_samples): + sel = np.arange(n_samples) != i + X_new = X_diabetes[sel] + y_new = y_diabetes[sel] + ridge.fit(X_new, y_new) + value = ridge.predict([X_diabetes[i]])[0] + error = (y_diabetes[i] - value) ** 2 + errors2.append(error) + values2.append(value) + + # check that efficient and brute-force LOO give same results + assert_almost_equal(errors, errors2) + assert_almost_equal(values, values2) + + # check best alpha + ridge_gcv.fit(filter_(X_diabetes), y_diabetes) + best_alpha = ridge_gcv.best_alpha + ret.append(best_alpha) + + # check that we get same best alpha with custom loss_func + ridge_gcv2 = _RidgeGCV(fit_intercept=False, loss_func=mean_square_error) + ridge_gcv2.fit(filter_(X_diabetes), y_diabetes) + assert_equal(ridge_gcv2.best_alpha, best_alpha) + + # check that we get same best alpha with sample weights + ridge_gcv.fit(filter_(X_diabetes), y_diabetes, + sample_weight=np.ones(n_samples)) + assert_equal(ridge_gcv.best_alpha, best_alpha) + + # simulate several responses + Y = np.vstack((y_diabetes,y_diabetes)).T + + ridge_gcv.fit(filter_(X_diabetes), Y) + Y_pred = ridge_gcv.predict(filter_(X_diabetes)) + ridge_gcv.fit(filter_(X_diabetes), y_diabetes) + y_pred = ridge_gcv.predict(filter_(X_diabetes)) + + assert_array_almost_equal(np.vstack((y_pred,y_pred)).T, + Y_pred) + + return ret + +def _test_ridge_cv(filter_): + n_samples = X_diabetes.shape[0] + + ridge_cv = RidgeCV() + ridge_cv.fit(filter_(X_diabetes), y_diabetes) + ridge_cv.predict(filter_(X_diabetes)) + + assert_equal(len(ridge_cv.coef_.shape), 1) + assert_equal(type(ridge_cv.intercept_), np.float64) + + cv = KFold(n_samples, 5) + ridge_cv.fit(filter_(X_diabetes), y_diabetes, cv=cv) + ridge_cv.predict(filter_(X_diabetes)) + + assert_equal(len(ridge_cv.coef_.shape), 1) + assert_equal(type(ridge_cv.intercept_), np.float64) + +def _test_ridge_diabetes(filter_): + ridge = Ridge(fit_intercept=False) + ridge.fit(filter_(X_diabetes), y_diabetes) + return np.round(ridge.score(filter_(X_diabetes), y_diabetes), 5) + +def _test_multi_ridge_diabetes(filter_): + # simulate several responses + Y = np.vstack((y_diabetes,y_diabetes)).T + + ridge = Ridge(fit_intercept=False) + ridge.fit(filter_(X_diabetes), Y) + Y_pred = ridge.predict(filter_(X_diabetes)) + ridge.fit(filter_(X_diabetes), y_diabetes) + y_pred = ridge.predict(filter_(X_diabetes)) + assert_array_almost_equal(np.vstack((y_pred,y_pred)).T, + Y_pred) + +def _test_ridge_classifiers(filter_): + for clf in (RidgeClassifier(), RidgeClassifierCV()): + clf.fit(filter_(X_iris), y_iris) + y_pred = clf.predict(filter_(X_iris)) + assert np.mean(y_iris == y_pred) >= 0.8 + + clf = RidgeClassifierCV() + n_samples = X_iris.shape[0] + cv = KFold(n_samples, 5) + clf.fit(filter_(X_iris), y_iris, cv=cv) + y_pred = clf.predict(filter_(X_iris)) + assert np.mean(y_iris == y_pred) >= 0.8 + +def test_dense_sparse(): + for test_func in (_test_ridge_loo, + _test_ridge_cv, + _test_ridge_diabetes, + _test_multi_ridge_diabetes, + _test_ridge_classifiers): + # test dense matrix + ret_dense = test_func(DENSE_FILTER) + # test sparse matrix + ret_sp = test_func(SPARSE_FILTER) + # test that the outputs are the same + assert_array_equal(ret_dense, ret_sp) + + diff --git a/scikits/learn/metrics/__init__.py b/scikits/learn/metrics/__init__.py index 78e3131f9e013868b99e7254cbd8be32c7cca7af..4b761ca18ae898dddbb4380dfd47af695a05e8c5 100644 --- a/scikits/learn/metrics/__init__.py +++ b/scikits/learn/metrics/__init__.py @@ -4,7 +4,9 @@ pairwise metrics or distances computation """ from .metrics import confusion_matrix, roc_curve, auc, precision_score, \ - recall_score, fbeta_score, f1_score, \ + recall_score, fbeta_score, f1_score, zero_one_score, \ precision_recall_fscore_support, classification_report, \ precision_recall_curve, explained_variance_score, r2_score, \ zero_one, mean_square_error + +from .pairwise import euclidean_distances diff --git a/scikits/learn/metrics/metrics.py b/scikits/learn/metrics/metrics.py index 6d8100107b210830d0eaafa278eb325a572adee9..3aabe702411ea3fdbd94d3b02cdc6c639cf20e1e 100644 --- a/scikits/learn/metrics/metrics.py +++ b/scikits/learn/metrics/metrics.py @@ -9,6 +9,7 @@ better # Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr> # Mathieu Blondel <mathieu@mblondel.org> +# Olivier Grisel <olivier.grisel@ensta.org> # License: BSD Style. import numpy as np @@ -256,7 +257,7 @@ def f1_score(y_true, y_pred, pos_label=1): score at 0. The relative contribution of precision and recall to the f1 score are equal. - :math:`F_1 = 2 \cdot \frac{p \cdot r}{p + r}` + F_1 = 2 * (precision * recall) / (precision + recall) See: http://en.wikipedia.org/wiki/F1_score @@ -534,6 +535,26 @@ def r2_score(y_true, y_pred): ((y_true - y_true.mean())**2).sum()) +def zero_one_score(y_true, y_pred): + """Zero-One classification score + + Positive integer (number of good classifications). + The best performance is 1. + + Return the percentage of good predictions. + + Parameters + ---------- + y_true : array-like + + y_pred : array-like + + Returns + ------- + score : integer + """ + return np.mean(y_pred == y_true) + ############################################################################### # Loss functions diff --git a/scikits/learn/metrics/pairwise.py b/scikits/learn/metrics/pairwise.py index cc9c486ac67ce20e728a45790dd86d8d2b0960eb..4b60e2c3d61677a3a0c22acc4bea509a6ae2b443 100644 --- a/scikits/learn/metrics/pairwise.py +++ b/scikits/learn/metrics/pairwise.py @@ -8,8 +8,7 @@ sets of points. import numpy as np - -def euclidian_distances(X, Y): +def euclidean_distances(X, Y, Y_norm_squared=None, squared=False): """ Considering the rows of X (and Y=X) as vectors, compute the distance matrix between each pair of vectors. @@ -20,39 +19,64 @@ def euclidian_distances(X, Y): Y: array of shape (n_samples_2, n_features) + Y_norm_squared: array [n_samples_2], optional + pre-computed (Y**2).sum(axis=1) + + squared: boolean, optional + This routine will return squared Euclidean distances instead. + Returns ------- distances: array of shape (n_samples_1, n_samples_2) Examples -------- - >>> from scikits.learn.metrics.pairwise import euclidian_distances + >>> from scikits.learn.metrics.pairwise import euclidean_distances >>> X = [[0, 1], [1, 1]] >>> # distrance between rows of X - >>> euclidian_distances(X, X) + >>> euclidean_distances(X, X) array([[ 0., 1.], [ 1., 0.]]) >>> # get distance to origin - >>> euclidian_distances(X, [[0, 0]]) + >>> euclidean_distances(X, [[0, 0]]) array([[ 1. ], [ 1.41421356]]) """ - # shortcut in the common case euclidean_distances(X, X) - compute_Y = X is not Y - - X = np.asanyarray(X) - Y = np.asanyarray(Y) + # should not need X_norm_squared because if you could precompute that as + # well as Y, then you should just pre-compute the output and not even + # call this function. + if X is Y: + X = Y = np.asanyarray(X) + else: + X = np.asanyarray(X) + Y = np.asanyarray(Y) if X.shape[1] != Y.shape[1]: raise ValueError("Incompatible dimension for X and Y matrices") XX = np.sum(X * X, axis=1)[:, np.newaxis] - if compute_Y: - YY = np.sum(Y * Y, axis=1)[np.newaxis, :] - else: + if X is Y: # shortcut in the common case euclidean_distances(X, X) YY = XX.T + elif Y_norm_squared is None: + YY = Y.copy() + YY **= 2 + YY = np.sum(YY, axis=1)[np.newaxis, :] + else: + YY = np.asanyarray(Y_norm_squared) + if YY.shape != (Y.shape[0],): + raise ValueError("Incompatible dimension for Y and Y_norm_squared") + YY = YY[np.newaxis, :] + # TODO: + # a faster cython implementation would do the dot product first, + # and then add XX, add YY, and do the clipping of negative values in + # a single pass over the output matrix. distances = XX + YY # Using broadcasting distances -= 2 * np.dot(X, Y.T) distances = np.maximum(distances, 0) - return np.sqrt(distances) + if squared: + return distances + else: + return np.sqrt(distances) + +euclidian_distances = euclidean_distances # both spelling for backward compat diff --git a/scikits/learn/metrics/tests/test_metrics.py b/scikits/learn/metrics/tests/test_metrics.py index 212e4b2a1a8f4eaf21b80230e38f66fc86c31f37..58b3a7e5dd1742ed2a251476bbb75ad80ea3973d 100644 --- a/scikits/learn/metrics/tests/test_metrics.py +++ b/scikits/learn/metrics/tests/test_metrics.py @@ -203,7 +203,7 @@ avg / total 0.62 0.61 0.56 75 assert_equal(report, expected_report) -def test_precision_recall_curve(): +def _test_precision_recall_curve(): """Test Precision-Recall and aread under PR curve""" y_true, _, probas_pred = make_prediction(binary=True) diff --git a/scikits/learn/metrics/tests/test_pairwise.py b/scikits/learn/metrics/tests/test_pairwise.py index 26b1c200b5259e00c2f6210e7ca13695c31c361e..0d21cdf679ce0a924aa22ffa0c58b2d66f951d6c 100644 --- a/scikits/learn/metrics/tests/test_pairwise.py +++ b/scikits/learn/metrics/tests/test_pairwise.py @@ -1,12 +1,12 @@ from numpy.testing import assert_array_almost_equal -from ..pairwise import euclidian_distances +from ..pairwise import euclidean_distances -def test_euclidian_distances(): +def test_euclidean_distances(): """Check that the pairwise euclidian distances computation""" X = [[0]] Y = [[1], [2]] - D = euclidian_distances(X, Y) + D = euclidean_distances(X, Y) assert_array_almost_equal(D, [[1., 2.]]) diff --git a/scikits/learn/mixture.py b/scikits/learn/mixture.py index 9798f8552b12a16c2bc98649b2e18816acd313ed..53f0be891073c118ce37e8026d3bedb8d9c8f72b 100644 --- a/scikits/learn/mixture.py +++ b/scikits/learn/mixture.py @@ -79,33 +79,36 @@ def lmvnpdf(obs, means, covars, cvtype='diag'): return lmvnpdf_dict[cvtype](obs, means, covars) -def sample_gaussian(mean, covar, cvtype='diag', n=1): +def sample_gaussian(mean, covar, cvtype='diag', n_samples=1): """Generate random samples from a Gaussian distribution. Parameters ---------- mean : array_like, shape (n_features,) Mean of the distribution. - covars : array_like + + covars : array_like, optional Covariance of the distribution. The shape depends on `cvtype`: scalar if 'spherical', (D) if 'diag', (D, D) if 'tied', or 'full' - cvtype : string + + cvtype : string, optional Type of the covariance parameters. Must be one of 'spherical', 'tied', 'diag', 'full'. Defaults to 'diag'. - n : int - Number of samples to generate. + + n_samples : int, optional + Number of samples to generate. Defaults to 1. Returns ------- - obs : array, shape (n, n_features) + obs : array, shape (n_features, n_samples) Randomly generated sample """ - ndim = len(mean) - rand = np.random.randn(ndim, n) - if n == 1: - rand.shape = (ndim,) + n_dim = len(mean) + rand = np.random.randn(n_dim, n_samples) + if n_samples == 1: + rand.shape = (n_dim,) if cvtype == 'spherical': rand *= np.sqrt(covar) @@ -134,9 +137,10 @@ class GMM(BaseEstimator): Parameters ---------- - n_states : int - Number of mixture components. - cvtype : string (read-only) + n_states : int, optional + Number of mixture components. Defaults to 1. + + cvtype : string (read-only), optional String describing the type of covariance parameters to use. Must be one of 'spherical', 'tied', 'diag', 'full'. Defaults to 'diag'. @@ -300,15 +304,15 @@ class GMM(BaseEstimator): Parameters ---------- - obs : array_like, shape (n, n_features) + obs : array_like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- - logprob : array_like, shape (n,) + logprob : array_like, shape (n_samples,) Log probabilities of each data point in `obs` - posteriors: array_like, shape (n, n_states) + posteriors: array_like, shape (n_samples, n_states) Posterior probabilities of each mixture component for each observation """ @@ -324,13 +328,13 @@ class GMM(BaseEstimator): Parameters ---------- - obs : array_like, shape (n, n_features) + obs : array_like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- - logprob : array_like, shape (n,) + logprob : array_like, shape (n_samples,) Log probabilities of each data point in `obs` """ logprob, posteriors = self.eval(obs) @@ -347,9 +351,9 @@ class GMM(BaseEstimator): Returns ------- - logprobs : array_like, shape (n,) + logprobs : array_like, shape (n_samples,) Log probability of each point in `obs` under the model. - components : array_like, shape (n,) + components : array_like, shape (n_samples,) Index of the most likelihod mixture components for each observation """ logprob, posteriors = self.eval(obs) @@ -364,7 +368,7 @@ class GMM(BaseEstimator): Returns ------- - C : array, shape = [n_samples] + C : array, shape = (n_samples,) """ logprob, components = self.decode(X) return components @@ -379,38 +383,46 @@ class GMM(BaseEstimator): Returns ------- - T : array-like, shape = [n_samples, n_states] + T : array-like, shape = (n_samples, n_states) Returns the probability of the sample for each Gaussian (state) in the model. """ logprob, posteriors = self.eval(X) return posteriors - def rvs(self, n=1): + def rvs(self, n_samples=1): """Generate random samples from the model. Parameters ---------- - n : int - Number of samples to generate. + n_samples : int, optional + Number of samples to generate. Defaults to 1. Returns ------- - obs : array_like, shape (n, n_features) + obs : array_like, shape (n_samples, n_features) List of samples """ weight_pdf = self.weights weight_cdf = np.cumsum(weight_pdf) - obs = np.empty((n, self.n_features)) - for x in xrange(n): - rand = np.random.rand() - c = (weight_cdf > rand).argmax() - if self._cvtype == 'tied': - cv = self._covars - else: - cv = self._covars[c] - obs[x] = sample_gaussian(self._means[c], cv, self._cvtype) + obs = np.empty((n_samples, self.n_features)) + rand = np.random.rand(n_samples) + # decide which component to use for each sample + comps = weight_cdf.searchsorted(rand) + # for each component, generate all needed samples + for comp in xrange(self._n_states): + # occurrences of current component in obs + comp_in_obs = (comp==comps) + # number of those occurrences + num_comp_in_obs = comp_in_obs.sum() + if num_comp_in_obs > 0: + if self._cvtype == 'tied': + cv = self._covars + else: + cv = self._covars[comp] + obs[comp_in_obs] = sample_gaussian( + self._means[comp], cv, self._cvtype, num_comp_in_obs).T return obs def fit(self, X, n_iter=10, min_covar=1e-3, thresh=1e-2, params='wmc', @@ -514,11 +526,11 @@ class GMM(BaseEstimator): def _lmvnpdfdiag(obs, means=0.0, covars=1.0): - nobs, ndim = obs.shape + n_obs, n_dim = obs.shape # (x-y).T A (x-y) = x.T A x - 2x.T A y + y.T A y #lpr = -0.5 * (np.tile((np.sum((means**2) / covars, 1) - # + np.sum(np.log(covars), 1))[np.newaxis,:], (nobs,1)) - lpr = -0.5 * (ndim * np.log(2 * np.pi) + np.sum(np.log(covars), 1) + # + np.sum(np.log(covars), 1))[np.newaxis,:], (n_obs,1)) + lpr = -0.5 * (n_dim * np.log(2 * np.pi) + np.sum(np.log(covars), 1) + np.sum((means ** 2) / covars, 1) - 2 * np.dot(obs, (means / covars).T) + np.dot(obs ** 2, (1.0 / covars).T)) @@ -534,10 +546,10 @@ def _lmvnpdfspherical(obs, means=0.0, covars=1.0): def _lmvnpdftied(obs, means, covars): from scipy import linalg - nobs, ndim = obs.shape + n_obs, n_dim = obs.shape # (x-y).T A (x-y) = x.T A x - 2x.T A y + y.T A y icv = linalg.pinv(covars) - lpr = -0.5 * (ndim * np.log(2 * np.pi) + np.log(linalg.det(covars)) + lpr = -0.5 * (n_dim * np.log(2 * np.pi) + np.log(linalg.det(covars)) + np.sum(obs * np.dot(obs, icv), 1)[:,np.newaxis] - 2 * np.dot(np.dot(obs, icv), means.T) + np.sum(means * np.dot(means, icv), 1)) @@ -556,20 +568,20 @@ def _lmvnpdffull(obs, means, covars): else: # slower, but works solve_triangular = linalg.solve - nobs, ndim = obs.shape + n_obs, n_dim = obs.shape nmix = len(means) - log_prob = np.empty((nobs,nmix)) + log_prob = np.empty((n_obs,nmix)) for c, (mu, cv) in enumerate(itertools.izip(means, covars)): cv_chol = linalg.cholesky(cv, lower=True) cv_log_det = 2*np.sum(np.log(np.diagonal(cv_chol))) cv_sol = solve_triangular(cv_chol, (obs - mu).T, lower=True).T log_prob[:, c] = -.5 * (np.sum(cv_sol**2, axis=1) + \ - ndim * np.log(2 * np.pi) + cv_log_det) + n_dim * np.log(2 * np.pi) + cv_log_det) return log_prob -def _validate_covars(covars, cvtype, nmix, ndim): +def _validate_covars(covars, cvtype, nmix, n_dim): from scipy import linalg if cvtype == 'spherical': if len(covars) != nmix: @@ -577,21 +589,21 @@ def _validate_covars(covars, cvtype, nmix, ndim): elif np.any(covars <= 0): raise ValueError("'spherical' covars must be non-negative") elif cvtype == 'tied': - if covars.shape != (ndim, ndim): - raise ValueError("'tied' covars must have shape (ndim, ndim)") + if covars.shape != (n_dim, n_dim): + raise ValueError("'tied' covars must have shape (n_dim, n_dim)") elif (not np.allclose(covars, covars.T) or np.any(linalg.eigvalsh(covars) <= 0)): raise ValueError("'tied' covars must be symmetric, " "positive-definite") elif cvtype == 'diag': - if covars.shape != (nmix, ndim): - raise ValueError("'diag' covars must have shape (nmix, ndim)") + if covars.shape != (nmix, n_dim): + raise ValueError("'diag' covars must have shape (nmix, n_dim)") elif np.any(covars <= 0): raise ValueError("'diag' covars must be non-negative") elif cvtype == 'full': - if covars.shape != (nmix, ndim, ndim): + if covars.shape != (nmix, n_dim, n_dim): raise ValueError("'full' covars must have shape " - "(nmix, ndim, ndim)") + "(nmix, n_dim, n_dim)") for n,cv in enumerate(covars): if (not np.allclose(cv, cv.T) or np.any(linalg.eigvalsh(cv) <= 0)): diff --git a/scikits/learn/neighbors.py b/scikits/learn/neighbors.py index f181339e58e66f71e85013c07e87c730eef0df06..07fca967db8ca255728b0c7d7da03074cdc3fa08 100644 --- a/scikits/learn/neighbors.py +++ b/scikits/learn/neighbors.py @@ -1,115 +1,151 @@ -""" -k-Nearest Neighbor Algorithm. +"""Nearest Neighbor related algorithms""" + +# Author: Fabian Pedregosa <fabian.pedregosa@inria.fr> +# Alexandre Gramfort <alexandre.gramfort@inria.fr> +# +# License: BSD, (C) INRIA -Uses BallTree algorithm, which is an efficient way to perform fast -neighbor searches in high dimensionality. -""" import numpy as np -from scipy import stats -from scipy import linalg from .base import BaseEstimator, ClassifierMixin, RegressorMixin -from .ball_tree import BallTree +from .ball_tree import BallTree, knn_brute -class Neighbors(BaseEstimator, ClassifierMixin): +class NeighborsClassifier(BaseEstimator, ClassifierMixin): """Classifier implementing k-Nearest Neighbor Algorithm. Parameters ---------- - data : array-like, shape (n, k) - The data points to be indexed. This array is not copied, and so - modifying this data will result in bogus results. - labels : array - An array representing labels for the data (only arrays of - integers are supported). - n_neighbors : int - default number of neighbors. - window_size : int + n_neighbors : int, optional + Default number of neighbors. Defaults to 5. + + window_size : int, optional Window size passed to BallTree + algorithm : {'auto', 'ball_tree', 'brute', 'brute_inplace'}, optional + Algorithm used to compute the nearest neighbors. 'ball_tree' + will construct a BallTree, 'brute' and 'brute_inplace' will + perform brute-force search.'auto' will guess the most + appropriate based on current dataset. + Examples -------- - >>> samples = [[0.,0.,1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]] - >>> labels = [0,0,1,1] - >>> from scikits.learn.neighbors import Neighbors - >>> neigh = Neighbors(n_neighbors=3) + >>> samples = [[0, 0, 1], [1, 0, 0]] + >>> labels = [0, 1] + >>> from scikits.learn.neighbors import NeighborsClassifier + >>> neigh = NeighborsClassifier(n_neighbors=1) >>> neigh.fit(samples, labels) - Neighbors(n_neighbors=3, window_size=1) + NeighborsClassifier(n_neighbors=1, window_size=1, algorithm='auto') >>> print neigh.predict([[0,0,0]]) - [ 0.] + [1] - Notes - ----- + See also + -------- + BallTree + + References + ---------- http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm """ - def __init__(self, n_neighbors=5, window_size=1): - """Internally uses the ball tree datastructure and algorithm for fast - neighbors lookups on high dimensional datasets. - """ + def __init__(self, n_neighbors=5, algorithm='auto', window_size=1): self.n_neighbors = n_neighbors self.window_size = window_size + self.algorithm = algorithm + + + def fit(self, X, Y, **params): + """ + Fit the model using X, y as training data. + + Parameters + ---------- + X : array-like, shape = [n_samples, n_features] + Training data. - def fit(self, X, Y=()): - # we need Y to be an integer, because after we'll use it an index - self.Y = np.asanyarray(Y, dtype=np.int) - self.ball_tree = BallTree(X, self.window_size) + y : array-like, shape = [n_samples] + Target values, array of integer values. + + params : list of keyword, optional + Overwrite keywords from __init__ + """ + X = np.asanyarray(X) + self._y = np.asanyarray(Y) + self._set_params(**params) + + if self.algorithm == 'ball_tree' or \ + (self.algorithm == 'auto' and X.shape[1] < 20): + self.ball_tree = BallTree(X, self.window_size) + else: + self.ball_tree = None + self._fit_X = X return self - def kneighbors(self, data, n_neighbors=None): + + def kneighbors(self, data, return_distance=True, **params): """Finds the K-neighbors of a point. + Returns distance + Parameters ---------- point : array-like The new point. + n_neighbors : int Number of neighbors to get (default is the value passed to the constructor). + return_distance : boolean, optional. Defaults to True. + If False, distances will not be returned + Returns ------- dist : array - Array representing the lengths to point. + Array representing the lengths to point, only present if + return_distance=True + ind : array - Array representing the indices of the nearest points in the - population matrix. + Indices of the nearest points in the population matrix. Examples -------- - In the following example, we construnct a Neighbors class from an - array representing our data set and ask who's the closest point to - [1,1,1] + In the following example, we construnct a NeighborsClassifier + class from an array representing our data set and ask who's + the closest point to [1,1,1] >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]] >>> labels = [0, 0, 1] - >>> from scikits.learn.neighbors import Neighbors - >>> neigh = Neighbors(n_neighbors=1) + >>> from scikits.learn.neighbors import NeighborsClassifier + >>> neigh = NeighborsClassifier(n_neighbors=1) >>> neigh.fit(samples, labels) - Neighbors(n_neighbors=1, window_size=1) + NeighborsClassifier(n_neighbors=1, window_size=1, algorithm='auto') >>> print neigh.kneighbors([1., 1., 1.]) - (array(0.5), array(2)) + (array([ 0.5]), array([2])) As you can see, it returns [0.5], and [2], which means that the element is at distance 0.5 and is the third element of samples (indexes start at 0). You can also query for multiple points: - >>> print neigh.kneighbors([[0., 1., 0.], [1., 0., 1.]]) - (array([ 0.5 , 1.11803399]), array([1, 2])) + >>> X = [[0., 1., 0.], [1., 0., 1.]] + >>> neigh.kneighbors(X, return_distance=False) + array([[1], + [2]]) """ - if n_neighbors is None: - n_neighbors = self.n_neighbors - return self.ball_tree.query(data, k=n_neighbors) + self._set_params(**params) + return self.ball_tree.query( + data, k=self.n_neighbors, return_distance=return_distance) + - def predict(self, T, n_neighbors=None): + def predict(self, X, **params): """Predict the class labels for the provided data. Parameters ---------- - test: array + X: array A 2-D array representing the test point. + n_neighbors : int Number of neighbors to get (default is the value passed to the constructor). @@ -118,68 +154,70 @@ class Neighbors(BaseEstimator, ClassifierMixin): ------- labels: array List of class labels (one for each data sample). - - Examples - -------- - >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]] - >>> labels = [0, 0, 1] - >>> from scikits.learn.neighbors import Neighbors - >>> neigh = Neighbors(n_neighbors=1) - >>> neigh.fit(samples, labels) - Neighbors(n_neighbors=1, window_size=1) - >>> print neigh.predict([.2, .1, .2]) - 0 - >>> print neigh.predict([[0., -1., 0.], [3., 2., 0.]]) - [0 1] """ - T = np.asanyarray(T) - if n_neighbors is None: - n_neighbors = self.n_neighbors - return _predict_from_BallTree(self.ball_tree, self.Y, T, n_neighbors) + X = np.atleast_2d(X) + self._set_params(**params) + # .. get neighbors .. + if self.ball_tree is None: + if self.algorithm == 'brute_inplace': + neigh_ind = knn_brute(self._fit_X, X, self.n_neighbors) + else: + from .metrics import euclidean_distances + dist = euclidean_distances( + X, self._fit_X, squared=True) + neigh_ind = dist.argsort(axis=1)[:, :self.n_neighbors] + else: + neigh_ind = self.ball_tree.query( + X, self.n_neighbors, return_distance=False) -def _predict_from_BallTree(ball_tree, Y, test, n_neighbors): - """Predict target from BallTree object containing the data points. + # .. most popular label .. + pred_labels = self._y[neigh_ind] + from scipy import stats + mode, _ = stats.mode(pred_labels, axis=1) + return mode.flatten().astype(np.int) - This is a helper method, not meant to be used directly. It will - not check that input is of the correct type. - """ - Y_ = Y[ball_tree.query(test, k=n_neighbors, return_distance=False)] - if n_neighbors == 1: - return Y_ - return (stats.mode(Y_, axis=1)[0]).ravel() ############################################################################### -# Neighbors Barycenter class for regression problems +# NeighborsRegressor class for regression problems -class NeighborsBarycenter(BaseEstimator, RegressorMixin): +class NeighborsRegressor(NeighborsClassifier, RegressorMixin): """Regression based on k-Nearest Neighbor Algorithm. The target is predicted by local interpolation of the targets associated of the k-Nearest Neighbors in the training set. - The interpolation weights correspond to barycenter weights. + + Different modes for estimating the result can be set via parameter + mode. 'barycenter' will apply the weights that best reconstruct + the point from its neighbors while 'mean' will apply constant + weights to each point. Parameters ---------- - X : array-like, shape (n_samples, n_features) - The data points to be indexed. This array is not copied, and so - modifying this data will result in bogus results. - y : array - An array representing labels for the data (only arrays of - integers are supported). - n_neighbors : int - default number of neighbors. - window_size : int + n_neighbors : int, optional + Default number of neighbors. Defaults to 5. + + window_size : int, optional Window size passed to BallTree + mode : {'mean', 'barycenter'}, optional + Weights to apply to labels. + + algorithm : {'auto', 'ball_tree', 'brute', 'brute_inplace'}, optional + Algorithm used to compute the nearest neighbors. 'ball_tree' + will construct a BallTree, 'brute' and 'brute_inplace' will + perform brute-force search.'auto' will guess the most + appropriate based on current dataset. + Examples -------- >>> X = [[0], [1], [2], [3]] >>> y = [0, 0, 1, 1] - >>> from scikits.learn.neighbors import NeighborsBarycenter - >>> neigh = NeighborsBarycenter(n_neighbors=2) + >>> from scikits.learn.neighbors import NeighborsRegressor + >>> neigh = NeighborsRegressor(n_neighbors=2) >>> neigh.fit(X, y) - NeighborsBarycenter(n_neighbors=2, window_size=1) + NeighborsRegressor(n_neighbors=2, window_size=1, mode='mean', + algorithm='auto') >>> print neigh.predict([[1.5]]) [ 0.5] @@ -188,26 +226,24 @@ class NeighborsBarycenter(BaseEstimator, RegressorMixin): http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm """ - def __init__(self, n_neighbors=5, window_size=1): - """Internally uses the ball tree datastructure and algorithm for fast - neighbors lookups on high dimensional datasets. - """ + + def __init__(self, n_neighbors=5, mode='mean', algorithm='auto', + window_size=1): self.n_neighbors = n_neighbors self.window_size = window_size + self.mode = mode + self.algorithm = algorithm - def fit(self, X, y, copy=True): - self._y = np.array(y, copy=copy) - self.ball_tree = BallTree(X, self.window_size) - return self - def predict(self, T, n_neighbors=None): + def predict(self, X, **params): """Predict the target for the provided data. Parameters ---------- - T : array + X : array A 2-D array representing the test data. - n_neighbors : int + + n_neighbors : int, optional Number of neighbors to get (default is the value passed to the constructor). @@ -215,79 +251,99 @@ class NeighborsBarycenter(BaseEstimator, RegressorMixin): ------- y: array List of target values (one for each data sample). - - Examples - -------- - >>> X = [[0], [1], [2]] - >>> y = [0, 0, 1] - >>> from scikits.learn.neighbors import NeighborsBarycenter - >>> neigh = NeighborsBarycenter(n_neighbors=2) - >>> neigh.fit(X, y) - NeighborsBarycenter(n_neighbors=2, window_size=1) - >>> print neigh.predict([[.5], [1.5]]) - [ 0. 0.5] """ - T = np.asanyarray(T) - if T.ndim == 1: - T = T[:,None] - if n_neighbors is None: - n_neighbors = self.n_neighbors - A = kneighbors_graph(T, n_neighbors=n_neighbors, weight="barycenter", - ball_tree=self.ball_tree).tocsr() - return A * self._y + X = np.atleast_2d(np.asanyarray(X)) + self._set_params(**params) + + # .. get neighbors .. + if self.ball_tree is None: + if self.algorithm == 'brute_inplace': + neigh_ind = knn_brute(self._fit_X, X, self.n_neighbors) + else: + from .metrics.pairwise import euclidean_distances + dist = euclidean_distances( + X, self._fit_X, squared=False) + neigh_ind = dist.argsort(axis=1)[:, :self.n_neighbors] + neigh = self._fit_X[neigh_ind] + else: + neigh_ind = self.ball_tree.query( + X, self.n_neighbors, return_distance=False) + neigh = self.ball_tree.data[neigh_ind] + + # .. return labels .. + if self.mode == 'barycenter': + W = barycenter_weights(X, neigh) + return (W * self._y[neigh_ind]).sum(axis=1) + + elif self.mode == 'mean': + return np.mean(self._y[neigh_ind], axis=1) + + else: + raise ValueError( + 'Unsupported mode, must be one of "barycenter" or ' + '"mean" but got %s instead' % self.mode) ############################################################################### # Utils k-NN based Functions -def barycenter_weights(x, X_neighbors, tol=1e-3): - """Computes barycenter weights +def barycenter_weights(X, Z, cond=None): + """ + Compute barycenter weights of X from Y along the first axis. - We estimate the weights to assign to each point in X_neighbors - to recover the point x. The barycenter weights sum to 1. - If x do not belong to the span of X_neighbors, it's the - projection of x onto the span that is recovered. + We estimate the weights to assign to each point in Y[i] to recover + the point X[i]. The barycenter weights sum to 1. Parameters ---------- - x : array - a 1D array + X : array-like, shape (n_samples, n_dim) - X_neighbors : array - a 2D array containing samples + Z : array-like, shape (n_samples, n_neighbors, n_dim) - tol : float - tolerance + cond: float, optional + Cutoff for small singular values; used to determine effective + rank of Z[i]. Singular values smaller than ``rcond * + largest_singular_value`` are considered zero. Returns ------- - array of barycenter weights that sum to 1 + B : array-like, shape (n_samples, n_neighbors) - Examples - -------- - >>> from scikits.learn.neighbors import barycenter_weights - >>> X_neighbors, x = [[0], [2]], [0.5] - >>> barycenter_weights(x, X_neighbors) - array([ 0.74968789, 0.25031211]) + Notes + ----- + See developers note for more information. """ - x = np.asanyarray(x) - X_neighbors = np.asanyarray(X_neighbors) - if x.ndim == 1: - x = x[None,:] - if X_neighbors.ndim == 1: - X_neighbors = X_neighbors[:,None] - z = x - X_neighbors - gram = np.dot(z, z.T) - # Add constant on diagonal to avoid singular matrices - diag_stride = gram.shape[0] + 1 - gram.flat[::diag_stride] += tol * np.trace(gram) - w = linalg.solve(gram, np.ones(len(X_neighbors))) - w /= np.sum(w) - return w - - -def kneighbors_graph(X, n_neighbors, weight=None, ball_tree=None, - window_size=1): - """Computes the (weighted) graph of k-Neighbors +# +# .. local variables .. +# + from scipy import linalg + X, Z = map(np.asanyarray, (X, Z)) + n_samples, n_neighbors = X.shape[0], Z.shape[1] + if X.dtype.kind == 'i': + X = X.astype(np.float) + B = np.empty((n_samples, n_neighbors), dtype=X.dtype) + v = np.ones(n_neighbors, dtype=X.dtype) + rank_update, = linalg.get_blas_funcs(('ger',), (X,)) + +# +# .. constrained least squares .. +# + v[0] -= np.sqrt(n_neighbors) + B[:, 0] = 1. / np.sqrt(n_neighbors) + if n_neighbors <= 1: + return B + alpha = - 1. / (n_neighbors - np.sqrt(n_neighbors)) + for i, A in enumerate(Z.transpose(0, 2, 1)): + C = rank_update(alpha, np.dot(A, v), v, a=A) + B[i, 1:] = linalg.lstsq( + C[:, 1:], X[i] - C[:, 0] / np.sqrt(n_neighbors), cond=cond, + overwrite_a=True, overwrite_b=True)[0].ravel() + B[i] = rank_update(alpha, v, np.dot(v.T, B[i]), a=B[i]) + + return B + + +def kneighbors_graph(X, n_neighbors, mode='connectivity'): + """Computes the (weighted) graph of k-Neighbors for points in X Parameters ---------- @@ -297,63 +353,64 @@ def kneighbors_graph(X, n_neighbors, weight=None, ball_tree=None, n_neighbors : int Number of neighbors for each sample. - weight : None (default) - Weights to apply on graph edges. If weight is None - then no weighting is applied (1 for each edge). - If weight equals "distance" the edge weight is the - euclidian distance. If weight equals "barycenter" - the weights are barycenter weights estimated by - solving a linear system for each point. - - ball_tree : None or instance of precomputed BallTree - - window_size : int - Window size pass to the BallTree + mode : {'connectivity', 'distance', 'barycenter'} + Type of returned matrix: 'connectivity' will return the + connectivity matrix with ones and zeros, in 'distance' the + edges are euclidian distance between points. In 'barycenter' + they are barycenter weights estimated by solving a linear + system for each point. Returns ------- - A : sparse matrix, shape = [n_samples, n_samples] - A is returned as LInked List Sparse matrix - A[i,j] = weight of edge that connects i to j + A : CSR sparse matrix, shape = [n_samples, n_samples] + A[i,j] is assigned the weight of edge that connects i to j. Examples -------- - >>> X = [[0], [2], [1]] + >>> X = [[0], [3], [1]] >>> from scikits.learn.neighbors import kneighbors_graph >>> A = kneighbors_graph(X, 2) >>> A.todense() matrix([[ 1., 0., 1.], [ 0., 1., 1.], - [ 0., 1., 1.]]) + [ 1., 0., 1.]]) """ + +# +# .. local variables .. +# from scipy import sparse X = np.asanyarray(X) n_samples = X.shape[0] - if ball_tree is None: - ball_tree = BallTree(X, window_size) - A = sparse.lil_matrix((n_samples, ball_tree.size)) - dist, ind = ball_tree.query(X, k=n_neighbors) - if weight is None: - for i, li in enumerate(ind): - if n_neighbors > 1: - A[i, list(li)] = np.ones(n_neighbors) - else: - A[i, li] = 1.0 - elif weight is "distance": - for i, li in enumerate(ind): - if n_neighbors > 1: - A[i, list(li)] = dist[i, :] - else: - A[i, li] = dist[i, 0] - elif weight is "barycenter": - # XXX : the next loop could be done in parallel - # by parallelizing groups of indices - for i, li in enumerate(ind): - if n_neighbors > 1: - X_i = ball_tree.data[li] - A[i, list(li)] = barycenter_weights(X[i], X_i) - else: - A[i, li] = 1.0 + ball_tree = BallTree(X) + n_nonzero = n_neighbors * n_samples + A_indptr = np.arange(0, n_nonzero + 1, n_neighbors) + +# +# .. construct CSR matrix .. +# + if mode is 'connectivity': + A_data = np.ones((n_samples, n_neighbors)) + A_ind = ball_tree.query( + X, k=n_neighbors, return_distance=False) + + elif mode is 'distance': + data, ind = ball_tree.query(X, k=n_neighbors + 1) + A_data, A_ind = data[:, 1:], ind[:, 1:] + + elif mode is 'barycenter': + ind = ball_tree.query( + X, k=n_neighbors + 1, return_distance=False) + A_ind = ind[:, 1:] + A_data = barycenter_weights(X, X[A_ind]) + else: - raise ValueError("Unknown weight type") + raise ValueError( + 'Unsupported mode, must be one of "connectivity", ' + '"distance" or "barycenter" but got %s instead' % mode) + + A = sparse.csr_matrix( + (A_data.reshape(-1), A_ind.reshape(-1), A_indptr), + shape=(n_samples, n_samples)) + return A diff --git a/scikits/learn/pca.py b/scikits/learn/pca.py index 38fc4fe1bbe180cb63a251b2e108d59821d6c74c..8cb7bdb17ce69f9975d7f564b293499a27c623bd 100644 --- a/scikits/learn/pca.py +++ b/scikits/learn/pca.py @@ -66,12 +66,12 @@ def _assess_dimension_(spectrum, rank, n_samples, dim): spectrum_ = spectrum.copy() spectrum_[rank:dim] = v for i in range(rank): - for j in range (i + 1, dim): + for j in range(i + 1, dim): pa += (np.log((spectrum[i] - spectrum[j]) * (1. / spectrum_[j] - 1. / spectrum_[i])) + np.log(n_samples)) - ll = pu + pl + pv + pp -pa / 2 - rank * np.log(n_samples) / 2 + ll = pu + pl + pv + pp - pa / 2 - rank * np.log(n_samples) / 2 return ll @@ -104,16 +104,17 @@ class PCA(BaseEstimator): Parameters ---------- - X: array-like, shape (n_samples, n_features) - Training vector, where n_samples in the number of samples and - n_features is the number of features. - n_components: int, none or string Number of components to keep. if n_components is not set all components are kept: n_components == min(n_samples, n_features) + if n_components == 'mle', Minka's MLE is used to guess the dimension + if 0 < n_components < 1, select the number of components such that + the explained variance ratio is greater + than n_components + copy: bool If False, data passed to fit are overwritten @@ -165,10 +166,22 @@ class PCA(BaseEstimator): self.whiten = whiten def fit(self, X, **params): - """Fit the model to the data X""" + """Fit the model from data in X. + + Parameters + ---------- + X: array-like, shape (n_samples, n_features) + Training vector, where n_samples in the number of samples + and n_features is the number of features. + + Returns + ------- + self : object + Returns the instance itself. + """ self._set_params(**params) X = np.atleast_2d(X) - n_samples = X.shape[0] + n_samples, n_features = X.shape if self.copy: X = X.copy() # Center data @@ -189,6 +202,13 @@ class PCA(BaseEstimator): self.n_components = _infer_dimension_(self.explained_variance_, n_samples, X.shape[1]) + elif 0 < self.n_components and self.n_components < 1.0: + # number of components for which the cumulated explained variance + # percentage is superior to the desired threshold + n_remove = np.sum(self.explained_variance_ratio_.cumsum() >= + self.n_components) - 1 + self.n_components = n_features - n_remove + if self.n_components is not None: self.components_ = self.components_[:, :self.n_components] self.explained_variance_ = \ @@ -200,9 +220,17 @@ class PCA(BaseEstimator): def transform(self, X): """Apply the dimension reduction learned on the train data.""" - Xr = X - self.mean_ - Xr = np.dot(Xr, self.components_) - return Xr + X_transformed = X - self.mean_ + X_transformed = np.dot(X_transformed, self.components_) + return X_transformed + + def inverse_transform(self, X): + """Return an input X_original whose transform would be X + + Note: if whitening is enabled, inverse_transform does not compute the + exact inverse operation as transform. + """ + return np.dot(X, self.components_.T) + self.mean_ class ProbabilisticPCA(PCA): @@ -228,13 +256,14 @@ class ProbabilisticPCA(PCA): if self.dim <= self.n_components: delta = np.zeros(self.dim) elif homoscedastic: - delta = (Xr ** 2).sum() / (n_samples*(self.dim)) * np.ones(self.dim) + delta = (Xr ** 2).sum() * np.ones(self.dim) \ + / (n_samples * self.dim) else: delta = (Xr ** 2).mean(0) / (self.dim - self.n_components) self.covariance_ = np.diag(delta) for k in range(self.n_components): - add_cov = np.dot( - self.components_[:, k:k+1], self.components_[:, k:k+1].T) + add_cov = np.dot( + self.components_[:, k:k + 1], self.components_[:, k:k + 1].T) self.covariance_ += self.explained_variance_[k] * add_cov return self @@ -273,10 +302,6 @@ class RandomizedPCA(BaseEstimator): Parameters ---------- - X: array-like or scipy.sparse matrix, shape (n_samples, n_features) - Training vector, where n_samples in the number of samples and - n_features is the number of features. - n_components: int Maximum number of components to keep: default is 50. @@ -306,15 +331,6 @@ class RandomizedPCA(BaseEstimator): k is not set then all components are stored and the sum of explained variances is equal to 1.0 - References - ----- - Finding structure with randomness: Stochastic algorithms for constructing - approximate matrix decompositions - Halko, et al., 2009 (arXiv:909) - - A randomized algorithm for the decomposition of matrices - Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert - Examples -------- >>> import numpy as np @@ -330,6 +346,19 @@ class RandomizedPCA(BaseEstimator): -------- PCA ProbabilisticPCA + + Notes + ------- + References: + + * Finding structure with randomness: Stochastic algorithms for + constructing approximate matrix decompositions Halko, et al., 2009 + (arXiv:909) + + * A randomized algorithm for the decomposition of matrices + Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert + + """ def __init__(self, n_components, copy=True, iterated_power=3, @@ -341,7 +370,19 @@ class RandomizedPCA(BaseEstimator): self.mean_ = None def fit(self, X, **params): - """Fit the model to the data X""" + """Fit the model to the data X. + + Parameters + ---------- + X: array-like or scipy.sparse matrix, shape (n_samples, n_features) + Training vector, where n_samples in the number of samples and + n_features is the number of features. + + Returns + ------- + self : object + Returns the instance itself. + """ self._set_params(**params) n_samples = X.shape[0] @@ -378,4 +419,10 @@ class RandomizedPCA(BaseEstimator): X = safe_sparse_dot(X, self.components_) return X + def inverse_transform(self, X): + """Return an reconstructed input whose transform would be X""" + X_original = safe_sparse_dot(X, self.components_.T) + if self.mean_ is not None: + X_original = X_original + self.mean_ + return X_original diff --git a/scikits/learn/pipeline.py b/scikits/learn/pipeline.py index 7b5734caeb7bdf083dccc02771b32dd5b2ac8c08..a2826f39780d7ce11b07f24b3489441196908fe8 100644 --- a/scikits/learn/pipeline.py +++ b/scikits/learn/pipeline.py @@ -176,19 +176,3 @@ class Pipeline(BaseEstimator): Xt = transform.transform(Xt) return self.steps[-1][-1].score(Xt, y) - def get_support(self): - support_ = None - for name, transform in self.steps[:-1]: - if hasattr(transform, 'get_support'): - support_ = transform.get_support() - if support_ is None: - support_ = np.ones(self.steps[-1][-1].coef_.shape, dtype=np.bool) - return support_ - - @property - def coef_(self): - support_ = self.get_support() - coef = np.zeros(support_.shape, dtype=np.float) - coef[support_] = self.steps[-1][-1].coef_ - return coef - diff --git a/scikits/learn/preprocessing/__init__.py b/scikits/learn/preprocessing/__init__.py index ec394ad66e426123af51a0950d4ebb662844856f..6e6bc2a3c68582b4b5e333d16b24bdd992acea4f 100644 --- a/scikits/learn/preprocessing/__init__.py +++ b/scikits/learn/preprocessing/__init__.py @@ -7,7 +7,7 @@ import numpy as np -from ..base import BaseEstimator +from ..base import BaseEstimator, TransformerMixin def _mean_and_std(X, axis=0, with_std=True): @@ -55,13 +55,13 @@ class Scaler(BaseEstimator): def __init__(self, with_std=True): self.with_std = with_std - def fit(self, X, y=None, **params): + def fit(self, X, **params): self._set_params(**params) self.mean_, self.std_ = _mean_and_std(X, axis=0, with_std=self.with_std) return self - def transform(self, X, y=None, copy=True): + def transform(self, X, copy=True): if copy: X = X.copy() # We are taking a view of the X array and modifying it @@ -74,11 +74,11 @@ class Scaler(BaseEstimator): class Normalizer(BaseEstimator): """Normalize vectors such that they sum to 1""" - def fit(self, X, y=None, **params): + def fit(self, X, **params): self._set_params(**params) return self - def transform(self, X, y=None, copy=True): + def transform(self, X, copy=True): if copy: X = X.copy() norms = X.sum(axis=1)[:, np.newaxis] @@ -91,15 +91,15 @@ class Normalizer(BaseEstimator): class LengthNormalizer(BaseEstimator): """Normalize vectors to unit vectors""" - def fit(self, X, y=None, **params): + def fit(self, X, **params): self._set_params(**params) return self - def transform(self, X, y=None, copy=True): + def transform(self, X, copy=True): if copy: X = X.copy() - norms = np.sqrt(np.sum(X ** 2, axis=1))[:,np.newaxis] + norms = np.sqrt(np.sum(X ** 2, axis=1))[:, np.newaxis] norms[norms == 0.0] = 1.0 X /= norms @@ -112,11 +112,11 @@ class Binarizer(BaseEstimator): def __init__(self, threshold=0.0): self.threshold = threshold - def fit(self, X, y=None, **params): + def fit(self, X, **params): self._set_params(**params) return self - def transform(self, X, y=None, copy=True): + def transform(self, X, copy=True): if copy: X = X.copy() @@ -127,3 +127,106 @@ class Binarizer(BaseEstimator): return X + +class LabelBinarizer(BaseEstimator, TransformerMixin): + """Binarize labels in a one-vs-all fashion. + + Several regression and binary classification algorithms are available in the + scikit. A simple way to extend these algorithms to the multi-class + classification case is to use the so-called one-vs-all scheme. + + At learning time, this simply consists in learning one regressor or binary + classifier per class. In doing so, one needs to convert multi-class labels + to binary labels (belong or does not belong to the class). LabelBinarizer + makes this process easy with the transform method. + + At prediction time, one assigns the class for which the corresponding model + gave the greatest confidence. LabelBinarizer makes this easy with the + inverse_transform method. + + Attributes + ---------- + classes_ : array of shape [n_class] + Holds the label for each class. + + Examples + -------- + >>> from scikits.learn import preprocessing + >>> clf = preprocessing.LabelBinarizer() + >>> clf.fit([1,2,6,4,2]) + LabelBinarizer() + >>> clf.classes_ + array([1, 2, 4, 6]) + >>> clf.transform([1, 6]) + array([[ 1., 0., 0., 0.], + [ 0., 0., 0., 1.]]) + """ + + def fit(self, y): + """Fit label binarizer + + Parameters + ---------- + y : numpy array of shape [n_samples] + Target values + + Returns + ------- + self : returns an instance of self. + """ + self.classes_ = np.unique(y) + return self + + def transform(self, y): + """Transform multi-class labels to binary labels + + The output of transform is sometimes referred to by some authors as the + 1-of-K coding scheme. + + Parameters + ---------- + y : numpy array of shape [n_samples] + Target values + + Returns + ------- + Y : numpy array of shape [n_samples, n_classes] + """ + if len(self.classes_) == 2: + Y = np.zeros((len(y), 1)) + Y[y == self.classes_[1], 0] = 1 + return Y + + elif len(self.classes_) >= 2: + Y = np.zeros((len(y), len(self.classes_))) + for i, k in enumerate(self.classes_): + Y[y == k, i] = 1 + return Y + + else: + raise ValueError + + def inverse_transform(self, Y): + """Transform binary labels back to multi-class labels + + Parameters + ---------- + Y : numpy array of shape [n_samples, n_classes] + Target values + + Returns + ------- + y : numpy array of shape [n_samples] + + Note + ----- + In the case when the binary labels are fractional (probabilistic), + inverse_transform chooses the class with the greatest value. Typically, + this allows to use the output of a linear model's decision_function + method directly as the input of inverse_transform. + """ + if len(Y.shape) == 1 or Y.shape[1] == 1: + y = np.array(Y.ravel() > 0, dtype=int) + else: + y = Y.argmax(axis=1) + return self.classes_[y] diff --git a/scikits/learn/preprocessing/tests/test_preprocessing.py b/scikits/learn/preprocessing/tests/test_preprocessing.py index 65b8ad7f31f0628689d9816cb6f57420955204de..df658d9642195dd1761709b8d48601175b493a07 100644 --- a/scikits/learn/preprocessing/tests/test_preprocessing.py +++ b/scikits/learn/preprocessing/tests/test_preprocessing.py @@ -6,15 +6,21 @@ from numpy.testing import assert_array_almost_equal, assert_array_equal, \ assert_almost_equal, assert_equal from scikits.learn.preprocessing import Scaler, scale, Normalizer, \ - LengthNormalizer, Binarizer + LengthNormalizer, Binarizer, \ + LabelBinarizer from scikits.learn.preprocessing.sparse import Normalizer as SparseNormalizer from scikits.learn.preprocessing.sparse import LengthNormalizer as \ SparseLengthNormalizer from scikits.learn.preprocessing.sparse import Binarizer as SparseBinarizer +from scikits.learn import datasets +from scikits.learn.linear_model.stochastic_gradient import SGDClassifier + np.random.seed(0) +iris = datasets.load_iris() + def toarray(a): if hasattr(a, "toarray"): a = a.toarray() @@ -123,3 +129,36 @@ def test_binarizer(): assert_equal(np.sum(X_bin==0), 2) assert_equal(np.sum(X_bin==1), 4) +def test_label_binarizer(): + lb = LabelBinarizer() + + # two-class case + inp = np.array([0, 1, 1, 0]) + expected = np.array([[0, 1, 1, 0]]).T + got = lb.fit_transform(inp) + assert_array_equal(expected, got) + assert_array_equal(lb.inverse_transform(got), inp) + + # multi-class case + inp = np.array([3, 2, 1, 2, 0]) + expected = np.array([[0, 0, 0, 1], + [0, 0, 1, 0], + [0, 1, 0, 0], + [0, 0, 1, 0], + [1, 0, 0, 0]]) + got = lb.fit_transform(inp) + assert_array_equal(expected, got) + assert_array_equal(lb.inverse_transform(got), inp) + +def test_label_binarizer_iris(): + lb = LabelBinarizer() + Y = lb.fit_transform(iris.target) + clfs = [SGDClassifier().fit(iris.data, Y[:, k]) + for k in range(len(lb.classes_))] + Y_pred = np.array([clf.decision_function(iris.data) for clf in clfs]).T + y_pred = lb.inverse_transform(Y_pred) + accuracy = np.mean(iris.target == y_pred) + y_pred2 = SGDClassifier().fit(iris.data, iris.target).predict(iris.data) + accuracy2 = np.mean(iris.target == y_pred2) + assert_almost_equal(accuracy, accuracy2) + diff --git a/scikits/learn/setup.py b/scikits/learn/setup.py index b2e940a486a73e66103b892ca5c98c39f2016ef2..2a903f27a20bc5c9b9b1ee5e0062a120e9c24f79 100644 --- a/scikits/learn/setup.py +++ b/scikits/learn/setup.py @@ -1,7 +1,6 @@ from os.path import join import warnings import numpy -import sys def configuration(parent_package='', top_path=None): @@ -36,12 +35,7 @@ def configuration(parent_package='', top_path=None): ('NO_ATLAS_INFO', 1) in blas_info.get('define_macros', [])): config.add_library('cblas', sources=[join('src', 'cblas', '*.c')]) - cblas_libs = ['cblas'] - blas_info.pop('libraries', None) warnings.warn(BlasNotFoundError.__doc__) - else: - cblas_libs = blas_info.pop('libraries', []) - config.add_extension('ball_tree', sources=[join('src', 'BallTree.cpp')], diff --git a/scikits/learn/src/BallTree.cpp b/scikits/learn/src/BallTree.cpp index e63c0d785ac6c2d8d6d2b24d9cef67ee1a0dd060..f98cdfa6684d4ab9e89b56fd412ac9326b7bb8e4 100644 --- a/scikits/learn/src/BallTree.cpp +++ b/scikits/learn/src/BallTree.cpp @@ -243,31 +243,6 @@ BallTree_query(BallTreeObject *self, PyObject *args, PyObject *kwds){ } } - //if only one neighbor is requested, then resize the neighbors array - if(k==1){ - PyArray_Dims dims; - dims.ptr = PyArray_DIMS(arr); - dims.len = PyArray_NDIM(arr)-1; - - //PyArray_Resize returns None - this needs to be picked - // up and dereferenced. - PyObject *NoneObj = PyArray_Resize( (PyArrayObject*)nbrs, &dims, - 0, NPY_ANYORDER ); - if (NoneObj == NULL){ - goto fail; - } - Py_DECREF(NoneObj); - - if(return_distance){ - NoneObj = PyArray_Resize( (PyArrayObject*)dist, &dims, - 0, NPY_ANYORDER ); - if (NoneObj == NULL){ - goto fail; - } - Py_DECREF(NoneObj); - } - } - if(return_distance){ Py_DECREF(arr_iter); Py_DECREF(nbrs_iter); @@ -737,22 +712,6 @@ BallTree_knn_brute(PyObject *self, PyObject *args, PyObject *kwds){ for(int i=0;i<N;i++) delete Points[i]; - //if only one neighbor is requested, then resize the neighbors array - if(k==1){ - PyArray_Dims dims; - dims.ptr = PyArray_DIMS(arr2); - dims.len = PyArray_NDIM(arr2)-1; - - //PyArray_Resize returns None - this needs to be picked - // up and dereferenced. - PyObject *NoneObj = PyArray_Resize( (PyArrayObject*)nbrs, &dims, - 0, NPY_ANYORDER ); - if (NoneObj == NULL){ - goto fail; - } - Py_DECREF(NoneObj); - } - return nbrs; fail: diff --git a/scikits/learn/svm/base.py b/scikits/learn/svm/base.py index 1a4a8f348a0b2aeb339f182d81855031a801f2bc..3be7947c743688e23d5b103bcfbc0e0d79ef3a8a 100644 --- a/scikits/learn/svm/base.py +++ b/scikits/learn/svm/base.py @@ -3,7 +3,8 @@ import numpy as np from ._libsvm import libsvm_train, libsvm_predict, libsvm_predict_proba, \ libsvm_decision_function, set_verbosity_wrap from . import _liblinear -from ..base import BaseEstimator, RegressorMixin, ClassifierMixin +from ..base import BaseEstimator + def _get_class_weight(class_weight, y): """ @@ -12,7 +13,7 @@ def _get_class_weight(class_weight, y): if class_weight == 'auto': uy = np.unique(y) weight_label = np.asarray(uy, dtype=np.int32, order='C') - weight = np.array([1.0 / np.sum(y==i) for i in uy], + weight = np.array([1.0 / np.sum(y == i) for i in uy], dtype=np.float64, order='C') weight *= uy.shape[0] / np.sum(weight) else: @@ -38,14 +39,13 @@ class BaseLibSVM(BaseEstimator): def __init__(self, impl, kernel, degree, gamma, coef0, cache_size, eps, C, nu, p, shrinking, probability): - assert impl in self._svm_types, \ - "impl should be one of %s, %s was given" % ( - self._svm_types, impl) + if not impl in self._svm_types: + raise ValueError("impl should be one of %s, %s was given" % ( + self._svm_types, impl)) - assert kernel in self._kernel_types or \ - hasattr(kernel, '__call__'), \ - "kernel should be one of %s or a callable, " \ - "%s was given." % ( self._kernel_types, kernel) + if not (kernel in self._kernel_types or hasattr(kernel, '__call__')): + raise ValueError("kernel should be one of %s or a callable, " \ + "%s was given." % (self._kernel_types, kernel)) self.kernel = kernel self.impl = impl @@ -75,7 +75,6 @@ class BaseLibSVM(BaseEstimator): _X = X return kernel_type, _X - def fit(self, X, y, class_weight={}, sample_weight=[], **params): """ Fit the SVM model according to the given training data and @@ -123,7 +122,7 @@ class BaseLibSVM(BaseEstimator): self.class_weight, self.class_weight_label = \ _get_class_weight(class_weight, y) - + # check dimensions solver_type = self._svm_types.index(self.impl) if solver_type != 2 and _X.shape[0] != y.shape[0]: @@ -131,14 +130,19 @@ class BaseLibSVM(BaseEstimator): "X has %s features, but y has %s." % \ (_X.shape[0], y.shape[0])) + if self.kernel == "precomputed" and X.shape[0] != X.shape[1]: + raise ValueError("X.shape[0] should be equal to X.shape[1]") + if (kernel_type in [1, 2]) and (self.gamma == 0): # if custom gamma is not provided ... - self.gamma = 1.0/_X.shape[0] + self.gamma = 1.0 / _X.shape[0] + + self.shape_fit_ = X.shape self.support_, self.support_vectors_, self.n_support_, \ self.dual_coef_, self.intercept_, self.label_, self.probA_, \ self.probB_ = \ - libsvm_train( _X, y, solver_type, kernel_type, self.degree, + libsvm_train(_X, y, solver_type, kernel_type, self.degree, self.gamma, self.coef0, self.eps, self.C, self.nu, self.cache_size, self.p, self.class_weight_label, self.class_weight, @@ -147,30 +151,38 @@ class BaseLibSVM(BaseEstimator): return self - def predict(self, T): + def predict(self, X): """ This function does classification or regression on an array of - test vectors T. + test vectors X. For a classification model, the predicted class for each - sample in T is returned. For a regression model, the function - value of T calculated is returned. + sample in X is returned. For a regression model, the function + value of X calculated is returned. For an one-class model, +1 or -1 is returned. Parameters ---------- - T : array-like, shape = [n_samples, n_features] - + X : array-like, shape = [n_samples, n_features] Returns ------- C : array, shape = [n_samples] """ - T = np.atleast_2d(np.asanyarray(T, dtype=np.float64, order='C')) - kernel_type, T = self._get_kernel(T) - - return libsvm_predict (T, self.support_vectors_, + X = np.atleast_2d(np.asanyarray(X, dtype=np.float64, order='C')) + n_samples, n_features = X.shape + kernel_type, X = self._get_kernel(X) + + if self.kernel == "precomputed": + if X.shape[1] != self.shape_fit_[0]: + raise ValueError("X.shape[1] should be equal to the number of " + "samples at training time!") + elif n_features != self.shape_fit_[1]: + raise ValueError("X.shape[1] should be equal to the number of " + "features at training time!") + + return libsvm_predict(X, self.support_vectors_, self.dual_coef_, self.intercept_, self._svm_types.index(self.impl), kernel_type, self.degree, self.gamma, self.coef0, self.eps, @@ -250,7 +262,7 @@ class BaseLibSVM(BaseEstimator): def decision_function(self, T): """ - Calculate the distance of the samples in T to the separating hyperplane. + Calculate the distance of the samples T to the separating hyperplane. Parameters ---------- @@ -265,7 +277,7 @@ class BaseLibSVM(BaseEstimator): T = np.atleast_2d(np.asanyarray(T, dtype=np.float64, order='C')) kernel_type, T = self._get_kernel(T) - dec_func = libsvm_decision_function (T, self.support_vectors_, + dec_func = libsvm_decision_function(T, self.support_vectors_, self.dual_coef_, self.intercept_, self._svm_types.index(self.impl), kernel_type, self.degree, self.gamma, self.coef0, self.eps, @@ -276,7 +288,6 @@ class BaseLibSVM(BaseEstimator): self.support_, self.label_, self.probA_, self.probB_) - if self.impl != 'one_class': # libsvm has the convention of returning negative values for # rightmost labels, so we invert the sign since our label_ is @@ -298,24 +309,25 @@ class BaseLibLinear(BaseEstimator): """ _solver_type_dict = { - 'PL2_LLR_D0' : 0, # L2 penalty, logistic regression - 'PL2_LL2_D1' : 1, # L2 penalty, L2 loss, dual form - 'PL2_LL2_D0' : 2, # L2 penalty, L2 loss, primal form - 'PL2_LL1_D1' : 3, # L2 penalty, L1 Loss, dual form - 'MC_SVC' : 4, # Multi-class Support Vector Classification - 'PL1_LL2_D0' : 5, # L1 penalty, L2 Loss, primal form - 'PL1_LLR_D0' : 6, # L1 penalty, logistic regression - 'PL2_LLR_D1' : 7, # L2 penalty, logistic regression, dual form + 'PL2_LLR_D0' : 0, # L2 penalty, logistic regression + 'PL2_LL2_D1' : 1, # L2 penalty, L2 loss, dual form + 'PL2_LL2_D0' : 2, # L2 penalty, L2 loss, primal form + 'PL2_LL1_D1' : 3, # L2 penalty, L1 Loss, dual form + 'MC_SVC' : 4, # Multi-class Support Vector Classification + 'PL1_LL2_D0' : 5, # L1 penalty, L2 Loss, primal form + 'PL1_LLR_D0' : 6, # L1 penalty, logistic regression + 'PL2_LLR_D1' : 7, # L2 penalty, logistic regression, dual form } def __init__(self, penalty='l2', loss='l2', dual=True, eps=1e-4, C=1.0, - multi_class=False, fit_intercept=True): + multi_class=False, fit_intercept=True, intercept_scaling=1): self.penalty = penalty self.loss = loss self.dual = dual self.eps = eps self.C = C self.fit_intercept = fit_intercept + self.intercept_scaling = intercept_scaling self.multi_class = multi_class # Check that the arguments given are valid: @@ -328,14 +340,14 @@ class BaseLibLinear(BaseEstimator): if self.multi_class: solver_type = 'MC_SVC' else: - solver_type = "P%s_L%s_D%d" % ( + solver_type = "P%s_L%s_D%d" % ( self.penalty.upper(), self.loss.upper(), int(self.dual)) if not solver_type in self._solver_type_dict: raise ValueError('Not supported set of arguments: ' + solver_type) return self._solver_type_dict[solver_type] - def fit(self, X, y, class_weight={},**params): + def fit(self, X, y, class_weight={}, **params): """ Fit the model according to the given training data and parameters. @@ -386,44 +398,83 @@ class BaseLibLinear(BaseEstimator): X = np.asanyarray(X, dtype=np.float64, order='C') self._check_n_features(X) - return _liblinear.predict_wrap(X, self.raw_coef_, + coef = self.raw_coef_ + + return _liblinear.predict_wrap(X, coef, self._get_solver_type(), self.eps, self.C, self.class_weight_label, self.class_weight, self.label_, self._get_bias()) + def decision_function(self, X): + """ + Return the decision function of X according to the trained + model. + + Parameters + ---------- + X : array-like, shape = [n_samples, n_features] + + Returns + ------- + T : array-like, shape = [n_samples, n_class] + Returns the decision function of the sample for each class + in the model. + """ + X = np.atleast_2d(np.asanyarray(X, dtype=np.float64, order='C')) + self._check_n_features(X) + + dec_func = _liblinear.decision_function_wrap( + X, self.raw_coef_, self._get_solver_type(), self.eps, + self.C, self.class_weight_label, self.class_weight, + self.label_, self._get_bias()) + + if len(self.label_) <= 2: + # in the two-class case, the decision sign needs be flipped + # due to liblinear's design + return -dec_func + else: + return dec_func + def _check_n_features(self, X): n_features = self.raw_coef_.shape[1] - if self.fit_intercept > 0: n_features -= 1 + if self.fit_intercept: + n_features -= 1 if X.shape[1] != n_features: raise ValueError("X.shape[1] should be %d, not %d." % (n_features, X.shape[1])) + @property def intercept_(self): - if self.fit_intercept > 0: - return self.raw_coef_[:,-1] + if self.fit_intercept: + ret = self.intercept_scaling * self.raw_coef_[:, -1] + if len(self.label_) <= 2: + ret *= -1 + return ret return 0.0 @property def coef_(self): - if self.fit_intercept > 0: - return self.raw_coef_[:,:-1] - return self.raw_coef_ + if self.fit_intercept: + ret = self.raw_coef_[:, : -1] + else: + ret = self.raw_coef_ + if len(self.label_) <= 2: + return -ret + else: + return ret def predict_proba(self, T): # only available for logistic regression raise NotImplementedError( 'liblinear does not provide this functionality') - def _get_bias(self): - """ - Due to some pecularities in libliner, parameter bias must be a - double indicating if the intercept should be computed: - positive for true, negative for false - """ - return int (self.fit_intercept) - .5 + if self.fit_intercept: + return self.intercept_scaling + else: + return -1.0 set_verbosity_wrap(0) diff --git a/scikits/learn/svm/liblinear.py b/scikits/learn/svm/liblinear.py index 7bc02de95d4a2c441c686b5d3059bbb5300f8df5..4232fc24bb5f7dfbfc05d78757ca2a8b8da7e46c 100644 --- a/scikits/learn/svm/liblinear.py +++ b/scikits/learn/svm/liblinear.py @@ -1,9 +1,10 @@ from ..base import ClassifierMixin +from ..linear_model.base import CoefSelectTransformerMixin from .base import BaseLibLinear -class LinearSVC(BaseLibLinear, ClassifierMixin): +class LinearSVC(BaseLibLinear, ClassifierMixin, CoefSelectTransformerMixin): """Linear Support Vector Classification. Similar to SVC with parameter kernel='linear', but uses internally @@ -34,6 +35,17 @@ class LinearSVC(BaseLibLinear, ClassifierMixin): perform multi-class SVM by Cramer and Singer. If active, options loss, penalty and dual will be ignored. + intercept_scaling : float, default: 1 + when self.fit_intercept is True, instance vector x becomes + [x, self.intercept_scaling], + i.e. a "synthetic" feature with constant value equals to + intercept_scaling is appended to the instance vector. + The intercept becomes intercept_scaling * synthetic feature weight + Note! the synthetic feature weight is subject to l1/l2 regularization + as all other features. + To lessen the effect of regularization on synthetic feature weight + (and therefore on the intercept) intercept_scaling has to be increased + Attributes ---------- `coef_` : array, shape = [n_features] if n_classes == 2 else [n_classes, n_features] diff --git a/scikits/learn/svm/sparse/base.py b/scikits/learn/svm/sparse/base.py index d20b53aaff8d5493cf0b18ae1e68015f45934b88..62b3d3ed18a4f8ee137de5c94f4d9f6545ecb3cb 100644 --- a/scikits/learn/svm/sparse/base.py +++ b/scikits/learn/svm/sparse/base.py @@ -1,6 +1,5 @@ import numpy as np -from ...base import ClassifierMixin from ..base import BaseLibSVM, BaseLibLinear, _get_class_weight from ._libsvm_sparse import libsvm_sparse_train, \ @@ -8,9 +7,10 @@ from ._libsvm_sparse import libsvm_sparse_train, \ from .. import _liblinear + class SparseBaseLibSVM(BaseLibSVM): - _kernel_types = ['linear', 'poly', 'rbf', 'sigmoid'] + _kernel_types = ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'] _svm_types = ['c_svc', 'nu_svc', 'one_class', 'epsilon_svr', 'nu_svr'] def __init__(self, impl, kernel, degree, gamma, coef0, cache_size, @@ -22,7 +22,7 @@ class SparseBaseLibSVM(BaseLibSVM): assert kernel in self._kernel_types, \ "kernel should be one of %s, "\ - "%s was given." % ( self._kernel_types, kernel) + "%s was given." % (self._kernel_types, kernel) self.kernel = kernel self.impl = impl @@ -38,20 +38,19 @@ class SparseBaseLibSVM(BaseLibSVM): self.probability = probability # container for when we call fit - self._support_data = np.empty (0, dtype=np.float64, order='C') - self._support_indices = np.empty (0, dtype=np.int32, order='C') - self._support_indptr = np.empty (0, dtype=np.int32, order='C') + self._support_data = np.empty(0, dtype=np.float64, order='C') + self._support_indices = np.empty(0, dtype=np.int32, order='C') + self._support_indptr = np.empty(0, dtype=np.int32, order='C') # strictly speaking, dual_coef is not sparse (see Notes above) - self._dual_coef_data = np.empty (0, dtype=np.float64, order='C') - self._dual_coef_indices = np.empty (0, dtype=np.int32, order='C') - self._dual_coef_indptr = np.empty (0, dtype=np.int32, order='C') - self.intercept_ = np.empty (0, dtype=np.float64, order='C') + self._dual_coef_data = np.empty(0, dtype=np.float64, order='C') + self._dual_coef_indices = np.empty(0, dtype=np.int32, order='C') + self._dual_coef_indptr = np.empty(0, dtype=np.int32, order='C') + self.intercept_ = np.empty(0, dtype=np.float64, order='C') # only used in classification self.n_support = np.empty(0, dtype=np.int32, order='C') - def fit(self, X, y, class_weight={}, sample_weight=[], **params): """ Fit the SVM model according to the given training data and @@ -93,7 +92,7 @@ class SparseBaseLibSVM(BaseLibSVM): import scipy.sparse X = scipy.sparse.csr_matrix(X) X.data = np.asanyarray(X.data, dtype=np.float64, order='C') - y = np.asanyarray(y, dtype=np.float64, order='C') + y = np.asanyarray(y, dtype=np.float64, order='C') sample_weight = np.asanyarray(sample_weight, dtype=np.float64, order='C') @@ -105,9 +104,9 @@ class SparseBaseLibSVM(BaseLibSVM): if (kernel_type == 2) and (self.gamma == 0): # if custom gamma is not provided ... - self.gamma = 1.0/X.shape[0] + self.gamma = 1.0 / X.shape[0] - self.label_, self.probA_, self.probB_ = libsvm_sparse_train ( + self.label_, self.probA_, self.probB_ = libsvm_sparse_train( X.shape[1], X.data, X.indices, X.indptr, y, solver_type, kernel_type, self.degree, self.gamma, self.coef0, self.eps, self.C, self._support_data, @@ -129,7 +128,7 @@ class SparseBaseLibSVM(BaseLibSVM): self.support_vectors_ = scipy.sparse.csr_matrix((self._support_data, self._support_indices, self._support_indptr), - (n_SV, X.shape[1]) ) + (n_SV, X.shape[1])) self.dual_coef_ = scipy.sparse.csr_matrix((self._dual_coef_data, dual_coef_indices, @@ -138,7 +137,6 @@ class SparseBaseLibSVM(BaseLibSVM): ) return self - def predict(self, T): """ This function does classification or regression on an array of @@ -163,15 +161,15 @@ class SparseBaseLibSVM(BaseLibSVM): T.data = np.asanyarray(T.data, dtype=np.float64, order='C') kernel_type = self._kernel_types.index(self.kernel) - return libsvm_sparse_predict (T.data, T.indices, T.indptr, + return libsvm_sparse_predict(T.data, T.indices, T.indptr, self.support_vectors_.data, self.support_vectors_.indices, self.support_vectors_.indptr, self.dual_coef_.data, self.intercept_, self._svm_types.index(self.impl), kernel_type, self.degree, self.gamma, self.coef0, self.eps, - self.C, self.class_weight_label, self.class_weight, self.nu, - self.cache_size, self.p, self.shrinking, + self.C, self.class_weight_label, self.class_weight, + self.nu, self.cache_size, self.p, self.shrinking, self.probability, self.n_support, self.label_, self.probA_, self.probB_) @@ -209,8 +207,9 @@ class SparseBaseLibLinear(BaseLibLinear): _liblinear.csr_train_wrap(X.shape[1], X.data, X.indices, X.indptr, y, self._get_solver_type(), - self.eps, self._get_bias(), self.C, self.class_weight_label, - self.class_weight) + self.eps, self._get_bias(), self.C, + self.class_weight_label, self.class_weight) + return self def predict(self, X): @@ -239,5 +238,37 @@ class SparseBaseLibLinear(BaseLibLinear): self.class_weight, self.label_, self._get_bias()) + def decision_function(self, X): + """ + Return the decision function of X according to the trained + model. + + Parameters + ---------- + X : sparse matrix, shape = [n_samples, n_features] + + Returns + ------- + T : array-like, shape = [n_samples, n_class] + Returns the decision function of the sample for each class + in the model. + """ + import scipy.sparse + X = scipy.sparse.csr_matrix(X) + self._check_n_features(X) + X.data = np.asanyarray(X.data, dtype=np.float64, order='C') + + dec_func = _liblinear.csr_decision_function_wrap( + X.shape[1], X.data, X.indices, X.indptr, self.raw_coef_, + self._get_solver_type(), self.eps, self.C, + self.class_weight_label, self.class_weight, self.label_, + self._get_bias()) + + if len(self.label_) <= 2: + # in the two-class case, the decision sign needs be flipped + # due to liblinear's design + return -dec_func + else: + return dec_func set_verbosity_wrap(0) diff --git a/scikits/learn/svm/sparse/liblinear.py b/scikits/learn/svm/sparse/liblinear.py index acac283d77e5b50d331fc8f969c167b8d82bf283..d23f2ef350ee08b2b1015205d5dff429d80130d3 100644 --- a/scikits/learn/svm/sparse/liblinear.py +++ b/scikits/learn/svm/sparse/liblinear.py @@ -2,12 +2,13 @@ import numpy as np from ...base import ClassifierMixin +from ...svm.sparse.base import SparseBaseLibLinear +from ...linear_model.sparse.base import CoefSelectTransformerMixin from .base import SparseBaseLibLinear from .. import _liblinear -from scipy import sparse - -class LinearSVC(SparseBaseLibLinear, ClassifierMixin): +class LinearSVC(SparseBaseLibLinear, ClassifierMixin, + CoefSelectTransformerMixin): """ Linear Support Vector Classification, Sparse Version @@ -32,6 +33,17 @@ class LinearSVC(SparseBaseLibLinear, ClassifierMixin): Select the algorithm to either solve the dual or primal optimization problem. + intercept_scaling : float, default: 1 + when self.fit_intercept is True, instance vector x becomes + [x, self.intercept_scaling], + i.e. a "synthetic" feature with constant value equals to + intercept_scaling is appended to the instance vector. + The intercept becomes intercept_scaling * synthetic feature weight + Note! the synthetic feature weight is subject to l1/l2 regularization + as all other features. + To lessen the effect of regularization on synthetic feature weight + (and therefore on the intercept) intercept_scaling has to be increased + Attributes ---------- `coef_` : array, shape = [n_features] if n_classes == 2 else [n_classes, n_features] diff --git a/scikits/learn/svm/sparse/libsvm.py b/scikits/learn/svm/sparse/libsvm.py index 8592e4267311a965fca997d9604766c04b645d18..8c19f99f776d23676480496bf752bb7aa9c9c851 100644 --- a/scikits/learn/svm/sparse/libsvm.py +++ b/scikits/learn/svm/sparse/libsvm.py @@ -1,5 +1,5 @@ -from ..base import ClassifierMixin, RegressorMixin +from ...base import ClassifierMixin, RegressorMixin from .base import SparseBaseLibSVM diff --git a/scikits/learn/svm/src/liblinear/_liblinear.c b/scikits/learn/svm/src/liblinear/_liblinear.c index 483ffc9a441f4181ddb6cc8aa4e4300574189160..53e0ebc193eed7997d87419907cccb57cbf6ad33 100644 --- a/scikits/learn/svm/src/liblinear/_liblinear.c +++ b/scikits/learn/svm/src/liblinear/_liblinear.c @@ -1,4 +1,4 @@ -/* Generated by Cython 0.12.1 on Fri Oct 22 11:47:09 2010 */ +/* Generated by Cython 0.12.1 on Mon Feb 21 09:17:21 2011 */ #define PY_SSIZE_T_CLEAN #include "Python.h" @@ -658,13 +658,16 @@ static char __pyx_k_5[] = "unknown dtype code in numpy.pxd (%d)"; static char __pyx_k_6[] = "Format string allocated too short, see comment in numpy.pxd"; static char __pyx_k_7[] = "Format string allocated too short."; static char __pyx_k_8[] = "\nWrapper for liblinear\n\nAuthor: fabian.pedregosa@inria.fr\n"; -static char __pyx_k_9[] = "train_wrap (line 51)"; -static char __pyx_k_10[] = "csr_train_wrap (line 102)"; -static char __pyx_k_11[] = "csr_predict_wrap (line 185)"; -static char __pyx_k_12[] = "predict_prob_wrap (line 225)"; -static char __pyx_k_13[] = "csr_predict_prob (line 279)"; +static char __pyx_k_9[] = "train_wrap (line 59)"; +static char __pyx_k_10[] = "csr_train_wrap (line 111)"; +static char __pyx_k_11[] = "csr_decision_function_wrap (line 202)"; +static char __pyx_k_12[] = "csr_decision_function_wrap"; +static char __pyx_k_13[] = "csr_predict_wrap (line 272)"; +static char __pyx_k_14[] = "predict_prob_wrap (line 312)"; +static char __pyx_k_15[] = "csr_predict_prob (line 366)"; static char __pyx_k__B[] = "B"; static char __pyx_k__C[] = "C"; +static char __pyx_k__F[] = "F"; static char __pyx_k__H[] = "H"; static char __pyx_k__I[] = "I"; static char __pyx_k__L[] = "L"; @@ -700,6 +703,7 @@ static char __pyx_k__int32[] = "int32"; static char __pyx_k__label[] = "label"; static char __pyx_k__names[] = "names"; static char __pyx_k__numpy[] = "numpy"; +static char __pyx_k__order[] = "order"; static char __pyx_k__range[] = "range"; static char __pyx_k__shape[] = "shape"; static char __pyx_k__fields[] = "fields"; @@ -734,8 +738,10 @@ static char __pyx_k__predict_prob_wrap[] = "predict_prob_wrap"; static PyObject *__pyx_kp_s_1; static PyObject *__pyx_kp_u_10; static PyObject *__pyx_kp_u_11; -static PyObject *__pyx_kp_u_12; +static PyObject *__pyx_n_s_12; static PyObject *__pyx_kp_u_13; +static PyObject *__pyx_kp_u_14; +static PyObject *__pyx_kp_u_15; static PyObject *__pyx_kp_u_2; static PyObject *__pyx_kp_u_3; static PyObject *__pyx_kp_u_4; @@ -744,6 +750,7 @@ static PyObject *__pyx_kp_u_6; static PyObject *__pyx_kp_u_7; static PyObject *__pyx_kp_u_9; static PyObject *__pyx_n_s__C; +static PyObject *__pyx_n_s__F; static PyObject *__pyx_n_s__MemoryError; static PyObject *__pyx_n_s__RuntimeError; static PyObject *__pyx_n_s__T; @@ -783,6 +790,7 @@ static PyObject *__pyx_n_s__ndim; static PyObject *__pyx_n_s__np; static PyObject *__pyx_n_s__numpy; static PyObject *__pyx_n_s__obj; +static PyObject *__pyx_n_s__order; static PyObject *__pyx_n_s__predict_prob_wrap; static PyObject *__pyx_n_s__range; static PyObject *__pyx_n_s__readonly; @@ -795,9 +803,10 @@ static PyObject *__pyx_n_s__type_num; static PyObject *__pyx_n_s__weight; static PyObject *__pyx_n_s__weight_label; static PyObject *__pyx_int_1; +static PyObject *__pyx_int_2; static PyObject *__pyx_int_15; -/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":51 +/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":59 * * * def train_wrap ( np.ndarray[np.float64_t, ndim=2, mode='c'] X, # <<<<<<<<<<<<<< @@ -852,9 +861,9 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject PyObject *__pyx_t_2 = NULL; PyObject *__pyx_t_3 = NULL; PyObject *__pyx_t_4 = NULL; - PyArrayObject *__pyx_t_5 = NULL; - int __pyx_t_6; - PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_5 = NULL; + PyArrayObject *__pyx_t_6 = NULL; + int __pyx_t_7; PyObject *__pyx_t_8 = NULL; PyObject *__pyx_t_9 = NULL; PyObject *__pyx_t_10 = NULL; @@ -887,54 +896,54 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__Y); if (likely(values[1])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__solver_type); if (likely(values[2])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__eps); if (likely(values[3])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__bias); if (likely(values[4])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 5: values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__C); if (likely(values[5])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 6: values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight_label); if (likely(values[6])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 7: values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight); if (likely(values[7])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "train_wrap") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "train_wrap") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } __pyx_v_X = ((PyArrayObject *)values[0]); __pyx_v_Y = ((PyArrayObject *)values[1]); - __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_eps = __pyx_PyFloat_AsDouble(values[3]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_bias = __pyx_PyFloat_AsDouble(values[4]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_C = __pyx_PyFloat_AsDouble(values[5]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(values[3]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_bias = __pyx_PyFloat_AsDouble(values[4]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(values[5]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_weight_label = ((PyArrayObject *)values[6]); __pyx_v_weight = ((PyArrayObject *)values[7]); } else if (PyTuple_GET_SIZE(__pyx_args) != 8) { @@ -942,16 +951,16 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject } else { __pyx_v_X = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 0)); __pyx_v_Y = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 1)); - __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 3)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 4)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 5)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 3)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 4)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 5)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_weight_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 6)); __pyx_v_weight = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 7)); } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_wrap", 1, 8, 8, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_liblinear.train_wrap"); return NULL; @@ -968,36 +977,36 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject __pyx_bstruct_Y.buf = NULL; __pyx_bstruct_weight_label.buf = NULL; __pyx_bstruct_weight.buf = NULL; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_X), __pyx_ptype_5numpy_ndarray, 1, "X", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_Y), __pyx_ptype_5numpy_ndarray, 1, "Y", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_X), __pyx_ptype_5numpy_ndarray, 1, "X", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_Y), __pyx_ptype_5numpy_ndarray, 1, "Y", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_X, (PyObject*)__pyx_v_X, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_X, (PyObject*)__pyx_v_X, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_X = __pyx_bstruct_X.strides[0]; __pyx_bstride_1_X = __pyx_bstruct_X.strides[1]; __pyx_bshape_0_X = __pyx_bstruct_X.shape[0]; __pyx_bshape_1_X = __pyx_bstruct_X.shape[1]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_Y, (PyObject*)__pyx_v_Y, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_Y, (PyObject*)__pyx_v_Y, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_Y = __pyx_bstruct_Y.strides[0]; __pyx_bshape_0_Y = __pyx_bstruct_Y.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_weight_label = __pyx_bstruct_weight_label.strides[0]; __pyx_bshape_0_weight_label = __pyx_bstruct_weight_label.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_weight = __pyx_bstruct_weight.strides[0]; __pyx_bshape_0_weight = __pyx_bstruct_weight.shape[0]; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":65 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":73 * cdef int len_w * * problem = set_problem(X.data, Y.data, X.shape, bias) # <<<<<<<<<<<<<< @@ -1006,7 +1015,7 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject */ __pyx_v_problem = set_problem(__pyx_v_X->data, __pyx_v_Y->data, __pyx_v_X->dimensions, __pyx_v_bias); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":67 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":75 * problem = set_problem(X.data, Y.data, X.shape, bias) * * param = set_parameter(solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) # <<<<<<<<<<<<<< @@ -1015,7 +1024,7 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject */ __pyx_v_param = set_parameter(__pyx_v_solver_type, __pyx_v_eps, __pyx_v_C, (__pyx_v_weight->dimensions[0]), __pyx_v_weight_label->data, __pyx_v_weight->data); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":69 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":77 * param = set_parameter(solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) * * error_msg = check_parameter(problem, param) # <<<<<<<<<<<<<< @@ -1024,7 +1033,7 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject */ __pyx_v_error_msg = check_parameter(__pyx_v_problem, __pyx_v_param); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":70 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":78 * * error_msg = check_parameter(problem, param) * if error_msg: # <<<<<<<<<<<<<< @@ -1034,7 +1043,7 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject __pyx_t_1 = (__pyx_v_error_msg != 0); if (__pyx_t_1) { - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":71 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":79 * error_msg = check_parameter(problem, param) * if error_msg: * free_problem(problem) # <<<<<<<<<<<<<< @@ -1043,7 +1052,7 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject */ free_problem(__pyx_v_problem); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":72 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":80 * if error_msg: * free_problem(problem) * free_parameter(param) # <<<<<<<<<<<<<< @@ -1052,50 +1061,50 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject */ free_parameter(__pyx_v_param); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":73 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":81 * free_problem(problem) * free_parameter(param) * raise ValueError(error_msg) # <<<<<<<<<<<<<< * * # early return */ - __pyx_t_2 = __Pyx_PyBytes_FromString(__pyx_v_error_msg); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyBytes_FromString(__pyx_v_error_msg); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_2)); __Pyx_GIVEREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_builtin_ValueError, __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_builtin_ValueError, __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_Raise(__pyx_t_2, 0, 0); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} goto __pyx_L6; } __pyx_L6:; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":76 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":84 * * # early return * model = train(problem, param) # <<<<<<<<<<<<<< * - * cdef np.ndarray[np.float64_t, ndim=2, mode='c'] w + * # coef matrix holder created as fortran since that's what's used in liblinear */ __pyx_v_model = train(__pyx_v_problem, __pyx_v_param); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":79 - * - * cdef np.ndarray[np.float64_t, ndim=2, mode='c'] w + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":88 + * # coef matrix holder created as fortran since that's what's used in liblinear + * cdef np.ndarray[np.float64_t, ndim=2, mode='fortran'] w * cdef int nr_class = get_nr_class(model) # <<<<<<<<<<<<<< * cdef int nr_feature = get_nr_feature(model) * if bias > 0: nr_feature = nr_feature + 1 */ __pyx_v_nr_class = get_nr_class(__pyx_v_model); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":80 - * cdef np.ndarray[np.float64_t, ndim=2, mode='c'] w + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":89 + * cdef np.ndarray[np.float64_t, ndim=2, mode='fortran'] w * cdef int nr_class = get_nr_class(model) * cdef int nr_feature = get_nr_feature(model) # <<<<<<<<<<<<<< * if bias > 0: nr_feature = nr_feature + 1 @@ -1103,12 +1112,12 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject */ __pyx_v_nr_feature = get_nr_feature(__pyx_v_model); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":81 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":90 * cdef int nr_class = get_nr_class(model) * cdef int nr_feature = get_nr_feature(model) * if bias > 0: nr_feature = nr_feature + 1 # <<<<<<<<<<<<<< * if nr_class == 2: - * w = np.empty((1, nr_feature)) + * w = np.empty((1, nr_feature),order='F') */ __pyx_t_1 = (__pyx_v_bias > 0); if (__pyx_t_1) { @@ -1117,31 +1126,31 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject } __pyx_L7:; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":82 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":91 * cdef int nr_feature = get_nr_feature(model) * if bias > 0: nr_feature = nr_feature + 1 * if nr_class == 2: # <<<<<<<<<<<<<< - * w = np.empty((1, nr_feature)) + * w = np.empty((1, nr_feature),order='F') * copy_w(w.data, model, nr_feature) */ __pyx_t_1 = (__pyx_v_nr_class == 2); if (__pyx_t_1) { - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":83 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":92 * if bias > 0: nr_feature = nr_feature + 1 * if nr_class == 2: - * w = np.empty((1, nr_feature)) # <<<<<<<<<<<<<< + * w = np.empty((1, nr_feature),order='F') # <<<<<<<<<<<<<< * copy_w(w.data, model, nr_feature) * else: */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__empty); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__empty); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyInt_FromLong(__pyx_v_nr_feature); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_v_nr_feature); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_INCREF(__pyx_int_1); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_int_1); @@ -1149,42 +1158,46 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); + __pyx_t_4 = PyDict_New(); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(((PyObject *)__pyx_t_4)); + if (PyDict_SetItem(__pyx_t_4, ((PyObject *)__pyx_n_s__order), ((PyObject *)__pyx_n_s__F)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyEval_CallObjectWithKeywords(__pyx_t_3, __pyx_t_2, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_5 = ((PyArrayObject *)__pyx_t_4); + __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; + if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = ((PyArrayObject *)__pyx_t_5); { __Pyx_BufFmt_StackElem __pyx_stack[1]; __Pyx_SafeReleaseBuffer(&__pyx_bstruct_w); - __pyx_t_6 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_t_5, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack); - if (unlikely(__pyx_t_6 < 0)) { - PyErr_Fetch(&__pyx_t_7, &__pyx_t_8, &__pyx_t_9); - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_v_w, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) { - Py_XDECREF(__pyx_t_7); Py_XDECREF(__pyx_t_8); Py_XDECREF(__pyx_t_9); + __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack); + if (unlikely(__pyx_t_7 < 0)) { + PyErr_Fetch(&__pyx_t_8, &__pyx_t_9, &__pyx_t_10); + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_v_w, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) { + Py_XDECREF(__pyx_t_8); Py_XDECREF(__pyx_t_9); Py_XDECREF(__pyx_t_10); __Pyx_RaiseBufferFallbackError(); } else { - PyErr_Restore(__pyx_t_7, __pyx_t_8, __pyx_t_9); + PyErr_Restore(__pyx_t_8, __pyx_t_9, __pyx_t_10); } } __pyx_bstride_0_w = __pyx_bstruct_w.strides[0]; __pyx_bstride_1_w = __pyx_bstruct_w.strides[1]; __pyx_bshape_0_w = __pyx_bstruct_w.shape[0]; __pyx_bshape_1_w = __pyx_bstruct_w.shape[1]; - if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __pyx_t_5 = 0; + __pyx_t_6 = 0; __Pyx_DECREF(((PyObject *)__pyx_v_w)); - __pyx_v_w = ((PyArrayObject *)__pyx_t_4); - __pyx_t_4 = 0; + __pyx_v_w = ((PyArrayObject *)__pyx_t_5); + __pyx_t_5 = 0; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":84 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":93 * if nr_class == 2: - * w = np.empty((1, nr_feature)) + * w = np.empty((1, nr_feature),order='F') * copy_w(w.data, model, nr_feature) # <<<<<<<<<<<<<< * else: * len_w = (nr_class) * nr_feature @@ -1194,75 +1207,79 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject } /*else*/ { - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":86 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":95 * copy_w(w.data, model, nr_feature) * else: * len_w = (nr_class) * nr_feature # <<<<<<<<<<<<<< - * w = np.empty((nr_class, nr_feature)) + * w = np.empty((nr_class, nr_feature),order='F') * copy_w(w.data, model, len_w) */ __pyx_v_len_w = (__pyx_v_nr_class * __pyx_v_nr_feature); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":87 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":96 * else: * len_w = (nr_class) * nr_feature - * w = np.empty((nr_class, nr_feature)) # <<<<<<<<<<<<<< + * w = np.empty((nr_class, nr_feature),order='F') # <<<<<<<<<<<<<< * copy_w(w.data, model, len_w) * */ - __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_4 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__empty); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s__empty); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = PyInt_FromLong(__pyx_v_nr_class); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_2 = PyInt_FromLong(__pyx_v_nr_feature); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyInt_FromLong(__pyx_v_nr_class); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyInt_FromLong(__pyx_v_nr_feature); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = PyTuple_New(2); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_t_4); - __Pyx_GIVEREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_10, 1, __pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2); + __Pyx_GIVEREF(__pyx_t_2); + __pyx_t_5 = 0; + __pyx_t_2 = 0; + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_3); __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_4 = 0; __pyx_t_3 = 0; - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_10); - __Pyx_GIVEREF(__pyx_t_10); - __pyx_t_10 = 0; - __pyx_t_10 = PyObject_Call(__pyx_t_2, __pyx_t_3, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(((PyObject *)__pyx_t_3)); + if (PyDict_SetItem(__pyx_t_3, ((PyObject *)__pyx_n_s__order), ((PyObject *)__pyx_n_s__F)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyEval_CallObjectWithKeywords(__pyx_t_4, __pyx_t_2, ((PyObject *)__pyx_t_3)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_5 = ((PyArrayObject *)__pyx_t_10); + __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; + if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = ((PyArrayObject *)__pyx_t_5); { __Pyx_BufFmt_StackElem __pyx_stack[1]; __Pyx_SafeReleaseBuffer(&__pyx_bstruct_w); - __pyx_t_6 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_t_5, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack); - if (unlikely(__pyx_t_6 < 0)) { - PyErr_Fetch(&__pyx_t_9, &__pyx_t_8, &__pyx_t_7); - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_v_w, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) { - Py_XDECREF(__pyx_t_9); Py_XDECREF(__pyx_t_8); Py_XDECREF(__pyx_t_7); + __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack); + if (unlikely(__pyx_t_7 < 0)) { + PyErr_Fetch(&__pyx_t_10, &__pyx_t_9, &__pyx_t_8); + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_v_w, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) { + Py_XDECREF(__pyx_t_10); Py_XDECREF(__pyx_t_9); Py_XDECREF(__pyx_t_8); __Pyx_RaiseBufferFallbackError(); } else { - PyErr_Restore(__pyx_t_9, __pyx_t_8, __pyx_t_7); + PyErr_Restore(__pyx_t_10, __pyx_t_9, __pyx_t_8); } } __pyx_bstride_0_w = __pyx_bstruct_w.strides[0]; __pyx_bstride_1_w = __pyx_bstruct_w.strides[1]; __pyx_bshape_0_w = __pyx_bstruct_w.shape[0]; __pyx_bshape_1_w = __pyx_bstruct_w.shape[1]; - if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __pyx_t_5 = 0; + __pyx_t_6 = 0; __Pyx_DECREF(((PyObject *)__pyx_v_w)); - __pyx_v_w = ((PyArrayObject *)__pyx_t_10); - __pyx_t_10 = 0; + __pyx_v_w = ((PyArrayObject *)__pyx_t_5); + __pyx_t_5 = 0; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":88 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":97 * len_w = (nr_class) * nr_feature - * w = np.empty((nr_class, nr_feature)) + * w = np.empty((nr_class, nr_feature),order='F') * copy_w(w.data, model, len_w) # <<<<<<<<<<<<<< * * cdef np.ndarray[np.int32_t, ndim=1, mode='c'] label @@ -1271,64 +1288,64 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject } __pyx_L8:; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":91 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":100 * * cdef np.ndarray[np.int32_t, ndim=1, mode='c'] label * label = np.empty(nr_class, dtype=np.int32) # <<<<<<<<<<<<<< * copy_label(label.data, model, nr_class) * */ - __pyx_t_10 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - __pyx_t_3 = PyObject_GetAttr(__pyx_t_10, __pyx_n_s__empty); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_3 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__empty); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = PyInt_FromLong(__pyx_v_nr_class); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = PyInt_FromLong(__pyx_v_nr_class); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_10); - __Pyx_GIVEREF(__pyx_t_10); - __pyx_t_10 = 0; - __pyx_t_10 = PyDict_New(); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_10)); - __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + __pyx_t_5 = 0; + __pyx_t_5 = PyDict_New(); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(((PyObject *)__pyx_t_5)); + __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_11 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s__int32); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s__int32); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (PyDict_SetItem(__pyx_t_10, ((PyObject *)__pyx_n_s__dtype), __pyx_t_11) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_5, ((PyObject *)__pyx_n_s__dtype), __pyx_t_11) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - __pyx_t_11 = PyEval_CallObjectWithKeywords(__pyx_t_3, __pyx_t_2, ((PyObject *)__pyx_t_10)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyEval_CallObjectWithKeywords(__pyx_t_3, __pyx_t_2, ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(((PyObject *)__pyx_t_10)); __pyx_t_10 = 0; - if (!(likely(((__pyx_t_11) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_11, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; + if (!(likely(((__pyx_t_11) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_11, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_12 = ((PyArrayObject *)__pyx_t_11); { __Pyx_BufFmt_StackElem __pyx_stack[1]; __Pyx_SafeReleaseBuffer(&__pyx_bstruct_label); - __pyx_t_6 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_t_12, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack); - if (unlikely(__pyx_t_6 < 0)) { - PyErr_Fetch(&__pyx_t_7, &__pyx_t_8, &__pyx_t_9); + __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_t_12, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack); + if (unlikely(__pyx_t_7 < 0)) { + PyErr_Fetch(&__pyx_t_8, &__pyx_t_9, &__pyx_t_10); if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_v_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) { - Py_XDECREF(__pyx_t_7); Py_XDECREF(__pyx_t_8); Py_XDECREF(__pyx_t_9); + Py_XDECREF(__pyx_t_8); Py_XDECREF(__pyx_t_9); Py_XDECREF(__pyx_t_10); __Pyx_RaiseBufferFallbackError(); } else { - PyErr_Restore(__pyx_t_7, __pyx_t_8, __pyx_t_9); + PyErr_Restore(__pyx_t_8, __pyx_t_9, __pyx_t_10); } } __pyx_bstride_0_label = __pyx_bstruct_label.strides[0]; __pyx_bshape_0_label = __pyx_bstruct_label.shape[0]; - if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_12 = 0; __Pyx_DECREF(((PyObject *)__pyx_v_label)); __pyx_v_label = ((PyArrayObject *)__pyx_t_11); __pyx_t_11 = 0; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":92 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":101 * cdef np.ndarray[np.int32_t, ndim=1, mode='c'] label * label = np.empty(nr_class, dtype=np.int32) * copy_label(label.data, model, nr_class) # <<<<<<<<<<<<<< @@ -1337,7 +1354,7 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject */ copy_label(__pyx_v_label->data, __pyx_v_model, __pyx_v_nr_class); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":95 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":104 * * ### FREE * free_and_destroy_model(&model) # <<<<<<<<<<<<<< @@ -1346,7 +1363,7 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject */ free_and_destroy_model((&__pyx_v_model)); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":96 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":105 * ### FREE * free_and_destroy_model(&model) * free_problem(problem) # <<<<<<<<<<<<<< @@ -1355,7 +1372,7 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject */ free_problem(__pyx_v_problem); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":97 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":106 * free_and_destroy_model(&model) * free_problem(problem) * free_parameter(param) # <<<<<<<<<<<<<< @@ -1364,7 +1381,7 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject */ free_parameter(__pyx_v_param); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":100 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":109 * # destroy_param(param) don't call this or it will destroy weight_label and weight * * return w, label # <<<<<<<<<<<<<< @@ -1372,7 +1389,7 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject * def csr_train_wrap ( int n_features, */ __Pyx_XDECREF(__pyx_r); - __pyx_t_11 = PyTuple_New(2); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyTuple_New(2); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_INCREF(((PyObject *)__pyx_v_w)); PyTuple_SET_ITEM(__pyx_t_11, 0, ((PyObject *)__pyx_v_w)); @@ -1390,7 +1407,7 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject __Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_10); + __Pyx_XDECREF(__pyx_t_5); __Pyx_XDECREF(__pyx_t_11); { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); @@ -1423,7 +1440,7 @@ static PyObject *__pyx_pf_10_liblinear_train_wrap(PyObject *__pyx_self, PyObject return __pyx_r; } -/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":102 +/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":111 * return w, label * * def csr_train_wrap ( int n_features, # <<<<<<<<<<<<<< @@ -1485,9 +1502,9 @@ static PyObject *__pyx_pf_10_liblinear_csr_train_wrap(PyObject *__pyx_self, PyOb PyObject *__pyx_t_2 = NULL; PyObject *__pyx_t_3 = NULL; PyObject *__pyx_t_4 = NULL; - PyArrayObject *__pyx_t_5 = NULL; - int __pyx_t_6; - PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_5 = NULL; + PyArrayObject *__pyx_t_6 = NULL; + int __pyx_t_7; PyObject *__pyx_t_8 = NULL; PyObject *__pyx_t_9 = NULL; PyObject *__pyx_t_10 = NULL; @@ -1523,95 +1540,95 @@ static PyObject *__pyx_pf_10_liblinear_csr_train_wrap(PyObject *__pyx_self, PyOb values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__X_values); if (likely(values[1])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__X_indices); if (likely(values[2])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__X_indptr); if (likely(values[3])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__Y); if (likely(values[4])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 5: values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__solver_type); if (likely(values[5])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 6: values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__eps); if (likely(values[6])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 7: values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__bias); if (likely(values[7])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 8: values[8] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__C); if (likely(values[8])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 9: values[9] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight_label); if (likely(values[9])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 9); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 9); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 10: values[10] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight); if (likely(values[10])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 10); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, 10); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "csr_train_wrap") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "csr_train_wrap") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } - __pyx_v_n_features = __Pyx_PyInt_AsInt(values[0]); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_n_features = __Pyx_PyInt_AsInt(values[0]); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_X_values = ((PyArrayObject *)values[1]); __pyx_v_X_indices = ((PyArrayObject *)values[2]); __pyx_v_X_indptr = ((PyArrayObject *)values[3]); __pyx_v_Y = ((PyArrayObject *)values[4]); - __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[5]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_eps = __pyx_PyFloat_AsDouble(values[6]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_bias = __pyx_PyFloat_AsDouble(values[7]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_C = __pyx_PyFloat_AsDouble(values[8]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[5]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(values[6]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_bias = __pyx_PyFloat_AsDouble(values[7]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(values[8]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_weight_label = ((PyArrayObject *)values[9]); __pyx_v_weight = ((PyArrayObject *)values[10]); } else if (PyTuple_GET_SIZE(__pyx_args) != 11) { goto __pyx_L5_argtuple_error; } else { - __pyx_v_n_features = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 0)); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_n_features = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 0)); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_X_values = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 1)); __pyx_v_X_indices = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 2)); __pyx_v_X_indptr = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 3)); __pyx_v_Y = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 4)); - __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 5)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 6)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 7)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 8)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 5)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 6)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 7)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 8)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_weight_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 9)); __pyx_v_weight = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 10)); } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_train_wrap", 1, 11, 11, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_liblinear.csr_train_wrap"); return NULL; @@ -1632,50 +1649,50 @@ static PyObject *__pyx_pf_10_liblinear_csr_train_wrap(PyObject *__pyx_self, PyOb __pyx_bstruct_Y.buf = NULL; __pyx_bstruct_weight_label.buf = NULL; __pyx_bstruct_weight.buf = NULL; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_X_values), __pyx_ptype_5numpy_ndarray, 1, "X_values", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_X_indices), __pyx_ptype_5numpy_ndarray, 1, "X_indices", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_X_indptr), __pyx_ptype_5numpy_ndarray, 1, "X_indptr", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_Y), __pyx_ptype_5numpy_ndarray, 1, "Y", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_X_values), __pyx_ptype_5numpy_ndarray, 1, "X_values", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 112; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_X_indices), __pyx_ptype_5numpy_ndarray, 1, "X_indices", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_X_indptr), __pyx_ptype_5numpy_ndarray, 1, "X_indptr", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_Y), __pyx_ptype_5numpy_ndarray, 1, "Y", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;} { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_X_values, (PyObject*)__pyx_v_X_values, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_X_values, (PyObject*)__pyx_v_X_values, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_X_values = __pyx_bstruct_X_values.strides[0]; __pyx_bshape_0_X_values = __pyx_bstruct_X_values.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_X_indices, (PyObject*)__pyx_v_X_indices, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_X_indices, (PyObject*)__pyx_v_X_indices, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_X_indices = __pyx_bstruct_X_indices.strides[0]; __pyx_bshape_0_X_indices = __pyx_bstruct_X_indices.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_X_indptr, (PyObject*)__pyx_v_X_indptr, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_X_indptr, (PyObject*)__pyx_v_X_indptr, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_X_indptr = __pyx_bstruct_X_indptr.strides[0]; __pyx_bshape_0_X_indptr = __pyx_bstruct_X_indptr.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_Y, (PyObject*)__pyx_v_Y, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_Y, (PyObject*)__pyx_v_Y, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_Y = __pyx_bstruct_Y.strides[0]; __pyx_bshape_0_Y = __pyx_bstruct_Y.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_weight_label = __pyx_bstruct_weight_label.strides[0]; __pyx_bshape_0_weight_label = __pyx_bstruct_weight_label.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_weight = __pyx_bstruct_weight.strides[0]; __pyx_bshape_0_weight = __pyx_bstruct_weight.shape[0]; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":122 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":131 * * problem = csr_set_problem(X_values.data, X_indices.shape, * X_indices.data, X_indptr.shape, X_indptr.data, Y.data, n_features, bias) # <<<<<<<<<<<<<< @@ -1684,7 +1701,7 @@ static PyObject *__pyx_pf_10_liblinear_csr_train_wrap(PyObject *__pyx_self, PyOb */ __pyx_v_problem = csr_set_problem(__pyx_v_X_values->data, __pyx_v_X_indices->dimensions, __pyx_v_X_indices->data, __pyx_v_X_indptr->dimensions, __pyx_v_X_indptr->data, __pyx_v_Y->data, __pyx_v_n_features, __pyx_v_bias); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":124 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":133 * X_indices.data, X_indptr.shape, X_indptr.data, Y.data, n_features, bias) * * param = set_parameter(solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) # <<<<<<<<<<<<<< @@ -1693,7 +1710,7 @@ static PyObject *__pyx_pf_10_liblinear_csr_train_wrap(PyObject *__pyx_self, PyOb */ __pyx_v_param = set_parameter(__pyx_v_solver_type, __pyx_v_eps, __pyx_v_C, (__pyx_v_weight->dimensions[0]), __pyx_v_weight_label->data, __pyx_v_weight->data); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":126 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":135 * param = set_parameter(solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) * * error_msg = check_parameter(problem, param) # <<<<<<<<<<<<<< @@ -1702,7 +1719,7 @@ static PyObject *__pyx_pf_10_liblinear_csr_train_wrap(PyObject *__pyx_self, PyOb */ __pyx_v_error_msg = check_parameter(__pyx_v_problem, __pyx_v_param); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":127 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":136 * * error_msg = check_parameter(problem, param) * if error_msg: # <<<<<<<<<<<<<< @@ -1712,7 +1729,7 @@ static PyObject *__pyx_pf_10_liblinear_csr_train_wrap(PyObject *__pyx_self, PyOb __pyx_t_1 = (__pyx_v_error_msg != 0); if (__pyx_t_1) { - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":128 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":137 * error_msg = check_parameter(problem, param) * if error_msg: * free_problem(problem) # <<<<<<<<<<<<<< @@ -1721,7 +1738,7 @@ static PyObject *__pyx_pf_10_liblinear_csr_train_wrap(PyObject *__pyx_self, PyOb */ free_problem(__pyx_v_problem); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":129 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":138 * if error_msg: * free_problem(problem) * free_parameter(param) # <<<<<<<<<<<<<< @@ -1730,50 +1747,50 @@ static PyObject *__pyx_pf_10_liblinear_csr_train_wrap(PyObject *__pyx_self, PyOb */ free_parameter(__pyx_v_param); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":130 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":139 * free_problem(problem) * free_parameter(param) * raise ValueError(error_msg) # <<<<<<<<<<<<<< * * # early return */ - __pyx_t_2 = __Pyx_PyBytes_FromString(__pyx_v_error_msg); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyBytes_FromString(__pyx_v_error_msg); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_2)); __Pyx_GIVEREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_builtin_ValueError, __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_builtin_ValueError, __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_Raise(__pyx_t_2, 0, 0); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;} goto __pyx_L6; } __pyx_L6:; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":133 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":142 * * # early return * model = train(problem, param) # <<<<<<<<<<<<<< * - * cdef np.ndarray[np.float64_t, ndim=2, mode='c'] w + * # fortran order since that's what liblinear does */ __pyx_v_model = train(__pyx_v_problem, __pyx_v_param); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":136 - * - * cdef np.ndarray[np.float64_t, ndim=2, mode='c'] w + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":146 + * # fortran order since that's what liblinear does + * cdef np.ndarray[np.float64_t, ndim=2, mode='fortran'] w * cdef int nr_class = get_nr_class(model) # <<<<<<<<<<<<<< * cdef int nr_feature = n_features * if bias > 0: nr_feature = nr_feature + 1 */ __pyx_v_nr_class = get_nr_class(__pyx_v_model); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":137 - * cdef np.ndarray[np.float64_t, ndim=2, mode='c'] w + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":147 + * cdef np.ndarray[np.float64_t, ndim=2, mode='fortran'] w * cdef int nr_class = get_nr_class(model) * cdef int nr_feature = n_features # <<<<<<<<<<<<<< * if bias > 0: nr_feature = nr_feature + 1 @@ -1781,12 +1798,12 @@ static PyObject *__pyx_pf_10_liblinear_csr_train_wrap(PyObject *__pyx_self, PyOb */ __pyx_v_nr_feature = __pyx_v_n_features; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":138 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":148 * cdef int nr_class = get_nr_class(model) * cdef int nr_feature = n_features * if bias > 0: nr_feature = nr_feature + 1 # <<<<<<<<<<<<<< * if nr_class == 2: - * w = np.empty((1, nr_feature)) + * w = np.empty((1, nr_feature),order='F') */ __pyx_t_1 = (__pyx_v_bias > 0); if (__pyx_t_1) { @@ -1795,31 +1812,31 @@ static PyObject *__pyx_pf_10_liblinear_csr_train_wrap(PyObject *__pyx_self, PyOb } __pyx_L7:; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":139 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":149 * cdef int nr_feature = n_features * if bias > 0: nr_feature = nr_feature + 1 * if nr_class == 2: # <<<<<<<<<<<<<< - * w = np.empty((1, nr_feature)) + * w = np.empty((1, nr_feature),order='F') * copy_w(w.data, model, nr_feature) */ __pyx_t_1 = (__pyx_v_nr_class == 2); if (__pyx_t_1) { - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":140 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":150 * if bias > 0: nr_feature = nr_feature + 1 * if nr_class == 2: - * w = np.empty((1, nr_feature)) # <<<<<<<<<<<<<< + * w = np.empty((1, nr_feature),order='F') # <<<<<<<<<<<<<< * copy_w(w.data, model, nr_feature) * else: */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__empty); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__empty); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyInt_FromLong(__pyx_v_nr_feature); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_v_nr_feature); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_INCREF(__pyx_int_1); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_int_1); @@ -1827,292 +1844,1221 @@ static PyObject *__pyx_pf_10_liblinear_csr_train_wrap(PyObject *__pyx_self, PyOb PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); + __pyx_t_4 = PyDict_New(); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(((PyObject *)__pyx_t_4)); + if (PyDict_SetItem(__pyx_t_4, ((PyObject *)__pyx_n_s__order), ((PyObject *)__pyx_n_s__F)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyEval_CallObjectWithKeywords(__pyx_t_3, __pyx_t_2, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_5 = ((PyArrayObject *)__pyx_t_4); + __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; + if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = ((PyArrayObject *)__pyx_t_5); { __Pyx_BufFmt_StackElem __pyx_stack[1]; __Pyx_SafeReleaseBuffer(&__pyx_bstruct_w); - __pyx_t_6 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_t_5, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack); - if (unlikely(__pyx_t_6 < 0)) { - PyErr_Fetch(&__pyx_t_7, &__pyx_t_8, &__pyx_t_9); - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_v_w, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) { - Py_XDECREF(__pyx_t_7); Py_XDECREF(__pyx_t_8); Py_XDECREF(__pyx_t_9); + __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack); + if (unlikely(__pyx_t_7 < 0)) { + PyErr_Fetch(&__pyx_t_8, &__pyx_t_9, &__pyx_t_10); + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_v_w, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) { + Py_XDECREF(__pyx_t_8); Py_XDECREF(__pyx_t_9); Py_XDECREF(__pyx_t_10); __Pyx_RaiseBufferFallbackError(); } else { - PyErr_Restore(__pyx_t_7, __pyx_t_8, __pyx_t_9); + PyErr_Restore(__pyx_t_8, __pyx_t_9, __pyx_t_10); } } __pyx_bstride_0_w = __pyx_bstruct_w.strides[0]; __pyx_bstride_1_w = __pyx_bstruct_w.strides[1]; __pyx_bshape_0_w = __pyx_bstruct_w.shape[0]; __pyx_bshape_1_w = __pyx_bstruct_w.shape[1]; - if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } + __pyx_t_6 = 0; + __Pyx_DECREF(((PyObject *)__pyx_v_w)); + __pyx_v_w = ((PyArrayObject *)__pyx_t_5); + __pyx_t_5 = 0; + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":151 + * if nr_class == 2: + * w = np.empty((1, nr_feature),order='F') + * copy_w(w.data, model, nr_feature) # <<<<<<<<<<<<<< + * else: + * len_w = (nr_class * nr_feature) + */ + copy_w(__pyx_v_w->data, __pyx_v_model, __pyx_v_nr_feature); + goto __pyx_L8; + } + /*else*/ { + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":153 + * copy_w(w.data, model, nr_feature) + * else: + * len_w = (nr_class * nr_feature) # <<<<<<<<<<<<<< + * w = np.empty((nr_class, nr_feature),order='F') + * copy_w(w.data, model, len_w) + */ + __pyx_v_len_w = (__pyx_v_nr_class * __pyx_v_nr_feature); + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":154 + * else: + * len_w = (nr_class * nr_feature) + * w = np.empty((nr_class, nr_feature),order='F') # <<<<<<<<<<<<<< + * copy_w(w.data, model, len_w) + * + */ + __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_4 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__empty); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = PyInt_FromLong(__pyx_v_nr_class); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_2 = PyInt_FromLong(__pyx_v_nr_feature); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2); + __Pyx_GIVEREF(__pyx_t_2); __pyx_t_5 = 0; + __pyx_t_2 = 0; + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(((PyObject *)__pyx_t_3)); + if (PyDict_SetItem(__pyx_t_3, ((PyObject *)__pyx_n_s__order), ((PyObject *)__pyx_n_s__F)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyEval_CallObjectWithKeywords(__pyx_t_4, __pyx_t_2, ((PyObject *)__pyx_t_3)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; + if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = ((PyArrayObject *)__pyx_t_5); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_w); + __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack); + if (unlikely(__pyx_t_7 < 0)) { + PyErr_Fetch(&__pyx_t_10, &__pyx_t_9, &__pyx_t_8); + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_v_w, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) { + Py_XDECREF(__pyx_t_10); Py_XDECREF(__pyx_t_9); Py_XDECREF(__pyx_t_8); + __Pyx_RaiseBufferFallbackError(); + } else { + PyErr_Restore(__pyx_t_10, __pyx_t_9, __pyx_t_8); + } + } + __pyx_bstride_0_w = __pyx_bstruct_w.strides[0]; __pyx_bstride_1_w = __pyx_bstruct_w.strides[1]; + __pyx_bshape_0_w = __pyx_bstruct_w.shape[0]; __pyx_bshape_1_w = __pyx_bstruct_w.shape[1]; + if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_t_6 = 0; __Pyx_DECREF(((PyObject *)__pyx_v_w)); - __pyx_v_w = ((PyArrayObject *)__pyx_t_4); - __pyx_t_4 = 0; + __pyx_v_w = ((PyArrayObject *)__pyx_t_5); + __pyx_t_5 = 0; + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":155 + * len_w = (nr_class * nr_feature) + * w = np.empty((nr_class, nr_feature),order='F') + * copy_w(w.data, model, len_w) # <<<<<<<<<<<<<< + * + * cdef np.ndarray[np.int32_t, ndim=1, mode='c'] label + */ + copy_w(__pyx_v_w->data, __pyx_v_model, __pyx_v_len_w); + } + __pyx_L8:; + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":158 + * + * cdef np.ndarray[np.int32_t, ndim=1, mode='c'] label + * label = np.empty((nr_class), dtype=np.int32) # <<<<<<<<<<<<<< + * copy_label(label.data, model, nr_class) + * + */ + __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_3 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__empty); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = PyInt_FromLong(__pyx_v_nr_class); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_5); + __Pyx_GIVEREF(__pyx_t_5); + __pyx_t_5 = 0; + __pyx_t_5 = PyDict_New(); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(((PyObject *)__pyx_t_5)); + __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_11 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s__int32); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (PyDict_SetItem(__pyx_t_5, ((PyObject *)__pyx_n_s__dtype), __pyx_t_11) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + __pyx_t_11 = PyEval_CallObjectWithKeywords(__pyx_t_3, __pyx_t_2, ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; + if (!(likely(((__pyx_t_11) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_11, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = ((PyArrayObject *)__pyx_t_11); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_label); + __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_t_12, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack); + if (unlikely(__pyx_t_7 < 0)) { + PyErr_Fetch(&__pyx_t_8, &__pyx_t_9, &__pyx_t_10); + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_v_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) { + Py_XDECREF(__pyx_t_8); Py_XDECREF(__pyx_t_9); Py_XDECREF(__pyx_t_10); + __Pyx_RaiseBufferFallbackError(); + } else { + PyErr_Restore(__pyx_t_8, __pyx_t_9, __pyx_t_10); + } + } + __pyx_bstride_0_label = __pyx_bstruct_label.strides[0]; + __pyx_bshape_0_label = __pyx_bstruct_label.shape[0]; + if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_t_12 = 0; + __Pyx_DECREF(((PyObject *)__pyx_v_label)); + __pyx_v_label = ((PyArrayObject *)__pyx_t_11); + __pyx_t_11 = 0; + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":159 + * cdef np.ndarray[np.int32_t, ndim=1, mode='c'] label + * label = np.empty((nr_class), dtype=np.int32) + * copy_label(label.data, model, nr_class) # <<<<<<<<<<<<<< + * + * ### FREE + */ + copy_label(__pyx_v_label->data, __pyx_v_model, __pyx_v_nr_class); + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":162 + * + * ### FREE + * free_and_destroy_model(&model) # <<<<<<<<<<<<<< + * free_problem(problem) + * free_parameter(param) + */ + free_and_destroy_model((&__pyx_v_model)); + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":163 + * ### FREE + * free_and_destroy_model(&model) + * free_problem(problem) # <<<<<<<<<<<<<< + * free_parameter(param) + * # destroy_param(param) don't call this or it will destroy weight_label and weight + */ + free_problem(__pyx_v_problem); + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":164 + * free_and_destroy_model(&model) + * free_problem(problem) + * free_parameter(param) # <<<<<<<<<<<<<< + * # destroy_param(param) don't call this or it will destroy weight_label and weight + * + */ + free_parameter(__pyx_v_param); + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":167 + * # destroy_param(param) don't call this or it will destroy weight_label and weight + * + * return w, label # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_11 = PyTuple_New(2); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __Pyx_INCREF(((PyObject *)__pyx_v_w)); + PyTuple_SET_ITEM(__pyx_t_11, 0, ((PyObject *)__pyx_v_w)); + __Pyx_GIVEREF(((PyObject *)__pyx_v_w)); + __Pyx_INCREF(((PyObject *)__pyx_v_label)); + PyTuple_SET_ITEM(__pyx_t_11, 1, ((PyObject *)__pyx_v_label)); + __Pyx_GIVEREF(((PyObject *)__pyx_v_label)); + __pyx_r = __pyx_t_11; + __pyx_t_11 = 0; + goto __pyx_L0; + + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_11); + { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; + __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight_label); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_X_indices); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_X_indptr); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_X_values); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_w); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_Y); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_label); + __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);} + __Pyx_AddTraceback("_liblinear.csr_train_wrap"); + __pyx_r = NULL; + goto __pyx_L2; + __pyx_L0:; + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight_label); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_X_indices); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_X_indptr); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_X_values); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_w); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_Y); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_label); + __pyx_L2:; + __Pyx_DECREF((PyObject *)__pyx_v_w); + __Pyx_DECREF((PyObject *)__pyx_v_label); + __Pyx_DECREF((PyObject *)__pyx_v_X_values); + __Pyx_DECREF((PyObject *)__pyx_v_X_indices); + __Pyx_DECREF((PyObject *)__pyx_v_X_indptr); + __Pyx_DECREF((PyObject *)__pyx_v_Y); + __Pyx_DECREF((PyObject *)__pyx_v_weight_label); + __Pyx_DECREF((PyObject *)__pyx_v_weight); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":170 + * + * + * def decision_function_wrap( # <<<<<<<<<<<<<< + * np.ndarray[np.float64_t, ndim=2, mode='c'] T, + * np.ndarray[np.float64_t, ndim=2, mode='fortran'] coef_, + */ + +static PyObject *__pyx_pf_10_liblinear_decision_function_wrap(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyObject *__pyx_pf_10_liblinear_decision_function_wrap(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyArrayObject *__pyx_v_T = 0; + PyArrayObject *__pyx_v_coef_ = 0; + int __pyx_v_solver_type; + double __pyx_v_eps; + double __pyx_v_C; + PyArrayObject *__pyx_v_weight_label = 0; + PyArrayObject *__pyx_v_weight = 0; + PyArrayObject *__pyx_v_label = 0; + double __pyx_v_bias; + PyArrayObject *__pyx_v_dec_values; + struct parameter *__pyx_v_param; + struct model *__pyx_v_model; + PyObject *__pyx_v_n_class; + Py_buffer __pyx_bstruct_dec_values; + Py_ssize_t __pyx_bstride_0_dec_values = 0; + Py_ssize_t __pyx_bstride_1_dec_values = 0; + Py_ssize_t __pyx_bshape_0_dec_values = 0; + Py_ssize_t __pyx_bshape_1_dec_values = 0; + Py_buffer __pyx_bstruct_weight; + Py_ssize_t __pyx_bstride_0_weight = 0; + Py_ssize_t __pyx_bshape_0_weight = 0; + Py_buffer __pyx_bstruct_weight_label; + Py_ssize_t __pyx_bstride_0_weight_label = 0; + Py_ssize_t __pyx_bshape_0_weight_label = 0; + Py_buffer __pyx_bstruct_label; + Py_ssize_t __pyx_bstride_0_label = 0; + Py_ssize_t __pyx_bshape_0_label = 0; + Py_buffer __pyx_bstruct_coef_; + Py_ssize_t __pyx_bstride_0_coef_ = 0; + Py_ssize_t __pyx_bstride_1_coef_ = 0; + Py_ssize_t __pyx_bshape_0_coef_ = 0; + Py_ssize_t __pyx_bshape_1_coef_ = 0; + Py_buffer __pyx_bstruct_T; + Py_ssize_t __pyx_bstride_0_T = 0; + Py_ssize_t __pyx_bstride_1_T = 0; + Py_ssize_t __pyx_bshape_0_T = 0; + Py_ssize_t __pyx_bshape_1_T = 0; + PyObject *__pyx_r = NULL; + PyObject *__pyx_t_1 = NULL; + int __pyx_t_2; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyArrayObject *__pyx_t_7 = NULL; + int __pyx_t_8; + PyObject *__pyx_t_9 = NULL; + PyObject *__pyx_t_10 = NULL; + PyObject *__pyx_t_11 = NULL; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__T,&__pyx_n_s__coef_,&__pyx_n_s__solver_type,&__pyx_n_s__eps,&__pyx_n_s__C,&__pyx_n_s__weight_label,&__pyx_n_s__weight,&__pyx_n_s__label,&__pyx_n_s__bias,0}; + __Pyx_RefNannySetupContext("decision_function_wrap"); + __pyx_self = __pyx_self; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); + PyObject* values[9] = {0,0,0,0,0,0,0,0,0}; + switch (PyTuple_GET_SIZE(__pyx_args)) { + case 9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8); + case 8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7); + case 7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6); + case 6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5); + case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + switch (PyTuple_GET_SIZE(__pyx_args)) { + case 0: + values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__T); + if (likely(values[0])) kw_args--; + else goto __pyx_L5_argtuple_error; + case 1: + values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__coef_); + if (likely(values[1])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("decision_function_wrap", 1, 9, 9, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 2: + values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__solver_type); + if (likely(values[2])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("decision_function_wrap", 1, 9, 9, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 3: + values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__eps); + if (likely(values[3])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("decision_function_wrap", 1, 9, 9, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 4: + values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__C); + if (likely(values[4])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("decision_function_wrap", 1, 9, 9, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 5: + values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight_label); + if (likely(values[5])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("decision_function_wrap", 1, 9, 9, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 6: + values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight); + if (likely(values[6])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("decision_function_wrap", 1, 9, 9, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 7: + values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__label); + if (likely(values[7])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("decision_function_wrap", 1, 9, 9, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 8: + values[8] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__bias); + if (likely(values[8])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("decision_function_wrap", 1, 9, 9, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "decision_function_wrap") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + __pyx_v_T = ((PyArrayObject *)values[0]); + __pyx_v_coef_ = ((PyArrayObject *)values[1]); + __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(values[3]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(values[4]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_weight_label = ((PyArrayObject *)values[5]); + __pyx_v_weight = ((PyArrayObject *)values[6]); + __pyx_v_label = ((PyArrayObject *)values[7]); + __pyx_v_bias = __pyx_PyFloat_AsDouble(values[8]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } else if (PyTuple_GET_SIZE(__pyx_args) != 9) { + goto __pyx_L5_argtuple_error; + } else { + __pyx_v_T = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 0)); + __pyx_v_coef_ = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 1)); + __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 3)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 4)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_weight_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 5)); + __pyx_v_weight = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 6)); + __pyx_v_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 7)); + __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 8)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("decision_function_wrap", 1, 9, 9, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_L3_error:; + __Pyx_AddTraceback("_liblinear.decision_function_wrap"); + return NULL; + __pyx_L4_argument_unpacking_done:; + __Pyx_INCREF((PyObject *)__pyx_v_T); + __Pyx_INCREF((PyObject *)__pyx_v_coef_); + __Pyx_INCREF((PyObject *)__pyx_v_weight_label); + __Pyx_INCREF((PyObject *)__pyx_v_weight); + __Pyx_INCREF((PyObject *)__pyx_v_label); + __pyx_v_dec_values = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); + __pyx_v_n_class = Py_None; __Pyx_INCREF(Py_None); + __pyx_bstruct_dec_values.buf = NULL; + __pyx_bstruct_T.buf = NULL; + __pyx_bstruct_coef_.buf = NULL; + __pyx_bstruct_weight_label.buf = NULL; + __pyx_bstruct_weight.buf = NULL; + __pyx_bstruct_label.buf = NULL; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T), __pyx_ptype_5numpy_ndarray, 1, "T", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_coef_), __pyx_ptype_5numpy_ndarray, 1, "coef_", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 172; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_label), __pyx_ptype_5numpy_ndarray, 1, "label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 176; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T, (PyObject*)__pyx_v_T, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_bstride_0_T = __pyx_bstruct_T.strides[0]; __pyx_bstride_1_T = __pyx_bstruct_T.strides[1]; + __pyx_bshape_0_T = __pyx_bstruct_T.shape[0]; __pyx_bshape_1_T = __pyx_bstruct_T.shape[1]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_coef_, (PyObject*)__pyx_v_coef_, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_bstride_0_coef_ = __pyx_bstruct_coef_.strides[0]; __pyx_bstride_1_coef_ = __pyx_bstruct_coef_.strides[1]; + __pyx_bshape_0_coef_ = __pyx_bstruct_coef_.shape[0]; __pyx_bshape_1_coef_ = __pyx_bstruct_coef_.shape[1]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_bstride_0_weight_label = __pyx_bstruct_weight_label.strides[0]; + __pyx_bshape_0_weight_label = __pyx_bstruct_weight_label.shape[0]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_bstride_0_weight = __pyx_bstruct_weight.strides[0]; + __pyx_bshape_0_weight = __pyx_bstruct_weight.shape[0]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_v_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_bstride_0_label = __pyx_bstruct_label.strides[0]; + __pyx_bshape_0_label = __pyx_bstruct_label.shape[0]; + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":184 + * + * param = set_parameter( + * solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) # <<<<<<<<<<<<<< + * + * model = set_model(param, coef_.data, coef_.shape, label.data, bias) + */ + __pyx_v_param = set_parameter(__pyx_v_solver_type, __pyx_v_eps, __pyx_v_C, (__pyx_v_weight->dimensions[0]), __pyx_v_weight_label->data, __pyx_v_weight->data); + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":186 + * solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) + * + * model = set_model(param, coef_.data, coef_.shape, label.data, bias) # <<<<<<<<<<<<<< + * + * n_class = label.shape[0] + */ + __pyx_v_model = set_model(__pyx_v_param, __pyx_v_coef_->data, __pyx_v_coef_->dimensions, __pyx_v_label->data, __pyx_v_bias); + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":188 + * model = set_model(param, coef_.data, coef_.shape, label.data, bias) + * + * n_class = label.shape[0] # <<<<<<<<<<<<<< + * if n_class <= 2: n_class = 1 + * dec_values = np.empty((T.shape[0], n_class), dtype=np.float64) + */ + __pyx_t_1 = __Pyx_PyInt_to_py_npy_intp((__pyx_v_label->dimensions[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_v_n_class); + __pyx_v_n_class = __pyx_t_1; + __pyx_t_1 = 0; + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":189 + * + * n_class = label.shape[0] + * if n_class <= 2: n_class = 1 # <<<<<<<<<<<<<< + * dec_values = np.empty((T.shape[0], n_class), dtype=np.float64) + * + */ + __pyx_t_1 = PyObject_RichCompare(__pyx_v_n_class, __pyx_int_2, Py_LE); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (__pyx_t_2) { + __Pyx_INCREF(__pyx_int_1); + __Pyx_DECREF(__pyx_v_n_class); + __pyx_v_n_class = __pyx_int_1; + goto __pyx_L6; + } + __pyx_L6:; + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":190 + * n_class = label.shape[0] + * if n_class <= 2: n_class = 1 + * dec_values = np.empty((T.shape[0], n_class), dtype=np.float64) # <<<<<<<<<<<<<< + * + * if copy_predict_values(T.data, model, T.shape, dec_values.data, n_class) < 0: + */ + __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__empty); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_PyInt_to_py_npy_intp((__pyx_v_T->dimensions[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1); + __Pyx_GIVEREF(__pyx_t_1); + __Pyx_INCREF(__pyx_v_n_class); + PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_v_n_class); + __Pyx_GIVEREF(__pyx_v_n_class); + __pyx_t_1 = 0; + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_4); + __Pyx_GIVEREF(__pyx_t_4); + __pyx_t_4 = 0; + __pyx_t_4 = PyDict_New(); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(((PyObject *)__pyx_t_4)); + __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_6 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__float64); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (PyDict_SetItem(__pyx_t_4, ((PyObject *)__pyx_n_s__dtype), __pyx_t_6) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = PyEval_CallObjectWithKeywords(__pyx_t_3, __pyx_t_1, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = ((PyArrayObject *)__pyx_t_6); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_dec_values); + __pyx_t_8 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_dec_values, (PyObject*)__pyx_t_7, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack); + if (unlikely(__pyx_t_8 < 0)) { + PyErr_Fetch(&__pyx_t_9, &__pyx_t_10, &__pyx_t_11); + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_dec_values, (PyObject*)__pyx_v_dec_values, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) { + Py_XDECREF(__pyx_t_9); Py_XDECREF(__pyx_t_10); Py_XDECREF(__pyx_t_11); + __Pyx_RaiseBufferFallbackError(); + } else { + PyErr_Restore(__pyx_t_9, __pyx_t_10, __pyx_t_11); + } + } + __pyx_bstride_0_dec_values = __pyx_bstruct_dec_values.strides[0]; __pyx_bstride_1_dec_values = __pyx_bstruct_dec_values.strides[1]; + __pyx_bshape_0_dec_values = __pyx_bstruct_dec_values.shape[0]; __pyx_bshape_1_dec_values = __pyx_bstruct_dec_values.shape[1]; + if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_t_7 = 0; + __Pyx_DECREF(((PyObject *)__pyx_v_dec_values)); + __pyx_v_dec_values = ((PyArrayObject *)__pyx_t_6); + __pyx_t_6 = 0; + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":192 + * dec_values = np.empty((T.shape[0], n_class), dtype=np.float64) + * + * if copy_predict_values(T.data, model, T.shape, dec_values.data, n_class) < 0: # <<<<<<<<<<<<<< + * raise MemoryError("We've run out of of memory") + * + */ + __pyx_t_8 = __Pyx_PyInt_AsInt(__pyx_v_n_class); if (unlikely((__pyx_t_8 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 192; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = (copy_predict_values(__pyx_v_T->data, __pyx_v_model, __pyx_v_T->dimensions, __pyx_v_dec_values->data, __pyx_t_8) < 0); + if (__pyx_t_2) { + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":193 + * + * if copy_predict_values(T.data, model, T.shape, dec_values.data, n_class) < 0: + * raise MemoryError("We've run out of of memory") # <<<<<<<<<<<<<< + * + * ### FREE + */ + __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 193; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_1)); + PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_kp_s_1)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_1)); + __pyx_t_4 = PyObject_Call(__pyx_builtin_MemoryError, __pyx_t_6, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 193; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_Raise(__pyx_t_4, 0, 0); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 193; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + goto __pyx_L7; + } + __pyx_L7:; + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":196 + * + * ### FREE + * free_parameter(param) # <<<<<<<<<<<<<< + * free_and_destroy_model(&model) + * return dec_values + */ + free_parameter(__pyx_v_param); + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":197 + * ### FREE + * free_parameter(param) + * free_and_destroy_model(&model) # <<<<<<<<<<<<<< + * return dec_values + * + */ + free_and_destroy_model((&__pyx_v_model)); + + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":198 + * free_parameter(param) + * free_and_destroy_model(&model) + * return dec_values # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(((PyObject *)__pyx_v_dec_values)); + __pyx_r = ((PyObject *)__pyx_v_dec_values); + goto __pyx_L0; + + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; + __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_dec_values); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight_label); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_label); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_coef_); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_T); + __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);} + __Pyx_AddTraceback("_liblinear.decision_function_wrap"); + __pyx_r = NULL; + goto __pyx_L2; + __pyx_L0:; + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_dec_values); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight_label); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_label); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_coef_); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_T); + __pyx_L2:; + __Pyx_DECREF((PyObject *)__pyx_v_dec_values); + __Pyx_DECREF(__pyx_v_n_class); + __Pyx_DECREF((PyObject *)__pyx_v_T); + __Pyx_DECREF((PyObject *)__pyx_v_coef_); + __Pyx_DECREF((PyObject *)__pyx_v_weight_label); + __Pyx_DECREF((PyObject *)__pyx_v_weight); + __Pyx_DECREF((PyObject *)__pyx_v_label); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":202 + * + * + * def csr_decision_function_wrap( # <<<<<<<<<<<<<< + * int n_features, + * np.ndarray[np.float64_t, ndim=1, mode='c'] T_values, + */ + +static PyObject *__pyx_pf_10_liblinear_csr_decision_function_wrap(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static char __pyx_doc_10_liblinear_csr_decision_function_wrap[] = "\n Predict from model\n\n Test data given in CSR format\n "; +static PyObject *__pyx_pf_10_liblinear_csr_decision_function_wrap(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + int __pyx_v_n_features; + PyArrayObject *__pyx_v_T_values = 0; + PyArrayObject *__pyx_v_T_indices = 0; + PyArrayObject *__pyx_v_T_indptr = 0; + PyArrayObject *__pyx_v_coef_ = 0; + int __pyx_v_solver_type; + double __pyx_v_eps; + double __pyx_v_C; + PyArrayObject *__pyx_v_weight_label = 0; + PyArrayObject *__pyx_v_weight = 0; + PyArrayObject *__pyx_v_label = 0; + double __pyx_v_bias; + PyArrayObject *__pyx_v_dec_values; + struct parameter *__pyx_v_param; + struct model *__pyx_v_model; + PyObject *__pyx_v_n_class; + Py_buffer __pyx_bstruct_weight_label; + Py_ssize_t __pyx_bstride_0_weight_label = 0; + Py_ssize_t __pyx_bshape_0_weight_label = 0; + Py_buffer __pyx_bstruct_weight; + Py_ssize_t __pyx_bstride_0_weight = 0; + Py_ssize_t __pyx_bshape_0_weight = 0; + Py_buffer __pyx_bstruct_T_indices; + Py_ssize_t __pyx_bstride_0_T_indices = 0; + Py_ssize_t __pyx_bshape_0_T_indices = 0; + Py_buffer __pyx_bstruct_T_values; + Py_ssize_t __pyx_bstride_0_T_values = 0; + Py_ssize_t __pyx_bshape_0_T_values = 0; + Py_buffer __pyx_bstruct_coef_; + Py_ssize_t __pyx_bstride_0_coef_ = 0; + Py_ssize_t __pyx_bstride_1_coef_ = 0; + Py_ssize_t __pyx_bshape_0_coef_ = 0; + Py_ssize_t __pyx_bshape_1_coef_ = 0; + Py_buffer __pyx_bstruct_T_indptr; + Py_ssize_t __pyx_bstride_0_T_indptr = 0; + Py_ssize_t __pyx_bshape_0_T_indptr = 0; + Py_buffer __pyx_bstruct_dec_values; + Py_ssize_t __pyx_bstride_0_dec_values = 0; + Py_ssize_t __pyx_bstride_1_dec_values = 0; + Py_ssize_t __pyx_bshape_0_dec_values = 0; + Py_ssize_t __pyx_bshape_1_dec_values = 0; + Py_buffer __pyx_bstruct_label; + Py_ssize_t __pyx_bstride_0_label = 0; + Py_ssize_t __pyx_bshape_0_label = 0; + PyObject *__pyx_r = NULL; + PyObject *__pyx_t_1 = NULL; + int __pyx_t_2; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyArrayObject *__pyx_t_7 = NULL; + int __pyx_t_8; + PyObject *__pyx_t_9 = NULL; + PyObject *__pyx_t_10 = NULL; + PyObject *__pyx_t_11 = NULL; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__n_features,&__pyx_n_s__T_values,&__pyx_n_s__T_indices,&__pyx_n_s__T_indptr,&__pyx_n_s__coef_,&__pyx_n_s__solver_type,&__pyx_n_s__eps,&__pyx_n_s__C,&__pyx_n_s__weight_label,&__pyx_n_s__weight,&__pyx_n_s__label,&__pyx_n_s__bias,0}; + __Pyx_RefNannySetupContext("csr_decision_function_wrap"); + __pyx_self = __pyx_self; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args = PyDict_Size(__pyx_kwds); + PyObject* values[12] = {0,0,0,0,0,0,0,0,0,0,0,0}; + switch (PyTuple_GET_SIZE(__pyx_args)) { + case 12: values[11] = PyTuple_GET_ITEM(__pyx_args, 11); + case 11: values[10] = PyTuple_GET_ITEM(__pyx_args, 10); + case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9); + case 9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8); + case 8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7); + case 7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6); + case 6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5); + case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + switch (PyTuple_GET_SIZE(__pyx_args)) { + case 0: + values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__n_features); + if (likely(values[0])) kw_args--; + else goto __pyx_L5_argtuple_error; + case 1: + values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__T_values); + if (likely(values[1])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("csr_decision_function_wrap", 1, 12, 12, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 2: + values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__T_indices); + if (likely(values[2])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("csr_decision_function_wrap", 1, 12, 12, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 3: + values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__T_indptr); + if (likely(values[3])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("csr_decision_function_wrap", 1, 12, 12, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 4: + values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__coef_); + if (likely(values[4])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("csr_decision_function_wrap", 1, 12, 12, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 5: + values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__solver_type); + if (likely(values[5])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("csr_decision_function_wrap", 1, 12, 12, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 6: + values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__eps); + if (likely(values[6])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("csr_decision_function_wrap", 1, 12, 12, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 7: + values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__C); + if (likely(values[7])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("csr_decision_function_wrap", 1, 12, 12, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 8: + values[8] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight_label); + if (likely(values[8])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("csr_decision_function_wrap", 1, 12, 12, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 9: + values[9] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight); + if (likely(values[9])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("csr_decision_function_wrap", 1, 12, 12, 9); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 10: + values[10] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__label); + if (likely(values[10])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("csr_decision_function_wrap", 1, 12, 12, 10); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 11: + values[11] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__bias); + if (likely(values[11])) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("csr_decision_function_wrap", 1, 12, 12, 11); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "csr_decision_function_wrap") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + __pyx_v_n_features = __Pyx_PyInt_AsInt(values[0]); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 203; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_T_values = ((PyArrayObject *)values[1]); + __pyx_v_T_indices = ((PyArrayObject *)values[2]); + __pyx_v_T_indptr = ((PyArrayObject *)values[3]); + __pyx_v_coef_ = ((PyArrayObject *)values[4]); + __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[5]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(values[6]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(values[7]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_weight_label = ((PyArrayObject *)values[8]); + __pyx_v_weight = ((PyArrayObject *)values[9]); + __pyx_v_label = ((PyArrayObject *)values[10]); + __pyx_v_bias = __pyx_PyFloat_AsDouble(values[11]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } else if (PyTuple_GET_SIZE(__pyx_args) != 12) { + goto __pyx_L5_argtuple_error; + } else { + __pyx_v_n_features = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 0)); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 203; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_T_values = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 1)); + __pyx_v_T_indices = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 2)); + __pyx_v_T_indptr = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 3)); + __pyx_v_coef_ = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 4)); + __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 5)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 6)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 7)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_weight_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 8)); + __pyx_v_weight = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 9)); + __pyx_v_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 10)); + __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 11)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("csr_decision_function_wrap", 1, 12, 12, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_L3_error:; + __Pyx_AddTraceback("_liblinear.csr_decision_function_wrap"); + return NULL; + __pyx_L4_argument_unpacking_done:; + __Pyx_INCREF((PyObject *)__pyx_v_T_values); + __Pyx_INCREF((PyObject *)__pyx_v_T_indices); + __Pyx_INCREF((PyObject *)__pyx_v_T_indptr); + __Pyx_INCREF((PyObject *)__pyx_v_coef_); + __Pyx_INCREF((PyObject *)__pyx_v_weight_label); + __Pyx_INCREF((PyObject *)__pyx_v_weight); + __Pyx_INCREF((PyObject *)__pyx_v_label); + __pyx_v_dec_values = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); + __pyx_v_n_class = Py_None; __Pyx_INCREF(Py_None); + __pyx_bstruct_dec_values.buf = NULL; + __pyx_bstruct_T_values.buf = NULL; + __pyx_bstruct_T_indices.buf = NULL; + __pyx_bstruct_T_indptr.buf = NULL; + __pyx_bstruct_coef_.buf = NULL; + __pyx_bstruct_weight_label.buf = NULL; + __pyx_bstruct_weight.buf = NULL; + __pyx_bstruct_label.buf = NULL; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_values), __pyx_ptype_5numpy_ndarray, 1, "T_values", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_indices), __pyx_ptype_5numpy_ndarray, 1, "T_indices", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_indptr), __pyx_ptype_5numpy_ndarray, 1, "T_indptr", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 206; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_coef_), __pyx_ptype_5numpy_ndarray, 1, "coef_", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 207; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 209; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_label), __pyx_ptype_5numpy_ndarray, 1, "label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 211; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_values, (PyObject*)__pyx_v_T_values, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_bstride_0_T_values = __pyx_bstruct_T_values.strides[0]; + __pyx_bshape_0_T_values = __pyx_bstruct_T_values.shape[0]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_indices, (PyObject*)__pyx_v_T_indices, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_bstride_0_T_indices = __pyx_bstruct_T_indices.strides[0]; + __pyx_bshape_0_T_indices = __pyx_bstruct_T_indices.shape[0]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_indptr, (PyObject*)__pyx_v_T_indptr, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_bstride_0_T_indptr = __pyx_bstruct_T_indptr.strides[0]; + __pyx_bshape_0_T_indptr = __pyx_bstruct_T_indptr.shape[0]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_coef_, (PyObject*)__pyx_v_coef_, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_bstride_0_coef_ = __pyx_bstruct_coef_.strides[0]; __pyx_bstride_1_coef_ = __pyx_bstruct_coef_.strides[1]; + __pyx_bshape_0_coef_ = __pyx_bstruct_coef_.shape[0]; __pyx_bshape_1_coef_ = __pyx_bstruct_coef_.shape[1]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_bstride_0_weight_label = __pyx_bstruct_weight_label.strides[0]; + __pyx_bshape_0_weight_label = __pyx_bstruct_weight_label.shape[0]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_bstride_0_weight = __pyx_bstruct_weight.strides[0]; + __pyx_bshape_0_weight = __pyx_bstruct_weight.shape[0]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_v_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_bstride_0_label = __pyx_bstruct_label.strides[0]; + __pyx_bshape_0_label = __pyx_bstruct_label.shape[0]; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":141 - * if nr_class == 2: - * w = np.empty((1, nr_feature)) - * copy_w(w.data, model, nr_feature) # <<<<<<<<<<<<<< - * else: - * len_w = (nr_class * nr_feature) + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":224 + * + * param = set_parameter( + * solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) # <<<<<<<<<<<<<< + * + * model = set_model(param, coef_.data, coef_.shape, label.data, bias) */ - copy_w(__pyx_v_w->data, __pyx_v_model, __pyx_v_nr_feature); - goto __pyx_L8; - } - /*else*/ { + __pyx_v_param = set_parameter(__pyx_v_solver_type, __pyx_v_eps, __pyx_v_C, (__pyx_v_weight->dimensions[0]), __pyx_v_weight_label->data, __pyx_v_weight->data); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":143 - * copy_w(w.data, model, nr_feature) - * else: - * len_w = (nr_class * nr_feature) # <<<<<<<<<<<<<< - * w = np.empty((nr_class, nr_feature)) - * copy_w(w.data, model, len_w) + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":226 + * solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) + * + * model = set_model(param, coef_.data, coef_.shape, label.data, bias) # <<<<<<<<<<<<<< + * + * n_class = label.shape[0] */ - __pyx_v_len_w = (__pyx_v_nr_class * __pyx_v_nr_feature); + __pyx_v_model = set_model(__pyx_v_param, __pyx_v_coef_->data, __pyx_v_coef_->dimensions, __pyx_v_label->data, __pyx_v_bias); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":144 - * else: - * len_w = (nr_class * nr_feature) - * w = np.empty((nr_class, nr_feature)) # <<<<<<<<<<<<<< - * copy_w(w.data, model, len_w) + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":228 + * model = set_model(param, coef_.data, coef_.shape, label.data, bias) + * + * n_class = label.shape[0] # <<<<<<<<<<<<<< + * if n_class <= 2: n_class = 1 * */ - __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s__empty); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyInt_FromLong(__pyx_v_nr_class); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyInt_FromLong(__pyx_v_nr_feature); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = PyTuple_New(2); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_t_4); - __Pyx_GIVEREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_10, 1, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_4 = 0; - __pyx_t_3 = 0; - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_3); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_10); - __Pyx_GIVEREF(__pyx_t_10); - __pyx_t_10 = 0; - __pyx_t_10 = PyObject_Call(__pyx_t_2, __pyx_t_3, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_5 = ((PyArrayObject *)__pyx_t_10); - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_w); - __pyx_t_6 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_t_5, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack); - if (unlikely(__pyx_t_6 < 0)) { - PyErr_Fetch(&__pyx_t_9, &__pyx_t_8, &__pyx_t_7); - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_w, (PyObject*)__pyx_v_w, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) { - Py_XDECREF(__pyx_t_9); Py_XDECREF(__pyx_t_8); Py_XDECREF(__pyx_t_7); - __Pyx_RaiseBufferFallbackError(); - } else { - PyErr_Restore(__pyx_t_9, __pyx_t_8, __pyx_t_7); - } - } - __pyx_bstride_0_w = __pyx_bstruct_w.strides[0]; __pyx_bstride_1_w = __pyx_bstruct_w.strides[1]; - __pyx_bshape_0_w = __pyx_bstruct_w.shape[0]; __pyx_bshape_1_w = __pyx_bstruct_w.shape[1]; - if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __pyx_t_5 = 0; - __Pyx_DECREF(((PyObject *)__pyx_v_w)); - __pyx_v_w = ((PyArrayObject *)__pyx_t_10); - __pyx_t_10 = 0; + __pyx_t_1 = __Pyx_PyInt_to_py_npy_intp((__pyx_v_label->dimensions[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 228; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_v_n_class); + __pyx_v_n_class = __pyx_t_1; + __pyx_t_1 = 0; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":145 - * len_w = (nr_class * nr_feature) - * w = np.empty((nr_class, nr_feature)) - * copy_w(w.data, model, len_w) # <<<<<<<<<<<<<< + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":229 * - * cdef np.ndarray[np.int32_t, ndim=1, mode='c'] label + * n_class = label.shape[0] + * if n_class <= 2: n_class = 1 # <<<<<<<<<<<<<< + * + * dec_values = np.empty((T_indptr.shape[0] - 1, n_class), dtype=np.float64) */ - copy_w(__pyx_v_w->data, __pyx_v_model, __pyx_v_len_w); + __pyx_t_1 = PyObject_RichCompare(__pyx_v_n_class, __pyx_int_2, Py_LE); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 229; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 229; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (__pyx_t_2) { + __Pyx_INCREF(__pyx_int_1); + __Pyx_DECREF(__pyx_v_n_class); + __pyx_v_n_class = __pyx_int_1; + goto __pyx_L6; } - __pyx_L8:; + __pyx_L6:; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":148 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":231 + * if n_class <= 2: n_class = 1 * - * cdef np.ndarray[np.int32_t, ndim=1, mode='c'] label - * label = np.empty((nr_class), dtype=np.int32) # <<<<<<<<<<<<<< - * copy_label(label.data, model, nr_class) + * dec_values = np.empty((T_indptr.shape[0] - 1, n_class), dtype=np.float64) # <<<<<<<<<<<<<< * + * if csr_copy_predict_values( */ - __pyx_t_10 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - __pyx_t_3 = PyObject_GetAttr(__pyx_t_10, __pyx_n_s__empty); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__empty); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = PyInt_FromLong(__pyx_v_nr_class); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_10); - __Pyx_GIVEREF(__pyx_t_10); - __pyx_t_10 = 0; - __pyx_t_10 = PyDict_New(); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(((PyObject *)__pyx_t_10)); - __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = PyInt_FromLong(((__pyx_v_T_indptr->dimensions[0]) - 1)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_11 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s__int32); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_11); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (PyDict_SetItem(__pyx_t_10, ((PyObject *)__pyx_n_s__dtype), __pyx_t_11) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - __pyx_t_11 = PyEval_CallObjectWithKeywords(__pyx_t_3, __pyx_t_2, ((PyObject *)__pyx_t_10)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_11); + PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1); + __Pyx_GIVEREF(__pyx_t_1); + __Pyx_INCREF(__pyx_v_n_class); + PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_v_n_class); + __Pyx_GIVEREF(__pyx_v_n_class); + __pyx_t_1 = 0; + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_4); + __Pyx_GIVEREF(__pyx_t_4); + __pyx_t_4 = 0; + __pyx_t_4 = PyDict_New(); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(((PyObject *)__pyx_t_4)); + __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_6 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__float64); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (PyDict_SetItem(__pyx_t_4, ((PyObject *)__pyx_n_s__dtype), __pyx_t_6) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = PyEval_CallObjectWithKeywords(__pyx_t_3, __pyx_t_1, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(((PyObject *)__pyx_t_10)); __pyx_t_10 = 0; - if (!(likely(((__pyx_t_11) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_11, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_12 = ((PyArrayObject *)__pyx_t_11); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = ((PyArrayObject *)__pyx_t_6); { __Pyx_BufFmt_StackElem __pyx_stack[1]; - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_label); - __pyx_t_6 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_t_12, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack); - if (unlikely(__pyx_t_6 < 0)) { - PyErr_Fetch(&__pyx_t_7, &__pyx_t_8, &__pyx_t_9); - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_v_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) { - Py_XDECREF(__pyx_t_7); Py_XDECREF(__pyx_t_8); Py_XDECREF(__pyx_t_9); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_dec_values); + __pyx_t_8 = __Pyx_GetBufferAndValidate(&__pyx_bstruct_dec_values, (PyObject*)__pyx_t_7, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack); + if (unlikely(__pyx_t_8 < 0)) { + PyErr_Fetch(&__pyx_t_9, &__pyx_t_10, &__pyx_t_11); + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_dec_values, (PyObject*)__pyx_v_dec_values, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) { + Py_XDECREF(__pyx_t_9); Py_XDECREF(__pyx_t_10); Py_XDECREF(__pyx_t_11); __Pyx_RaiseBufferFallbackError(); } else { - PyErr_Restore(__pyx_t_7, __pyx_t_8, __pyx_t_9); + PyErr_Restore(__pyx_t_9, __pyx_t_10, __pyx_t_11); } } - __pyx_bstride_0_label = __pyx_bstruct_label.strides[0]; - __pyx_bshape_0_label = __pyx_bstruct_label.shape[0]; - if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_bstride_0_dec_values = __pyx_bstruct_dec_values.strides[0]; __pyx_bstride_1_dec_values = __pyx_bstruct_dec_values.strides[1]; + __pyx_bshape_0_dec_values = __pyx_bstruct_dec_values.shape[0]; __pyx_bshape_1_dec_values = __pyx_bstruct_dec_values.shape[1]; + if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __pyx_t_12 = 0; - __Pyx_DECREF(((PyObject *)__pyx_v_label)); - __pyx_v_label = ((PyArrayObject *)__pyx_t_11); - __pyx_t_11 = 0; + __pyx_t_7 = 0; + __Pyx_DECREF(((PyObject *)__pyx_v_dec_values)); + __pyx_v_dec_values = ((PyArrayObject *)__pyx_t_6); + __pyx_t_6 = 0; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":149 - * cdef np.ndarray[np.int32_t, ndim=1, mode='c'] label - * label = np.empty((nr_class), dtype=np.int32) - * copy_label(label.data, model, nr_class) # <<<<<<<<<<<<<< + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":236 + * n_features, T_values.shape, T_values.data, T_indices.shape, + * T_indices.data, T_indptr.shape, T_indptr.data, model, + * dec_values.data, n_class) < 0: # <<<<<<<<<<<<<< + * raise MemoryError("We've run out of of memory") * - * ### FREE */ - copy_label(__pyx_v_label->data, __pyx_v_model, __pyx_v_nr_class); + __pyx_t_8 = __Pyx_PyInt_AsInt(__pyx_v_n_class); if (unlikely((__pyx_t_8 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 236; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = (csr_copy_predict_values(__pyx_v_n_features, __pyx_v_T_values->dimensions, __pyx_v_T_values->data, __pyx_v_T_indices->dimensions, __pyx_v_T_indices->data, __pyx_v_T_indptr->dimensions, __pyx_v_T_indptr->data, __pyx_v_model, __pyx_v_dec_values->data, __pyx_t_8) < 0); + if (__pyx_t_2) { - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":152 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":237 + * T_indices.data, T_indptr.shape, T_indptr.data, model, + * dec_values.data, n_class) < 0: + * raise MemoryError("We've run out of of memory") # <<<<<<<<<<<<<< * * ### FREE - * free_and_destroy_model(&model) # <<<<<<<<<<<<<< - * free_problem(problem) - * free_parameter(param) */ - free_and_destroy_model((&__pyx_v_model)); + __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 237; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_INCREF(((PyObject *)__pyx_kp_s_1)); + PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_kp_s_1)); + __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_1)); + __pyx_t_4 = PyObject_Call(__pyx_builtin_MemoryError, __pyx_t_6, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 237; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_Raise(__pyx_t_4, 0, 0); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 237; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + goto __pyx_L7; + } + __pyx_L7:; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":153 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":240 + * * ### FREE + * free_parameter(param) # <<<<<<<<<<<<<< * free_and_destroy_model(&model) - * free_problem(problem) # <<<<<<<<<<<<<< - * free_parameter(param) - * # destroy_param(param) don't call this or it will destroy weight_label and weight + * return dec_values */ - free_problem(__pyx_v_problem); + free_parameter(__pyx_v_param); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":154 - * free_and_destroy_model(&model) - * free_problem(problem) - * free_parameter(param) # <<<<<<<<<<<<<< - * # destroy_param(param) don't call this or it will destroy weight_label and weight + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":241 + * ### FREE + * free_parameter(param) + * free_and_destroy_model(&model) # <<<<<<<<<<<<<< + * return dec_values * */ - free_parameter(__pyx_v_param); + free_and_destroy_model((&__pyx_v_model)); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":157 - * # destroy_param(param) don't call this or it will destroy weight_label and weight + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":242 + * free_parameter(param) + * free_and_destroy_model(&model) + * return dec_values # <<<<<<<<<<<<<< * - * return w, label # <<<<<<<<<<<<<< * - * def predict_wrap(np.ndarray[np.float64_t, ndim=2, mode='c'] T, */ __Pyx_XDECREF(__pyx_r); - __pyx_t_11 = PyTuple_New(2); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_11); - __Pyx_INCREF(((PyObject *)__pyx_v_w)); - PyTuple_SET_ITEM(__pyx_t_11, 0, ((PyObject *)__pyx_v_w)); - __Pyx_GIVEREF(((PyObject *)__pyx_v_w)); - __Pyx_INCREF(((PyObject *)__pyx_v_label)); - PyTuple_SET_ITEM(__pyx_t_11, 1, ((PyObject *)__pyx_v_label)); - __Pyx_GIVEREF(((PyObject *)__pyx_v_label)); - __pyx_r = __pyx_t_11; - __pyx_t_11 = 0; + __Pyx_INCREF(((PyObject *)__pyx_v_dec_values)); + __pyx_r = ((PyObject *)__pyx_v_dec_values); goto __pyx_L0; __pyx_r = Py_None; __Pyx_INCREF(Py_None); goto __pyx_L0; __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_10); - __Pyx_XDECREF(__pyx_t_11); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight); __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight_label); - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_X_indices); - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_X_indptr); - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_X_values); - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_w); - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_Y); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_T_indices); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_T_values); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_coef_); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_T_indptr); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_dec_values); __Pyx_SafeReleaseBuffer(&__pyx_bstruct_label); __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);} - __Pyx_AddTraceback("_liblinear.csr_train_wrap"); + __Pyx_AddTraceback("_liblinear.csr_decision_function_wrap"); __pyx_r = NULL; goto __pyx_L2; __pyx_L0:; - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight); __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight_label); - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_X_indices); - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_X_indptr); - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_X_values); - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_w); - __Pyx_SafeReleaseBuffer(&__pyx_bstruct_Y); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_weight); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_T_indices); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_T_values); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_coef_); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_T_indptr); + __Pyx_SafeReleaseBuffer(&__pyx_bstruct_dec_values); __Pyx_SafeReleaseBuffer(&__pyx_bstruct_label); __pyx_L2:; - __Pyx_DECREF((PyObject *)__pyx_v_w); - __Pyx_DECREF((PyObject *)__pyx_v_label); - __Pyx_DECREF((PyObject *)__pyx_v_X_values); - __Pyx_DECREF((PyObject *)__pyx_v_X_indices); - __Pyx_DECREF((PyObject *)__pyx_v_X_indptr); - __Pyx_DECREF((PyObject *)__pyx_v_Y); + __Pyx_DECREF((PyObject *)__pyx_v_dec_values); + __Pyx_DECREF(__pyx_v_n_class); + __Pyx_DECREF((PyObject *)__pyx_v_T_values); + __Pyx_DECREF((PyObject *)__pyx_v_T_indices); + __Pyx_DECREF((PyObject *)__pyx_v_T_indptr); + __Pyx_DECREF((PyObject *)__pyx_v_coef_); __Pyx_DECREF((PyObject *)__pyx_v_weight_label); __Pyx_DECREF((PyObject *)__pyx_v_weight); + __Pyx_DECREF((PyObject *)__pyx_v_label); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":159 - * return w, label +/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":245 * - * def predict_wrap(np.ndarray[np.float64_t, ndim=2, mode='c'] T, # <<<<<<<<<<<<<< - * np.ndarray[np.float64_t, ndim=2, mode='c'] coef_, - * int solver_type, double eps, double C, + * + * def predict_wrap( # <<<<<<<<<<<<<< + * np.ndarray[np.float64_t, ndim=2, mode='c'] T, + * np.ndarray[np.float64_t, ndim=2, mode='fortran'] coef_, */ static PyObject *__pyx_pf_10_liblinear_predict_wrap(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ @@ -2191,79 +3137,79 @@ static PyObject *__pyx_pf_10_liblinear_predict_wrap(PyObject *__pyx_self, PyObje values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__coef_); if (likely(values[1])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__solver_type); if (likely(values[2])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__eps); if (likely(values[3])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__C); if (likely(values[4])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 5: values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight_label); if (likely(values[5])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 6: values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight); if (likely(values[6])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 7: values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__label); if (likely(values[7])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 8: values[8] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__bias); if (likely(values[8])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "predict_wrap") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "predict_wrap") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } __pyx_v_T = ((PyArrayObject *)values[0]); __pyx_v_coef_ = ((PyArrayObject *)values[1]); - __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_eps = __pyx_PyFloat_AsDouble(values[3]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_C = __pyx_PyFloat_AsDouble(values[4]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 248; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(values[3]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 248; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(values[4]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 248; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_weight_label = ((PyArrayObject *)values[5]); __pyx_v_weight = ((PyArrayObject *)values[6]); __pyx_v_label = ((PyArrayObject *)values[7]); - __pyx_v_bias = __pyx_PyFloat_AsDouble(values[8]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_bias = __pyx_PyFloat_AsDouble(values[8]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } else if (PyTuple_GET_SIZE(__pyx_args) != 9) { goto __pyx_L5_argtuple_error; } else { __pyx_v_T = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 0)); __pyx_v_coef_ = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 1)); - __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 3)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 4)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 248; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 3)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 248; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 4)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 248; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_weight_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 5)); __pyx_v_weight = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 6)); __pyx_v_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 7)); - __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 8)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 8)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_wrap", 1, 9, 9, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_liblinear.predict_wrap"); return NULL; @@ -2280,43 +3226,43 @@ static PyObject *__pyx_pf_10_liblinear_predict_wrap(PyObject *__pyx_self, PyObje __pyx_bstruct_weight_label.buf = NULL; __pyx_bstruct_weight.buf = NULL; __pyx_bstruct_label.buf = NULL; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T), __pyx_ptype_5numpy_ndarray, 1, "T", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_coef_), __pyx_ptype_5numpy_ndarray, 1, "coef_", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 160; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 162; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 163; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_label), __pyx_ptype_5numpy_ndarray, 1, "label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 164; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T), __pyx_ptype_5numpy_ndarray, 1, "T", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 246; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_coef_), __pyx_ptype_5numpy_ndarray, 1, "coef_", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 249; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_label), __pyx_ptype_5numpy_ndarray, 1, "label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T, (PyObject*)__pyx_v_T, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T, (PyObject*)__pyx_v_T, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_T = __pyx_bstruct_T.strides[0]; __pyx_bstride_1_T = __pyx_bstruct_T.strides[1]; __pyx_bshape_0_T = __pyx_bstruct_T.shape[0]; __pyx_bshape_1_T = __pyx_bstruct_T.shape[1]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_coef_, (PyObject*)__pyx_v_coef_, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_coef_, (PyObject*)__pyx_v_coef_, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_coef_ = __pyx_bstruct_coef_.strides[0]; __pyx_bstride_1_coef_ = __pyx_bstruct_coef_.strides[1]; __pyx_bshape_0_coef_ = __pyx_bstruct_coef_.shape[0]; __pyx_bshape_1_coef_ = __pyx_bstruct_coef_.shape[1]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_weight_label = __pyx_bstruct_weight_label.strides[0]; __pyx_bshape_0_weight_label = __pyx_bstruct_weight_label.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_weight = __pyx_bstruct_weight.strides[0]; __pyx_bshape_0_weight = __pyx_bstruct_weight.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_v_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_v_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 245; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_label = __pyx_bstruct_label.strides[0]; __pyx_bshape_0_label = __pyx_bstruct_label.shape[0]; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":171 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":258 * cdef model *model * * param = set_parameter(solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) # <<<<<<<<<<<<<< @@ -2325,7 +3271,7 @@ static PyObject *__pyx_pf_10_liblinear_predict_wrap(PyObject *__pyx_self, PyObje */ __pyx_v_param = set_parameter(__pyx_v_solver_type, __pyx_v_eps, __pyx_v_C, (__pyx_v_weight->dimensions[0]), __pyx_v_weight_label->data, __pyx_v_weight->data); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":173 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":260 * param = set_parameter(solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) * * model = set_model(param, coef_.data, coef_.shape, label.data, bias) # <<<<<<<<<<<<<< @@ -2334,40 +3280,40 @@ static PyObject *__pyx_pf_10_liblinear_predict_wrap(PyObject *__pyx_self, PyObje */ __pyx_v_model = set_model(__pyx_v_param, __pyx_v_coef_->data, __pyx_v_coef_->dimensions, __pyx_v_label->data, __pyx_v_bias); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":175 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":262 * model = set_model(param, coef_.data, coef_.shape, label.data, bias) * * dec_values = np.empty(T.shape[0], dtype=np.int32) # <<<<<<<<<<<<<< * if copy_predict(T.data, model, T.shape, dec_values.data) < 0: * raise MemoryError("We've run out of of memory") */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__empty); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__empty); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_to_py_npy_intp((__pyx_v_T->dimensions[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_to_py_npy_intp((__pyx_v_T->dimensions[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s__int32); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s__int32); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__dtype), __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__dtype), __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyEval_CallObjectWithKeywords(__pyx_t_2, __pyx_t_3, ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyEval_CallObjectWithKeywords(__pyx_t_2, __pyx_t_3, ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = ((PyArrayObject *)__pyx_t_5); { __Pyx_BufFmt_StackElem __pyx_stack[1]; @@ -2384,14 +3330,14 @@ static PyObject *__pyx_pf_10_liblinear_predict_wrap(PyObject *__pyx_self, PyObje } __pyx_bstride_0_dec_values = __pyx_bstruct_dec_values.strides[0]; __pyx_bshape_0_dec_values = __pyx_bstruct_dec_values.shape[0]; - if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 175; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_6 = 0; __Pyx_DECREF(((PyObject *)__pyx_v_dec_values)); __pyx_v_dec_values = ((PyArrayObject *)__pyx_t_5); __pyx_t_5 = 0; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":176 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":263 * * dec_values = np.empty(T.shape[0], dtype=np.int32) * if copy_predict(T.data, model, T.shape, dec_values.data) < 0: # <<<<<<<<<<<<<< @@ -2401,29 +3347,29 @@ static PyObject *__pyx_pf_10_liblinear_predict_wrap(PyObject *__pyx_self, PyObje __pyx_t_11 = (copy_predict(__pyx_v_T->data, __pyx_v_model, __pyx_v_T->dimensions, __pyx_v_dec_values->data) < 0); if (__pyx_t_11) { - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":177 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":264 * dec_values = np.empty(T.shape[0], dtype=np.int32) * if copy_predict(T.data, model, T.shape, dec_values.data) < 0: * raise MemoryError("We've run out of of memory") # <<<<<<<<<<<<<< * * ### FREE */ - __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 264; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_INCREF(((PyObject *)__pyx_kp_s_1)); PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_kp_s_1)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_1)); - __pyx_t_1 = PyObject_Call(__pyx_builtin_MemoryError, __pyx_t_5, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_builtin_MemoryError, __pyx_t_5, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 264; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_Raise(__pyx_t_1, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 264; __pyx_clineno = __LINE__; goto __pyx_L1_error;} goto __pyx_L6; } __pyx_L6:; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":180 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":267 * * ### FREE * free_parameter(param) # <<<<<<<<<<<<<< @@ -2432,7 +3378,7 @@ static PyObject *__pyx_pf_10_liblinear_predict_wrap(PyObject *__pyx_self, PyObje */ free_parameter(__pyx_v_param); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":181 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":268 * ### FREE * free_parameter(param) * free_and_destroy_model(&model) # <<<<<<<<<<<<<< @@ -2441,7 +3387,7 @@ static PyObject *__pyx_pf_10_liblinear_predict_wrap(PyObject *__pyx_self, PyObje */ free_and_destroy_model((&__pyx_v_model)); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":182 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":269 * free_parameter(param) * free_and_destroy_model(&model) * return dec_values # <<<<<<<<<<<<<< @@ -2492,7 +3438,7 @@ static PyObject *__pyx_pf_10_liblinear_predict_wrap(PyObject *__pyx_self, PyObje return __pyx_r; } -/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":185 +/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":272 * * * def csr_predict_wrap( # <<<<<<<<<<<<<< @@ -2587,103 +3533,103 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_wrap(PyObject *__pyx_self, Py values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__T_values); if (likely(values[1])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__T_indices); if (likely(values[2])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__T_indptr); if (likely(values[3])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__coef_); if (likely(values[4])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 5: values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__solver_type); if (likely(values[5])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 6: values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__eps); if (likely(values[6])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 7: values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__C); if (likely(values[7])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 8: values[8] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight_label); if (likely(values[8])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 9: values[9] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight); if (likely(values[9])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 9); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 9); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 10: values[10] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__label); if (likely(values[10])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 10); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 10); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 11: values[11] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__bias); if (likely(values[11])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 11); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, 11); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "csr_predict_wrap") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "csr_predict_wrap") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } - __pyx_v_n_features = __Pyx_PyInt_AsInt(values[0]); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 186; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_n_features = __Pyx_PyInt_AsInt(values[0]); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 273; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_T_values = ((PyArrayObject *)values[1]); __pyx_v_T_indices = ((PyArrayObject *)values[2]); __pyx_v_T_indptr = ((PyArrayObject *)values[3]); __pyx_v_coef_ = ((PyArrayObject *)values[4]); - __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[5]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_eps = __pyx_PyFloat_AsDouble(values[6]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_C = __pyx_PyFloat_AsDouble(values[7]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[5]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(values[6]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(values[7]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_weight_label = ((PyArrayObject *)values[8]); __pyx_v_weight = ((PyArrayObject *)values[9]); __pyx_v_label = ((PyArrayObject *)values[10]); - __pyx_v_bias = __pyx_PyFloat_AsDouble(values[11]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_bias = __pyx_PyFloat_AsDouble(values[11]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } else if (PyTuple_GET_SIZE(__pyx_args) != 12) { goto __pyx_L5_argtuple_error; } else { - __pyx_v_n_features = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 0)); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 186; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_n_features = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 0)); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 273; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_T_values = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 1)); __pyx_v_T_indices = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 2)); __pyx_v_T_indptr = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 3)); __pyx_v_coef_ = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 4)); - __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 5)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 6)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 7)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 5)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 6)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 7)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_weight_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 8)); __pyx_v_weight = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 9)); __pyx_v_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 10)); - __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 11)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 11)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_wrap", 1, 12, 12, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_liblinear.csr_predict_wrap"); return NULL; @@ -2704,57 +3650,57 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_wrap(PyObject *__pyx_self, Py __pyx_bstruct_weight_label.buf = NULL; __pyx_bstruct_weight.buf = NULL; __pyx_bstruct_label.buf = NULL; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_values), __pyx_ptype_5numpy_ndarray, 1, "T_values", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_indices), __pyx_ptype_5numpy_ndarray, 1, "T_indices", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_indptr), __pyx_ptype_5numpy_ndarray, 1, "T_indptr", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_coef_), __pyx_ptype_5numpy_ndarray, 1, "coef_", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 192; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 193; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_label), __pyx_ptype_5numpy_ndarray, 1, "label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 194; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_values), __pyx_ptype_5numpy_ndarray, 1, "T_values", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_indices), __pyx_ptype_5numpy_ndarray, 1, "T_indices", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_indptr), __pyx_ptype_5numpy_ndarray, 1, "T_indptr", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_coef_), __pyx_ptype_5numpy_ndarray, 1, "coef_", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 277; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_label), __pyx_ptype_5numpy_ndarray, 1, "label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_values, (PyObject*)__pyx_v_T_values, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_values, (PyObject*)__pyx_v_T_values, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_T_values = __pyx_bstruct_T_values.strides[0]; __pyx_bshape_0_T_values = __pyx_bstruct_T_values.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_indices, (PyObject*)__pyx_v_T_indices, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_indices, (PyObject*)__pyx_v_T_indices, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_T_indices = __pyx_bstruct_T_indices.strides[0]; __pyx_bshape_0_T_indices = __pyx_bstruct_T_indices.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_indptr, (PyObject*)__pyx_v_T_indptr, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_indptr, (PyObject*)__pyx_v_T_indptr, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_T_indptr = __pyx_bstruct_T_indptr.strides[0]; __pyx_bshape_0_T_indptr = __pyx_bstruct_T_indptr.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_coef_, (PyObject*)__pyx_v_coef_, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_coef_, (PyObject*)__pyx_v_coef_, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_coef_ = __pyx_bstruct_coef_.strides[0]; __pyx_bstride_1_coef_ = __pyx_bstruct_coef_.strides[1]; __pyx_bshape_0_coef_ = __pyx_bstruct_coef_.shape[0]; __pyx_bshape_1_coef_ = __pyx_bstruct_coef_.shape[1]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_weight_label = __pyx_bstruct_weight_label.strides[0]; __pyx_bshape_0_weight_label = __pyx_bstruct_weight_label.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_weight = __pyx_bstruct_weight.strides[0]; __pyx_bshape_0_weight = __pyx_bstruct_weight.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_v_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_v_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_label = __pyx_bstruct_label.strides[0]; __pyx_bshape_0_label = __pyx_bstruct_label.shape[0]; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":206 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":293 * cdef model *model * * param = set_parameter(solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) # <<<<<<<<<<<<<< @@ -2763,7 +3709,7 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_wrap(PyObject *__pyx_self, Py */ __pyx_v_param = set_parameter(__pyx_v_solver_type, __pyx_v_eps, __pyx_v_C, (__pyx_v_weight->dimensions[0]), __pyx_v_weight_label->data, __pyx_v_weight->data); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":208 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":295 * param = set_parameter(solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) * * model = set_model(param, coef_.data, coef_.shape, label.data, bias) # <<<<<<<<<<<<<< @@ -2772,40 +3718,40 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_wrap(PyObject *__pyx_self, Py */ __pyx_v_model = set_model(__pyx_v_param, __pyx_v_coef_->data, __pyx_v_coef_->dimensions, __pyx_v_label->data, __pyx_v_bias); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":210 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":297 * model = set_model(param, coef_.data, coef_.shape, label.data, bias) * * dec_values = np.empty(T_indptr.shape[0] - 1, dtype=np.int32) # <<<<<<<<<<<<<< * if csr_copy_predict(n_features, T_values.shape, T_values.data, * T_indices.shape, T_indices.data, */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__empty); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__empty); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyInt_FromLong(((__pyx_v_T_indptr->dimensions[0]) - 1)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong(((__pyx_v_T_indptr->dimensions[0]) - 1)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s__int32); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_GetAttr(__pyx_t_4, __pyx_n_s__int32); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__dtype), __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__dtype), __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyEval_CallObjectWithKeywords(__pyx_t_2, __pyx_t_3, ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyEval_CallObjectWithKeywords(__pyx_t_2, __pyx_t_3, ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = ((PyArrayObject *)__pyx_t_5); { __Pyx_BufFmt_StackElem __pyx_stack[1]; @@ -2822,14 +3768,14 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_wrap(PyObject *__pyx_self, Py } __pyx_bstride_0_dec_values = __pyx_bstruct_dec_values.strides[0]; __pyx_bshape_0_dec_values = __pyx_bstruct_dec_values.shape[0]; - if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_6 = 0; __Pyx_DECREF(((PyObject *)__pyx_v_dec_values)); __pyx_v_dec_values = ((PyArrayObject *)__pyx_t_5); __pyx_t_5 = 0; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":214 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":301 * T_indices.shape, T_indices.data, * T_indptr.shape, T_indptr.data, * model, dec_values.data) < 0: # <<<<<<<<<<<<<< @@ -2839,29 +3785,29 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_wrap(PyObject *__pyx_self, Py __pyx_t_11 = (csr_copy_predict(__pyx_v_n_features, __pyx_v_T_values->dimensions, __pyx_v_T_values->data, __pyx_v_T_indices->dimensions, __pyx_v_T_indices->data, __pyx_v_T_indptr->dimensions, __pyx_v_T_indptr->data, __pyx_v_model, __pyx_v_dec_values->data) < 0); if (__pyx_t_11) { - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":215 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":302 * T_indptr.shape, T_indptr.data, * model, dec_values.data) < 0: * raise MemoryError("We've run out of of memory") # <<<<<<<<<<<<<< * * ### FREE */ - __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 215; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_INCREF(((PyObject *)__pyx_kp_s_1)); PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_kp_s_1)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_1)); - __pyx_t_1 = PyObject_Call(__pyx_builtin_MemoryError, __pyx_t_5, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 215; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_builtin_MemoryError, __pyx_t_5, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_Raise(__pyx_t_1, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 215; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} goto __pyx_L6; } __pyx_L6:; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":218 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":305 * * ### FREE * free_parameter(param) # <<<<<<<<<<<<<< @@ -2870,7 +3816,7 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_wrap(PyObject *__pyx_self, Py */ free_parameter(__pyx_v_param); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":219 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":306 * ### FREE * free_parameter(param) * free_and_destroy_model(&model) # <<<<<<<<<<<<<< @@ -2879,7 +3825,7 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_wrap(PyObject *__pyx_self, Py */ free_and_destroy_model((&__pyx_v_model)); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":220 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":307 * free_parameter(param) * free_and_destroy_model(&model) * return dec_values # <<<<<<<<<<<<<< @@ -2936,11 +3882,11 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_wrap(PyObject *__pyx_self, Py return __pyx_r; } -/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":225 +/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":312 * * * def predict_prob_wrap(np.ndarray[np.float64_t, ndim=2, mode='c'] T, # <<<<<<<<<<<<<< - * np.ndarray[np.float64_t, ndim=2, mode='c'] coef_, + * np.ndarray[np.float64_t, ndim=2, mode='fortran'] coef_, * int solver_type, double eps, double C, */ @@ -3024,79 +3970,79 @@ static PyObject *__pyx_pf_10_liblinear_predict_prob_wrap(PyObject *__pyx_self, P values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__coef_); if (likely(values[1])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__solver_type); if (likely(values[2])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__eps); if (likely(values[3])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__C); if (likely(values[4])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 5: values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight_label); if (likely(values[5])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 6: values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight); if (likely(values[6])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 7: values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__label); if (likely(values[7])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 8: values[8] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__bias); if (likely(values[8])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "predict_prob_wrap") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "predict_prob_wrap") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } __pyx_v_T = ((PyArrayObject *)values[0]); __pyx_v_coef_ = ((PyArrayObject *)values[1]); - __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_eps = __pyx_PyFloat_AsDouble(values[3]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_C = __pyx_PyFloat_AsDouble(values[4]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[2]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(values[3]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(values[4]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_weight_label = ((PyArrayObject *)values[5]); __pyx_v_weight = ((PyArrayObject *)values[6]); __pyx_v_label = ((PyArrayObject *)values[7]); - __pyx_v_bias = __pyx_PyFloat_AsDouble(values[8]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_bias = __pyx_PyFloat_AsDouble(values[8]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } else if (PyTuple_GET_SIZE(__pyx_args) != 9) { goto __pyx_L5_argtuple_error; } else { __pyx_v_T = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 0)); __pyx_v_coef_ = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 1)); - __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 3)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 4)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 3)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 4)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_weight_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 5)); __pyx_v_weight = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 6)); __pyx_v_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 7)); - __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 8)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 8)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("predict_prob_wrap", 1, 9, 9, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_liblinear.predict_prob_wrap"); return NULL; @@ -3113,43 +4059,43 @@ static PyObject *__pyx_pf_10_liblinear_predict_prob_wrap(PyObject *__pyx_self, P __pyx_bstruct_weight_label.buf = NULL; __pyx_bstruct_weight.buf = NULL; __pyx_bstruct_label.buf = NULL; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T), __pyx_ptype_5numpy_ndarray, 1, "T", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_coef_), __pyx_ptype_5numpy_ndarray, 1, "coef_", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 228; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 229; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_label), __pyx_ptype_5numpy_ndarray, 1, "label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 230; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T), __pyx_ptype_5numpy_ndarray, 1, "T", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_coef_), __pyx_ptype_5numpy_ndarray, 1, "coef_", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 315; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_label), __pyx_ptype_5numpy_ndarray, 1, "label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;} { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T, (PyObject*)__pyx_v_T, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T, (PyObject*)__pyx_v_T, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_T = __pyx_bstruct_T.strides[0]; __pyx_bstride_1_T = __pyx_bstruct_T.strides[1]; __pyx_bshape_0_T = __pyx_bstruct_T.shape[0]; __pyx_bshape_1_T = __pyx_bstruct_T.shape[1]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_coef_, (PyObject*)__pyx_v_coef_, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_coef_, (PyObject*)__pyx_v_coef_, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_coef_ = __pyx_bstruct_coef_.strides[0]; __pyx_bstride_1_coef_ = __pyx_bstruct_coef_.strides[1]; __pyx_bshape_0_coef_ = __pyx_bstruct_coef_.shape[0]; __pyx_bshape_1_coef_ = __pyx_bstruct_coef_.shape[1]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_weight_label = __pyx_bstruct_weight_label.strides[0]; __pyx_bshape_0_weight_label = __pyx_bstruct_weight_label.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_weight = __pyx_bstruct_weight.strides[0]; __pyx_bshape_0_weight = __pyx_bstruct_weight.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_v_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_v_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_label = __pyx_bstruct_label.strides[0]; __pyx_bshape_0_label = __pyx_bstruct_label.shape[0]; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":262 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":349 * cdef model *model * * param = set_parameter(solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) # <<<<<<<<<<<<<< @@ -3158,7 +4104,7 @@ static PyObject *__pyx_pf_10_liblinear_predict_prob_wrap(PyObject *__pyx_self, P */ __pyx_v_param = set_parameter(__pyx_v_solver_type, __pyx_v_eps, __pyx_v_C, (__pyx_v_weight->dimensions[0]), __pyx_v_weight_label->data, __pyx_v_weight->data); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":264 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":351 * param = set_parameter(solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) * * model = set_model(param, coef_.data, coef_.shape, label.data, bias) # <<<<<<<<<<<<<< @@ -3167,7 +4113,7 @@ static PyObject *__pyx_pf_10_liblinear_predict_prob_wrap(PyObject *__pyx_self, P */ __pyx_v_model = set_model(__pyx_v_param, __pyx_v_coef_->data, __pyx_v_coef_->dimensions, __pyx_v_label->data, __pyx_v_bias); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":266 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":353 * model = set_model(param, coef_.data, coef_.shape, label.data, bias) * * cdef int nr_class = get_nr_class(model) # <<<<<<<<<<<<<< @@ -3176,23 +4122,23 @@ static PyObject *__pyx_pf_10_liblinear_predict_prob_wrap(PyObject *__pyx_self, P */ __pyx_v_nr_class = get_nr_class(__pyx_v_model); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":267 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":354 * * cdef int nr_class = get_nr_class(model) * dec_values = np.empty((T.shape[0], nr_class), dtype=np.float64) # <<<<<<<<<<<<<< * if copy_prob_predict(T.data, model, T.shape, dec_values.data) < 0: * raise MemoryError("We've run out of of memory") */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__empty); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__empty); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_to_py_npy_intp((__pyx_v_T->dimensions[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_to_py_npy_intp((__pyx_v_T->dimensions[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyInt_FromLong(__pyx_v_nr_class); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(__pyx_v_nr_class); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); @@ -3200,26 +4146,26 @@ static PyObject *__pyx_pf_10_liblinear_predict_prob_wrap(PyObject *__pyx_self, P __Pyx_GIVEREF(__pyx_t_3); __pyx_t_1 = 0; __pyx_t_3 = 0; - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyDict_New(); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyDict_New(); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__float64); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__float64); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyDict_SetItem(__pyx_t_4, ((PyObject *)__pyx_n_s__dtype), __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_4, ((PyObject *)__pyx_n_s__dtype), __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyEval_CallObjectWithKeywords(__pyx_t_2, __pyx_t_3, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyEval_CallObjectWithKeywords(__pyx_t_2, __pyx_t_3, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; - if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = ((PyArrayObject *)__pyx_t_5); { __Pyx_BufFmt_StackElem __pyx_stack[1]; @@ -3236,14 +4182,14 @@ static PyObject *__pyx_pf_10_liblinear_predict_prob_wrap(PyObject *__pyx_self, P } __pyx_bstride_0_dec_values = __pyx_bstruct_dec_values.strides[0]; __pyx_bstride_1_dec_values = __pyx_bstruct_dec_values.strides[1]; __pyx_bshape_0_dec_values = __pyx_bstruct_dec_values.shape[0]; __pyx_bshape_1_dec_values = __pyx_bstruct_dec_values.shape[1]; - if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_6 = 0; __Pyx_DECREF(((PyObject *)__pyx_v_dec_values)); __pyx_v_dec_values = ((PyArrayObject *)__pyx_t_5); __pyx_t_5 = 0; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":268 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":355 * cdef int nr_class = get_nr_class(model) * dec_values = np.empty((T.shape[0], nr_class), dtype=np.float64) * if copy_prob_predict(T.data, model, T.shape, dec_values.data) < 0: # <<<<<<<<<<<<<< @@ -3253,29 +4199,29 @@ static PyObject *__pyx_pf_10_liblinear_predict_prob_wrap(PyObject *__pyx_self, P __pyx_t_11 = (copy_prob_predict(__pyx_v_T->data, __pyx_v_model, __pyx_v_T->dimensions, __pyx_v_dec_values->data) < 0); if (__pyx_t_11) { - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":269 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":356 * dec_values = np.empty((T.shape[0], nr_class), dtype=np.float64) * if copy_prob_predict(T.data, model, T.shape, dec_values.data) < 0: * raise MemoryError("We've run out of of memory") # <<<<<<<<<<<<<< * * ### FREE */ - __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_INCREF(((PyObject *)__pyx_kp_s_1)); PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_kp_s_1)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_1)); - __pyx_t_4 = PyObject_Call(__pyx_builtin_MemoryError, __pyx_t_5, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(__pyx_builtin_MemoryError, __pyx_t_5, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_Raise(__pyx_t_4, 0, 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} goto __pyx_L6; } __pyx_L6:; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":272 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":359 * * ### FREE * free_parameter(param) # <<<<<<<<<<<<<< @@ -3284,7 +4230,7 @@ static PyObject *__pyx_pf_10_liblinear_predict_prob_wrap(PyObject *__pyx_self, P */ free_parameter(__pyx_v_param); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":273 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":360 * ### FREE * free_parameter(param) * free_and_destroy_model(&model) # <<<<<<<<<<<<<< @@ -3293,7 +4239,7 @@ static PyObject *__pyx_pf_10_liblinear_predict_prob_wrap(PyObject *__pyx_self, P */ free_and_destroy_model((&__pyx_v_model)); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":275 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":362 * free_and_destroy_model(&model) * * return dec_values # <<<<<<<<<<<<<< @@ -3344,7 +4290,7 @@ static PyObject *__pyx_pf_10_liblinear_predict_prob_wrap(PyObject *__pyx_self, P return __pyx_r; } -/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":279 +/* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":366 * * * def csr_predict_prob( # <<<<<<<<<<<<<< @@ -3442,103 +4388,103 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_prob(PyObject *__pyx_self, Py values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__T_values); if (likely(values[1])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__T_indices); if (likely(values[2])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__T_indptr); if (likely(values[3])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__coef_); if (likely(values[4])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 5: values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__solver_type); if (likely(values[5])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 6: values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__eps); if (likely(values[6])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 7: values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__C); if (likely(values[7])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 8: values[8] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight_label); if (likely(values[8])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 9: values[9] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__weight); if (likely(values[9])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 9); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 9); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 10: values[10] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__label); if (likely(values[10])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 10); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 10); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 11: values[11] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__bias); if (likely(values[11])) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 11); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, 11); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "csr_predict_prob") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, PyTuple_GET_SIZE(__pyx_args), "csr_predict_prob") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } - __pyx_v_n_features = __Pyx_PyInt_AsInt(values[0]); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_n_features = __Pyx_PyInt_AsInt(values[0]); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_T_values = ((PyArrayObject *)values[1]); __pyx_v_T_indices = ((PyArrayObject *)values[2]); __pyx_v_T_indptr = ((PyArrayObject *)values[3]); __pyx_v_coef_ = ((PyArrayObject *)values[4]); - __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[5]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_eps = __pyx_PyFloat_AsDouble(values[6]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_C = __pyx_PyFloat_AsDouble(values[7]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_solver_type = __Pyx_PyInt_AsInt(values[5]); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(values[6]); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(values[7]); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_weight_label = ((PyArrayObject *)values[8]); __pyx_v_weight = ((PyArrayObject *)values[9]); __pyx_v_label = ((PyArrayObject *)values[10]); - __pyx_v_bias = __pyx_PyFloat_AsDouble(values[11]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 289; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_bias = __pyx_PyFloat_AsDouble(values[11]); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } else if (PyTuple_GET_SIZE(__pyx_args) != 12) { goto __pyx_L5_argtuple_error; } else { - __pyx_v_n_features = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 0)); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_n_features = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 0)); if (unlikely((__pyx_v_n_features == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_T_values = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 1)); __pyx_v_T_indices = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 2)); __pyx_v_T_indptr = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 3)); __pyx_v_coef_ = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 4)); - __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 5)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 6)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 7)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_solver_type = __Pyx_PyInt_AsInt(PyTuple_GET_ITEM(__pyx_args, 5)); if (unlikely((__pyx_v_solver_type == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_eps = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 6)); if (unlikely((__pyx_v_eps == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_C = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 7)); if (unlikely((__pyx_v_C == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_weight_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 8)); __pyx_v_weight = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 9)); __pyx_v_label = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 10)); - __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 11)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 289; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_bias = __pyx_PyFloat_AsDouble(PyTuple_GET_ITEM(__pyx_args, 11)); if (unlikely((__pyx_v_bias == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("csr_predict_prob", 1, 12, 12, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_liblinear.csr_predict_prob"); return NULL; @@ -3559,57 +4505,57 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_prob(PyObject *__pyx_self, Py __pyx_bstruct_weight_label.buf = NULL; __pyx_bstruct_weight.buf = NULL; __pyx_bstruct_label.buf = NULL; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_values), __pyx_ptype_5numpy_ndarray, 1, "T_values", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_indices), __pyx_ptype_5numpy_ndarray, 1, "T_indices", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_indptr), __pyx_ptype_5numpy_ndarray, 1, "T_indptr", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_coef_), __pyx_ptype_5numpy_ndarray, 1, "coef_", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_label), __pyx_ptype_5numpy_ndarray, 1, "label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 288; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_values), __pyx_ptype_5numpy_ndarray, 1, "T_values", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_indices), __pyx_ptype_5numpy_ndarray, 1, "T_indices", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_T_indptr), __pyx_ptype_5numpy_ndarray, 1, "T_indptr", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 370; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_coef_), __pyx_ptype_5numpy_ndarray, 1, "coef_", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight_label), __pyx_ptype_5numpy_ndarray, 1, "weight_label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_weight), __pyx_ptype_5numpy_ndarray, 1, "weight", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_label), __pyx_ptype_5numpy_ndarray, 1, "label", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_values, (PyObject*)__pyx_v_T_values, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_values, (PyObject*)__pyx_v_T_values, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_T_values = __pyx_bstruct_T_values.strides[0]; __pyx_bshape_0_T_values = __pyx_bstruct_T_values.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_indices, (PyObject*)__pyx_v_T_indices, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_indices, (PyObject*)__pyx_v_T_indices, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_T_indices = __pyx_bstruct_T_indices.strides[0]; __pyx_bshape_0_T_indices = __pyx_bstruct_T_indices.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_indptr, (PyObject*)__pyx_v_T_indptr, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_T_indptr, (PyObject*)__pyx_v_T_indptr, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_T_indptr = __pyx_bstruct_T_indptr.strides[0]; __pyx_bshape_0_T_indptr = __pyx_bstruct_T_indptr.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_coef_, (PyObject*)__pyx_v_coef_, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_coef_, (PyObject*)__pyx_v_coef_, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_F_CONTIGUOUS, 2, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_coef_ = __pyx_bstruct_coef_.strides[0]; __pyx_bstride_1_coef_ = __pyx_bstruct_coef_.strides[1]; __pyx_bshape_0_coef_ = __pyx_bstruct_coef_.shape[0]; __pyx_bshape_1_coef_ = __pyx_bstruct_coef_.shape[1]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight_label, (PyObject*)__pyx_v_weight_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_weight_label = __pyx_bstruct_weight_label.strides[0]; __pyx_bshape_0_weight_label = __pyx_bstruct_weight_label.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_weight, (PyObject*)__pyx_v_weight, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_weight = __pyx_bstruct_weight.strides[0]; __pyx_bshape_0_weight = __pyx_bstruct_weight.shape[0]; { __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_v_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_bstruct_label, (PyObject*)__pyx_v_label, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_bstride_0_label = __pyx_bstruct_label.strides[0]; __pyx_bshape_0_label = __pyx_bstruct_label.shape[0]; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":301 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":388 * * param = set_parameter(solver_type, eps, C, weight.shape[0], * weight_label.data, weight.data) # <<<<<<<<<<<<<< @@ -3618,7 +4564,7 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_prob(PyObject *__pyx_self, Py */ __pyx_v_param = set_parameter(__pyx_v_solver_type, __pyx_v_eps, __pyx_v_C, (__pyx_v_weight->dimensions[0]), __pyx_v_weight_label->data, __pyx_v_weight->data); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":303 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":390 * weight_label.data, weight.data) * * model = set_model(param, coef_.data, coef_.shape, label.data, bias) # <<<<<<<<<<<<<< @@ -3627,7 +4573,7 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_prob(PyObject *__pyx_self, Py */ __pyx_v_model = set_model(__pyx_v_param, __pyx_v_coef_->data, __pyx_v_coef_->dimensions, __pyx_v_label->data, __pyx_v_bias); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":304 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":391 * * model = set_model(param, coef_.data, coef_.shape, label.data, bias) * cdef int nr_class = get_nr_class(model) # <<<<<<<<<<<<<< @@ -3636,23 +4582,23 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_prob(PyObject *__pyx_self, Py */ __pyx_v_nr_class = get_nr_class(__pyx_v_model); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":305 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":392 * model = set_model(param, coef_.data, coef_.shape, label.data, bias) * cdef int nr_class = get_nr_class(model) * dec_values = np.empty((T_indptr.shape[0]-1, nr_class), dtype=np.float64) # <<<<<<<<<<<<<< * * if csr_copy_predict_proba(n_features, T_values.shape, T_values.data, */ - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__empty); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__empty); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyInt_FromLong(((__pyx_v_T_indptr->dimensions[0]) - 1)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong(((__pyx_v_T_indptr->dimensions[0]) - 1)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyInt_FromLong(__pyx_v_nr_class); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(__pyx_v_nr_class); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); @@ -3660,26 +4606,26 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_prob(PyObject *__pyx_self, Py __Pyx_GIVEREF(__pyx_t_3); __pyx_t_1 = 0; __pyx_t_3 = 0; - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyDict_New(); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyDict_New(); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__float64); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__float64); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyDict_SetItem(__pyx_t_4, ((PyObject *)__pyx_n_s__dtype), __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_4, ((PyObject *)__pyx_n_s__dtype), __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyEval_CallObjectWithKeywords(__pyx_t_2, __pyx_t_3, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyEval_CallObjectWithKeywords(__pyx_t_2, __pyx_t_3, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; - if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = ((PyArrayObject *)__pyx_t_5); { __Pyx_BufFmt_StackElem __pyx_stack[1]; @@ -3696,14 +4642,14 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_prob(PyObject *__pyx_self, Py } __pyx_bstride_0_dec_values = __pyx_bstruct_dec_values.strides[0]; __pyx_bstride_1_dec_values = __pyx_bstruct_dec_values.strides[1]; __pyx_bshape_0_dec_values = __pyx_bstruct_dec_values.shape[0]; __pyx_bshape_1_dec_values = __pyx_bstruct_dec_values.shape[1]; - if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_6 = 0; __Pyx_DECREF(((PyObject *)__pyx_v_dec_values)); __pyx_v_dec_values = ((PyArrayObject *)__pyx_t_5); __pyx_t_5 = 0; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":310 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":397 * T_indices.shape, T_indices.data, * T_indptr.shape, T_indptr.data, * model, dec_values.data) < 0: # <<<<<<<<<<<<<< @@ -3713,29 +4659,29 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_prob(PyObject *__pyx_self, Py __pyx_t_11 = (csr_copy_predict_proba(__pyx_v_n_features, __pyx_v_T_values->dimensions, __pyx_v_T_values->data, __pyx_v_T_indices->dimensions, __pyx_v_T_indices->data, __pyx_v_T_indptr->dimensions, __pyx_v_T_indptr->data, __pyx_v_model, __pyx_v_dec_values->data) < 0); if (__pyx_t_11) { - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":311 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":398 * T_indptr.shape, T_indptr.data, * model, dec_values.data) < 0: * raise MemoryError("We've run out of of memory") # <<<<<<<<<<<<<< * * ### FREE */ - __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 311; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 398; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_INCREF(((PyObject *)__pyx_kp_s_1)); PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_kp_s_1)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_1)); - __pyx_t_4 = PyObject_Call(__pyx_builtin_MemoryError, __pyx_t_5, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 311; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(__pyx_builtin_MemoryError, __pyx_t_5, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 398; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_Raise(__pyx_t_4, 0, 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 311; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 398; __pyx_clineno = __LINE__; goto __pyx_L1_error;} goto __pyx_L6; } __pyx_L6:; - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":314 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":401 * * ### FREE * free_parameter(param) # <<<<<<<<<<<<<< @@ -3744,7 +4690,7 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_prob(PyObject *__pyx_self, Py */ free_parameter(__pyx_v_param); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":315 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":402 * ### FREE * free_parameter(param) * free_and_destroy_model(&model) # <<<<<<<<<<<<<< @@ -3752,7 +4698,7 @@ static PyObject *__pyx_pf_10_liblinear_csr_predict_prob(PyObject *__pyx_self, Py */ free_and_destroy_model((&__pyx_v_model)); - /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":316 + /* "/home/fabian/dev/scikit-learn/scikits/learn/svm/src/liblinear/_liblinear.pyx":403 * free_parameter(param) * free_and_destroy_model(&model) * return dec_values # <<<<<<<<<<<<<< @@ -5742,6 +6688,8 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py static struct PyMethodDef __pyx_methods[] = { {__Pyx_NAMESTR("train_wrap"), (PyCFunction)__pyx_pf_10_liblinear_train_wrap, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_10_liblinear_train_wrap)}, {__Pyx_NAMESTR("csr_train_wrap"), (PyCFunction)__pyx_pf_10_liblinear_csr_train_wrap, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_10_liblinear_csr_train_wrap)}, + {__Pyx_NAMESTR("decision_function_wrap"), (PyCFunction)__pyx_pf_10_liblinear_decision_function_wrap, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("csr_decision_function_wrap"), (PyCFunction)__pyx_pf_10_liblinear_csr_decision_function_wrap, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_10_liblinear_csr_decision_function_wrap)}, {__Pyx_NAMESTR("predict_wrap"), (PyCFunction)__pyx_pf_10_liblinear_predict_wrap, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, {__Pyx_NAMESTR("csr_predict_wrap"), (PyCFunction)__pyx_pf_10_liblinear_csr_predict_wrap, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_10_liblinear_csr_predict_wrap)}, {__Pyx_NAMESTR("predict_prob_wrap"), (PyCFunction)__pyx_pf_10_liblinear_predict_prob_wrap, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_10_liblinear_predict_prob_wrap)}, @@ -5769,8 +6717,10 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_s_1, __pyx_k_1, sizeof(__pyx_k_1), 0, 0, 1, 0}, {&__pyx_kp_u_10, __pyx_k_10, sizeof(__pyx_k_10), 0, 1, 0, 0}, {&__pyx_kp_u_11, __pyx_k_11, sizeof(__pyx_k_11), 0, 1, 0, 0}, - {&__pyx_kp_u_12, __pyx_k_12, sizeof(__pyx_k_12), 0, 1, 0, 0}, + {&__pyx_n_s_12, __pyx_k_12, sizeof(__pyx_k_12), 0, 0, 1, 1}, {&__pyx_kp_u_13, __pyx_k_13, sizeof(__pyx_k_13), 0, 1, 0, 0}, + {&__pyx_kp_u_14, __pyx_k_14, sizeof(__pyx_k_14), 0, 1, 0, 0}, + {&__pyx_kp_u_15, __pyx_k_15, sizeof(__pyx_k_15), 0, 1, 0, 0}, {&__pyx_kp_u_2, __pyx_k_2, sizeof(__pyx_k_2), 0, 1, 0, 0}, {&__pyx_kp_u_3, __pyx_k_3, sizeof(__pyx_k_3), 0, 1, 0, 0}, {&__pyx_kp_u_4, __pyx_k_4, sizeof(__pyx_k_4), 0, 1, 0, 0}, @@ -5779,6 +6729,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_u_7, __pyx_k_7, sizeof(__pyx_k_7), 0, 1, 0, 0}, {&__pyx_kp_u_9, __pyx_k_9, sizeof(__pyx_k_9), 0, 1, 0, 0}, {&__pyx_n_s__C, __pyx_k__C, sizeof(__pyx_k__C), 0, 0, 1, 1}, + {&__pyx_n_s__F, __pyx_k__F, sizeof(__pyx_k__F), 0, 0, 1, 1}, {&__pyx_n_s__MemoryError, __pyx_k__MemoryError, sizeof(__pyx_k__MemoryError), 0, 0, 1, 1}, {&__pyx_n_s__RuntimeError, __pyx_k__RuntimeError, sizeof(__pyx_k__RuntimeError), 0, 0, 1, 1}, {&__pyx_n_s__T, __pyx_k__T, sizeof(__pyx_k__T), 0, 0, 1, 1}, @@ -5818,6 +6769,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s__np, __pyx_k__np, sizeof(__pyx_k__np), 0, 0, 1, 1}, {&__pyx_n_s__numpy, __pyx_k__numpy, sizeof(__pyx_k__numpy), 0, 0, 1, 1}, {&__pyx_n_s__obj, __pyx_k__obj, sizeof(__pyx_k__obj), 0, 0, 1, 1}, + {&__pyx_n_s__order, __pyx_k__order, sizeof(__pyx_k__order), 0, 0, 1, 1}, {&__pyx_n_s__predict_prob_wrap, __pyx_k__predict_prob_wrap, sizeof(__pyx_k__predict_prob_wrap), 0, 0, 1, 1}, {&__pyx_n_s__range, __pyx_k__range, sizeof(__pyx_k__range), 0, 0, 1, 1}, {&__pyx_n_s__readonly, __pyx_k__readonly, sizeof(__pyx_k__readonly), 0, 0, 1, 1}, @@ -5832,8 +6784,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {0, 0, 0, 0, 0, 0, 0} }; static int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_ValueError = __Pyx_GetName(__pyx_b, __pyx_n_s__ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_MemoryError = __Pyx_GetName(__pyx_b, __pyx_n_s__MemoryError); if (!__pyx_builtin_MemoryError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_ValueError = __Pyx_GetName(__pyx_b, __pyx_n_s__ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_MemoryError = __Pyx_GetName(__pyx_b, __pyx_n_s__MemoryError); if (!__pyx_builtin_MemoryError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 193; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_range = __Pyx_GetName(__pyx_b, __pyx_n_s__range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_RuntimeError = __Pyx_GetName(__pyx_b, __pyx_n_s__RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 786; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; @@ -5844,6 +6796,7 @@ static int __Pyx_InitCachedBuiltins(void) { static int __Pyx_InitGlobals(void) { if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_int_2 = PyInt_FromLong(2); if (unlikely(!__pyx_int_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __pyx_int_15 = PyInt_FromLong(15); if (unlikely(!__pyx_int_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; return 0; __pyx_L1_error:; @@ -5951,27 +6904,34 @@ PyMODINIT_FUNC PyInit__liblinear(void) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_kp_u_10), __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_GetAttr(__pyx_m, __pyx_n_s__csr_predict_wrap); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetAttr(__pyx_m, __pyx_n_s_12); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_2, "__doc__"); __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_kp_u_11), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__predict_prob_wrap); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__csr_predict_wrap); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __pyx_t_2 = __Pyx_GetAttrString(__pyx_t_3, "__doc__"); __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_kp_u_12), __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_kp_u_13), __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_GetAttr(__pyx_m, __pyx_n_s__csr_predict_prob); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetAttr(__pyx_m, __pyx_n_s__predict_prob_wrap); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = __Pyx_GetAttrString(__pyx_t_2, "__doc__"); __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_kp_u_13), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_kp_u_14), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = PyObject_GetAttr(__pyx_m, __pyx_n_s__csr_predict_prob); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_GetAttrString(__pyx_t_3, "__doc__"); + __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_kp_u_15), __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (PyObject_SetAttr(__pyx_m, __pyx_n_s____test__, ((PyObject *)__pyx_t_1)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; diff --git a/scikits/learn/svm/src/liblinear/_liblinear.pyx b/scikits/learn/svm/src/liblinear/_liblinear.pyx index bb4b3add762cd562547e5f165143801c6021b504..763437ac581601bce0413b09489784cef99ada26 100644 --- a/scikits/learn/svm/src/liblinear/_liblinear.pyx +++ b/scikits/learn/svm/src/liblinear/_liblinear.pyx @@ -31,17 +31,25 @@ cdef extern from "liblinear_helper.c": model *set_model(parameter *, char *, np.npy_intp *, char *, double) int copy_predict(char *, model *, np.npy_intp *, char *) - int csr_copy_predict (np.npy_intp n_features, np.npy_intp *data_size, char *data, np.npy_intp *index_size, - char *index, np.npy_intp *intptr_size, char *intptr, model *model, - char *dec_values) - int csr_copy_predict_proba(np.npy_intp n_features, np.npy_intp *data_size, - char *data, np.npy_intp *index_size, - char *index, np.npy_intp *indptr_shape, - char *indptr, model *model_, - char *dec_values) + int csr_copy_predict( + np.npy_intp n_features, np.npy_intp *data_size, char *data, + np.npy_intp *index_size, char *index, np.npy_intp + *intptr_size, char *intptr, model *model, char *dec_values) + + int csr_copy_predict_values( + np.npy_intp n_features, np.npy_intp *data_size, char *data, np.npy_intp + *index_size, char *index, np.npy_intp *indptr_shape, char + *intptr, model *model_, char *dec_values, int nr_class) + + + int csr_copy_predict_proba( + np.npy_intp n_features, np.npy_intp *data_size, char *data, + np.npy_intp *index_size, char *index, np.npy_intp + *indptr_shape, char *indptr, model *model_, char *dec_values) int copy_prob_predict(char *, model *, np.npy_intp *, char *) + int copy_predict_values(char *, model *, np.npy_intp *, char *, int) int copy_label(char *, model *, int) double get_bias(model *) void free_problem (problem *) @@ -75,16 +83,17 @@ def train_wrap ( np.ndarray[np.float64_t, ndim=2, mode='c'] X, # early return model = train(problem, param) - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] w + # coef matrix holder created as fortran since that's what's used in liblinear + cdef np.ndarray[np.float64_t, ndim=2, mode='fortran'] w cdef int nr_class = get_nr_class(model) cdef int nr_feature = get_nr_feature(model) if bias > 0: nr_feature = nr_feature + 1 if nr_class == 2: - w = np.empty((1, nr_feature)) + w = np.empty((1, nr_feature),order='F') copy_w(w.data, model, nr_feature) else: len_w = (nr_class) * nr_feature - w = np.empty((nr_class, nr_feature)) + w = np.empty((nr_class, nr_feature),order='F') copy_w(w.data, model, len_w) cdef np.ndarray[np.int32_t, ndim=1, mode='c'] label @@ -132,16 +141,17 @@ def csr_train_wrap ( int n_features, # early return model = train(problem, param) - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] w + # fortran order since that's what liblinear does + cdef np.ndarray[np.float64_t, ndim=2, mode='fortran'] w cdef int nr_class = get_nr_class(model) cdef int nr_feature = n_features if bias > 0: nr_feature = nr_feature + 1 if nr_class == 2: - w = np.empty((1, nr_feature)) + w = np.empty((1, nr_feature),order='F') copy_w(w.data, model, nr_feature) else: len_w = (nr_class * nr_feature) - w = np.empty((nr_class, nr_feature)) + w = np.empty((nr_class, nr_feature),order='F') copy_w(w.data, model, len_w) cdef np.ndarray[np.int32_t, ndim=1, mode='c'] label @@ -156,13 +166,90 @@ def csr_train_wrap ( int n_features, return w, label -def predict_wrap(np.ndarray[np.float64_t, ndim=2, mode='c'] T, - np.ndarray[np.float64_t, ndim=2, mode='c'] coef_, - int solver_type, double eps, double C, - np.ndarray[np.int32_t, ndim=1, mode='c'] weight_label, - np.ndarray[np.float64_t, ndim=1, mode='c'] weight, - np.ndarray[np.int32_t, ndim=1, mode='c'] label, - double bias): + +def decision_function_wrap( + np.ndarray[np.float64_t, ndim=2, mode='c'] T, + np.ndarray[np.float64_t, ndim=2, mode='fortran'] coef_, + int solver_type, double eps, double C, + np.ndarray[np.int32_t, ndim=1, mode='c'] weight_label, + np.ndarray[np.float64_t, ndim=1, mode='c'] weight, + np.ndarray[np.int32_t, ndim=1, mode='c'] label, + double bias): + + cdef np.ndarray[np.float64_t, ndim=2, mode='c'] dec_values + cdef parameter *param + cdef model *model + + param = set_parameter( + solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) + + model = set_model(param, coef_.data, coef_.shape, label.data, bias) + + n_class = label.shape[0] + if n_class <= 2: n_class = 1 + dec_values = np.empty((T.shape[0], n_class), dtype=np.float64) + + if copy_predict_values(T.data, model, T.shape, dec_values.data, n_class) < 0: + raise MemoryError("We've run out of of memory") + + ### FREE + free_parameter(param) + free_and_destroy_model(&model) + return dec_values + + + +def csr_decision_function_wrap( + int n_features, + np.ndarray[np.float64_t, ndim=1, mode='c'] T_values, + np.ndarray[np.int32_t, ndim=1, mode='c'] T_indices, + np.ndarray[np.int32_t, ndim=1, mode='c'] T_indptr, + np.ndarray[np.float64_t, ndim=2, mode='fortran'] coef_, + int solver_type, double eps, double C, + np.ndarray[np.int32_t, ndim=1, mode='c'] weight_label, + np.ndarray[np.float64_t, ndim=1, mode='c'] weight, + np.ndarray[np.int32_t, ndim=1, mode='c'] label, + double bias): + """ + Predict from model + + Test data given in CSR format + """ + + cdef np.ndarray[np.float64_t, ndim=2, mode='c'] dec_values + cdef parameter *param + cdef model *model + + param = set_parameter( + solver_type, eps, C, weight.shape[0], weight_label.data, weight.data) + + model = set_model(param, coef_.data, coef_.shape, label.data, bias) + + n_class = label.shape[0] + if n_class <= 2: n_class = 1 + + dec_values = np.empty((T_indptr.shape[0] - 1, n_class), dtype=np.float64) + + if csr_copy_predict_values( + n_features, T_values.shape, T_values.data, T_indices.shape, + T_indices.data, T_indptr.shape, T_indptr.data, model, + dec_values.data, n_class) < 0: + raise MemoryError("We've run out of of memory") + + ### FREE + free_parameter(param) + free_and_destroy_model(&model) + return dec_values + + +def predict_wrap( + np.ndarray[np.float64_t, ndim=2, mode='c'] T, + np.ndarray[np.float64_t, ndim=2, mode='fortran'] coef_, + int solver_type, double eps, double C, + np.ndarray[np.int32_t, ndim=1, mode='c'] weight_label, + np.ndarray[np.float64_t, ndim=1, mode='c'] weight, + np.ndarray[np.int32_t, ndim=1, mode='c'] label, + double bias): cdef np.ndarray[np.int32_t, ndim=1, mode='c'] dec_values cdef parameter *param @@ -180,14 +267,14 @@ def predict_wrap(np.ndarray[np.float64_t, ndim=2, mode='c'] T, free_parameter(param) free_and_destroy_model(&model) return dec_values - + def csr_predict_wrap( int n_features, np.ndarray[np.float64_t, ndim=1, mode='c'] T_values, np.ndarray[np.int32_t, ndim=1, mode='c'] T_indices, np.ndarray[np.int32_t, ndim=1, mode='c'] T_indptr, - np.ndarray[np.float64_t, ndim=2, mode='c'] coef_, + np.ndarray[np.float64_t, ndim=2, mode='fortran'] coef_, int solver_type, double eps, double C, np.ndarray[np.int32_t, ndim=1, mode='c'] weight_label, np.ndarray[np.float64_t, ndim=1, mode='c'] weight, @@ -223,7 +310,7 @@ def csr_predict_wrap( def predict_prob_wrap(np.ndarray[np.float64_t, ndim=2, mode='c'] T, - np.ndarray[np.float64_t, ndim=2, mode='c'] coef_, + np.ndarray[np.float64_t, ndim=2, mode='fortran'] coef_, int solver_type, double eps, double C, np.ndarray[np.int32_t, ndim=1, mode='c'] weight_label, np.ndarray[np.float64_t, ndim=1, mode='c'] weight, @@ -281,7 +368,7 @@ def csr_predict_prob( np.ndarray[np.float64_t, ndim=1, mode='c'] T_values, np.ndarray[np.int32_t, ndim=1, mode='c'] T_indices, np.ndarray[np.int32_t, ndim=1, mode='c'] T_indptr, - np.ndarray[np.float64_t, ndim=2, mode='c'] coef_, + np.ndarray[np.float64_t, ndim=2, mode='fortran'] coef_, int solver_type, double eps, double C, np.ndarray[np.int32_t, ndim=1, mode='c'] weight_label, np.ndarray[np.float64_t, ndim=1, mode='c'] weight, diff --git a/scikits/learn/svm/src/liblinear/liblinear_helper.c b/scikits/learn/svm/src/liblinear/liblinear_helper.c index f5f98bc1b23cb538f69436e12a3caaf5f850ba09..1be2a908d15f88ed4cf49730a81fd5f24233cea4 100644 --- a/scikits/learn/svm/src/liblinear/liblinear_helper.c +++ b/scikits/learn/svm/src/liblinear/liblinear_helper.c @@ -64,7 +64,7 @@ struct feature_node **dense_to_sparse (double *x, npy_intp *dims, double bias) /* - * Convert scipy.sparse.csr to libsvm's sparse data structure +c * Convert scipy.sparse.csr to libsvm's sparse data structure */ struct feature_node **csr_to_sparse (double *values, npy_intp *shape_indices, int *indices, npy_intp *shape_indptr, int *indptr, double bias, @@ -182,17 +182,19 @@ struct model * set_model(struct parameter *param, char *coef, npy_intp *dims, memcpy(model->w, coef, len_w * sizeof(double)); model->nr_feature = bias > 0 ? k - 1 : k; - model->nr_class = m; + model->param = *param; model->bias = bias; return model; } + void copy_w(char *data, struct model *model, int len) { - memcpy(data, model->w, len * sizeof(double)); + memcpy(data, model->w, len * sizeof(double)); + } double get_bias(struct model *model) @@ -255,6 +257,48 @@ int csr_copy_predict(npy_intp n_features, npy_intp *data_size, char *data, return 0; } +int copy_predict_values (char *predict, struct model *model_, + npy_intp *predict_dims, char *dec_values, int nr_class) +{ + npy_intp i; + struct feature_node **predict_nodes; + predict_nodes = dense_to_sparse((double *) predict, predict_dims, model_->bias); + if (predict_nodes == NULL) + return -1; + for(i=0; i<predict_dims[0]; ++i) { + predict_values(model_, predict_nodes[i], + ((double *) dec_values) + i*nr_class); + free(predict_nodes[i]); + } + + free(predict_nodes); + return 0; +} + +int csr_copy_predict_values(npy_intp n_features, npy_intp *data_size, + char *data, npy_intp *index_size, char + *index, npy_intp *indptr_shape, char + *intptr, struct model *model_, char + *dec_values, int nr_class) { + + int *t = (int *) dec_values; + struct feature_node **predict_nodes; + npy_intp i; + + predict_nodes = csr_to_sparse((double *) data, index_size, + (int *) index, indptr_shape, (int *) intptr, model_->bias, n_features); + + if (predict_nodes == NULL) + return -1; + for (i = 0; i < indptr_shape[0] - 1; ++i) { + predict_values(model_, predict_nodes[i], + ((double *) dec_values) + i*nr_class); + free(predict_nodes[i]); + } + free(predict_nodes); + return 0; +} + int copy_prob_predict(char *predict, struct model *model_, npy_intp *predict_dims, char *dec_values) diff --git a/scikits/learn/svm/src/liblinear/linear.cpp b/scikits/learn/svm/src/liblinear/linear.cpp index baf08d02e4ecc7a1b464275c17e132a5894e6beb..7319648ec44ca2f727bcc99805a8987bb5dc5acf 100644 --- a/scikits/learn/svm/src/liblinear/linear.cpp +++ b/scikits/learn/svm/src/liblinear/linear.cpp @@ -1,3 +1,10 @@ +/* + Modified 2011: + + - Make labels sorted in group_classes, Dan Yamins. + + */ + #include <math.h> #include <stdio.h> #include <stdlib.h> @@ -1689,12 +1696,11 @@ static void group_classes(const problem *prob, int *nr_class_ret, int **label_re int *label = Malloc(int,max_nr_class); int *count = Malloc(int,max_nr_class); int *data_label = Malloc(int,l); - int i; + int i,j, this_label, this_count; for(i=0;i<l;i++) { - int this_label = prob->y[i]; - int j; + this_label = (int)prob->y[i]; for(j=0;j<nr_class;j++) { if(this_label == label[j]) @@ -1718,6 +1724,38 @@ static void group_classes(const problem *prob, int *nr_class_ret, int **label_re } } + /* START MOD: Sort labels and apply to array count --dyamins */ + + for(j=1; j<nr_class; j++) + { + i = j-1; + this_label = label[j]; + this_count = count[j]; + while(i>=0 && label[i] > this_label) + { + label[i+1] = label[i]; + count[i+1] = count[i]; + i--; + } + label[i+1] = this_label; + count[i+1] = this_count; + } + + for (i=0; i <l; i++) + { + j = 0; + this_label = (int)prob->y[i]; + while(this_label != label[j]) + { + j++; + } + data_label[i] = j; + + } + + /* END MOD */ + + int *start = Malloc(int,nr_class); start[0] = 0; for(i=1;i<nr_class;i++) @@ -2009,7 +2047,9 @@ int predict_values(const struct model *model_, const struct feature_node *x, dou // the dimension of testing data may exceed that of training if(idx<=n) for(i=0;i<nr_w;i++) - dec_values[i] += w[(idx-1)*nr_w+i]*lx->value; + dec_values[i] += w[(idx-1)*nr_w+i]*lx->value; + + } if(nr_class==2) @@ -2018,7 +2058,7 @@ int predict_values(const struct model *model_, const struct feature_node *x, dou { int dec_max_idx = 0; for(i=1;i<nr_class;i++) - { + { if(dec_values[i] > dec_values[dec_max_idx]) dec_max_idx = i; } diff --git a/scikits/learn/svm/tests/test_sparse.py b/scikits/learn/svm/tests/test_sparse.py index c0c2f9b3df7b0ed4b6ec32cf9eea5c974bee78a1..e4579dbc2e1067ff10721a2bc0aa18fcebc9920a 100644 --- a/scikits/learn/svm/tests/test_sparse.py +++ b/scikits/learn/svm/tests/test_sparse.py @@ -6,6 +6,7 @@ from numpy.testing import assert_array_almost_equal, \ from nose.tools import assert_raises from scikits.learn.datasets.samples_generator import test_dataset_classif +from . import test_svm # test sample 1 X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]) @@ -22,6 +23,12 @@ true_result2 = [1, 2, 3] iris = datasets.load_iris() +# permute +perm = np.random.permutation(iris.target.size) +iris.data = iris.data[perm] +iris.target = iris.target[perm] +# sparsify +iris.data = scipy.sparse.csr_matrix(iris.data) def test_SVC(): @@ -35,10 +42,10 @@ def test_SVC(): assert scipy.sparse.issparse(sp_clf.support_vectors_) assert_array_almost_equal(clf.support_vectors_, sp_clf.support_vectors_.todense()) - assert scipy.sparse.issparse (sp_clf.dual_coef_) + assert scipy.sparse.issparse(sp_clf.dual_coef_) assert_array_almost_equal(clf.dual_coef_, sp_clf.dual_coef_.todense()) - assert scipy.sparse.issparse (sp_clf.coef_) + assert scipy.sparse.issparse(sp_clf.coef_) assert_array_almost_equal(clf.coef_, sp_clf.coef_.todense()) assert_array_almost_equal(clf.predict(T), sp_clf.predict(T)) @@ -55,30 +62,30 @@ def test_SVC_iris(): """Test the sparse SVC with the iris dataset""" for k in ('linear', 'rbf'): sp_clf = svm.sparse.SVC(kernel=k).fit(iris.data, iris.target) - clf = svm.SVC(kernel=k).fit(iris.data, iris.target) + clf = svm.SVC(kernel=k).fit(iris.data.todense(), iris.target) assert_array_almost_equal(clf.support_vectors_, sp_clf.support_vectors_.todense()) assert_array_almost_equal(clf.dual_coef_, sp_clf.dual_coef_.todense()) - assert_array_almost_equal(clf.predict(iris.data), sp_clf.predict(iris.data)) + assert_array_almost_equal( + clf.predict(iris.data.todense()), sp_clf.predict(iris.data)) if k == 'linear': assert_array_almost_equal(clf.coef_, sp_clf.coef_.todense()) - def test_error(): """ Test that it gives proper exception on deficient input """ # impossible value of C - assert_raises (ValueError, svm.SVC(C=-1).fit, X, Y) + assert_raises(ValueError, svm.SVC(C=-1).fit, X, Y) # impossible value of nu clf = svm.sparse.NuSVC(nu=0.0) assert_raises(ValueError, clf.fit, X, Y) - Y2 = Y[:-1] # wrong dimensions for labels + Y2 = Y[:-1] # wrong dimensions for labels assert_raises(ValueError, clf.fit, X, Y2) - assert_raises(AssertionError, svm.SVC, X, Y2) + assert_raises(ValueError, svm.SVC, X, Y2) clf = svm.sparse.SVC() clf.fit(X, Y) @@ -94,43 +101,50 @@ def test_LinearSVC(): assert sp_clf.fit_intercept - assert_array_almost_equal (clf.raw_coef_, sp_clf.raw_coef_, decimal=4) + assert_array_almost_equal(clf.raw_coef_, sp_clf.raw_coef_, decimal=4) - assert_array_almost_equal (clf.predict(X), sp_clf.predict(X)) + assert_array_almost_equal(clf.predict(X), sp_clf.predict(X)) clf.fit(X2, Y2) sp_clf.fit(X2, Y2) - assert_array_almost_equal (clf.raw_coef_, sp_clf.raw_coef_, decimal=4) + assert_array_almost_equal(clf.raw_coef_, sp_clf.raw_coef_, decimal=4) def test_LinearSVC_iris(): """Test the sparse LinearSVC with the iris dataset""" - iris = datasets.load_iris() + sp_clf = svm.sparse.LinearSVC().fit(iris.data, iris.target) - clf = svm.LinearSVC().fit(iris.data, iris.target) + clf = svm.LinearSVC().fit(iris.data.todense(), iris.target) assert_array_almost_equal(clf.label_, sp_clf.label_) - assert_equal (clf.fit_intercept, sp_clf.fit_intercept) + assert_equal(clf.fit_intercept, sp_clf.fit_intercept) assert_array_almost_equal(clf.raw_coef_, sp_clf.raw_coef_, decimal=1) - assert_array_almost_equal(clf.predict(iris.data), sp_clf.predict(iris.data)) + assert_array_almost_equal( + clf.predict(iris.data.todense()), sp_clf.predict(iris.data)) + + # check decision_function + pred = np.argmax(sp_clf.decision_function(iris.data), 1) + assert_array_almost_equal(pred, clf.predict(iris.data.todense())) + def test_weight(): """ Test class weights """ - X_, y_ = test_dataset_classif(n_samples=200, n_features=100, param=[5,1], + X_, y_ = test_dataset_classif(n_samples=200, n_features=100, param=[5, 1], seed=0) X_ = scipy.sparse.csr_matrix(X_) for clf in (linear_model.sparse.LogisticRegression(), svm.sparse.LinearSVC(), svm.sparse.SVC()): - clf.fit(X_[:180], y_[:180], class_weight={0:5}) + clf.fit(X_[:180], y_[:180], class_weight={0: 5}) y_pred = clf.predict(X_[180:]) assert np.sum(y_pred == y_[180:]) >= 11 + def test_sample_weights(): """ Test weights on individual samples @@ -139,11 +153,18 @@ def test_sample_weights(): clf.fit(X, Y) assert_array_equal(clf.predict(X[2]), [1.]) - sample_weight=[.1]*3 + [10]*3 + sample_weight = [.1] * 3 + [10] * 3 clf.fit(X, Y, sample_weight=sample_weight) assert_array_equal(clf.predict(X[2]), [2.]) +def test_sparse_liblinear_intercept_handling(): + """ + Test that sparse liblinear honours intercept_scaling param + """ + test_svm.test_dense_liblinear_intercept_handling(svm.sparse.LinearSVC) + + if __name__ == '__main__': import nose nose.runmodule() diff --git a/scikits/learn/svm/tests/test_svm.py b/scikits/learn/svm/tests/test_svm.py index d971b7f66926801ac96dfee39d841cd9906e35ef..18bbab4818d611f0e8f8c4cc38ecc8f85ec5ac78 100644 --- a/scikits/learn/svm/tests/test_svm.py +++ b/scikits/learn/svm/tests/test_svm.py @@ -24,13 +24,14 @@ perm = np.random.permutation(iris.target.size) iris.data = iris.data[perm] iris.target = iris.target[perm] + def test_libsvm_parameters(): """ Test parameters on classes that make use of libsvm. """ clf = svm.SVC(kernel='linear').fit(X, Y) - assert_array_equal(clf.dual_coef_, [[ 0.25, -.25]]) + assert_array_equal(clf.dual_coef_, [[0.25, -.25]]) assert_array_equal(clf.support_, [1, 3]) assert_array_equal(clf.support_vectors_, (X[1], X[3])) assert_array_equal(clf.intercept_, [0.]) @@ -77,7 +78,7 @@ def test_precomputed(): KT = np.zeros_like(KT) for i in range(len(T)): for j in clf.support_: - KT[i,j] = np.dot(T[i], X[j]) + KT[i, j] = np.dot(T[i], X[j]) pred = clf.predict(KT) assert_array_equal(pred, true_result) @@ -113,7 +114,7 @@ def test_precomputed(): K = np.zeros_like(K) for i in range(len(iris.data)): for j in clf.support_: - K[i,j] = np.dot(iris.data[i], iris.data[j]) + K[i, j] = np.dot(iris.data[i], iris.data[j]) pred = clf.predict(K) assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2) @@ -122,6 +123,20 @@ def test_precomputed(): clf.fit(iris.data, iris.target) assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2) +def test_sanity_checks_fit(): + clf = svm.SVC(kernel='precomputed') + assert_raises(ValueError, clf.fit, X, Y) + +def test_sanity_checks_predict(): + Xt = np.array(X).T + + clf = svm.SVC(kernel='precomputed') + clf.fit(np.dot(X, Xt), Y) + assert_raises(ValueError, clf.predict, X) + + clf = svm.SVC() + clf.fit(X, Y) + assert_raises(ValueError, clf.predict, Xt) def test_SVR(): """ @@ -149,8 +164,8 @@ def test_SVR(): decimal=3) assert_raises(NotImplementedError, lambda: clf.coef_) assert_array_almost_equal(clf.support_vectors_, X) - assert_array_almost_equal(clf.intercept_, [ 1.49997261]) - assert_array_almost_equal(pred, [ 1.10001274, 1.86682485, 1.73300377]) + assert_array_almost_equal(clf.intercept_, [1.49997261]) + assert_array_almost_equal(pred, [1.10001274, 1.86682485, 1.73300377]) def test_oneclass(): @@ -163,7 +178,7 @@ def test_oneclass(): assert_array_almost_equal(pred, [1, -1, -1]) assert_array_almost_equal(clf.intercept_, [-1.351], decimal=3) - assert_array_almost_equal(clf.dual_coef_, [[ 0.750, 0.749, 0.749, 0.750]], + assert_array_almost_equal(clf.dual_coef_, [[0.750, 0.749, 0.749, 0.750]], decimal=3) assert_raises(NotImplementedError, lambda: clf.coef_) @@ -201,8 +216,8 @@ def test_probability(): T = [[0, 0, 0, 0], [2, 2, 2, 2]] assert_array_almost_equal(clf.predict_proba(T), - [[ 0.993, 0.003, 0.002], - [ 0.740, 0.223 , 0.035]], + [[0.993, 0.003, 0.002], + [0.740, 0.223, 0.035]], decimal=2) assert_almost_equal(clf.predict_proba(T), @@ -228,7 +243,6 @@ def test_decision_function(): data = iris.data[0] sv_start = np.r_[0, np.cumsum(clf.n_support_)] - n_features = iris.data.shape[1] n_class = 3 kvalue = np.dot(data, clf.support_vectors_.T) @@ -236,11 +250,11 @@ def test_decision_function(): dec = np.empty(n_class * (n_class - 1) / 2) p = 0 for i in range(n_class): - for j in range(i+1, n_class): - coef1 = clf.dual_coef_[j-1] + for j in range(i + 1, n_class): + coef1 = clf.dual_coef_[j - 1] coef2 = clf.dual_coef_[i] - idx1 = slice(sv_start[i], sv_start[i+1]) - idx2 = slice(sv_start[j], sv_start[j+1]) + idx1 = slice(sv_start[i], sv_start[i + 1]) + idx2 = slice(sv_start[j], sv_start[j + 1]) s = np.dot(coef1[idx1], kvalue[idx1]) + \ np.dot(coef2[idx2], kvalue[idx2]) + \ clf.intercept_[p] @@ -260,10 +274,10 @@ def test_weight(): # so all predicted values belong to class 2 assert_array_almost_equal(clf.predict(X), [2] * 6) - X_, y_ = test_dataset_classif(n_samples=200, n_features=100, param=[5,1], + X_, y_ = test_dataset_classif(n_samples=200, n_features=100, param=[5, 1], seed=0) for clf in (linear_model.LogisticRegression(), svm.LinearSVC(), svm.SVC()): - clf.fit(X_[:180], y_[:180], class_weight={0:5}) + clf.fit(X_[: 180], y_[: 180], class_weight={0: 5}) y_pred = clf.predict(X_[180:]) assert np.sum(y_pred == y_[180:]) >= 11 @@ -276,7 +290,7 @@ def test_sample_weights(): clf.fit(X, Y) assert_array_equal(clf.predict(X[2]), [1.]) - sample_weight=[.1]*3 + [10]*3 + sample_weight = [.1] * 3 + [10] * 3 clf.fit(X, Y, sample_weight=sample_weight) assert_array_equal(clf.predict(X[2]), [2.]) @@ -298,7 +312,7 @@ def test_auto_weight(): assert_array_almost_equal(clf.coef_, clf_auto.coef_, 6) # build an very very imbalanced dataset out of iris data - X_0 = X[y == 0,:] + X_0 = X[y == 0, :] y_0 = y[y == 0] X_imbalanced = np.vstack([X] + [X_0] * 10) @@ -319,15 +333,15 @@ def test_error(): Test that it gives proper exception on deficient input """ # impossible value of C - assert_raises (ValueError, svm.SVC(C=-1).fit, X, Y) + assert_raises(ValueError, svm.SVC(C=-1).fit, X, Y) # impossible value of nu clf = svm.NuSVC(nu=0.0) assert_raises(ValueError, clf.fit, X, Y) - Y2 = Y[:-1] # wrong dimensions for labels + Y2 = Y[:-1] # wrong dimensions for labels assert_raises(ValueError, clf.fit, X, Y2) - assert_raises(AssertionError, svm.SVC, X, Y2) + assert_raises(ValueError, svm.SVC, X, Y2) # Test with arrays that are non-contiguous. Xf = np.asfortranarray(X) @@ -346,7 +360,7 @@ def test_LinearSVC(): assert clf.fit_intercept assert_array_equal(clf.predict(T), true_result) - assert_array_almost_equal(clf.intercept_, [0], decimal=5) + assert_array_almost_equal(clf.intercept_, [0], decimal=3) # the same with l1 penalty clf = svm.LinearSVC(penalty='l1', dual=False).fit(X, Y) @@ -360,14 +374,86 @@ def test_LinearSVC(): clf = svm.LinearSVC(penalty='l2', loss='l1', dual=True).fit(X, Y) assert_array_equal(clf.predict(T), true_result) + # test also decision function + dec = clf.decision_function(T).ravel() + res = (dec > 0).astype(np.int) + 1 + assert_array_equal(res, true_result) + def test_LinearSVC_iris(): """ Test that LinearSVC gives plausible predictions on the iris dataset """ clf = svm.LinearSVC().fit(iris.data, iris.target) - assert np.mean(clf.predict(iris.data) == iris.target) > 0.95 + assert np.mean(clf.predict(iris.data) == iris.target) > 0.8 + + dec = clf.decision_function(iris.data) + pred = np.argmax(dec, 1) + assert_array_equal(pred, clf.predict(iris.data)) + +def test_dense_liblinear_intercept_handling(classifier=svm.LinearSVC): + """ + Test that dense liblinear honours intercept_scaling param + """ + X = [[2, 1], + [3, 1], + [1, 3], + [2, 3]] + y = [0, 0, 1, 1] + clf = classifier(fit_intercept=True, penalty='l1', loss='l2', + dual=False, C=1, eps=1e-7) + assert clf.intercept_scaling == 1, clf.intercept_scaling + assert clf.fit_intercept + + # when intercept_scaling is low the intercept value is highly "penalized" + # by regularization + clf.intercept_scaling = 1 + clf.fit(X, y) + assert_almost_equal(clf.intercept_, 0, decimal=5) + + # when intercept_scaling is sufficiently high, the intercept value + # is not affected by regularization + clf.intercept_scaling = 100 + clf.fit(X, y) + intercept1 = clf.intercept_ + assert intercept1 < -1 + + # when intercept_scaling is sufficiently high, the intercept value + # doesn't depend on intercept_scaling value + clf.intercept_scaling = 1000 + clf.fit(X, y) + intercept2 = clf.intercept_ + assert_array_almost_equal(intercept1, intercept2, decimal=2) + + +def test_liblinear_predict(): + """ + Test liblinear predict + + Sanity check, test that predict implemented in python + returns the same as the one in libliblinear + + """ + # multi-class case + clf = svm.LinearSVC().fit(iris.data, iris.target) + weights = clf.coef_.T + bias = clf.intercept_ + H = np.dot(iris.data, weights) + bias + assert_array_equal(clf.predict(iris.data), H.argmax(axis=1)) + + # binary-class case + X = [[2, 1], + [3, 1], + [1, 3], + [2, 3]] + y = [0, 0, 1, 1] + + clf = svm.LinearSVC().fit(X, y) + weights = np.ravel(clf.coef_) + bias = clf.intercept_ + H = np.dot(X, weights) + bias + assert_array_equal(clf.predict(X), (H > 0).astype(int)) if __name__ == '__main__': import nose diff --git a/scikits/learn/tests/test_cross_val.py b/scikits/learn/tests/test_cross_val.py index 871fc76d368a367979126e6aa52344ff03795ab7..acf0a0426772276e5ea129c382652e42d1cd367c 100644 --- a/scikits/learn/tests/test_cross_val.py +++ b/scikits/learn/tests/test_cross_val.py @@ -4,14 +4,22 @@ import numpy as np import nose +from nose.tools import assert_true from ..base import BaseEstimator +from ..datasets import load_iris +from ..metrics import zero_one_score +from ..cross_val import StratifiedKFold +from ..svm import SVC from .. import cross_val +from ..cross_val import permutation_test_score + class MockClassifier(BaseEstimator): """Dummy classifier to test the cross-validation """ + def __init__(self, a=0): self.a = a @@ -29,12 +37,12 @@ class MockClassifier(BaseEstimator): X = np.ones((10, 2)) y = np.arange(10)/2 -################################################################################ +############################################################################## # Tests def test_kfold(): # Check that errors are raise if there is not enough samples - nose.tools.assert_raises(AssertionError, cross_val.KFold, 3, 3) + nose.tools.assert_raises(AssertionError, cross_val.KFold, 3, 4) y = [0, 0, 1, 1, 2] nose.tools.assert_raises(AssertionError, cross_val.StratifiedKFold, y, 3) @@ -45,5 +53,48 @@ def test_cross_val_score(): clf.a = a # Smoke test score = cross_val.cross_val_score(clf, X, y) - np.testing.assert_array_equal(score, clf.score(X, y)) + np.testing.assert_array_equal(score, clf.score(X, y)) + + +def test_permutation_score(): + iris = load_iris() + X = iris.data + y = iris.target + svm = SVC(kernel='linear') + cv = StratifiedKFold(y, 2) + + score, scores, pvalue = permutation_test_score(svm, X, y, + zero_one_score, cv) + assert_true(score > 0.9) + np.testing.assert_almost_equal(pvalue, 0.0, 1) + + score_label, _, pvalue_label = permutation_test_score(svm, X, y, + zero_one_score, + cv, labels=np.ones(y.size), + rng=0) + assert_true(score_label == score) + assert_true(pvalue_label == pvalue) + + # set random y + y = np.mod(np.arange(len(y)), 3) + + score, scores, pvalue = permutation_test_score(svm, X, y, + zero_one_score, cv) + assert_true(score < 0.5) + assert_true(pvalue > 0.4) + +def test_cross_val_generator_with_indices(): + X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) + y = np.array([1, 1, 2, 2]) + labels = np.array([1, 2, 3, 4]) + loo = cross_val.LeaveOneOut(4, indices=True) + lpo = cross_val.LeavePOut(4, 2, indices=True) + kf = cross_val.KFold(4, 2, indices=True) + skf = cross_val.StratifiedKFold(y, 2, indices=True) + lolo = cross_val.LeaveOneLabelOut(labels, indices=True) + lopo = cross_val.LeavePLabelOut(labels, 2, indices=True) + for cv in [loo, lpo, kf, skf, lolo, lopo]: + for train, test in cv: + X_train, X_test = X[train], X[test] + y_train, y_test = y[train], y[test] diff --git a/scikits/learn/tests/test_hmm.py b/scikits/learn/tests/test_hmm.py index 41b93f0c83138f3c8134c31045c33ca3e7cd417d..1f138107db6c32b8b031b37169ece67c16911d41 100644 --- a/scikits/learn/tests/test_hmm.py +++ b/scikits/learn/tests/test_hmm.py @@ -330,6 +330,16 @@ class GaussianHMMTester(GaussianHMMParams): % (self.cvtype, params, trainll, np.diff(trainll))) self.assertTrue(np.all(np.diff(trainll) > -0.5)) + def test_fit_works_on_sequences_of_different_length(self): + obs = [np.random.rand(3, self.n_features), + np.random.rand(4, self.n_features), + np.random.rand(5, self.n_features)] + + h = hmm.GaussianHMM(self.n_states, self.cvtype) + # This shouldn't raise + # ValueError: setting an array element with a sequence. + h.fit(obs) + def test_fit_with_priors(self, params='stmc', n_iter=10, verbose=False): startprob_prior = 10 * self.startprob + 2.0 @@ -612,6 +622,16 @@ class TestGMMHMM(GMMHMMParams, SeedRandomNumberGeneratorTestCase): np.diff(trainll)) self.assertTrue(np.all(np.diff(trainll) > -0.5)) + def test_fit_works_on_sequences_of_different_length(self): + obs = [np.random.rand(3, self.n_features), + np.random.rand(4, self.n_features), + np.random.rand(5, self.n_features)] + + h = hmm.GMMHMM(self.n_states, cvtype=self.cvtype) + # This shouldn't raise + # ValueError: setting an array element with a sequence. + h.fit(obs) + class TestGMMHMMWithSphericalCovars(TestGMMHMM): cvtype = 'spherical' diff --git a/scikits/learn/tests/test_init.py b/scikits/learn/tests/test_init.py new file mode 100644 index 0000000000000000000000000000000000000000..34101c833b1c4bf83479d5b7601f380659063f36 --- /dev/null +++ b/scikits/learn/tests/test_init.py @@ -0,0 +1,25 @@ +#emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +#ex: set sts=4 ts=4 sw=4 noet: + +# Basic unittests to test functioning of module's top-level + +__author__ = 'Yaroslav Halchenko' +__license__ = 'BSD' + + +from nose.tools import assert_true, assert_false, assert_equal, \ + assert_raises + +try: + from scikits.learn import * + _top_import_error = None +except Exception, e: + _top_import_error = e + +def test_import_skl(): + """Test either above import has failed for some reason + + "import *" is discouraged outside of the module level, hence we + rely on setting up the variable above + """ + assert_equal(_top_import_error, None) diff --git a/scikits/learn/tests/test_mixture.py b/scikits/learn/tests/test_mixture.py index 6a02eee9e391caed7b3769a61df0a195698528be..6135452c13014b8a2e6afccaa9de0e1f33840ecf 100644 --- a/scikits/learn/tests/test_mixture.py +++ b/scikits/learn/tests/test_mixture.py @@ -62,22 +62,26 @@ def test_sample_gaussian(): mu = np.random.randint(10) * np.random.rand(n_features) cv = (np.random.rand(n_features) + 1.0) ** 2 - samples = mixture.sample_gaussian(mu, cv, cvtype='diag', n=n_samples) + samples = mixture.sample_gaussian( + mu, cv, cvtype='diag', n_samples=n_samples) assert np.allclose(samples.mean(axis), mu, atol=0.3) assert np.allclose(samples.var(axis), cv, atol=0.5) # the same for spherical covariances cv = (np.random.rand() + 1.0) ** 2 - samples = mixture.sample_gaussian(mu, cv, cvtype='spherical', n=n_samples) + samples = mixture.sample_gaussian( + mu, cv, cvtype='spherical', n_samples=n_samples) assert np.allclose(samples.mean(axis), mu, atol=0.3) - assert np.allclose(samples.var(axis), np.repeat(cv, n_features), atol=0.5) + assert np.allclose( + samples.var(axis), np.repeat(cv, n_features), atol=0.5) # and for full covariances A = np.random.randn(n_features, n_features) cv = np.dot(A.T, A) + np.eye(n_features) - samples = mixture.sample_gaussian(mu, cv, cvtype='full', n=n_samples) + samples = mixture.sample_gaussian( + mu, cv, cvtype='full', n_samples=n_samples) assert np.allclose(samples.mean(axis), mu, atol=0.3) assert np.allclose(np.cov(samples), cv, atol=1.) @@ -219,7 +223,7 @@ class GMMTester(): g._covars = 20 * self.covars[self.cvtype] # Create a training set by sampling from the predefined distribution. - train_obs = g.rvs(n=100) + train_obs = g.rvs(n_samples=100) g.fit(train_obs, n_iter=0, init_params=params) diff --git a/scikits/learn/tests/test_neighbors.py b/scikits/learn/tests/test_neighbors.py index 42836da6e0c2d7742a5fd784e99e041358290f03..79577a3ea651a84193c4c23cb74fdc92e0fb2215 100644 --- a/scikits/learn/tests/test_neighbors.py +++ b/scikits/learn/tests/test_neighbors.py @@ -1,8 +1,13 @@ +import numpy as np +from numpy.testing import assert_array_almost_equal, assert_array_equal -from numpy.testing import assert_array_equal, assert_array_almost_equal, \ - assert_equal +from scikits.learn import neighbors, datasets -from .. import neighbors +# load and shuffle iris dataset +iris = datasets.load_iris() +perm = np.random.permutation(iris.target.size) +iris.data = iris.data[perm] +iris.target = iris.target[perm] def test_neighbors_1D(): @@ -13,73 +18,109 @@ def test_neighbors_1D(): """ # some constants n = 6 - n_2 = n/2 X = [[x] for x in range(0, n)] - Y = [0]*n_2 + [1]*n_2 - - # n_neighbors = 1 - knn = neighbors.Neighbors(n_neighbors=1) - knn.fit(X, Y) - test = [[i + 0.01] for i in range(0, n_2)] + \ - [[i - 0.01] for i in range(n_2, n)] - assert_array_equal(knn.predict(test), [0, 0, 0, 1, 1, 1]) - # same as before, but using predict() instead of Neighbors object - - # n_neighbors = 3 - knn = neighbors.Neighbors(n_neighbors=3) - knn.fit(X, Y) - assert_array_equal(knn.predict([[i +0.01] for i in range(0, n_2)]), - [0 for i in range(n_2)]) - assert_array_equal(knn.predict([[i-0.01] for i in range(n_2, n)]), - [1 for i in range(n_2)]) - - -def test_neighbors_2D(): + Y = [0]*(n/2) + [1]*(n/2) + + for s in ('auto', 'ball_tree', 'brute', 'inplace'): + # n_neighbors = 1 + knn = neighbors.NeighborsClassifier(n_neighbors=1, algorithm=s) + knn.fit(X, Y) + test = [[i + 0.01] for i in range(0, n/2)] + \ + [[i - 0.01] for i in range(n/2, n)] + assert_array_equal(knn.predict(test), [0]*3 + [1]*3) + + # n_neighbors = 2 + knn = neighbors.NeighborsClassifier(n_neighbors=2, algorithm=s) + knn.fit(X, Y) + assert_array_equal(knn.predict(test), [0]*4 + [1]*2) + + # n_neighbors = 3 + knn = neighbors.NeighborsClassifier(n_neighbors=3, algorithm=s) + knn.fit(X, Y) + assert_array_equal(knn.predict([[i +0.01] for i in range(0, n/2)]), + [0 for i in range(n/2)]) + assert_array_equal(knn.predict([[i-0.01] for i in range(n/2, n)]), + [1 for i in range(n/2)]) + + +def test_neighbors_iris(): """ - Nearest Neighbor in the plane. + Sanity checks on the iris dataset Puts three points of each label in the plane and performs a nearest neighbor query on points near the decision boundary. """ - X = ( - (0, 1), (1, 1), (1, 0), # label 0 - (-1, 0), (-1, -1), (0, -1)) # label 1 - n_2 = len(X)/2 - Y = [0]*n_2 + [1]*n_2 - knn = neighbors.Neighbors() - knn.fit(X, Y) - prediction = knn.predict([[0, .1], [0, -.1], [.1, 0], [-.1, 0]]) - assert_array_equal(prediction, [0, 1, 0, 1]) + for s in ('auto', 'ball_tree', 'brute', 'inplace'): + clf = neighbors.NeighborsClassifier() + clf.fit(iris.data, iris.target, n_neighbors=1, algorithm=s) + assert_array_equal(clf.predict(iris.data), iris.target) + clf.fit(iris.data, iris.target, n_neighbors=9, algorithm=s) + assert np.mean(clf.predict(iris.data)== iris.target) > 0.95 -def test_neighbors_barycenter(): - """ - NeighborsBarycenter for regression using k-NN - """ - X = [[0], [1], [2], [3]] - y = [0, 0, 1, 1] - neigh = neighbors.NeighborsBarycenter(n_neighbors=2) - neigh.fit(X, y) - assert_equal(neigh.predict([[1.5]]), 0.5) + for m in ('barycenter', 'mean'): + rgs = neighbors.NeighborsRegressor() + rgs.fit(iris.data, iris.target, mode=m, algorithm=s) + assert np.mean( + rgs.predict(iris.data).round() == iris.target) > 0.95 def test_kneighbors_graph(): """ Test kneighbors_graph to build the k-Nearest Neighbor graph. """ - X = [[0], [1.01], [2]] - A = neighbors.kneighbors_graph(X, 2, weight=None) - assert_array_equal(A.todense(), - [[1, 1, 0], [0, 1, 1], [0, 1, 1]]) - A = neighbors.kneighbors_graph(X, 2, weight="distance") - assert_array_almost_equal(A.todense(), - [[0, 1.01, 0], [0, 0, 0.99], [0, 0.99, 0]], 4) - A = neighbors.kneighbors_graph(X, 2, weight="barycenter") - assert_array_almost_equal(A.todense(), - [[0.99, 0, 0], [0, 0.99, 0], [0, 0, 0.99]], 2) - - # Also check corner cases - A = neighbors.kneighbors_graph(X, 3, weight=None) - A = neighbors.kneighbors_graph(X, 3, weight="distance") - A = neighbors.kneighbors_graph(X, 3, weight="barycenter") + X = [[0, 1], [1.01, 1.], [2, 0]] + + # n_neighbors = 1 + A = neighbors.kneighbors_graph(X, 1, mode='connectivity') + assert_array_equal(A.todense(), np.eye(A.shape[0])) + + A = neighbors.kneighbors_graph(X, 1, mode='distance') + assert_array_almost_equal( + A.todense(), + [[ 0. , 1.01 , 0. ], + [ 1.01 , 0. , 0. ], + [ 0. , 1.40716026, 0. ]]) + + A = neighbors.kneighbors_graph(X, 1, mode='barycenter') + assert_array_almost_equal( + A.todense(), + [[ 0., 1., 0.], + [ 1., 0., 0.], + [ 0., 1., 0.]]) + + # n_neigbors = 2 + A = neighbors.kneighbors_graph(X, 2, mode='connectivity') + assert_array_equal( + A.todense(), + [[ 1., 1., 0.], + [ 1., 1., 0.], + [ 0., 1., 1.]]) + + A = neighbors.kneighbors_graph(X, 2, mode='distance') + assert_array_almost_equal( + A.todense(), + [[ 0. , 1.01 , 2.23606798], + [ 1.01 , 0. , 1.40716026], + [ 2.23606798, 1.40716026, 0. ]]) + + A = neighbors.kneighbors_graph(X, 2, mode='barycenter') + # check that columns sum to one + assert_array_almost_equal(np.sum(A.todense(), 1), np.ones((3, 1))) + assert_array_almost_equal( + A.todense(), + [[ 0. , 1.5049745 , -0.5049745 ], + [ 0.596 , 0. , 0.404 ], + [-0.98019802, 1.98019802, 0. ]]) + + # n_neighbors = 3 + A = neighbors.kneighbors_graph(X, 3, mode='connectivity') + assert_array_almost_equal( + A.todense(), + [[1, 1, 1], [1, 1, 1], [1, 1, 1]]) + + +if __name__ == '__main__': + import nose + nose.runmodule() diff --git a/scikits/learn/tests/test_pca.py b/scikits/learn/tests/test_pca.py index 713e66746e165bf83a775f06bbecd4b95947159e..ef09cb7b0fdc69de32e6f284a30b4211b8f9bba9 100644 --- a/scikits/learn/tests/test_pca.py +++ b/scikits/learn/tests/test_pca.py @@ -69,17 +69,42 @@ def test_whitening(): def test_pca_check_projection(): """Test that the projection of data is correct""" + np.random.seed(0) n, p = 100, 3 X = randn(n, p) * .1 X[:10] += np.array([3, 4, 5]) Xt = 0.1 * randn(1, p) + np.array([3, 4, 5]) Yt = PCA(n_components=2).fit(X).transform(Xt) - Yt /= np.sqrt((Yt**2).sum()) + Yt /= np.sqrt((Yt ** 2).sum()) np.testing.assert_almost_equal(np.abs(Yt[0][0]), 1., 1) +def test_pca_inverse(): + """Test that the projection of data can be inverted""" + np.random.seed(0) + n, p = 50, 3 + X = randn(n, p) # spherical data + X[:, 1] *= .00001 # make middle component relatively small + X += [5, 4, 3] # make a large mean + + # same check that we can find the original data from the transformed + # signal (since the data is almost of rank n_components) + pca = PCA(n_components=2).fit(X) + Y = pca.transform(X) + Y_inverse = pca.inverse_transform(Y) + assert_almost_equal(X, Y_inverse, decimal=3) + + # same as above with whitening (approximate reconstruction) + pca = PCA(n_components=2, whiten=True) + pca.fit(X) + Y = pca.transform(X) + Y_inverse = pca.inverse_transform(Y) + relative_max_delta = (np.abs(X - Y_inverse) / np.abs(X).mean()).max() + assert_almost_equal(relative_max_delta, 0.11, decimal=2) + + def test_randomized_pca_check_projection(): """Test that the projection by RandomizedPCA on dense data is correct""" n, p = 100, 3 @@ -93,8 +118,32 @@ def test_randomized_pca_check_projection(): np.testing.assert_almost_equal(np.abs(Yt[0][0]), 1., 1) +def test_randomized_pca_inverse(): + """Test that RandomizedPCA is inversible on dense data""" + np.random.seed(0) + n, p = 50, 3 + X = randn(n, p) # spherical data + X[:, 1] *= .00001 # make middle component relatively small + X += [5, 4, 3] # make a large mean + + # same check that we can find the original data from the transformed signal + # (since the data is almost of rank n_components) + pca = RandomizedPCA(n_components=2).fit(X) + Y = pca.transform(X) + Y_inverse = pca.inverse_transform(Y) + assert_almost_equal(X, Y_inverse, decimal=2) + + # same as above with whitening (approximate reconstruction) + pca = RandomizedPCA(n_components=2, whiten=True).fit(X) + Y = pca.transform(X) + Y_inverse = pca.inverse_transform(Y) + relative_max_delta = (np.abs(X - Y_inverse) / np.abs(X).mean()).max() + assert_almost_equal(relative_max_delta, 0.11, decimal=2) + + def test_sparse_randomized_pca_check_projection(): """Test that the projection by RandomizedPCA on sparse data is correct""" + np.random.seed(0) n, p = 100, 3 X = randn(n, p) * .1 X[:10] += np.array([3, 4, 5]) @@ -108,14 +157,41 @@ def test_sparse_randomized_pca_check_projection(): np.testing.assert_almost_equal(np.abs(Yt[0][0]), 1., 1) +def test_sparse_randomized_pca_inverse(): + """Test that RandomizedPCA is inversible on sparse data""" + np.random.seed(0) + n, p = 50, 3 + X = randn(n, p) # spherical data + X[:, 1] *= .00001 # make middle component relatively small + # no large means because the sparse version of randomized pca does not do + # centering to avoid breaking the sparsity + X = csr_matrix(X) + + # same check that we can find the original data from the transformed signal + # (since the data is almost of rank n_components) + pca = RandomizedPCA(n_components=2).fit(X) + Y = pca.transform(X) + Y_inverse = pca.inverse_transform(Y) + assert_almost_equal(X.todense(), Y_inverse, decimal=2) + + # same as above with whitening (approximate reconstruction) + pca = RandomizedPCA(n_components=2, whiten=True).fit(X) + Y = pca.transform(X) + Y_inverse = pca.inverse_transform(Y) + relative_max_delta = (np.abs(X.todense() - Y_inverse) + / np.abs(X).mean()).max() + # XXX: this does not seam to work as expected: + assert_almost_equal(relative_max_delta, 0.91, decimal=2) + + def test_pca_dim(): """Check automated dimensionality setting""" + np.random.seed(0) n, p = 100, 5 X = randn(n, p) * .1 X[:10] += np.array([3, 4, 5, 1, 2]) - pca = PCA(n_components='mle') - pca.fit(X) - assert_true(pca.n_components == 1) + pca = PCA(n_components='mle').fit(X) + assert_equal(pca.n_components, 1) def test_infer_dim_1(): @@ -164,6 +240,15 @@ def test_infer_dim_3(): spect = pca.explained_variance_ assert_true(_infer_dimension_(spect, n, p) > 2) +def test_infer_dim_by_explained_variance(): + X = iris.data + pca = PCA(n_components=0.95) + pca.fit(X) + assert_equal(pca.n_components, 2) + + pca = PCA(n_components=0.01) + pca.fit(X) + assert_equal(pca.n_components, 1) def test_probabilistic_pca_1(): """Test that probabilistic PCA yields a reasonable score""" diff --git a/scikits/learn/tests/test_pipeline.py b/scikits/learn/tests/test_pipeline.py index a51a3ae6ce92ac4aeda612f8b4ab22430ca5e105..3f53a680e8808cfa7e08b30945ffc48c4438ef31 100644 --- a/scikits/learn/tests/test_pipeline.py +++ b/scikits/learn/tests/test_pipeline.py @@ -93,7 +93,3 @@ def test_pipeline_methods(): pipe.predict_proba(X) pipe.predict_log_proba(X) pipe.score(X, y) - support_ = pipe.get_support() - assert np.sum(support_) == 2 - coef_ = pipe.coef_ - assert np.size(coef_) == 4 diff --git a/scikits/learn/utils/__init__.py b/scikits/learn/utils/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..056d8eed898b124f9249a9b65b76338b2feecb30 100644 --- a/scikits/learn/utils/__init__.py +++ b/scikits/learn/utils/__init__.py @@ -0,0 +1,11 @@ + +import numpy as np +import scipy.sparse as sp + +def safe_asanyarray(X, dtype=None, order=None): + if sp.issparse(X): + return X + #return type(X)(X, dtype) + else: + return np.asanyarray(X, dtype, order) + diff --git a/scikits/learn/utils/extmath.py b/scikits/learn/utils/extmath.py index 495ceb271c9e1f61477ee0bd66ab2ad3c0cd1ce9..d25846a81a5206c9cd6ef2a59916e92f20cdb74b 100644 --- a/scikits/learn/utils/extmath.py +++ b/scikits/learn/utils/extmath.py @@ -87,11 +87,14 @@ def density(w, **kwargs): return d -def safe_sparse_dot(a, b): +def safe_sparse_dot(a, b, dense_output=False): """Dot product that handle the sparse matrix case correctly""" from scipy import sparse if sparse.issparse(a) or sparse.issparse(b): - return a * b + ret = a * b + if dense_output and hasattr(ret, "toarray"): + ret = ret.toarray() + return ret else: return np.dot(a,b) diff --git a/scikits/learn/utils/src/cholesky_delete.c b/scikits/learn/utils/src/cholesky_delete.c index 7117ba0a5c14de7a04537a15f42bc174e7fbd994..66d587b819a04987d0522c3a867cb65b80af1627 100644 --- a/scikits/learn/utils/src/cholesky_delete.c +++ b/scikits/learn/utils/src/cholesky_delete.c @@ -18,27 +18,27 @@ int double_cholesky_delete (int m, int n, double *L, int go_out) { double c, s; /* delete row go_out */ - double * _L = L + (go_out * m); + double *L1 = L + (go_out * m); int i; for (i = go_out; i < n - 1; ++i) { - cblas_dcopy (i + 2, _L + m , 1, _L, 1); - _L += m; + cblas_dcopy (i + 2, L1 + m , 1, L1, 1); + L1 += m; } - _L = L + (go_out * m); + L1 = L + (go_out * m); for (i=go_out; i < n - 1; ++i) { - cblas_drotg (_L + i, _L + i + 1, &c, &s); - if (_L[i] < 0) { + cblas_drotg (L1 + i, L1 + i + 1, &c, &s); + if (L1[i] < 0) { /* Diagonals cannot be negative */ - _L[i] = copysign(_L[i], 1.0); + L1[i] = copysign(L1[i], 1.0); c = -c; s = -s; } - _L[i+1] = 0.; /* just for cleanup */ - _L += m; + L1[i+1] = 0.; /* just for cleanup */ + L1 += m; - cblas_drot (n - (i + 2), _L + i, m, _L + i + 1, + cblas_drot (n - (i + 2), L1 + i, m, L1 + i + 1, m, c, s); } diff --git a/setup.py b/setup.py index 386db4493c067c52c5fd93ec6140db2be5c03aaf..9d2a4039d13337fc3c618e3d20f6fe26588846ba 100644 --- a/setup.py +++ b/setup.py @@ -9,13 +9,13 @@ import os DISTNAME = 'scikits.learn' DESCRIPTION = 'A set of python modules for machine learning and data mining' -LONG_DESCRIPTION = descr +LONG_DESCRIPTION = open('README.rst').read() MAINTAINER = 'Fabian Pedregosa' MAINTAINER_EMAIL = 'fabian.pedregosa@inria.fr' URL = 'http://scikit-learn.sourceforge.net' LICENSE = 'new BSD' DOWNLOAD_URL = 'http://sourceforge.net/projects/scikit-learn/files/' -VERSION = '0.6' +VERSION = '0.7.1' import setuptools # we are using a setuptools namespace from numpy.distutils.core import setup diff --git a/site.cfg b/site.cfg index 461af4710ab1adb37e008294079ec4b963bec05a..9055c7c25da37687867f239237784691fe743b1f 100644 --- a/site.cfg +++ b/site.cfg @@ -1,10 +1,4 @@ -# Uncomment to link against system-wide libsvm -# [libsvm] -# libraries=svm -# library_dirs=/usr/lib -# include_dirs=/usr/include/libsvm-2.0/libsvm - # Uncomment to link against the MKL library on windows # [mkl] # include_dirs=C:\Program Files\Intel\MKL\10.2.5.035\include