diff --git a/scikits/learn/glm/benchmarks/bench_lars.py b/scikits/learn/glm/benchmarks/bench_lars.py deleted file mode 100644 index bb5e83d5c5ee1484a966813250d5cb338b753c22..0000000000000000000000000000000000000000 --- a/scikits/learn/glm/benchmarks/bench_lars.py +++ /dev/null @@ -1,22 +0,0 @@ -""" -Benchmark for the LARS algorithm. - -Work in progress -""" - -from datetime import datetime -import numpy as np -from scikits.learn import glm - -n, m = 100, 50000 - -X = np.random.randn(n, m) -y = np.random.randn(n) - -if __name__ == '__main__': - print "Computing regularization path using the LARS ..." - start = datetime.now() - alphas, active, path = glm.lars_path(X, y, method='lasso') - print "This took ", datetime.now() - start - - diff --git a/scikits/learn/svm/base.py b/scikits/learn/svm/base.py index 707b0ff62f64dd14b4c02cd8f5b936cdaa97ec21..0cc36bafb42832efc78966dec5152f10f514e408 100644 --- a/scikits/learn/svm/base.py +++ b/scikits/learn/svm/base.py @@ -12,7 +12,7 @@ class BaseLib(BaseEstimator): self.weight_label = np.asarray(uy, dtype=np.int32, order='C') self.weight = np.array([1.0 / np.sum(y==i) for i in uy], dtype=np.float64, order='C') - self.weight *= y.shape[0] / np.sum(self.weight) + self.weight *= uy.shape[0] / np.sum(self.weight) else: self.weight = np.asarray(class_weight.values(), dtype=np.float64, order='C') @@ -77,7 +77,7 @@ class BaseLibSVM(BaseLib): X : array-like, shape = [n_samples, n_features] Training vector, where n_samples is the number of samples and n_features is the number of features. - y : array, shape = [n_samples] + y : array-like, shape = [n_samples] Target values (integers in classification, real numbers in regression) class_weight : dict, {class_label : weight} or "auto" @@ -311,10 +311,10 @@ class BaseLibLinear(BaseLib): Parameters ---------- - X : array-like, shape = [nsamples, nfeatures] - Training vector, where nsamples in the number of samples and - nfeatures is the number of features. - y : array, shape = [nsamples] + X : array-like, shape = [n_samples, n_features] + Training vector, where n_samples in the number of samples and + n_features is the number of features. + y : array-like, shape = [n_samples] Target vector relative to X class_weight : dict , {class_label : weight} Weights associated with classes. If not given, all classes diff --git a/scikits/learn/svm/tests/test_svm.py b/scikits/learn/svm/tests/test_svm.py index dea35aad3232b8b1c12984b8db1a4bb949353567..f29d76395b051527195ad9dad06654352a2638a9 100644 --- a/scikits/learn/svm/tests/test_svm.py +++ b/scikits/learn/svm/tests/test_svm.py @@ -256,12 +256,16 @@ def test_auto_weight(): # compute reference metrics on iris dataset that is quite balanced by # default X, y = iris.data, iris.target - clf = svm.SVC().fit(X, y) - assert_almost_equal(metrics.f1_score(y, clf.predict(X)), 0.94, 2) + clf = svm.SVC(kernel="linear").fit(X, y) + assert_almost_equal(metrics.f1_score(y, clf.predict(X)), 0.99, 2) # make the same prediction using automated class_weight - clf = svm.SVC().fit(X, y, class_weight="auto") - assert_almost_equal(metrics.f1_score(y, clf.predict(X)), 0.99, 2) + clf_auto = svm.SVC(kernel="linear").fit(X, y, class_weight="auto") + assert_almost_equal(metrics.f1_score(y, clf_auto.predict(X)), 0.99, 2) + + # Make sure that in the balanced case it does not change anything + # to use "auto" + assert_array_almost_equal(clf.coef_, clf_auto.coef_, 6) # build an very very imbalanced dataset out of iris data X_0 = X[y == 0,:] @@ -277,7 +281,7 @@ def test_auto_weight(): # fit a model with auto class_weight enabled clf = svm.SVC().fit(X_imbalanced, y_imbalanced, class_weight="auto") y_pred = clf.predict(X) - assert_almost_equal(metrics.f1_score(y, y_pred), 0.99, 2) + assert_almost_equal(metrics.f1_score(y, y_pred), 0.92, 2) def test_error():