Skip to content
Snippets Groups Projects
Commit 545f47c7 authored by Alexandre Gramfort's avatar Alexandre Gramfort
Browse files

FIX : example of dense vs sparse Lasso on dense and sparse data

parent bb2e0663
Branches
Tags
No related merge requests found
......@@ -4,9 +4,8 @@ Lasso on dense and sparse data
==============================
We show that glm.Lasso and glm.sparse.Lasso
provide the same results.
XXX : At the end of the day it should also lead to a speed improvement
provide the same results and that in the case of
sparse data glm.sparse.Lasso improves the speed.
"""
......@@ -20,10 +19,10 @@ from scikits.learn.glm import Lasso as DenseLasso
###############################################################################
# The two Lasso implementation on Dense data
# The two Lasso implementations on Dense data
print "--- Dense matrices"
n_samples, n_features = 100, 10000
n_samples, n_features = 200, 10000
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
......@@ -44,27 +43,26 @@ print "Distance between coefficients : %s" % linalg.norm(sparse_lasso.coef_
- dense_lasso.coef_)
###############################################################################
# The two Lasso implementation on Sparse data
# The two Lasso implementations on Sparse data
print "--- Sparse matrices"
Xs = sparse.coo_matrix(X)
mask = Xs.data > 2 # Sparsify data matrix
col = Xs.col[mask]
row = Xs.row[mask]
Xs = X.copy()
Xs[Xs < 2.5] = 0.0
Xs = sparse.coo_matrix(Xs)
Xs = Xs.tocsc()
print "Matrix density : %s %%" % (mask.sum() / float(X.size) * 100)
print "Matrix density : %s %%" % (Xs.nnz / float(X.size) * 100)
alpha = 0.1
sparse_lasso = SparseLasso(alpha=alpha, fit_intercept=False)
dense_lasso = DenseLasso(alpha=alpha, fit_intercept=False)
t0 = time()
sparse_lasso.fit(Xs, y, maxit=1000, tol=0.0)
sparse_lasso.fit(Xs, y, maxit=1000)
print "Sparse Lasso done in %fs" % (time() - t0)
t0 = time()
dense_lasso.fit(Xs.todense(), y, maxit=1000, tol=0.0)
dense_lasso.fit(Xs.todense(), y, maxit=1000)
print "Dense Lasso done in %fs" % (time() - t0)
print "Distance between coefficients : %s" % linalg.norm(sparse_lasso.coef_
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment