From fc5da45c9e94bc18a55cd592a70bf1c21da75852 Mon Sep 17 00:00:00 2001 From: Olivier Grisel <olivier.grisel@ensta.org> Date: Sun, 28 Nov 2010 16:27:47 +0100 Subject: [PATCH] new benchmark: comparing SVD implementations --- benchmarks/plot_bench_svd.py | 87 ++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 benchmarks/plot_bench_svd.py diff --git a/benchmarks/plot_bench_svd.py b/benchmarks/plot_bench_svd.py new file mode 100644 index 0000000000..bf65bde79b --- /dev/null +++ b/benchmarks/plot_bench_svd.py @@ -0,0 +1,87 @@ +"""Benchmarks of Singular Values Decomposition (Exact and Approximate)""" +import gc +from time import time +import numpy as np +from collections import defaultdict + +from scipy.linalg import svd +from scikits.learn.utils.extmath import fast_svd + + +def make_data(n_samples, n_features, rank=None): + """Build a fixed rank matrix with logspaced singular values""" + np.random.seed(42) + if rank is None: + rank = min(n_samples, n_samples) / 10 + 1 + + s = np.identity(rank) * np.logspace(10, 10e-5, rank) + X = np.dot(np.random.randn(n_samples, rank), s) + X = np.dot(X, np.random.randn(rank, n_features)) + return X + + +def compute_bench(data_gen, samples_range, features_range, q=3): + + it = 0 + + results = defaultdict(lambda: []) + + max_it = len(samples_range) * len(features_range) + for n_samples in samples_range: + for n_features in features_range: + it += 1 + print '====================' + print 'Iteration %03d of %03d' % (it, max_it) + print '====================' + X = make_data(n_samples, n_features) + rank = min(n_samples, n_samples) / 10 + 1 + + gc.collect() + print "benching scipy svd: " + tstart = time() + svd(X, full_matrices=False) + results['scipy svd'].append(time() - tstart) + + gc.collect() + print "benching scikit-learn fast_svd: q=0" + tstart = time() + fast_svd(X, rank, q=0) + results['scikit-learn fast_svd (q=0)'].append(time() - tstart) + + gc.collect() + print "benching scikit-learn fast_svd: q=%d " % q + tstart = time() + fast_svd(X, rank, q=q) + results['scikit-learn fast_svd (q=%d)' % q].append(time() - tstart) + + return results + + +if __name__ == '__main__': + from scikits.learn.linear_model import Lasso, LassoLARS + from mpl_toolkits.mplot3d import axes3d + import matplotlib.pyplot as plt + + samples_range = np.linspace(2, 1000, 4) + features_range = np.linspace(2, 1000, 4) + results = compute_bench(make_data, samples_range, features_range) + + fig = plt.figure() + ax = fig.gca(projection='3d') + for c, (label, timings) in zip('rbg', sorted(results.iteritems())): + X, Y = np.meshgrid(samples_range, features_range) + Z = np.asarray(timings).reshape(samples_range.shape[0], + features_range.shape[0]) + # plot the actual surface + ax.plot_surface(X, Y, Z, rstride=8, cstride=8, alpha=0.3, + color=c) + # dummy point plot to stick the legend to since surface plot do not + # support legends (yet?) + ax.plot([1], [1], [1], color=c, label=label) + + ax.set_xlabel('n_samples') + ax.set_ylabel('n_features') + ax.set_zlabel('time (s)') + ax.legend() + plt.show() + -- GitLab