From dc956b4d0250a67459af145934a6583c71526820 Mon Sep 17 00:00:00 2001 From: Fabian Pedregosa <fabian.pedregosa@inria.fr> Date: Thu, 16 Dec 2010 17:32:42 +0100 Subject: [PATCH] FIX: second argument in euclidean_distances. This method failed when second argument was not given. Slightly changed the API to always take a second argument without speed penalization and add optional argument axis. Added test. --- scikits/learn/metrics/pairwise.py | 43 +++++++++++++------- scikits/learn/metrics/tests/test_pairwise.py | 1 + 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/scikits/learn/metrics/pairwise.py b/scikits/learn/metrics/pairwise.py index ef32cae797..4dc2c05e76 100644 --- a/scikits/learn/metrics/pairwise.py +++ b/scikits/learn/metrics/pairwise.py @@ -9,36 +9,49 @@ sets of points. import numpy as np -def euclidian_distances(X, Y=None): +def euclidian_distances(X, Y): """ Considering the rows of X (and Y=X) as vectors, compute the - distance matrix between each pair of vector + distance matrix between each pair of vectors. Parameters ---------- - X, array of shape (n_samples_1, n_features) + X: array of shape (n_samples_1, n_features) - Y, array of shape (n_samples_2, n_features), default None - if Y is None, then Y=X is used instead + Y: array of shape (n_samples_2, n_features) Returns ------- - distances, array of shape (n_samples_1, n_samples_2) - """ + distances: array of shape (n_samples_1, n_samples_2) + + Examples + -------- + >>> X = [[0, 1], [1, 1]] + >>> # distrance between rows of X + >>> euclidian_distances(X, X) + array([[ 0., 1.], + [ 1., 0.]]) + >>> # get distance to origin + >>> euclidian_distances(X, [[0, 0]]) + array([[ 1. ], + [ 1.41421356]]) + """ + # shortcut in the common case euclidean_distances(X, X) + compute_Y = X is not Y + X = np.asanyarray(X) Y = np.asanyarray(Y) - if Y is None: - Y = X + if X.shape[1] != Y.shape[1]: - raise ValueError, "incompatible dimension for X and Y matrices" + raise ValueError("Incompatible dimension for X and Y matrices") XX = np.sum(X * X, axis=1)[:, np.newaxis] - if Y is None: - YY = XX.T - else: + if compute_Y: YY = np.sum(Y * Y, axis=1)[np.newaxis, :] + else: + YY = XX.T + distances = XX + YY # Using broadcasting distances -= 2 * np.dot(X, Y.T) distances = np.maximum(distances, 0) - distances = np.sqrt(distances) - return distances + return np.sqrt(distances) diff --git a/scikits/learn/metrics/tests/test_pairwise.py b/scikits/learn/metrics/tests/test_pairwise.py index 1d8b1dc54e..26b1c200b5 100644 --- a/scikits/learn/metrics/tests/test_pairwise.py +++ b/scikits/learn/metrics/tests/test_pairwise.py @@ -9,3 +9,4 @@ def test_euclidian_distances(): Y = [[1], [2]] D = euclidian_distances(X, Y) assert_array_almost_equal(D, [[1., 2.]]) + -- GitLab