diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py index 89477fea0a9d0466322ff89b0061bf319530f1d7..e18658b7025a0dfe184f6f4950143beff62b4358 100644 --- a/sklearn/neighbors/base.py +++ b/sklearn/neighbors/base.py @@ -41,6 +41,23 @@ def _check_weights(weights): "'distance', or a callable function") +def _dist_to_weight(dist): + """ Calculates weights from distances. Replaces line + "weights = 1. / dist", which gives warning div by zeros, + if one sample in dist is zero. + + Takes dist matrix, which can be multidimensional. + Returns weights matrix of same dimension.""" + + # Dist could be multidimensional, flatten it so it's values + # can be looped. + dist_values = dist.ravel() + retval = np.zeros(len(dist_values)) + for i, d in enumerate(dist_values): + retval[i] = (1.0 / d) if (d != 0.0) else np.inf + return retval.reshape(dist.shape) + + def _get_weights(dist, weights): """Get the weights from an array of distances and a parameter ``weights`` @@ -53,7 +70,7 @@ def _get_weights(dist, weights): if weights in (None, 'uniform'): return None elif weights == 'distance': - return [1. / d for d in dist] + return [_dist_to_weight(d) for d in dist] elif callable(weights): return [weights(d) for d in dist] else: @@ -63,7 +80,7 @@ def _get_weights(dist, weights): if weights in (None, 'uniform'): return None elif weights == 'distance': - return 1. / dist + return _dist_to_weight(dist) elif callable(weights): return weights(dist) else: diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 38460ac73d4cc6ed61e4db5662dd86f9b29d42ec..cac97b09349b790a29c8e26783b0a907488fffc2 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -25,6 +25,20 @@ ALGORITHMS = ('ball_tree', 'brute', 'kd_tree', 'auto') P = (1, 2, 3, 4, np.inf) +def _weight_func(dist): + """ Weight function to replace lambda d: d ** -2. + The lambda function is not valid because: + if d==0 then 0^-2 is not valid. """ + + # Dist could be multidimensional, flatten it so all values + # can be looped + dist_values = dist.ravel() + retval = np.zeros(len(dist_values)) + for i, d in enumerate(dist_values): + retval[i] = (d ** -2) if (d != 0.0) else np.inf + return retval.reshape(dist.shape) + + def test_warn_on_equidistant(n_samples=100, n_features=3, k=3): """test the production of a warning if equidistant points are discarded""" X = np.random.random(size=(n_samples, n_features)) @@ -159,7 +173,7 @@ def test_kneighbors_classifier(n_samples=40, X = 2 * rng.rand(n_samples, n_features) - 1 y = ((X ** 2).sum(axis=1) < .25).astype(np.int) - weight_func = lambda d: d ** -2 + weight_func = _weight_func for algorithm in ALGORITHMS: for weights in ['uniform', 'distance', weight_func]: @@ -182,7 +196,7 @@ def test_radius_neighbors_classifier(n_samples=40, X = 2 * rng.rand(n_samples, n_features) - 1 y = ((X ** 2).sum(axis=1) < .25).astype(np.int) - weight_func = lambda d: d ** -2 + weight_func = _weight_func for algorithm in ALGORITHMS: for weights in ['uniform', 'distance', weight_func]: @@ -206,7 +220,7 @@ def test_radius_neighbors_classifier_when_no_neighbors(): z1 = np.array([[1.01, 1.01], [2.01, 2.01]]) # no outliers z2 = np.array([[1.01, 1.01], [1.4, 1.4]]) # one outlier - weight_func = lambda d: d ** -2 + weight_func = _weight_func for algorithm in ALGORITHMS: for weights in ['uniform', 'distance', weight_func]: @@ -231,7 +245,7 @@ def test_radius_neighbors_classifier_outlier_labeling(): correct_labels1 = np.array([1, 2]) correct_labels2 = np.array([1, -1]) - weight_func = lambda d: d ** -2 + weight_func = _weight_func for algorithm in ALGORITHMS: for weights in ['uniform', 'distance', weight_func]: @@ -244,6 +258,27 @@ def test_radius_neighbors_classifier_outlier_labeling(): assert_array_equal(correct_labels2, clf.predict(z2)) +def test_radius_neighbors_classifier_zero_distance(): + """ Test radius-based classifier, when distance to a sample is zero. """ + + X = np.array([[1.0, 1.0], [2.0, 2.0]]) + y = np.array([1, 2]) + radius = 0.1 + + z1 = np.array([[1.01, 1.01], [2.0, 2.0]]) + correct_labels1 = np.array([1, 2]) + + weight_func = _weight_func + + for algorithm in ALGORITHMS: + for weights in ['uniform', 'distance', weight_func]: + clf = neighbors.RadiusNeighborsClassifier(radius=radius, + weights=weights, + algorithm=algorithm) + clf.fit(X, y) + assert_array_equal(correct_labels1, clf.predict(z1)) + + def test_kneighbors_classifier_sparse(n_samples=40, n_features=5, n_test_pts=10, @@ -281,7 +316,7 @@ def test_kneighbors_regressor(n_samples=40, y_target = y[:n_test_pts] - weight_func = lambda d: d ** -2 + weight_func = _weight_func for algorithm in ALGORITHMS: for weights in ['uniform', 'distance', weight_func]: @@ -307,7 +342,7 @@ def test_radius_neighbors_regressor(n_samples=40, y_target = y[:n_test_pts] - weight_func = lambda d: d ** -2 + weight_func = _weight_func for algorithm in ALGORITHMS: for weights in ['uniform', 'distance', weight_func]: