From 10c146f1640cf66cf4dbcf79552a2ecf5d50ea55 Mon Sep 17 00:00:00 2001 From: Gael varoquaux <gael.varoquaux@normalesup.org> Date: Mon, 19 Sep 2011 01:32:54 +0200 Subject: [PATCH] Cosmit: make in-place modifications explicit --- .../grid_search_text_feature_extraction.py | 1 - sklearn/cluster/k_means_.py | 21 ++++++++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/examples/grid_search_text_feature_extraction.py b/examples/grid_search_text_feature_extraction.py index c89381afdb..d17c9f250c 100644 --- a/examples/grid_search_text_feature_extraction.py +++ b/examples/grid_search_text_feature_extraction.py @@ -49,7 +49,6 @@ print __doc__ from pprint import pprint from time import time -import os import logging from sklearn.datasets import fetch_20newsgroups diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py index d709657cab..896da3eabc 100644 --- a/sklearn/cluster/k_means_.py +++ b/sklearn/cluster/k_means_.py @@ -575,11 +575,11 @@ def _mini_batch_step_dense(X, batch_slice, centers, counts, x_squared_norms): The row slice of the mini batch. centers: array, shape (k, n_features) - The cluster centers + The cluster centers. This array is MODIFIED IN PLACE counts: array, shape (k, ) The vector in which we keep track of the numbers of elements in a - cluster + cluster. This array is MODIFIED IN PLACE x_squared_norms: array, shape (n_samples,) Squared euclidean norm of each data point. @@ -599,6 +599,7 @@ def _mini_batch_step_dense(X, batch_slice, centers, counts, x_squared_norms): centers[q] = (1. / (counts[q] + c)) * ( counts[q] * centers[q] + np.sum(X[center_mask], axis=0)) counts[q] += c + return counts, centers def _mini_batch_step_sparse(X, batch_slice, centers, counts, x_squared_norms): @@ -614,11 +615,11 @@ def _mini_batch_step_sparse(X, batch_slice, centers, counts, x_squared_norms): The row slice of the mini batch. centers: array, shape (k, n_features) - The cluster centers + The cluster centers. This array is MODIFIED IN PLACE counts: array, shape (k, ) The vector in which we keep track of the numbers of elements in a - cluster + cluster. This array is MODIFIED IN PLACE x_squared_norms: array, shape (n_samples,) The squared norms of each sample in `X`. @@ -628,6 +629,7 @@ def _mini_batch_step_sparse(X, batch_slice, centers, counts, x_squared_norms): _k_means._mini_batch_update_sparse(X.data, X.indices, X.indptr, batch_slice, centers, counts, cache) + return counts, centers class MiniBatchKMeans(KMeans): @@ -742,8 +744,10 @@ class MiniBatchKMeans(KMeans): for i, batch_slice in izip(n_iterations, cycle(batch_slices)): old_centers = self.cluster_centers_.copy() - _mini_batch_step(X_shuffled, batch_slice, self.cluster_centers_, - self.counts, x_squared_norms=x_squared_norms) + self.counts, self.cluster_centers_ = _mini_batch_step( + X_shuffled, batch_slice, + self.cluster_centers_, self.counts, + x_squared_norms=x_squared_norms) if np.sum((old_centers - self.cluster_centers_) ** 2) < tol: if self.verbose: @@ -798,8 +802,9 @@ class MiniBatchKMeans(KMeans): else: _mini_batch_step = _mini_batch_step_dense - _mini_batch_step(X, batch_slice, self.cluster_centers_, self.counts, - x_squared_norms=x_squared_norms) + self.counts, self.cluster_centers_ = _mini_batch_step(X, + batch_slice, self.cluster_centers_, self.counts, + x_squared_norms=x_squared_norms) self.inertia_, self.labels_ = _calculate_labels_inertia( X, self.cluster_centers_, x_squared_norms) -- GitLab