From 71953c4934863730cb600b9918e0fc9dcc5f156d Mon Sep 17 00:00:00 2001
From: Fabian Pedregosa <fabian.pedregosa@inria.fr>
Date: Mon, 19 Sep 2011 18:43:19 +0200
Subject: [PATCH] Safer assert_all_finite.

Check for Inf/NaN when X.sum() is not finite. This avoids the risk of
false positives because of sum overflow while remaining performant for
the usual case where all values are finite.

This idea is taken from:

   https://github.com/tecki/numpy/commit/57167f7c02b02bfb49204233eaee0f289ad37c92
---
 sklearn/utils/__init__.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 37661851f5..96658c0e9a 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -2,18 +2,16 @@ import numpy as np
 import scipy.sparse as sp
 import warnings
 
-_FLOAT_CODES = np.typecodes['AllFloat']
-
-
 def assert_all_finite(X):
     """Throw a ValueError if X contains NaN or infinity.
     Input MUST be an np.ndarray instance or a scipy.sparse matrix."""
 
-    # O(n) time, O(1) solution. XXX: will fail if the sum over X is
-    # *extremely* large. A proper solution would be a C-level loop to check
-    # each element.
-    if X.dtype.char in _FLOAT_CODES and not np.isfinite(X.sum()):
-        raise ValueError("array contains NaN or infinity")
+    # First try an O(n) time, O(1) space solution for the common case that
+    # there everything is finite; fall back to O(n) space np.isfinite to
+    # prevent false positives from overflow in sum method.
+    if X.dtype.char in np.typecodes['AllFloat'] and not np.isfinite(X.sum()) \
+      and not np.isfinite(X).all():
+            raise ValueError("array contains NaN or infinity")
 
 
 def safe_asanyarray(X, dtype=None, order=None):
-- 
GitLab