diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index 3272f18b0a2f4961f944014df099e30a9b7f9d20..6ef69221846dde6c272d1da16831595d25784a97 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -172,7 +172,8 @@ class BaseForest(BaseEnsemble, SelectorMixin): oob_score=False, n_jobs=1, random_state=None, - verbose=0): + verbose=0, + pre_dispatch="2*n_jobs"): super(BaseForest, self).__init__( base_estimator=base_estimator, n_estimators=n_estimators, @@ -182,6 +183,7 @@ class BaseForest(BaseEnsemble, SelectorMixin): self.compute_importances = compute_importances self.oob_score = oob_score self.n_jobs = n_jobs + self.pre_dispatch = pre_dispatch self.random_state = check_random_state(random_state) self.feature_importances_ = None @@ -237,7 +239,8 @@ class BaseForest(BaseEnsemble, SelectorMixin): n_jobs, n_trees, _ = _partition_trees(self) # Parallel loop - all_trees = Parallel(n_jobs=n_jobs, verbose=self.verbose)( + all_trees = Parallel(n_jobs=n_jobs, verbose=self.verbose, + pre_dispatch=self.pre_dispatch)( delayed(_parallel_build_trees)( n_trees[i], self, @@ -302,7 +305,8 @@ class ForestClassifier(BaseForest, ClassifierMixin): oob_score=False, n_jobs=1, random_state=None, - verbose=0): + verbose=0, + pre_dispatch="2*n_jobs"): super(ForestClassifier, self).__init__( base_estimator, n_estimators=n_estimators, @@ -312,7 +316,8 @@ class ForestClassifier(BaseForest, ClassifierMixin): oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, - verbose=verbose) + verbose=verbose, + pre_dispatch=pre_dispatch) def predict(self, X): """Predict class for X. @@ -402,7 +407,8 @@ class ForestRegressor(BaseForest, RegressorMixin): oob_score=False, n_jobs=1, random_state=None, - verbose=0): + verbose=0, + pre_dispatch="2*n_jobs"): super(ForestRegressor, self).__init__( base_estimator, n_estimators=n_estimators, @@ -412,7 +418,8 @@ class ForestRegressor(BaseForest, RegressorMixin): oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, - verbose=verbose) + verbose=verbose, + pre_dispatch=pre_dispatch) def predict(self, X): """Predict regression target for X. @@ -527,6 +534,23 @@ class RandomForestClassifier(ForestClassifier): verbose : int, optional (default=0) Controlls the verbosity of the tree building process. + pre_dispatch: int, or string, optional + Controls the number of jobs that get dispatched during parallel + execution. Reducing this number can be useful to avoid an + explosion of memory consumption when more jobs get dispatched + than CPUs can process. This parameter can be: + + - None, in which case all the jobs are immediatly + created and spawned. Use this for lightweight and + fast-running jobs, to avoid delays due to on-demand + spawning of the jobs + + - An int, giving the exact number of total jobs that are + spawned + + - A string, giving an expression as a function of n_jobs, + as in '2*n_jobs' + Attributes ---------- `feature_importances_` : array, shape = [n_features] @@ -561,7 +585,8 @@ class RandomForestClassifier(ForestClassifier): oob_score=False, n_jobs=1, random_state=None, - verbose=0): + verbose=0, + pre_dispatch="2*n_jobs"): super(RandomForestClassifier, self).__init__( base_estimator=DecisionTreeClassifier(), n_estimators=n_estimators, @@ -573,7 +598,8 @@ class RandomForestClassifier(ForestClassifier): oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, - verbose=verbose) + verbose=verbose, + pre_dispatch=pre_dispatch) self.criterion = criterion self.max_depth = max_depth @@ -662,6 +688,23 @@ class RandomForestRegressor(ForestRegressor): verbose : int, optional (default=0) Controlls the verbosity of the tree building process. + pre_dispatch: int, or string, optional + Controls the number of jobs that get dispatched during parallel + execution. Reducing this number can be useful to avoid an + explosion of memory consumption when more jobs get dispatched + than CPUs can process. This parameter can be: + + - None, in which case all the jobs are immediatly + created and spawned. Use this for lightweight and + fast-running jobs, to avoid delays due to on-demand + spawning of the jobs + + - An int, giving the exact number of total jobs that are + spawned + + - A string, giving an expression as a function of n_jobs, + as in '2*n_jobs' + Attributes ---------- `feature_importances_` : array of shape = [n_features] @@ -696,7 +739,8 @@ class RandomForestRegressor(ForestRegressor): oob_score=False, n_jobs=1, random_state=None, - verbose=0): + verbose=0, + pre_dispatch="2*n_jobs"): super(RandomForestRegressor, self).__init__( base_estimator=DecisionTreeRegressor(), n_estimators=n_estimators, @@ -708,7 +752,8 @@ class RandomForestRegressor(ForestRegressor): oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, - verbose=verbose) + verbose=verbose, + pre_dispatch=pre_dispatch) self.criterion = criterion self.max_depth = max_depth @@ -798,6 +843,23 @@ class ExtraTreesClassifier(ForestClassifier): verbose : int, optional (default=0) Controlls the verbosity of the tree building process. + pre_dispatch: int, or string, optional + Controls the number of jobs that get dispatched during parallel + execution. Reducing this number can be useful to avoid an + explosion of memory consumption when more jobs get dispatched + than CPUs can process. This parameter can be: + + - None, in which case all the jobs are immediatly + created and spawned. Use this for lightweight and + fast-running jobs, to avoid delays due to on-demand + spawning of the jobs + + - An int, giving the exact number of total jobs that are + spawned + + - A string, giving an expression as a function of n_jobs, + as in '2*n_jobs' + Attributes ---------- `feature_importances_` : array of shape = [n_features] @@ -834,7 +896,8 @@ class ExtraTreesClassifier(ForestClassifier): oob_score=False, n_jobs=1, random_state=None, - verbose=0): + verbose=0, + pre_dispatch="2*n_jobs"): super(ExtraTreesClassifier, self).__init__( base_estimator=ExtraTreeClassifier(), n_estimators=n_estimators, @@ -846,7 +909,8 @@ class ExtraTreesClassifier(ForestClassifier): oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, - verbose=verbose) + verbose=verbose, + pre_dispatch=pre_dispatch) self.criterion = criterion self.max_depth = max_depth @@ -937,6 +1001,23 @@ class ExtraTreesRegressor(ForestRegressor): verbose : int, optional (default=0) Controlls the verbosity of the tree building process. + pre_dispatch: int, or string, optional + Controls the number of jobs that get dispatched during parallel + execution. Reducing this number can be useful to avoid an + explosion of memory consumption when more jobs get dispatched + than CPUs can process. This parameter can be: + + - None, in which case all the jobs are immediatly + created and spawned. Use this for lightweight and + fast-running jobs, to avoid delays due to on-demand + spawning of the jobs + + - An int, giving the exact number of total jobs that are + spawned + + - A string, giving an expression as a function of n_jobs, + as in '2*n_jobs' + Attributes ---------- `feature_importances_` : array of shape = [n_features] @@ -971,7 +1052,8 @@ class ExtraTreesRegressor(ForestRegressor): oob_score=False, n_jobs=1, random_state=None, - verbose=0): + verbose=0, + pre_dispatch="2*n_jobs"): super(ExtraTreesRegressor, self).__init__( base_estimator=ExtraTreeRegressor(), n_estimators=n_estimators, @@ -983,7 +1065,8 @@ class ExtraTreesRegressor(ForestRegressor): oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, - verbose=verbose) + verbose=verbose, + pre_dispatch=pre_dispatch) self.criterion = criterion self.max_depth = max_depth