diff --git a/doc/faq.rst b/doc/faq.rst index d5d15a1ed024325d096a4a77807e2e490e58991a..16101bc5c9ba7ed767350c7b4c9de56c11677828 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -127,6 +127,7 @@ together with scikit-learn tools. You can implement your favorite algorithm in a scikit-learn compatible way, upload it to github and let us know. We will list it under :ref:`related_projects`. +.. _selectiveness: Why are you so selective on what algorithms you include in scikit-learn? ------------------------------------------------------------------------ @@ -313,7 +314,7 @@ not close your pull request or discontinue your work solely because of this reason. How do I set a ``random_state`` for an entire execution? ----------------------------------------------------- +--------------------------------------------------------- For testing and replicability, it is often important to have the entire execution controlled by a single seed for the pseudo-random number generator used in diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 9f9e7ac19f87eaf528468cfeaaa336fbadfc5619..63e16932cf9af9316507128aeafed0277a4761e4 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -1101,7 +1101,7 @@ Here is a small example of usage of this function::: .. topic:: Example: - * See :ref:`sphx_glr_calibration_plot_calibration.py` + * See :ref:`sphx_glr_auto_examples_calibration_plot_calibration.py` for an example of Brier score loss usage to perform probability calibration of classifiers. diff --git a/examples/ensemble/plot_random_forest_regression_multioutput.py b/examples/ensemble/plot_random_forest_regression_multioutput.py index 5849833fef4bfe566c669e60345cf5180fbdb107..2b795ab3a92b2279a46d1c1139fa5e34f78aaf24 100644 --- a/examples/ensemble/plot_random_forest_regression_multioutput.py +++ b/examples/ensemble/plot_random_forest_regression_multioutput.py @@ -4,10 +4,10 @@ Comparing random forests and the multi-output meta estimator ============================================================ An example to compare multi-output regression with random forest and -the :ref:`multioutput.MultiOutputRegressor <_multiclass>` meta-estimator. +the :ref:`multioutput.MultiOutputRegressor <multiclass>` meta-estimator. This example illustrates the use of the -:ref:`multioutput.MultiOutputRegressor <_multiclass>` meta-estimator +:ref:`multioutput.MultiOutputRegressor <multiclass>` meta-estimator to perform multi-output regression. A random forest regressor is used, which supports multi-output regression natively, so the results can be compared. diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py index 05ea557b58bd3471bd16974d074dd59fef7a31c3..c96622c95f1ba2e905c059dec018a01842ac591f 100644 --- a/sklearn/ensemble/iforest.py +++ b/sklearn/ensemble/iforest.py @@ -64,6 +64,7 @@ class IsolationForest(BaseBagging): max_features : int or float, optional (default=1.0) The number of features to draw from X to train each base estimator. + - If int, then draw `max_features` features. - If float, then draw `max_features * X.shape[1]` features. diff --git a/sklearn/mixture/bayesian_mixture.py b/sklearn/mixture/bayesian_mixture.py index 97aaacb4564c1d00c8f17524d639c8cf8b2e5bb0..d22ba73691ad9cf3784828ef77613bc6c2fa45e6 100644 --- a/sklearn/mixture/bayesian_mixture.py +++ b/sklearn/mixture/bayesian_mixture.py @@ -90,13 +90,14 @@ class BayesianGaussianMixture(BaseMixture): close to zero. The number of effective components is therefore smaller than n_components. - covariance_type : {'full', 'tied', 'diag', 'spherical'}, defaults to 'full'. + covariance_type : {'full', 'tied', 'diag', 'spherical'}, defaults to 'full' String describing the type of covariance parameters to use. Must be one of:: - 'full' (each component has its own general covariance matrix), - 'tied' (all components share the same general covariance matrix), - 'diag' (each component has its own diagonal covariance matrix), - 'spherical' (each component has its own single variance). + + 'full' (each component has its own general covariance matrix), + 'tied' (all components share the same general covariance matrix), + 'diag' (each component has its own diagonal covariance matrix), + 'spherical' (each component has its own single variance). tol : float, defaults to 1e-3. The convergence threshold. EM iterations will stop when the @@ -118,14 +119,16 @@ class BayesianGaussianMixture(BaseMixture): The method used to initialize the weights, the means and the covariances. Must be one of:: - 'kmeans' : responsibilities are initialized using kmeans. - 'random' : responsibilities are initialized randomly. + + 'kmeans' : responsibilities are initialized using kmeans. + 'random' : responsibilities are initialized randomly. weight_concentration_prior_type : str, defaults to 'dirichlet_process'. String describing the type of the weight concentration prior. Must be one of:: - 'dirichlet_process' (using the Stick-breaking representation), - 'dirichlet_distribution' (can favor more uniform weights). + + 'dirichlet_process' (using the Stick-breaking representation), + 'dirichlet_distribution' (can favor more uniform weights). weight_concentration_prior : float | None, optional. The dirichlet concentration of each component on the weight @@ -133,7 +136,7 @@ class BayesianGaussianMixture(BaseMixture): the center and will lead to more components being active, while a lower concentration parameter will lead to more mass at the edge of the mixture weights simplex. The value of the parameter must be greater - than 0. If it is None, it's set to `1. / n_components`. + than 0. If it is None, it's set to ``1. / n_components``. mean_precision_prior : float | None, optional. The precision prior on the mean distribution (Gaussian). @@ -142,7 +145,7 @@ class BayesianGaussianMixture(BaseMixture): The value of the parameter must be greater than 0. If it is None, it's set to 1. - mean_prior : array-like, shape (`n_features`,), optional + mean_prior : array-like, shape (n_features,), optional The prior on the mean distribution (Gaussian). If it is None, it's set to the mean of X. @@ -154,10 +157,11 @@ class BayesianGaussianMixture(BaseMixture): The prior on the covariance distribution (Wishart). If it is None, the emiprical covariance prior is initialized using the covariance of X. The shape depends on `covariance_type`:: - (`n_features`, `n_features`) if 'full', - (`n_features`, `n_features`) if 'tied', - (`n_features`) if 'diag', - float if 'spherical' + + (n_features, n_features) if 'full', + (n_features, n_features) if 'tied', + (n_features) if 'diag', + float if 'spherical' random_state: RandomState or an int seed, defaults to None. A random number generator instance. @@ -178,15 +182,16 @@ class BayesianGaussianMixture(BaseMixture): Attributes ---------- - weights_ : array-like, shape (`n_components`,) + weights_ : array-like, shape (n_components,) The weights of each mixture components. - means_ : array-like, shape (`n_components`, `n_features`) + means_ : array-like, shape (n_components, n_features) The mean of each mixture component. covariances_ : array-like The covariance of each mixture component. The shape depends on `covariance_type`:: + (n_components,) if 'spherical', (n_features, n_features) if 'tied', (n_components, n_features) if 'diag', @@ -199,7 +204,8 @@ class BayesianGaussianMixture(BaseMixture): equivalently parameterized by the precision matrices. Storing the precision matrices instead of the covariance matrices makes it more efficient to compute the log-likelihood of new samples at test time. - The shape depends on `covariance_type`:: + The shape depends on ``covariance_type``:: + (n_components,) if 'spherical', (n_features, n_features) if 'tied', (n_components, n_features) if 'diag', @@ -212,7 +218,8 @@ class BayesianGaussianMixture(BaseMixture): Gaussian can be equivalently parameterized by the precision matrices. Storing the precision matrices instead of the covariance matrices makes it more efficient to compute the log-likelihood of new samples at test - time. The shape depends on `covariance_type`:: + time. The shape depends on ``covariance_type``:: + (n_components,) if 'spherical', (n_features, n_features) if 'tied', (n_components, n_features) if 'diag', @@ -232,15 +239,17 @@ class BayesianGaussianMixture(BaseMixture): weight_concentration_prior_ : tuple or float The dirichlet concentration of each component on the weight distribution (Dirichlet). The type depends on - `weight_concentration_prior_type`:: + ``weight_concentration_prior_type``:: + (float, float) if 'dirichlet_process' (Beta parameters), float if 'dirichlet_distribution' (Dirichlet parameters). + The higher concentration puts more mass in the center and will lead to more components being active, while a lower concentration parameter will lead to more mass at the edge of the simplex. - weight_concentration_ : array-like, shape (`n_components`, ) + weight_concentration_ : array-like, shape (n_components,) The dirichlet concentration of each component on the weight distribution (Dirichlet). @@ -250,26 +259,27 @@ class BayesianGaussianMixture(BaseMixture): Smaller values concentrate the means of each clusters around `mean_prior`. - mean_precision_ : array-like, shape (`n_components`, ) + mean_precision_ : array-like, shape (n_components,) The precision of each components on the mean distribution (Gaussian). - means_prior_ : array-like, shape (`n_features`,) + means_prior_ : array-like, shape (n_features,) The prior on the mean distribution (Gaussian). degrees_of_freedom_prior_ : float The prior of the number of degrees of freedom on the covariance distributions (Wishart). - degrees_of_freedom_ : array-like, shape (`n_components`,) + degrees_of_freedom_ : array-like, shape (n_components,) The number of degrees of freedom of each components in the model. covariance_prior_ : float or array-like The prior on the covariance distribution (Wishart). The shape depends on `covariance_type`:: - (`n_features`, `n_features`) if 'full', - (`n_features`, `n_features`) if 'tied', - (`n_features`) if 'diag', - float if 'spherical' + + (n_features, n_features) if 'full', + (n_features, n_features) if 'tied', + (n_features) if 'diag', + float if 'spherical' See Also -------- diff --git a/sklearn/mixture/gaussian_mixture.py b/sklearn/mixture/gaussian_mixture.py index 57fa24e891836ecb572d880895abe0c10e21e62a..f4a182a7c95672c42899ae26e9b9ce9431d6a013 100644 --- a/sklearn/mixture/gaussian_mixture.py +++ b/sklearn/mixture/gaussian_mixture.py @@ -450,13 +450,14 @@ class GaussianMixture(BaseMixture): The number of mixture components. covariance_type : {'full', 'tied', 'diag', 'spherical'}, - defaults to 'full'. + defaults to 'full'. String describing the type of covariance parameters to use. Must be one of:: - 'full' (each component has its own general covariance matrix), - 'tied' (all components share the same general covariance matrix), - 'diag' (each component has its own diagonal covariance matrix), - 'spherical' (each component has its own single variance). + + 'full' (each component has its own general covariance matrix), + 'tied' (all components share the same general covariance matrix), + 'diag' (each component has its own diagonal covariance matrix), + 'spherical' (each component has its own single variance). tol : float, defaults to 1e-3. The convergence threshold. EM iterations will stop when the @@ -476,8 +477,9 @@ class GaussianMixture(BaseMixture): The method used to initialize the weights, the means and the precisions. Must be one of:: - 'kmeans' : responsibilities are initialized using kmeans. - 'random' : responsibilities are initialized randomly. + + 'kmeans' : responsibilities are initialized using kmeans. + 'random' : responsibilities are initialized randomly. weights_init : array-like, shape (n_components, ), optional The user-provided initial weights, defaults to None. @@ -492,6 +494,7 @@ class GaussianMixture(BaseMixture): matrices), defaults to None. If it None, precisions are initialized using the 'init_params' method. The shape depends on 'covariance_type':: + (n_components,) if 'spherical', (n_features, n_features) if 'tied', (n_components, n_features) if 'diag', @@ -525,6 +528,7 @@ class GaussianMixture(BaseMixture): covariances_ : array-like The covariance of each mixture component. The shape depends on `covariance_type`:: + (n_components,) if 'spherical', (n_features, n_features) if 'tied', (n_components, n_features) if 'diag', @@ -538,6 +542,7 @@ class GaussianMixture(BaseMixture): precision matrices instead of the covariance matrices makes it more efficient to compute the log-likelihood of new samples at test time. The shape depends on `covariance_type`:: + (n_components,) if 'spherical', (n_features, n_features) if 'tied', (n_components, n_features) if 'diag', @@ -551,6 +556,7 @@ class GaussianMixture(BaseMixture): Storing the precision matrices instead of the covariance matrices makes it more efficient to compute the log-likelihood of new samples at test time. The shape depends on `covariance_type`:: + (n_components,) if 'spherical', (n_features, n_features) if 'tied', (n_components, n_features) if 'diag',