diff --git a/doc/modules/linreg.rst b/doc/modules/linreg.rst index ba1e3d14776beb1d6d51af691b192a5266e76153..8f68ab2c4aed72c0ad8ca70ffe054b639696f320 100644 --- a/doc/modules/linreg.rst +++ b/doc/modules/linreg.rst @@ -1,12 +1,51 @@ -========================= -Generalized Linear Models -========================= +================= +Linear Regression +================= -Description TODO +Linear Regression +================= -.. .. automodule:: scikits.learn.machine.glm - :members: +In this model, the target value is expected to be a linear combination +of the input variables. + +.. math:: y(X, W) = w_0 + w_1 x_1 + ... + w_D x_D + +Parameter W is estimated by least squares. + +.. what happens if there are duplicate rows ? + +Linear regression is done via instances of :class:`LinearRegression`. + +.. autoclass:: scikits.learn.linreg.LinearRegression + :members: + +>>> from scikits.learn import linreg +>>> +>>> clf = Linre +Ridge Regression +================ + +Coefficient estimates for multiple linear regression models rely on +the independence of the model terms. When terms are correlated and the +columns of the design matrix :math:`X` have an approximate linear +dependence, the matrix :math:`X(X^T X)^{-1}` becomes close to +singular. As a result, the least-squares estimate: + +.. math:: \hat{\beta} = (X^T X)^{-1} X^T y + +becomes highly sensitive to random errors in the observed response +:math:`y`, producing a large variance. This situation of +*multicollinearity* can arise, for example, when data are collected +without an experimental design. + +Ridge regression adresses the problem by estimating regression +coefficients using: + +.. math:: \hat{\beta} = (X^T X + \alpha I)^{-1} X^T y + +.. autoclass:: scikits.learn.glm.ridge.Ridge + :members: Formulations ============ diff --git a/scikits/learn/linreg/regression.py b/scikits/learn/linreg/regression.py index 651db4f92f0c587e6285e316e30078ae36a13473..82d86e4c00b620f5438fa03afb58e53ace0f06e0 100644 --- a/scikits/learn/linreg/regression.py +++ b/scikits/learn/linreg/regression.py @@ -19,6 +19,11 @@ class LinearRegression(object): ---------- This class takes no parameters + Members + ------- + coef_ : array + Estimated coefficients for the linear regression problem. + This is just plain linear regression wrapped is a Predictor object. """ @@ -26,7 +31,7 @@ class LinearRegression(object): """ Fit linear model """ - self.w, self.residues, self.rank, self.singular = \ + self.coef_, self.residues_, self.rank_, self.singular_ = \ scipy.linalg.lstsq(X, Y) return self @@ -62,8 +67,7 @@ class RidgeRegression(object): See also -------- - http://scikit-learn.sourceforge.net/doc/modules/glm.html - + http://scikit-learn.sourceforge.net/doc/modules/linreg.html """ def __init__(self, alpha=1.0): @@ -75,16 +79,17 @@ class RidgeRegression(object): if nsamples > nfeatures: # w = inv(X^t X + alpha*Id) * X.T y - self.w = scipy.linalg.solve(np.dot(X.T,X) + self.alpha * np.eye(nfeatures), + self.coef_ = scipy.linalg.solve(np.dot(X.T,X) + self.alpha * np.eye(nfeatures), np.dot(X.T,y)) else: # w = X.T * inv(X X^t + alpha*Id) y - self.w = np.dot(X.T, + self.coef_ = np.dot(X.T, scipy.linalg.solve(np.dot(X, X.T) + self.alpha * np.eye(nsamples), y)) return self - def predict(self, X): - """Predict using Linear Model + def predict(self, T): """ - return np.dot(X,self.w) + Predict using Linear Model + """ + return np.dot(T, self.w)