diff --git a/scikits/learn/datasets/samples_generator/nonlinear.py b/scikits/learn/datasets/samples_generator/nonlinear.py new file mode 100755 index 0000000000000000000000000000000000000000..db44141562a4e02d6b78ca571b8fabb04f81e6c0 --- /dev/null +++ b/scikits/learn/datasets/samples_generator/nonlinear.py @@ -0,0 +1,30 @@ +import numpy as np +import numpy.random as nr + +def friedman(nb_samples=100, nb_features=10,noise_std=1): + """ + Function creating simulated data with non linearities + (cf.Friedman 1993) + X = NR.normal(0,1) + Y = 10*sin(X[:,0]*X[:,1]) + 20*(X[:,2]-0.5)**2 + 10*X[:,3] + 5*X[:,4] + The number of features is at least 5. + + Parameters + ---------- + nb_samples : int + number of samples (defaut is 100). + nb_features : int + number of features (defaut is 10). + noise_std : float + std of the noise, which is added as noise_std*NR.normal(0,1) + Returns + ------- + X : numpy array of shape (nb_samples, nb_features) for input samples + Y : numpy array of shape (nb_samples) for labels + + """ + X = nr.normal(loc=0, scale=1, size=(nb_samples, nb_features)) + Y = 10*np.sin(X[:,0]*X[:,1]) + 20*(X[:,2]-0.5)**2 + 10*X[:,3] + 5*X[:,4] + Y += noise_std*nr.normal(loc=0,scale=1,size=(nb_samples)) + return X,Y +