diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py index dfdb4f6b5d0af0dcdb88e77ecc1241c079223a08..41fd97dacb2f23aec37d6c6934b974388af444ba 100644 --- a/sklearn/datasets/__init__.py +++ b/sklearn/datasets/__init__.py @@ -10,6 +10,7 @@ from .base import load_files from .base import load_iris from .base import load_linnerud from .base import load_boston +from .base import load_cities from .base import get_data_home from .base import clear_data_home from .base import load_sample_images diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py index 90b42fc02422046d9e418b3309c882bad19d0f77..a4b78ade3a18a8a1e7a27bbc886d4d17b7082d70 100644 --- a/sklearn/datasets/base.py +++ b/sklearn/datasets/base.py @@ -334,6 +334,32 @@ def load_diabetes(): return Bunch(data=data, target=target) +def load_cities(): + """Load and return the travelling distances between major + cities in france. + + ============== ================== + Samples total 17 cities + Dimensionality + Features real, 0 < x < 1300 + Targets 3D positions + ============== + + Returns + ------- + data: Bunch + Dictionary-like object, the interesting attributes are: 'data', the + distance matrix to learn and 'header', labels for the 14 cities. + """ + base_dir = join(dirname(__file__), 'data/') + # Read data + data = np.loadtxt(base_dir + 'france_distances.csv', + skiprows=1) + with open(base_dir + 'france_distances.csv') as f: + header = f.readline().split(',') + return Bunch(data=data, header=header) + + def load_linnerud(): """Load and return the linnerud dataset (multivariate regression). diff --git a/sklearn/datasets/data/france_distances.csv b/sklearn/datasets/data/france_distances.csv new file mode 100644 index 0000000000000000000000000000000000000000..31e8ec646fb3e3323f2ff57e057b0e57834853e1 --- /dev/null +++ b/sklearn/datasets/data/france_distances.csv @@ -0,0 +1,19 @@ +Poitiers,Tours,Lille,Dijon,Le Mans,Toulouse,Bordeaux,Paris,Clermont Ferrand,Limoges,Rennes,Nantes,Brest,Le Havre,Caen,Lyon,Marseille,Grenoble +0.000000000000000000e+00 1.050000000000000000e+02 5.550000000000000000e+02 5.210000000000000000e+02 2.020000000000000000e+02 4.130000000000000000e+02 2.270000000000000000e+02 3.400000000000000000e+02 3.250000000000000000e+02 1.300000000000000000e+02 2.850000000000000000e+02 2.170000000000000000e+02 4.750000000000000000e+02 4.400000000000000000e+02 3.620000000000000000e+02 5.080000000000000000e+02 7.760000000000000000e+02 5.970000000000000000e+02 +1.050000000000000000e+02 0.000000000000000000e+00 4.560000000000000000e+02 4.210000000000000000e+02 1.020000000000000000e+02 5.160000000000000000e+02 3.490000000000000000e+02 2.400000000000000000e+02 3.330000000000000000e+02 2.290000000000000000e+02 2.530000000000000000e+02 2.170000000000000000e+02 5.020000000000000000e+02 3.410000000000000000e+02 2.630000000000000000e+02 5.220000000000000000e+02 7.810000000000000000e+02 6.120000000000000000e+02 +5.550000000000000000e+02 4.560000000000000000e+02 0.000000000000000000e+00 5.020000000000000000e+02 4.250000000000000000e+02 8.960000000000000000e+02 8.000000000000000000e+02 2.210000000000000000e+02 6.400000000000000000e+02 6.100000000000000000e+02 5.690000000000000000e+02 6.020000000000000000e+02 7.560000000000000000e+02 3.170000000000000000e+02 3.880000000000000000e+02 6.820000000000000000e+02 9.910000000000000000e+02 7.920000000000000000e+02 +5.210000000000000000e+02 4.210000000000000000e+02 5.020000000000000000e+02 0.000000000000000000e+00 7.800000000000000000e+02 6.720000000000000000e+02 6.660000000000000000e+02 3.150000000000000000e+02 3.220000000000000000e+02 4.340000000000000000e+02 6.180000000000000000e+02 6.390000000000000000e+02 8.610000000000000000e+02 5.080000000000000000e+02 5.450000000000000000e+02 1.950000000000000000e+02 5.070000000000000000e+02 3.050000000000000000e+02 +2.020000000000000000e+02 1.020000000000000000e+02 4.250000000000000000e+02 7.800000000000000000e+02 0.000000000000000000e+00 6.150000000000000000e+02 4.460000000000000000e+02 2.100000000000000000e+02 4.320000000000000000e+02 3.050000000000000000e+02 1.550000000000000000e+02 1.890000000000000000e+02 3.970000000000000000e+02 2.430000000000000000e+02 1.640000000000000000e+02 6.280000000000000000e+02 8.900000000000000000e+02 7.380000000000000000e+02 +4.130000000000000000e+02 5.160000000000000000e+02 8.960000000000000000e+02 6.720000000000000000e+02 6.150000000000000000e+02 0.000000000000000000e+00 2.460000000000000000e+02 6.790000000000000000e+02 3.750000000000000000e+02 2.910000000000000000e+02 7.000000000000000000e+02 5.860000000000000000e+02 8.820000000000000000e+02 8.480000000000000000e+02 7.720000000000000000e+02 5.390000000000000000e+02 4.050000000000000000e+02 5.310000000000000000e+02 +2.270000000000000000e+02 3.490000000000000000e+02 8.000000000000000000e+02 6.660000000000000000e+02 4.460000000000000000e+02 2.460000000000000000e+02 0.000000000000000000e+00 5.860000000000000000e+02 3.690000000000000000e+02 2.200000000000000000e+02 4.610000000000000000e+02 3.460000000000000000e+02 6.430000000000000000e+02 6.860000000000000000e+02 6.030000000000000000e+02 5.880000000000000000e+02 6.450000000000000000e+02 6.780000000000000000e+02 +3.400000000000000000e+02 2.400000000000000000e+02 2.210000000000000000e+02 3.150000000000000000e+02 2.100000000000000000e+02 6.790000000000000000e+02 5.860000000000000000e+02 0.000000000000000000e+00 4.240000000000000000e+02 3.940000000000000000e+02 3.500000000000000000e+02 3.860000000000000000e+02 5.930000000000000000e+02 2.040000000000000000e+02 2.410000000000000000e+02 4.650000000000000000e+02 7.770000000000000000e+02 5.750000000000000000e+02 +3.250000000000000000e+02 3.330000000000000000e+02 6.400000000000000000e+02 3.220000000000000000e+02 4.320000000000000000e+02 3.750000000000000000e+02 3.690000000000000000e+02 4.240000000000000000e+02 0.000000000000000000e+00 2.220000000000000000e+02 5.840000000000000000e+02 5.360000000000000000e+02 8.220000000000000000e+02 5.930000000000000000e+02 5.890000000000000000e+02 2.060000000000000000e+02 4.780000000000000000e+02 2.970000000000000000e+02 +1.300000000000000000e+02 2.290000000000000000e+02 6.100000000000000000e+02 4.340000000000000000e+02 3.050000000000000000e+02 2.910000000000000000e+02 2.200000000000000000e+02 3.940000000000000000e+02 2.220000000000000000e+02 0.000000000000000000e+00 4.120000000000000000e+02 3.190000000000000000e+02 6.160000000000000000e+02 5.610000000000000000e+02 4.660000000000000000e+02 4.420000000000000000e+02 6.900000000000000000e+02 5.320000000000000000e+02 +2.850000000000000000e+02 2.530000000000000000e+02 5.690000000000000000e+02 6.180000000000000000e+02 1.550000000000000000e+02 7.000000000000000000e+02 4.610000000000000000e+02 3.500000000000000000e+02 5.840000000000000000e+02 4.120000000000000000e+02 0.000000000000000000e+00 1.070000000000000000e+02 2.440000000000000000e+02 2.760000000000000000e+02 1.790000000000000000e+02 7.680000000000000000e+02 1.077000000000000000e+03 8.780000000000000000e+02 +2.170000000000000000e+02 2.170000000000000000e+02 6.020000000000000000e+02 6.390000000000000000e+02 1.890000000000000000e+02 5.860000000000000000e+02 3.460000000000000000e+02 3.860000000000000000e+02 5.360000000000000000e+02 3.190000000000000000e+02 1.070000000000000000e+02 0.000000000000000000e+00 2.960000000000000000e+02 3.840000000000000000e+02 3.840000000000000000e+02 7.250000000000000000e+02 9.850000000000000000e+02 8.140000000000000000e+02 +4.750000000000000000e+02 5.020000000000000000e+02 7.560000000000000000e+02 8.610000000000000000e+02 3.970000000000000000e+02 8.820000000000000000e+02 6.430000000000000000e+02 5.930000000000000000e+02 8.220000000000000000e+02 6.160000000000000000e+02 2.440000000000000000e+02 2.960000000000000000e+02 0.000000000000000000e+00 4.670000000000000000e+02 3.710000000000000000e+02 1.011000000000000000e+03 1.282000000000000000e+03 1.121000000000000000e+03 +4.400000000000000000e+02 3.410000000000000000e+02 3.170000000000000000e+02 5.080000000000000000e+02 2.430000000000000000e+02 8.480000000000000000e+02 6.860000000000000000e+02 2.040000000000000000e+02 5.930000000000000000e+02 5.610000000000000000e+02 2.760000000000000000e+02 3.840000000000000000e+02 4.670000000000000000e+02 0.000000000000000000e+00 9.500000000000000000e+01 6.580000000000000000e+02 9.670000000000000000e+02 7.680000000000000000e+02 +3.620000000000000000e+02 2.630000000000000000e+02 3.880000000000000000e+02 5.450000000000000000e+02 1.640000000000000000e+02 7.720000000000000000e+02 6.030000000000000000e+02 2.410000000000000000e+02 5.890000000000000000e+02 4.660000000000000000e+02 1.790000000000000000e+02 3.840000000000000000e+02 3.710000000000000000e+02 9.500000000000000000e+01 0.000000000000000000e+00 6.950000000000000000e+02 1.005000000000000000e+03 7.680000000000000000e+02 +5.080000000000000000e+02 5.220000000000000000e+02 6.820000000000000000e+02 1.950000000000000000e+02 6.280000000000000000e+02 5.390000000000000000e+02 5.880000000000000000e+02 4.650000000000000000e+02 2.060000000000000000e+02 4.420000000000000000e+02 7.680000000000000000e+02 7.250000000000000000e+02 1.011000000000000000e+03 6.580000000000000000e+02 6.950000000000000000e+02 0.000000000000000000e+00 3.150000000000000000e+02 1.120000000000000000e+02 +7.760000000000000000e+02 7.810000000000000000e+02 9.910000000000000000e+02 5.070000000000000000e+02 8.900000000000000000e+02 4.050000000000000000e+02 6.450000000000000000e+02 7.770000000000000000e+02 4.780000000000000000e+02 6.900000000000000000e+02 1.077000000000000000e+03 9.850000000000000000e+02 1.282000000000000000e+03 9.670000000000000000e+02 1.005000000000000000e+03 3.150000000000000000e+02 0.000000000000000000e+00 2.730000000000000000e+02 +5.970000000000000000e+02 6.120000000000000000e+02 7.920000000000000000e+02 3.050000000000000000e+02 7.380000000000000000e+02 5.310000000000000000e+02 6.780000000000000000e+02 5.750000000000000000e+02 2.970000000000000000e+02 5.320000000000000000e+02 8.780000000000000000e+02 8.140000000000000000e+02 1.121000000000000000e+03 7.680000000000000000e+02 7.680000000000000000e+02 1.120000000000000000e+02 2.730000000000000000e+02 0.000000000000000000e+00 diff --git a/sklearn/manifold/nmds.py b/sklearn/manifold/mds.py similarity index 93% rename from sklearn/manifold/nmds.py rename to sklearn/manifold/mds.py index 1b30b5514ed8a470b7bc8f6e6d61920dffa93a89..98fdfb055aa291ff4b6ceeb8d94c9c9759354fe4 100644 --- a/sklearn/manifold/nmds.py +++ b/sklearn/manifold/mds.py @@ -184,16 +184,25 @@ class MDS(BaseEstimator): Parameters ---------- - Notes ----- """ - def __init__(self, p=2, init=None, max_iter=300, eps=1e-3): + # TODO + def __init__(self, p=2, metric=True, init=None, max_iter=300, eps=1e-3): self.p = p self.init = init self.max_iter = max_iter self.eps = eps - def fit(): + def fit(self, X, y=None): + """ + """ + self.X = smacof(X, metric=self.metric, p=self.p, init=self.init, + max_iter=self.max_iter, verbose=self.verbose, + eps=self.eps) + return self + + def predict(self, X): """ """ + # TODO diff --git a/sklearn/manifold/tests/test_nmds.py b/sklearn/manifold/tests/test_mds.py similarity index 84% rename from sklearn/manifold/tests/test_nmds.py rename to sklearn/manifold/tests/test_mds.py index 3072a3dacdede5ac96dfc7a14d53082c3ee38da0..04f6621032e851035a5365346efdff825b81e5e6 100644 --- a/sklearn/manifold/tests/test_nmds.py +++ b/sklearn/manifold/tests/test_mds.py @@ -2,13 +2,13 @@ import numpy as np from numpy.testing import assert_array_almost_equal from nose.tools import assert_raises -from sklearn.manifold import nmds +from sklearn.manifold import mds def test_pav(): distances = np.array([10., 8, 11, 5, 13, 11, 9, 14, 6, 16]) similarities = np.arange(10) - distances_fit = nmds.PAV(distances, similarities) + distances_fit = mds.PAV(distances, similarities) assert_array_almost_equal(distances_fit, np.array([8.5, 8.5, 8.5, 8.5, 10.6, 10.6, 10.6, 10.6, @@ -26,7 +26,7 @@ def test_smacof(): [.451, .252], [.016, -.238], [-.200, .524]]) - X = nmds.smacof(sim, init=Z, p=2, max_iter=1) + X = mds.smacof(sim, init=Z, p=2, max_iter=1) X_true = np.array([[-1.415, -2.471], [1.633, 1.107], [.249, -.067], @@ -41,14 +41,14 @@ def test_smacof_error(): [3, 2, 0, 1], [4, 2, 1, 0]]) - assert_raises(ValueError, nmds.smacof, sim) + assert_raises(ValueError, mds.smacof, sim) # Not squared similarity matrix: sim = np.array([[0, 5, 9, 4], [5, 0, 2, 2], [4, 2, 1, 0]]) - assert_raises(ValueError, nmds.smacof, sim) + assert_raises(ValueError, mds.smacof, sim) # init not None and not correct format: sim = np.array([[0, 5, 3, 4], @@ -59,4 +59,4 @@ def test_smacof_error(): Z = np.array([[-.266, -.539], [.016, -.238], [-.200, .524]]) - assert_raises(ValueError, nmds.smacof, sim, init=Z) + assert_raises(ValueError, mds.smacof, sim, init=Z)