Skip to content
Snippets Groups Projects
Commit 1382b264 authored by Mathieu Blondel's avatar Mathieu Blondel
Browse files

Use CCA as well in multilabel example.

parent 512c9116
No related branches found
No related tags found
No related merge requests found
# Authors: Vlad Niculae, Mathieu Blondel
# License: BSD
"""
=========================
Multilabel classification
......@@ -11,16 +13,16 @@ dataset is generated randomly based on the following process:
- pick the document length: k ~ Poisson(length)
- k times, choose a word: w ~ Multinomial(theta_c)
In the above process, rejection sampling is used to make sure that
n is never zero or more than 2, and that the document length
is never zero. Likewise, we reject classes which have already been chosen.
The documents that are assigned to both classes are plotted surrounded by
two colored circles.
In the above process, rejection sampling is used to make sure that n is more
than 2, and that the document length is never zero. Likewise, we reject classes
which have already been chosen. The documents that are assigned to both classes
are plotted surrounded by two colored circles.
The classification is performed by projecting to the first two principal
components for visualisation purposes, followed by using the
:class:`sklearn.multiclass.OneVsRestClassifier` metaclassifier using two SVCs
with linear kernels to learn a discriminative model for each class.
components found by PCA and CCA for visualisation purposes, followed by using
the :class:`sklearn.multiclass.OneVsRestClassifier` metaclassifier using two
SVCs with linear kernels to learn a discriminative model for each class.
Note that PCA is an unsupervised algorithm, while CCA is supervised.
"""
print __doc__
......@@ -30,7 +32,9 @@ import matplotlib.pylab as pl
from sklearn.datasets import make_multilabel_classification
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import LabelBinarizer
from sklearn.decomposition import PCA
from sklearn.pls import CCA
def plot_hyperplane(clf, min_x, max_x, linestyle, label):
......@@ -42,26 +46,24 @@ def plot_hyperplane(clf, min_x, max_x, linestyle, label):
pl.plot(xx, yy, linestyle, label=label)
pl.figure(figsize=(13, 6))
for subplot, allow_unlabeled, title in zip((1, 2),
(False, True),
('with unlabeled samples',
'without unlabeled samples')):
X, Y = make_multilabel_classification(n_classes=2, n_labels=1,
allow_unlabeled=allow_unlabeled,
random_state=42)
def plot_subfigure(X, Y, subplot, title, transform):
if transform == "pca":
X = PCA(n_components=2).fit_transform(X)
elif transform == "cca":
# Convert list of tuples to a class indicator matrix first
Y_indicator = LabelBinarizer().fit(Y).transform(Y)
X = CCA(n_components=2).fit(X, Y_indicator).transform(X)
else:
raise ValueError
min_x = np.min(X[:, 0])
max_x = np.max(X[:, 0])
classif = OneVsRestClassifier(SVC(kernel='linear'))
classif.fit(X, Y)
pl.subplot(1, 2, subplot)
pl.title('Multilabel classification\n(%s)' % title)
pl.xlabel('First principal component')
pl.ylabel('Second principal component')
pl.subplot(2, 2, subplot)
pl.title(title)
zero_class = np.where([0 in y for y in Y])
one_class = np.where([1 in y for y in Y])
......@@ -78,7 +80,28 @@ for subplot, allow_unlabeled, title in zip((1, 2),
'Boundary\nfor class 2')
pl.xticks(())
pl.yticks(())
pl.legend()
if subplot == 1:
pl.xlabel('First principal component')
pl.ylabel('Second principal component')
pl.legend(loc="upper right")
pl.figure(figsize=(13, 6))
X, Y = make_multilabel_classification(n_classes=2, n_labels=1,
allow_unlabeled=True,
random_state=42)
plot_subfigure(X, Y, 1, "With unlabeled samples + CCA", "cca")
plot_subfigure(X, Y, 2, "With unlabeled samples + PCA", "pca")
X, Y = make_multilabel_classification(n_classes=2, n_labels=1,
allow_unlabeled=False,
random_state=42)
plot_subfigure(X, Y, 3, "Without unlabeled samples + CCA", "cca")
plot_subfigure(X, Y, 4, "Without unlabeled samples + PCA", "pca")
pl.subplots_adjust(.04, .07, .97, .90, .09, .2)
pl.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment