Skip to content
Snippets Groups Projects
Commit 570d3c26 authored by Gael Varoquaux's avatar Gael Varoquaux
Browse files

ENH: Cosmetic improvements to the face example

parent 1b1bee0b
No related branches found
No related tags found
No related merge requests found
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
Faces recognition example using eigenfaces and SVMs Faces recognition example using eigenfaces and SVMs
=================================================== ===================================================
The dataset used in this example is a preprocessed excerpt of the "Labeled Faces The dataset used in this example is a preprocessed excerpt of the
in the Wild", aka LFW_: "Labeled Faces in the Wild", aka LFW_:
http://vis-www.cs.umass.edu/lfw/lfw-funneled.tgz (233MB) http://vis-www.cs.umass.edu/lfw/lfw-funneled.tgz (233MB)
...@@ -27,7 +27,6 @@ print __doc__ ...@@ -27,7 +27,6 @@ print __doc__
import os import os
from gzip import GzipFile from gzip import GzipFile
from collections import defaultdict
import numpy as np import numpy as np
import pylab as pl import pylab as pl
...@@ -71,52 +70,29 @@ face_filenames = [l.strip() for l in file(filenames_filename).readlines()] ...@@ -71,52 +70,29 @@ face_filenames = [l.strip() for l in file(filenames_filename).readlines()]
faces -= faces.mean(axis=1)[:, np.newaxis] faces -= faces.mean(axis=1)[:, np.newaxis]
################################################################################
# Count occurrences of each category
categories = [f.rsplit('_', 1)[0] for f in face_filenames]
counts = defaultdict(lambda: 0)
for cat in categories:
counts[cat] += 1
################################################################################ ################################################################################
# Index category names into integers suitable for scikit-learn # Index category names into integers suitable for scikit-learn
# TODO: factorize this out as a utility function in scikit-learn # Here we do a little dance to convert file names in integer indices
# (class indices in machine learning talk) that are suitable to be used
class Vocabulary(dict): # as a target for training a classifier. Note the use of an array with
# unique entries to store the relation between class index and name,
def __getitem__(self, k): # often called a 'Look Up Table' (LUT).
if k not in self: # Also, note the use of 'searchsorted' to convert an array in a set of
self[k] = len(self) # integers given a second array to use as a LUT.
return super(Vocabulary, self).__getitem__(k) categories = np.array([f.rsplit('_', 1)[0] for f in face_filenames])
def add(self, k):
self[k]
vocabulary = Vocabulary() # A unique integer per category
category_names = np.unique(categories)
for cat in counts.iterkeys(): # Turn the categories in their corresponding integer label
vocabulary.add(cat) target = np.searchsorted(category_names, categories)
category_names = dict((v, k) for k, v in vocabulary.iteritems())
################################################################################
# Subsample the dataset to restrict to the most frequent categories # Subsample the dataset to restrict to the most frequent categories
selected_target = np.argsort(np.bincount(target))[-5:]
target = np.asarray([vocabulary[cat] for cat in categories]) # If you are using a numpy version >= 1.4, this can be done with 'np.in1d'
mask = np.array([item in selected_target for item in target])
top_categories = [(count, vocabulary[cat])
for cat, count in counts.iteritems()]
top_categories.sort(reverse=True)
labels = [i for c, i in top_categories[:5]]
kept = set(labels)
mask = np.asarray([i for i, t in enumerate(target) if t in kept])
X = faces[mask] X = faces[mask]
y = target[mask] y = target[mask]
...@@ -132,7 +108,6 @@ split = n_samples * 3 / 4 ...@@ -132,7 +108,6 @@ split = n_samples * 3 / 4
X_train, X_test = X[:split], X[split:] X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:] y_train, y_test = y[:split], y[split:]
################################################################################ ################################################################################
# Compute a PCA (eigenfaces) on the training set # Compute a PCA (eigenfaces) on the training set
n_components = 100 n_components = 100
...@@ -158,10 +133,10 @@ clf = SVC(C=100).fit(X_train_pca, y_train, class_weight="auto") ...@@ -158,10 +133,10 @@ clf = SVC(C=100).fit(X_train_pca, y_train, class_weight="auto")
# Quantitative evaluation of the model quality on the test set # Quantitative evaluation of the model quality on the test set
y_pred = clf.predict(X_test_pca) y_pred = clf.predict(X_test_pca)
print classification_report(y_test, y_pred, labels=labels, print classification_report(y_test, y_pred, labels=selected_target,
class_names=[category_names[l] for l in labels]) class_names=category_names[selected_target])
print confusion_matrix(y_test, y_pred, labels=labels) print confusion_matrix(y_test, y_pred, labels=selected_target)
################################################################################ ################################################################################
...@@ -170,14 +145,17 @@ print confusion_matrix(y_test, y_pred, labels=labels) ...@@ -170,14 +145,17 @@ print confusion_matrix(y_test, y_pred, labels=labels)
n_row = 3 n_row = 3
n_col = 4 n_col = 4
pl.figure(figsize=(2*n_col, 2.3*n_row))
pl.subplots_adjust(bottom=0, left=.01, right=.99, top=.95, hspace=.15)
for i in range(n_row * n_col): for i in range(n_row * n_col):
pl.subplot(n_row, n_col, i + 1) pl.subplot(n_row, n_col, i + 1)
pl.imshow(X_test[i].reshape((64, 64)), cmap=pl.cm.gray_r) pl.imshow(X_test[i].reshape((64, 64)), cmap=pl.cm.gray)
pl.title('pred: %s\ntrue: %s' % (category_names[y_pred[i]], pl.title('pred: %s\ntrue: %s' % (category_names[y_pred[i]],
category_names[y_test[i]])) category_names[y_test[i]]), size=12)
pl.xticks(())
pl.yticks(())
pl.show() pl.show()
# TODO: find a way to hide the x and y axis
# TODO: plot the top eigenfaces and the singular values absolute values # TODO: plot the top eigenfaces and the singular values absolute values
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment