Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
scikit-learn
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Ian Johnson
scikit-learn
Commits
570d3c26
Commit
570d3c26
authored
14 years ago
by
Gael Varoquaux
Browse files
Options
Downloads
Patches
Plain Diff
ENH: Cosmetic improvements to the face example
parent
1b1bee0b
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
examples/plot_face_recognition.py
+27
-49
27 additions, 49 deletions
examples/plot_face_recognition.py
with
27 additions
and
49 deletions
examples/plot_face_recognition.py
+
27
−
49
View file @
570d3c26
...
@@ -3,8 +3,8 @@
...
@@ -3,8 +3,8 @@
Faces recognition example using eigenfaces and SVMs
Faces recognition example using eigenfaces and SVMs
===================================================
===================================================
The dataset used in this example is a preprocessed excerpt of the
"
Labeled Faces
The dataset used in this example is a preprocessed excerpt of the
in the Wild
"
, aka LFW_:
"
Labeled Faces
in the Wild
"
, aka LFW_:
http://vis-www.cs.umass.edu/lfw/lfw-funneled.tgz (233MB)
http://vis-www.cs.umass.edu/lfw/lfw-funneled.tgz (233MB)
...
@@ -27,7 +27,6 @@ print __doc__
...
@@ -27,7 +27,6 @@ print __doc__
import
os
import
os
from
gzip
import
GzipFile
from
gzip
import
GzipFile
from
collections
import
defaultdict
import
numpy
as
np
import
numpy
as
np
import
pylab
as
pl
import
pylab
as
pl
...
@@ -71,52 +70,29 @@ face_filenames = [l.strip() for l in file(filenames_filename).readlines()]
...
@@ -71,52 +70,29 @@ face_filenames = [l.strip() for l in file(filenames_filename).readlines()]
faces
-=
faces
.
mean
(
axis
=
1
)[:,
np
.
newaxis
]
faces
-=
faces
.
mean
(
axis
=
1
)[:,
np
.
newaxis
]
################################################################################
# Count occurrences of each category
categories
=
[
f
.
rsplit
(
'
_
'
,
1
)[
0
]
for
f
in
face_filenames
]
counts
=
defaultdict
(
lambda
:
0
)
for
cat
in
categories
:
counts
[
cat
]
+=
1
################################################################################
################################################################################
# Index category names into integers suitable for scikit-learn
# Index category names into integers suitable for scikit-learn
# TODO: factorize this out as a utility function in scikit-learn
# Here we do a little dance to convert file names in integer indices
# (class indices in machine learning talk) that are suitable to be used
class
Vocabulary
(
dict
):
# as a target for training a classifier. Note the use of an array with
# unique entries to store the relation between class index and name,
def
__getitem__
(
self
,
k
):
# often called a 'Look Up Table' (LUT).
if
k
not
in
self
:
# Also, note the use of 'searchsorted' to convert an array in a set of
self
[
k
]
=
len
(
self
)
# integers given a second array to use as a LUT.
return
super
(
Vocabulary
,
self
).
__getitem__
(
k
)
categories
=
np
.
array
([
f
.
rsplit
(
'
_
'
,
1
)[
0
]
for
f
in
face_filenames
])
def
add
(
self
,
k
):
self
[
k
]
vocabulary
=
Vocabulary
()
# A unique integer per category
category_names
=
np
.
unique
(
categories
)
for
cat
in
counts
.
iterkeys
():
# Turn the categories in their corresponding integer label
vocabulary
.
add
(
cat
)
target
=
np
.
searchsorted
(
category_names
,
categories
)
category_names
=
dict
((
v
,
k
)
for
k
,
v
in
vocabulary
.
iteritems
())
################################################################################
# Subsample the dataset to restrict to the most frequent categories
# Subsample the dataset to restrict to the most frequent categories
selected_target
=
np
.
argsort
(
np
.
bincount
(
target
))[
-
5
:]
target
=
np
.
asarray
([
vocabulary
[
cat
]
for
cat
in
categories
])
# If you are using a numpy version >= 1.4, this can be done with 'np.in1d'
mask
=
np
.
array
([
item
in
selected_target
for
item
in
target
])
top_categories
=
[(
count
,
vocabulary
[
cat
])
for
cat
,
count
in
counts
.
iteritems
()]
top_categories
.
sort
(
reverse
=
True
)
labels
=
[
i
for
c
,
i
in
top_categories
[:
5
]]
kept
=
set
(
labels
)
mask
=
np
.
asarray
([
i
for
i
,
t
in
enumerate
(
target
)
if
t
in
kept
])
X
=
faces
[
mask
]
X
=
faces
[
mask
]
y
=
target
[
mask
]
y
=
target
[
mask
]
...
@@ -132,7 +108,6 @@ split = n_samples * 3 / 4
...
@@ -132,7 +108,6 @@ split = n_samples * 3 / 4
X_train
,
X_test
=
X
[:
split
],
X
[
split
:]
X_train
,
X_test
=
X
[:
split
],
X
[
split
:]
y_train
,
y_test
=
y
[:
split
],
y
[
split
:]
y_train
,
y_test
=
y
[:
split
],
y
[
split
:]
################################################################################
################################################################################
# Compute a PCA (eigenfaces) on the training set
# Compute a PCA (eigenfaces) on the training set
n_components
=
100
n_components
=
100
...
@@ -158,10 +133,10 @@ clf = SVC(C=100).fit(X_train_pca, y_train, class_weight="auto")
...
@@ -158,10 +133,10 @@ clf = SVC(C=100).fit(X_train_pca, y_train, class_weight="auto")
# Quantitative evaluation of the model quality on the test set
# Quantitative evaluation of the model quality on the test set
y_pred
=
clf
.
predict
(
X_test_pca
)
y_pred
=
clf
.
predict
(
X_test_pca
)
print
classification_report
(
y_test
,
y_pred
,
labels
=
labels
,
print
classification_report
(
y_test
,
y_pred
,
labels
=
selected_target
,
class_names
=
[
category_names
[
l
]
for
l
in
labels
])
class_names
=
category_names
[
selected_target
])
print
confusion_matrix
(
y_test
,
y_pred
,
labels
=
labels
)
print
confusion_matrix
(
y_test
,
y_pred
,
labels
=
selected_target
)
################################################################################
################################################################################
...
@@ -170,14 +145,17 @@ print confusion_matrix(y_test, y_pred, labels=labels)
...
@@ -170,14 +145,17 @@ print confusion_matrix(y_test, y_pred, labels=labels)
n_row
=
3
n_row
=
3
n_col
=
4
n_col
=
4
pl
.
figure
(
figsize
=
(
2
*
n_col
,
2.3
*
n_row
))
pl
.
subplots_adjust
(
bottom
=
0
,
left
=
.
01
,
right
=
.
99
,
top
=
.
95
,
hspace
=
.
15
)
for
i
in
range
(
n_row
*
n_col
):
for
i
in
range
(
n_row
*
n_col
):
pl
.
subplot
(
n_row
,
n_col
,
i
+
1
)
pl
.
subplot
(
n_row
,
n_col
,
i
+
1
)
pl
.
imshow
(
X_test
[
i
].
reshape
((
64
,
64
)),
cmap
=
pl
.
cm
.
gray
_r
)
pl
.
imshow
(
X_test
[
i
].
reshape
((
64
,
64
)),
cmap
=
pl
.
cm
.
gray
)
pl
.
title
(
'
pred: %s
\n
true: %s
'
%
(
category_names
[
y_pred
[
i
]],
pl
.
title
(
'
pred: %s
\n
true: %s
'
%
(
category_names
[
y_pred
[
i
]],
category_names
[
y_test
[
i
]]))
category_names
[
y_test
[
i
]]),
size
=
12
)
pl
.
xticks
(())
pl
.
yticks
(())
pl
.
show
()
pl
.
show
()
# TODO: find a way to hide the x and y axis
# TODO: plot the top eigenfaces and the singular values absolute values
# TODO: plot the top eigenfaces and the singular values absolute values
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment