Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
scikit-learn
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Ian Johnson
scikit-learn
Commits
cdeee0d4
Commit
cdeee0d4
authored
14 years ago
by
Olivier Grisel
Browse files
Options
Downloads
Patches
Plain Diff
OPTIM: fast_svd now has a auto transpose mode that switch to the fastest impl
parent
e61c7df1
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
scikits/learn/utils/extmath.py
+30
-8
30 additions, 8 deletions
scikits/learn/utils/extmath.py
scikits/learn/utils/tests/test_svd.py
+38
-9
38 additions, 9 deletions
scikits/learn/utils/tests/test_svd.py
with
68 additions
and
17 deletions
scikits/learn/utils/extmath.py
+
30
−
8
View file @
cdeee0d4
...
...
@@ -94,8 +94,8 @@ def _sparsedot(a, b):
else
:
return
np
.
dot
(
a
,
b
)
def
fast_svd
(
M
,
k
,
p
=
None
,
rng
=
0
,
q
=
0
):
"""
Computes the k-truncated
SVD using random projections
def
fast_svd
(
M
,
k
,
p
=
None
,
q
=
0
,
transpose
=
'
auto
'
,
rng
=
0
):
"""
Computes the k-truncated
randomized SVD
Parameters
===========
...
...
@@ -113,6 +113,13 @@ def fast_svd(M, k, p=None, rng=0, q=0):
Number of power iterations (can be used to deal with very noisy
problems).
transpose: True, False or
'
auto
'
(default)
Whether the algorithm should be applied to M.T instead of M. The
result should approximately be the same. The
'
auto
'
mode will
trigger the transposition if M.shape[1] > M.shape[0] since this
implementation of randomized SVD tend to be a little faster in that
case).
rng: RandomState or an int seed (0 by default)
A random number generator instance to make behavior
...
...
@@ -136,15 +143,25 @@ def fast_svd(M, k, p=None, rng=0, q=0):
A randomized algorithm for the decomposition of matrices
Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert
"""
# lazy import of scipy sparse, because it is very slow.
from
scipy
import
sparse
if
p
==
None
:
p
=
k
if
rng
is
None
:
rng
=
np
.
random
.
RandomState
()
elif
isinstance
(
rng
,
int
):
rng
=
np
.
random
.
RandomState
(
rng
)
# lazy import of scipy sparse, because it is very slow.
from
scipy
import
sparse
if
p
==
None
:
p
=
k
n_samples
,
n_features
=
M
.
shape
if
transpose
==
'
auto
'
and
n_samples
>
n_features
:
transpose
=
True
if
transpose
:
# this implementation is a bit faster with smaller shape[1]
M
=
M
.
T
# generating random gaussian vectors r with shape: (M.shape[1], k + p)
r
=
rng
.
normal
(
size
=
(
M
.
shape
[
1
],
k
+
p
))
...
...
@@ -168,5 +185,10 @@ def fast_svd(M, k, p=None, rng=0, q=0):
Uhat
,
s
,
V
=
linalg
.
svd
(
B
,
full_matrices
=
False
)
del
B
U
=
np
.
dot
(
Q
,
Uhat
)
if
transpose
:
# transpose back the results according to the input convention
return
V
[:
k
,
:].
T
,
s
[:
k
],
U
[:,
:
k
].
T
else
:
return
U
[:,
:
k
],
s
[:
k
],
V
[:
k
,
:]
This diff is collapsed.
Click to expand it.
scikits/learn/utils/tests/test_svd.py
+
38
−
9
View file @
cdeee0d4
...
...
@@ -36,11 +36,10 @@ def test_fast_svd_low_rank():
# ensure that the singular values of both methods are equal up to the real
# rank of the matrix
assert_almost_equal
(
s
[:
ran
k
],
sa
[:
rank
]
)
assert_almost_equal
(
s
[:
k
],
sa
)
# check the singular vectors too (while not checking the sign)
assert_almost_equal
(
np
.
dot
(
U
[:,
:
rank
],
V
[:
rank
,
:]),
np
.
dot
(
Ua
[:,
:
rank
],
Va
[:
rank
,
:]))
assert_almost_equal
(
np
.
dot
(
U
[:,
:
k
],
V
[:
k
,
:]),
np
.
dot
(
Ua
,
Va
))
# check the sparse matrix representation
X
=
sparse
.
csr_matrix
(
X
)
...
...
@@ -71,14 +70,14 @@ def test_fast_svd_low_rank_with_noise():
_
,
sa
,
_
=
fast_svd
(
X
,
k
,
q
=
0
)
# the approximation does not tolerate the noise:
assert
np
.
abs
(
s
[:
ran
k
]
-
sa
[:
rank
]
).
max
()
>
0.
1
assert
np
.
abs
(
s
[:
k
]
-
sa
).
max
()
>
0.
05
# compute the singular values of X using the fast approximate method with
# iterated power method
_
,
sap
,
_
=
fast_svd
(
X
,
k
,
q
=
3
)
_
,
sap
,
_
=
fast_svd
(
X
,
k
,
q
=
5
)
# the iterated power method is helping getting rid of the noise:
assert_almost_equal
(
s
[:
ran
k
],
sap
[:
rank
]
,
decimal
=
5
)
assert_almost_equal
(
s
[:
k
],
sap
,
decimal
=
3
)
def
test_fast_svd_infinite_rank
():
...
...
@@ -102,14 +101,44 @@ def test_fast_svd_infinite_rank():
_
,
sa
,
_
=
fast_svd
(
X
,
k
,
q
=
0
)
# the approximation does not tolerate the noise:
assert
np
.
abs
(
s
[:
ran
k
]
-
sa
[:
rank
]
).
max
()
>
0.1
assert
np
.
abs
(
s
[:
k
]
-
sa
).
max
()
>
0.1
# compute the singular values of X using the fast approximate method with
# iterated power method
_
,
sap
,
_
=
fast_svd
(
X
,
k
,
q
=
7
)
_
,
sap
,
_
=
fast_svd
(
X
,
k
,
q
=
5
)
# the iterated power method is still managing to get most of the structure
# at the requested rank
assert_almost_equal
(
s
[:
rank
],
sap
[:
rank
],
decimal
=
5
)
assert_almost_equal
(
s
[:
k
],
sap
,
decimal
=
3
)
def
test_fast_svd_transpose_consistency
():
"""
Check that transposing the design matrix has limit impact
"""
n_samples
=
100
n_features
=
500
rank
=
4
k
=
10
X
=
low_rank_fat_tail
(
n_samples
,
n_features
,
effective_rank
=
rank
,
tail_strength
=
0.5
,
seed
=
0
)
assert_equal
(
X
.
shape
,
(
n_samples
,
n_features
))
U1
,
s1
,
V1
=
fast_svd
(
X
,
k
,
q
=
3
,
transpose
=
False
,
rng
=
0
)
U2
,
s2
,
V2
=
fast_svd
(
X
,
k
,
q
=
3
,
transpose
=
True
,
rng
=
0
)
U3
,
s3
,
V3
=
fast_svd
(
X
,
k
,
q
=
3
,
transpose
=
'
auto
'
,
rng
=
0
)
U4
,
s4
,
V4
=
linalg
.
svd
(
X
,
full_matrices
=
False
)
assert_almost_equal
(
s1
,
s4
[:
k
],
decimal
=
3
)
assert_almost_equal
(
s2
,
s4
[:
k
],
decimal
=
3
)
assert_almost_equal
(
s3
,
s4
[:
k
],
decimal
=
3
)
assert_almost_equal
(
np
.
dot
(
U1
,
V1
),
np
.
dot
(
U4
[:,
:
k
],
V4
[:
k
,
:]),
decimal
=
2
)
assert_almost_equal
(
np
.
dot
(
U2
,
V2
),
np
.
dot
(
U4
[:,
:
k
],
V4
[:
k
,
:]),
decimal
=
2
)
# in this case 'auto' is equivalent to transpose
assert_almost_equal
(
s2
,
s3
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment