Skip to content
Snippets Groups Projects
Commit d2b39db5 authored by Fabian Pedregosa's avatar Fabian Pedregosa
Browse files

Model compaction.

From: fullung <fullung@cb17146a-f446-4be1-a4f7-bd7c5bb65646>

git-svn-id: https://scikit-learn.svn.sourceforge.net/svnroot/scikit-learn/trunk@203 22fbfee3-77ab-4535-9bad-27d1bd3bc7d8
parent 44fb893b
No related branches found
No related tags found
No related merge requests found
......@@ -42,7 +42,7 @@ class LibSvmClassificationResults:
For training data with nr_class classes, this function returns
nr_class*(nr_class-1)/2 decision values in a dictionary for
each item in the test dataset. The keys of the dictionary are
2-tuples, one for each combination of two class labels.
2-tuples, one for each permutation of two class labels.
"""
n = self.nr_class * (self.nr_class - 1) / 2
def p(v):
......
......@@ -33,7 +33,7 @@ class LibSvmOneClassResults:
distribution, while a non-positive value indicates that is is
not.
"""
return [self.predictor.predict_values(x, 1)[0] for x in dataset]
return [self.predictor.predict_values(x, 1) for x in dataset]
def compact(self):
self.predictor.compact()
......
from ctypes import POINTER, c_double, addressof
from ctypes import POINTER, c_double, addressof, byref
from itertools import izip
import numpy as N
from dataset import svm_node_dot
......@@ -44,6 +45,9 @@ class LibSvmPredictor:
v = N.empty((n,), dtype=N.float64)
vptr = v.ctypes.data_as(POINTER(c_double))
libsvm.svm_predict_values(self.model, xptr, vptr)
if n == 1:
return v[0]
else:
return v
def predict_probability(self, x, n):
......@@ -88,6 +92,7 @@ class LibSvmPythonPredictor:
ids = [int(modelc.SV[i][0].value) for i in range(modelc.l)]
support_vectors = [dataset[id] for id in ids]
self.support_vectors = support_vectors
self.is_compact = False
libsvm.svm_destroy_model(model)
def predict(self, x):
......@@ -107,7 +112,7 @@ class LibSvmPythonPredictor:
else:
return self.predict_values(x, 1)
def predict_values(self, x, n):
def _predict_values_sparse(self, x, n):
if self.svm_type in [libsvm.C_SVC, libsvm.NU_SVC]:
kvalue = N.empty((len(self.support_vectors),))
for i, sv in enumerate(self.support_vectors):
......@@ -121,12 +126,12 @@ class LibSvmPythonPredictor:
ci, cj = self.nSV[i], self.nSV[j]
coef1 = self.sv_coef[j - 1]
coef2 = self.sv_coef[i]
sum = -self.rho[p]
sum = 0.
for k in range(ci):
sum += coef1[si + k] * kvalue[si + k]
for k in range(cj):
sum += coef2[sj + k] * kvalue[sj + k]
dec_values[p] = sum
dec_values[p] = sum - self.rho[p]
p += 1
return dec_values
else:
......@@ -135,8 +140,52 @@ class LibSvmPythonPredictor:
z += sv_coef * self.kernel(x, sv, svm_node_dot)
return z
def _predict_values_compact(self, x, n):
if self.svm_type in [libsvm.C_SVC, libsvm.NU_SVC]:
for i, sv in enumerate(self.support_vectors):
kvalue = N.empty((len(self.support_vectors),))
kvalue[i] = self.kernel(x, sv, svm_node_dot)
return kvalue - self.rho
else:
sv = self.support_vectors[0]
return self.kernel(x, sv, svm_node_dot) - self.rho
def predict_values(self, x, n):
if self.is_compact:
return self._predict_values_compact(x, n)
else:
return self._predict_values_sparse(x, n)
def predict_probability(self, x, n):
raise NotImplementedError
def _compact_svs(self, svs, coefs):
maxlen = 0
for sv in svs:
maxlen = N.maximum(maxlen, sv['index'].max())
csv = N.zeros((maxlen + 1,), libsvm.svm_node_dtype)
csv['index'][:-1] = N.arange(1, maxlen + 1)
csv['index'][-1] = -1
for coef, sv in izip(coefs, svs):
idx = sv['index'][:-1] - 1
csv['value'][idx] += coef*sv['value'][:-1]
return csv
def compact(self):
raise NotImplementedError
if self.svm_type in [libsvm.C_SVC, libsvm.NU_SVC]:
compact_support_vectors = []
for i in range(self.nr_class):
for j in range(i + 1, self.nr_class):
si, sj = self.start[i], self.start[j]
ci, cj = self.nSV[i], self.nSV[j]
svi = self.support_vectors[si:si + ci]
svj = self.support_vectors[sj:sj + cj]
coef1 = self.sv_coef[j - 1][si:si + ci]
coef2 = self.sv_coef[i][sj:sj + cj]
csv = self._compact_svs(svi + svj, coef1 + coef2)
compact_support_vectors.append(csv)
self.support_vectors = compact_support_vectors
else:
csv = self._compact_svs(self.support_vectors, self.sv_coef)
self.support_vectors = [csv]
self.is_compact = True
......@@ -139,7 +139,7 @@ class test_classification(NumpyTestCase):
refx = N.vstack([x1, x2])
trndata = LibSvmClassificationDataSet(zip(reflabels, refx))
testdata = LibSvmTestDataSet(refx)
return trndata, trndata1, trndata2, testdata
return trndata, testdata, trndata1, trndata2
def _make_kernels(self):
def kernelf(x, y, dot):
......@@ -158,7 +158,7 @@ class test_classification(NumpyTestCase):
return kernels
def check_all(self):
trndata, trndata1, trndata2, testdata = self._make_datasets()
trndata, testdata, trndata1, trndata2 = self._make_datasets()
kernels = self._make_kernels()
weights = [(0, 2.0), (1, 5.0), (2, 3.0)]
for kernel in kernels:
......@@ -226,5 +226,20 @@ class test_classification(NumpyTestCase):
p = results.predict(testdata)
assert_array_equal(p, refp)
def check_compact(self):
traindata, testdata = self._make_basic_datasets()
kernel = LinearKernel()
cost = 10.0
weights = [(1, 10.0)]
model = LibSvmCClassificationModel(kernel, cost, weights)
results = model.fit(traindata, LibSvmPythonPredictor)
refvs = results.predict_values(testdata)
results.compact()
vs = results.predict_values(testdata)
print vs
for refv, v in zip(refvs, vs):
for key, value in refv.iteritems():
self.assertEqual(value, v[key])
if __name__ == '__main__':
NumpyTest().run()
......@@ -5,6 +5,7 @@ set_local_path('../..')
from svm.dataset import LibSvmOneClassDataSet, LibSvmTestDataSet
from svm.kernel import *
from svm.oneclass import *
from svm.predict import *
restore_path()
class test_oneclass(NumpyTestCase):
......@@ -24,9 +25,8 @@ class test_oneclass(NumpyTestCase):
return traindata, testdata
def check_train(self):
ModelType = LibSvmOneClassModel
traindata, testdata = self._make_basic_datasets()
model = ModelType(LinearKernel())
model = LibSvmOneClassModel(LinearKernel())
results = model.fit(traindata)
p = results.predict(testdata)
assert_array_equal(p, [False, False, False, True])
......@@ -56,5 +56,14 @@ class test_oneclass(NumpyTestCase):
for p, v in zip(pred, values):
self.assertEqual(v > 0, p)
def check_compact(self):
traindata, testdata = self._make_basic_datasets()
model = LibSvmOneClassModel(LinearKernel())
results = model.fit(traindata, LibSvmPythonPredictor)
refv = results.predict_values(testdata)
results.compact()
v = results.predict_values(testdata)
assert_array_equal(refv, v)
if __name__ == '__main__':
NumpyTest().run()
......@@ -163,5 +163,15 @@ class test_regression(NumpyTestCase):
p = results.predict(testdata)
assert_array_almost_equal(refp, p)
def check_compact(self):
traindata, testdata = self._make_basic_datasets()
kernel = LinearKernel()
model = LibSvmEpsilonRegressionModel(LinearKernel())
results = model.fit(traindata, LibSvmPythonPredictor)
refp = results.predict(testdata)
results.compact()
p = results.predict(testdata)
assert_array_equal(refp, p)
if __name__ == '__main__':
NumpyTest().run()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment