Source code for sklearn_lvq.grlvq

# -*- coding: utf-8 -*-

# Author: Joris Jensen <jjensen@techfak.uni-bielefeld.de>
#
# License: BSD 3 clause

from __future__ import division

import numpy as np
from scipy.optimize import minimize

from .glvq import GlvqModel, _squared_euclidean
from sklearn.utils import validation


[docs]class GrlvqModel(GlvqModel): """Generalized Relevance Learning Vector Quantization Parameters ---------- prototypes_per_class : int or list of int, optional (default=1) Number of prototypes per class. Use list to specify different numbers per class. initial_prototypes : array-like, shape = [n_prototypes, n_features + 1], optional Prototypes to start with. If not given initialization near the class means. Class label must be placed as last entry of each prototype. initial_relevances : array-like, shape = [n_prototypes], optional Relevances to start with. If not given all relevances are equal. regularization : float, optional (default=0.0) Value between 0 and 1. Regularization is done by the log determinant of the relevance matrix. Without regularization relevances may degenerate to zero. max_iter : int, optional (default=2500) The maximum number of iterations. gtol : float, optional (default=1e-5) Gradient norm must be less than gtol before successful termination of l-bfgs-b. beta : int, optional (default=2) Used inside phi. 1 / (1 + np.math.exp(-beta * x)) C : array-like, shape = [2,3] ,optional Weights for wrong classification of form (y_real,y_pred,weight) Per default all weights are one, meaning you only need to specify the weights not equal one. display : boolean, optional (default=False) Print information about the bfgs steps. random_state : int, RandomState instance or None, optional If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. Attributes ---------- w_ : array-like, shape = [n_prototypes, n_features] Prototype vector, where n_prototypes in the number of prototypes and n_features is the number of features c_w_ : array-like, shape = [n_prototypes] Prototype classes classes_ : array-like, shape = [n_classes] Array containing labels. lambda_ : array-like, shape = [n_prototypes] Relevances See also -------- GlvqModel, GmlvqModel, LgmlvqModel """
[docs] def __init__(self, prototypes_per_class=1, initial_prototypes=None, initial_relevances=None, regularization=0.0, max_iter=2500, gtol=1e-5, beta=2, C=None, display=False, random_state=None): super(GrlvqModel, self).__init__(prototypes_per_class, initial_prototypes, max_iter, gtol, beta, C, display, random_state) self.regularization = regularization self.initial_relevances = initial_relevances
def _optgrad(self, variables, training_data, label_equals_prototype, random_state, lr_relevances=0, lr_prototypes=1): n_data, n_dim = training_data.shape nb_prototypes = self.c_w_.shape[0] prototypes = variables.reshape(variables.size // n_dim, n_dim)[ :nb_prototypes] lambd = variables[prototypes.size:] lambd[lambd < 0] = 0.0000001 # dirty fix if all values are smaller 0 dist = _squared_euclidean(lambd * training_data, lambd * prototypes) d_wrong = dist.copy() d_wrong[label_equals_prototype] = np.inf distwrong = d_wrong.min(1) pidxwrong = d_wrong.argmin(1) d_correct = dist d_correct[np.invert(label_equals_prototype)] = np.inf distcorrect = d_correct.min(1) pidxcorrect = d_correct.argmin(1) distcorrectpluswrong = distcorrect + distwrong distcorectminuswrong = distcorrect - distwrong mu = distcorectminuswrong / distcorrectpluswrong mu = np.vectorize(self.phi_prime)(mu) g = np.zeros(prototypes.shape) distcorrectpluswrong = 4 / distcorrectpluswrong ** 2 gw = np.zeros(lambd.size) for i in range(nb_prototypes): idxc = i == pidxcorrect idxw = i == pidxwrong dcd = mu[idxw] * distcorrect[idxw] * distcorrectpluswrong[idxw] dwd = mu[idxc] * distwrong[idxc] * distcorrectpluswrong[idxc] if lr_relevances > 0: difc = training_data[idxc] - prototypes[i] difw = training_data[idxw] - prototypes[i] gw -= dcd.dot(difw ** 2) - dwd.dot(difc ** 2) if lr_prototypes > 0: g[i] = dcd.dot(difw) - dwd.dot(difc) elif lr_prototypes > 0: g[i] = dcd.dot(training_data[idxw]) - \ dwd.dot(training_data[idxc]) + \ (dwd.sum(0) - dcd.sum(0)) * prototypes[i] f3 = 0 if self.regularization: f3 = np.diag(np.linalg.pinv(np.sqrt(np.diag(lambd)))) if lr_relevances > 0: gw = 2 / n_data * lr_relevances * \ gw - self.regularization * f3 if lr_prototypes > 0: g[:nb_prototypes] = 1 / n_data * lr_prototypes * \ g[:nb_prototypes] * lambd g = np.append(g.ravel(), gw, axis=0) g = g * (1 + 0.0001 * (random_state.rand(*g.shape) - 0.5)) return g def _optfun(self, variables, training_data, label_equals_prototype): n_data, n_dim = training_data.shape nb_prototypes = self.c_w_.shape[0] prototypes = variables.reshape(variables.size // n_dim, n_dim)[ :nb_prototypes] lambd = variables[prototypes.size:] dist = _squared_euclidean(lambd * training_data, lambd * prototypes) d_wrong = dist.copy() d_wrong[label_equals_prototype] = np.inf distwrong = d_wrong.min(1) d_correct = dist d_correct[np.invert(label_equals_prototype)] = np.inf distcorrect = d_correct.min(1) distcorrectpluswrong = distcorrect + distwrong distcorectminuswrong = distcorrect - distwrong mu = distcorectminuswrong / distcorrectpluswrong mu *= self.c_[label_equals_prototype.argmax(1), d_wrong.argmin(1)] return np.vectorize(self.phi)(mu).sum(0) def _optimize(self, x, y, random_state): if not isinstance(self.regularization, float) or self.regularization < 0: raise ValueError("regularization must be a positive float") nb_prototypes, nb_features = self.w_.shape if self.initial_relevances is None: self.lambda_ = np.ones([nb_features]) else: self.lambda_ = validation.column_or_1d( validation.check_array(self.initial_relevances, dtype='float', ensure_2d=False)) if self.lambda_.size != nb_features: raise ValueError("length of initial relevances is wrong" "features=%d" "length=%d" % ( nb_features, self.lambda_.size)) self.lambda_ /= np.sum(self.lambda_) variables = np.append(self.w_.ravel(), self.lambda_, axis=0) label_equals_prototype = y[np.newaxis].T == self.c_w_ method = 'l-bfgs-b' res = minimize( fun=lambda vs: self._optfun( vs, x, label_equals_prototype=label_equals_prototype), jac=lambda vs: self._optgrad( vs, x, label_equals_prototype=label_equals_prototype, lr_prototypes=1, lr_relevances=0, random_state=random_state), method=method, x0=variables, options={'disp': self.display, 'gtol': self.gtol, 'maxiter': self.max_iter}) n_iter = res.nit res = minimize( fun=lambda vs: self._optfun( vs, x, label_equals_prototype=label_equals_prototype), jac=lambda vs: self._optgrad( vs, x, label_equals_prototype=label_equals_prototype, lr_prototypes=0, lr_relevances=1, random_state=random_state), method=method, x0=res.x, options={'disp': self.display, 'gtol': self.gtol, 'maxiter': self.max_iter}) n_iter = max(n_iter, res.nit) res = minimize( fun=lambda vs: self._optfun( vs, x, label_equals_prototype=label_equals_prototype), jac=lambda vs: self._optgrad( vs, x, label_equals_prototype=label_equals_prototype, lr_prototypes=1, lr_relevances=1, random_state=random_state), method=method, x0=res.x, options={'disp': self.display, 'gtol': self.gtol, 'maxiter': self.max_iter}) n_iter = max(n_iter, res.nit) self.w_ = res.x.reshape(res.x.size // nb_features, nb_features)[:nb_prototypes] self.lambda_ = res.x[self.w_.size:] self.lambda_[self.lambda_ < 0] = 0.0000001 self.lambda_ = self.lambda_ / self.lambda_.sum() self.n_iter_ = n_iter def _compute_distance(self, x, w=None, lambda_=None): if w is None: w = self.w_ if lambda_ is None: lambda_ = self.lambda_ nb_samples = x.shape[0] nb_prototypes = w.shape[0] distance = np.zeros([nb_prototypes, nb_samples]) for i in range(nb_prototypes): delta = x - w[i] distance[i] = np.sum(delta ** 2 * lambda_, 1) return distance.T
[docs] def project(self, x, dims, print_variance_covered=False): """Projects the data input data X using the relevance vector of trained model to dimension dim Parameters ---------- x : array-like, shape = [n,n_features] input data for project dims : int dimension to project to print_variance_covered : boolean flag to print the covered variance of the projection Returns -------- C : array, shape = [n,n_features] Returns predicted values. """ idx = self.lambda_.argsort()[::-1] if print_variance_covered: print('variance coverd by projection:', self.lambda_[idx][:dims].sum() / self.lambda_.sum() * 100) return x.dot(np.diag(self.lambda_)[idx][:, :dims])