0001 classdef sgvm_gaussian_kde < handle
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 properties
0018 dataset
0019 bw_method
0020 d
0021 n
0022 factor
0023 covariance
0024 invcov
0025 norm_factor
0026 end
0027 methods
0028 function self = sgvm_gaussian_kde(dataset, varargin)
0029 self.bw_method = sgvm_varargin_parse(varargin,'bw_method', 'scott');
0030
0031 self.dataset = dataset;
0032 [self.n, self.d] = size(dataset);
0033 self.set_bandwidth(self.bw_method)
0034 end
0035
0036 function set_bandwidth(self, bw_method)
0037 if strcmp(bw_method,'scott')
0038 self.factor = self.n^(-1/(self.d + 4));
0039 elseif strcmp(bw_method,'silverman')
0040 self.factor = (self.n*(self.d + 2)/4)^(-1/(self.d + 4));
0041 elseif isscalar(bw_method)
0042 self.factor = bw_method;
0043 else
0044 error('bw_method should be ''scott'', ''silverman'', or a scalar')
0045 end
0046 self.covariance = cov(self.dataset)*self.factor^2;
0047 self.invcov = inv(cov(self.dataset))/self.factor^2;
0048 self.norm_factor = sqrt(det(2*pi*self.covariance))*self.n;
0049 end
0050
0051 function z = resample(self, n)
0052 sigma = sgvm_mult_randn(n, self.covariance);
0053 idx = randi(self.n, n, 1);
0054 means = self.dataset(idx,:);
0055 z = means + sigma;
0056 end
0057
0058 function z = single_sample(self, actual_vals, subset)
0059 if nargin == 1
0060 actual_vals = false;
0061 elseif nargin == 2
0062 subset = 'all';
0063 end
0064 if actual_vals
0065 if strcmp('all', subset)
0066 I = randi(self.n);
0067 else
0068 I = subset(randi(length(subset)));
0069 end
0070 z = self.dataset(I,:);
0071 else
0072 z = self.resample(1);
0073 end
0074 end
0075
0076 function result = evaluate(self, points)
0077
0078 [m, d] = size(points);
0079 if d ~= self.d
0080 error('sgvm_gaussian_kde/evaluate: points array must have as many columns as the kde dimensions')
0081 end
0082
0083 result = zeros(m, 1);
0084 if m >= self.n
0085
0086 for k = 1:self.n
0087 df = repmat(self.dataset(k,:), m, 1) - points;
0088 tdiff = self.invcov*df.';
0089 energy = sum(df'.*tdiff, 1).'/2;
0090 result = result + exp(-energy);
0091 end
0092 else
0093
0094 for k = 1:m
0095 df = self.dataset - repmat(points(k,:), self.n, 1);
0096 tdiff = self.invcov*df.';
0097 energy = sum(df'.*tdiff, 1).'/2;
0098 result(k) = sum(exp(-energy), 1);
0099 end
0100 end
0101 result = result/self.norm_factor;
0102 end
0103 end
0104 end