Supplemental
Other Languages
When doing some of these models and algorithms, I had some other code to work with in another language, or, at the time, just wanted to try it in that language. There is not a whole lot here, but it still may be useful to some. Refer to the corresponding chapter of R code for context.
Python Demos
Linear Regression
Data Setup
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.optimize import minimize
123) # ensures replication
np.random.seed(
# predictors and response
# increasing N will get estimated values closer to the known parameters
= 1000 # sample size
N = 2 # number of desired predictors
k = np.matrix(np.random.normal(size = N * k)).reshape(N, k)
X = -.5 + .2*X[:, 0] + .1*X[:, 1] + np.random.normal(scale = .5, size = N).reshape(N, 1)
y
= pd.DataFrame(np.column_stack([X, y]), columns = ['X1', 'X2', 'y']) dfXy
Functions
A maximum likelihood approach.
def lm_ml(par, X, y):
# par: parameters to be estimated
# X: predictor matrix with intercept column
# y: response
# setup
= par[1:].reshape(X.shape[1], 1) # coefficients
beta = par[0] # error sd
sigma # N = X.shape[0]
# linear predictor
= X * beta # linear predictor
LP = LP # identity link in the glm sense
mu
# calculate likelihood
= norm.logpdf(y, loc = mu, scale = sigma) # log likelihood; or use norm.logpdf
L
# L = -.5*N*log(sigma2) - .5*(1/sigma2)*crossprod(y-mu) # alternate log likelihood form
= -np.sum(L) # optim by default is minimization, and we want to maximize the likelihood
L
return(L)
An approach via least squares loss function.
def lm_ls(par, X, y):
# par: parameters to be estimated
# X: predictor matrix with intercept column
# y: response
# setup
= par.reshape(3, 1) # coefficients
beta = X.shape[0]
N = X.shape[1]
p
# linear predictor
= X * beta # linear predictor
LP = LP # identity link in the glm sense
mu #
# # squared error loss
#
return(np.sum(np.square(y - mu)))
Estimation
= np.column_stack([np.repeat(1, N).reshape(N, 1), X])
X_mm
# you may get warnings, they can be ignored
= minimize(
fit_ml = lm_ml,
fun = [1, 0, 0, 0],
x0 = (X_mm, y),
args = ((0, None), (None, None), (None, None), (None, None)),
bounds = 'L-BFGS-B',
method = 1e-12,
tol = {'maxiter': 500}
options
)
# can use least_squares directly
# from scipy.optimize import least_squares
= minimize(
fit_ls
lm_ls,= np.array([0, 0, 0]),
x0 = (X_mm, y),
args = 1e-12,
tol = {'maxiter': 500}
options )
Comparison
import statsmodels.formula.api as smf
= smf.ols('y ~ X1 + X2', data = dfXy)
model_sm_ols = model_sm_ols.fit()
fit_sm_ols
pd.DataFrame(
[0]**2, fit_ml.x[1:]),
np.append(fit_ml.x[/(N - X_mm.shape[1] - 1), fit_ls.x),
np.append(fit_ls.fun
np.append(fit_sm_ols.scale, fit_sm_ols.params)
], = ['sigma','Int', 'b_X1', 'b_X2'],
columns = ['ML', 'OLS', 'SM']
index )
sigma Int b_X1 b_X2
ML 0.240332 -0.494647 0.213036 0.0815
OLS 0.241297 -0.494647 0.213036 0.0815
SM 0.241055 -0.494647 0.213036 0.0815
Logistic Regression
Data Setup
import numpy as np
import pandas as pd
from scipy.stats import logistic, binom
from scipy.optimize import minimize
123) # ensures replication
np.random.seed(
# predictors and response
# increasing N will get estimated values closer to the known parameters
= 2500 # sample size
N = 2 # number of desired predictors
k = np.matrix(np.random.normal(size = N * k)).reshape(N, k)
X = -.5 + .2*X[:, 0] + .1*X[:, 1]
eta
= 1/(1 + np.exp(-eta))
pi
= np.random.binomial(1, p = pi, size = (N, 1))
y
= pd.DataFrame(np.column_stack([X, y]), columns = ['X1', 'X2', 'y']) dfXy
Functions
A maximum likelihood approach.
def logreg_ml(par, X, y):
# par: parameters to be estimated
# X: predictor matrix with intercept column
# y: response
# setup
= par.reshape(X.shape[1], 1) # coefficients
beta # N = X.shape[0]
# linear predictor
= np.dot(X, beta) # linear predictor
LP = 1/(1 + np.exp(-LP)) # logit link
mu
# calculate likelihood
= binom.logpmf(y, 1, mu) # log likelihood
L
# L = np.multiply(y, np.log(mu)) + np.multiply(1 - y, np.log(1 - mu)) # alternate log likelihood form
= -np.sum(L) # optim by default is minimization, and we want to maximize the likelihood
L
return(L)
Another approach via exponential loss function.
def logreg_exp(par, X, y):
# Arguments
# par: parameters to be estimated
# X: predictor matrix with intercept column
# y: target
# setup
= par.reshape(X.shape[1], 1) # coefficients
beta
# linear predictor
= np.dot(X, beta) # linear predictor
LP
# calculate exponential loss function (convert y to -1:1 from 0:1)
= np.sum(np.exp(-np.multiply(np.where(y, 1, -1), LP) * .5))
L
return(L)
Estimation
Setup for use with optim.
= np.column_stack([np.repeat(1, N).reshape(N, 1), X])
X_mm
= minimize(
fit_ml = logreg_ml,
fun = [0, 0, 0],
x0 = (X_mm, y),
args = 'BFGS',
method = 1e-12,
tol = {'maxiter': 500}
options
)
= minimize(
fit_exp = logreg_exp,
fun = [0, 0, 0],
x0 = (X_mm, y),
args = 'BFGS',
method = 1e-12,
tol = {'maxiter': 500}
options
)
= fit_ml.x
pars_ml = fit_exp.x pars_exp
Comparison
import statsmodels.formula.api as smf
= smf.logit('y ~ X1 + X2', data = dfXy)
model_sm_glm = model_sm_glm.fit()
fit_sm_glm
pd.DataFrame(
[
pars_ml,
pars_exp,
fit_sm_glm.params
], = ['Int', 'b_X1', 'b_X2'],
columns = ['fit_ml', 'fit_exp', 'SM']
index )
Int b_X1 b_X2
fit_ml -0.505637 0.234567 0.054700
fit_exp -0.504802 0.231859 0.053134
SM -0.505637 0.234567 0.054700
Quantile Regression
See the R chapter for details.
Data Setup
import pandas as pd
import numpy as np
# originally from quantreg package
= pd.read_csv('data/engel.csv')
engel
= np.column_stack([
X 0],1)),
np.ones((engel.shape['income'])
np.asarray(engel[
])
= np.asarray(engel['foodexp']) y
def qreg(par, X, y, tau):
= np.dot(X, par)
lp = y - lp
res = np.where(res < 0 , -(1 - tau)*res, tau*res)
loss
return(np.sum(loss))
Estimation
We’ll estimate the median to start.
# from scipy.stats import logistic, binom
from scipy.optimize import minimize
minimize(= qreg,
fun = [0, 0],
x0 = (X, y, .5),
args = 'BFGS',
method = 1e-12,
tol = {'maxiter': 500}
options )
Other quantiles
Now we will add additional quantiles to estimate.
as pd
pandas
# quantiles
= [.05, .1, .25, .5, .75, .9, .95]
qs = []
fit
for tau in qs:
= minimize(
init = qreg,
fun = [0, 0],
x0 = (X, y, tau),
args = 'BFGS',
method = 1e-12,
tol = {'maxiter': 500}
options
)
fit.append(1,2), columns = ['int', 'slope'])
pd.DataFrame(init.x.reshape(
)
= pd.concat(fit)
fit_qr
from plotnine import *
(= 'income', y = 'foodexp'), data = engel) +
ggplot(aes(x +
geom_point() = 'int', slope = 'slope', color = 'int'),
geom_abline(aes(intercept = fit_qr)
data )
Stochastic Gradient Descent
Here we use the adagrad approach as in the R demo. The dataset shift and other variants are not demonstrated.
Data Setup
Create some data for a standard linear regression.
import numpy as np
1234)
np.random.seed(
= 10000
n = np.random.normal(size = n)
x1 = np.random.normal(size = n)
x2 = 1 + .5*x1 + .2*x2 + np.random.normal(size = n)
y
= np.column_stack([np.ones(n), x1, x2]) X
Function
The estimating function using the adagrad approach.
def sgd(par, X, y, stepsize, stepsize_tau = 0, average = False):
"""
Estimate a linear regression via stochastic gradient descent
par: parameter estimates
X: model matrix
y: target variable
stepsize: the learning rate
stepsize_tau: if > 0, a check on the LR at early iterations
average: an alternative method
Returns a dictionary of the parameter estimates, the estimates for each iteration,
the loss for each iteration, the root mean square error, and the fitted values
using the final parameter estimates
"""
= par
beta = np.zeros_like(X)
betamat = np.zeros_like(y) # if you want these across iterations
fits = np.zeros_like(y)
loss = 0
s
for i in np.arange(X.shape[0]):
= X[i]
Xi = y[i]
yi
= np.dot(Xi, beta)
LP = np.dot(Xi.T, LP - yi)
grad = s + grad**2
s -= stepsize * grad/(stepsize_tau + np.sqrt(s))
beta
if average and i > 1:
-= 1/i * (betamat[i - 1] - beta)
beta
= beta
betamat[i] = LP
fits[i] = (LP - yi)**2
loss[i]
= np.dot(X, beta)
LP = np.dot(LP - y, LP - y)
lastloss
return({
'par': beta, # final estimates
'par_chain': betamat, # all estimates
'loss': loss, # observation level loss
'RMSE': np.sqrt(np.sum(lastloss)/X.shape[0]),
'fitted': LP
})
Set starting values and estimate. For any particular data you might have to fiddle with the stepsize
, perhaps
choosing one based on cross-validation with old data.
= np.zeros(3)
init
= sgd(
fit_sgd
init,
X,
y,= .1,
stepsize = .5
stepsize_tau
)
'par'].round(4)
fit_sgd['RMSE'].round(4) fit_sgd[
array([1.0172, 0.4942, 0.1937])
0.9978
Comparison
We’ll compare our results to standard linear regression.
import pandas as pd
import statsmodels.formula.api as smf
= pd.DataFrame({'x1': x1, 'x2': x2, 'y': y})
df
= smf.ols('y ~ x1 + x2', df).fit()
lm_fit
round(4)
lm_fit.params.round(4) lm_fit.scale.
Intercept 1.0037
x1 0.5032
x2 0.2000
dtype: float64
0.9955
Nelder-Mead
import copy
'''
Francois Chollet's Nelder-Mead in Python.
https://github.com/fchollet/nelder-mead/blob/master/nelder_mead.py
Pure Python/Numpy implementation of the Nelder-Mead algorithm.
Reference: https://en.wikipedia.org/wiki/Nelder%E2%80%93Mead_method
'''
def nelder_mead(
f,
x_start,= 0.1,
step = 10e-6,
no_improve_thr = 10,
no_improv_break = 0,
max_iter = 1.,
alpha = 2.,
gamma = 0.5,
rho = 0.5
sigma
):'''
@param f (function): function to optimize, must return a scalar score
and operate over a numpy array of the same dimensions as x_start
@param x_start (numpy array): initial position
@param step (float): look-around radius in initial step
@no_improv_thr, no_improv_break (float, int): break after no_improv_break iterations with
an improvement lower than no_improv_thr
@max_iter (int): always break after this number of iterations.
Set it to 0 to loop indefinitely.
@alpha, gamma, rho, sigma (floats): parameters of the algorithm
(see Wikipedia page for reference)
return: tuple (best parameter array, best score)
'''
# init
= len(x_start)
dim = f(x_start)
prev_best = 0
no_improv = [[x_start, prev_best]]
res
for i in range(dim):
= copy.copy(x_start)
x = x[i] + step
x[i] = f(x)
score
res.append([x, score])
# simplex iter
= 0
iters while 1:
# order
=lambda x: x[1])
res.sort(key= res[0][1]
best
# break after max_iter
if max_iter and iters >= max_iter:
return res[0]
+= 1
iters
# break after no_improv_break iterations with no improvement
if iters//10 == 0:
print('...best so far:', best)
if best < prev_best - no_improve_thr:
= 0
no_improv = best
prev_best else:
+= 1
no_improv
if no_improv >= no_improv_break:
return res[0]
# centroid
= [0.] * dim
x0 for tup in res[:-1]:
for i, c in enumerate(tup[0]):
+= c / (len(res)-1)
x0[i]
# reflection
= x0 + alpha*(x0 - res[-1][0])
xr = f(xr)
rscore if res[0][1] <= rscore < res[-2][1]:
del res[-1]
res.append([xr, rscore])continue
# expansion
if rscore < res[0][1]:
= x0 + gamma*(x0 - res[-1][0])
xe = f(xe)
escore if escore < rscore:
del res[-1]
res.append([xe, escore])continue
else:
del res[-1]
res.append([xr, rscore])continue
# contraction
= x0 + rho*(x0 - res[-1][0])
xc = f(xc)
cscore if cscore < res[-1][1]:
del res[-1]
res.append([xc, cscore])continue
# reduction
= res[0][0]
x1 = []
nres for tup in res:
= x1 + sigma*(tup[0] - x1)
redx = f(redx)
score
nres.append([redx, score])= nres
res
if __name__ == "__main__":
# test
import math
import numpy as np
def f(x):
return math.sin(x[0]) * math.cos(x[1]) * (1. / (abs(x[2]) + 1))
0., 0., 0.])) nelder_mead(f, np.array([
HMM
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
From the wikipedia page with slight modification
https://en.wikipedia.org/wiki/Viterbi_algorithm#Example
"""
def viterbi(obs, states, start_p, trans_p, emit_p):
= [{}]
V
for st in states:
0][st] = {"prob": start_p[st] * emit_p[st][obs[0]], "prev": None}
V[
# Run Viterbi when t > 0
for t in range(1, len(obs)):
V.append({})
for st in states:
= max(V[t-1][prev_st]["prob"]*trans_p[prev_st][st] for prev_st in states)
max_tr_prob
for prev_st in states:
if V[t-1][prev_st]["prob"] * trans_p[prev_st][st] == max_tr_prob:
= max_tr_prob * emit_p[st][obs[t]]
max_prob = {"prob": max_prob, "prev": prev_st}
V[t][st] break
for line in dptable(V):
print(line)
= []
opt
# The highest probability
= max(value["prob"] for value in V[-1].values())
max_prob = None
previous
# Get most probable state and its backtrack
for st, data in V[-1].items():
if data["prob"] == max_prob:
opt.append(st)= st
previous break
# Follow the backtrack till the first observation
for t in range(len(V) - 2, -1, -1):
0, V[t + 1][previous]["prev"])
opt.insert(= V[t + 1][previous]["prev"]
previous
print('The steps of states are ' + ' '.join(opt) + ' with highest probability of %s' % max_prob)
def dptable(V):
# Print a table of steps from dictionary
yield " ".join(("%12d" % i) for i in range(len(V)))
for state in V[0]:
yield "%.7s: " % state + " ".join("%.7s" % ("%f" % v[state]["prob"]) for v in V)
# The function viterbi takes the following arguments: obs is the sequence of
# observations, e.g. ['normal', 'cold', 'dizzy']; states is the set of hidden
# states; start_p is the start probability; trans_p are the transition
# probabilities; and emit_p are the emission probabilities. For simplicity of
# code, we assume that the observation sequence obs is non-empty and that
# trans_p[i][j] and emit_p[i][j] is defined for all states i,j.
# In the running example, the forward/Viterbi algorithm is used as follows:
= ('normal', 'cold', 'dizzy')
obs = ('Healthy', 'Fever')
states
= {'Healthy': 0.6, 'Fever': 0.4}
start_p
= {
trans_p 'Healthy' : {'Healthy': 0.7, 'Fever': 0.3},
'Fever' : {'Healthy': 0.4, 'Fever': 0.6}
}
= {
emit_p 'Healthy' : {'normal': 0.5, 'cold': 0.4, 'dizzy': 0.1},
'Fever' : {'normal': 0.1, 'cold': 0.3, 'dizzy': 0.6}
}
viterbi(obs,states,start_p,trans_p,emit_p)
Julia Demos
I haven’t played with Julia in a very long time, but briefly hacked the old code to get something that worked. As Julia has gone though notable changes, it’s doubtful these are very good as far as Julia programming standards go, though conceptually they may still provide some utility. Perhaps at some point I’ll reteach myself the basics and come back to these. In any case the code did run on a Jupyter notebook.
Mixed Models
One-factor
#####################
### Main function ###
#####################
using LinearAlgebra
using Statistics
function one_factor_re_loglike(par::Vector)
, ni = size(y)
d= par[1]
mu = par[2]
sigma2_mu = par[3]
sigma2
= sigma2*I(ni) + sigma2_mu*ones(ni, ni)
Sigmai = -(ni*d)/2*log(2*pi) - d/2*log(det(Sigmai))
l
for i in 1:d
= y[i,:]
yi = l - .5(yi .- mu)' * (Sigmai\(yi .- mu))
l end
= -l[1]
l
return l
end
###################
### Data set up ###
###################
= [22.6 20.5 20.8
y 22.6 21.2 20.5
17.3 16.2 16.6
21.4 23.7 23.2
20.9 22.2 22.6
14.5 10.5 12.3
20.8 19.1 21.3
17.4 18.6 18.6
25.1 24.8 24.9
14.9 16.3 16.6]
################################
### Starting values and test ###
################################
using Statistics
= mean(y)
mu0 = var(mean(y, dims = 2))
sigma2_mu0 = mean(var(y, dims = 2))
sigma20 = [mu0, sigma2_mu0, sigma20]
theta0
### test
one_factor_re_loglike(theta0)
###########
### Run ###
###########
using Optim
= optimize(one_factor_re_loglike, theta0, LBFGS())
res res
* Status: success
* Candidate solution
Final objective value: 6.226441e+01
* Found with
Algorithm: L-BFGS
* Convergence measures
|x - x'| = 2.93e-08 ≰ 0.0e+00
|x - x'|/|x'| = 1.49e-09 ≰ 0.0e+00
|f(x) - f(x')| = 1.42e-14 ≰ 0.0e+00
|f(x) - f(x')|/|f(x')| = 2.28e-16 ≰ 0.0e+00
|g(x)| = 1.17e-09 ≤ 1.0e-08
* Work counters
Seconds run: 1 (vs limit Inf)
Iterations: 7
f(x) calls: 22
∇f(x) calls: 22
Optim.minimizer(res)
3-element Array{Float64,1}:
19.599999980440952
12.193999992338886
1.1666666662195693
Optim.minimum(res)
62.30661224610756
Two-factor
using LinearAlgebra
using Statistics
function sfran2_loglike(par::Vector)
= length(y)
n = par[1]
mu
= exp(par[2])
sigma2_alpha = exp(par[3])
sigma2_gamma = exp(par[4])
sigma2
= sigma2*I(n) + sigma2_alpha*(Xalpha * Xalpha') + sigma2_gamma * (Xgamma * Xgamma')
Sigma
= -n/2*log(2*pi) - sum(log.(diag(cholesky(Sigma).L))) - .5*(y .- mu)' * (Sigma\(y .- mu))
l
= -l[1]
l return l
end
##################
### Data setup ###
##################
= [1.39,1.29,1.12,1.16,1.52,1.62,1.88,1.87,1.24,1.18,
y .95,.96,.82,.92,1.18,1.20,1.47,1.41,1.57,1.65]
################################
### Starting values and test ###
################################
= mean(reshape(y, 4, 5), 1)
yhat
= [mean(y), log(var(yhat)), log(var(y)/3), log(var(y)/3)]
theta0
sfran2_loglike(theta0)
###########
### Run ###
###########
using Optim
= optimize(sfran2_loglike, theta0, method = :l_bfgs)
res res
* Status: success
* Candidate solution
Final objective value: -1.199315e+01
* Found with
Algorithm: L-BFGS
* Convergence measures
|x - x'| = 6.60e-09 ≰ 0.0e+00
|x - x'|/|x'| = 1.08e-09 ≰ 0.0e+00
|f(x) - f(x')| = 5.33e-15 ≰ 0.0e+00
|f(x) - f(x')|/|f(x')| = 4.44e-16 ≰ 0.0e+00
|g(x)| = 7.02e-10 ≤ 1.0e-08
* Work counters
Seconds run: 0 (vs limit Inf)
Iterations: 9
f(x) calls: 23
∇f(x) calls: 23
exp.(Optim.minimizer(res))
4-element Array{Float64,1}:
3.7434213772629223
0.053720000000540405
0.031790000003692476
0.002290000000530042
-2*Optim.minimum(res)
23.98630759443859
Matlab Demos
I don’t code in Matlab, nor have any particular desire to, so this is provided here just for reference because I happened to have the code.
Mixed Models
One-factor
% matlab from Statistical Modeling and Computation (2014 p 311). See the
% associated twofactorRE.R file for details.
function one_factor_re_loglike(mu, sigma2_mu, sigma2, y)
d ni] = size(y);
[Sigmai = sigma2*eye(ni) + sigma2_mu*ones(ni,ni);
l = -(ni*d) / 2*log(2*pi) - d / 2*log(det(Sigmai));
for i=1:d
yi = y(i, :)';
l = l - .5*(yi - mu)' * (Sigmai\(yi - mu));
end
end
y = [22.6 20.5 20.8;
22.6 21.2 20.5;
17.3 16.2 16.6;
21.4 23.7 23.2;
20.9 22.2 22.6;
14.5 10.5 12.3;
20.8 19.1 21.3;
17.4 18.6 18.6;
25.1 24.8 24.9;
14.9 16.3 16.6];
f = @(theta) -one_factor_re_loglike(theta(1), theta(2), theta(3), y);
ybar = mean(y, 2);
theta0 = [mean(ybar) var(ybar) mean(var(y, 0, 2))];
thetahat = fminsearch(f, theta0);
Two-factor
% matlab from Statistical Modeling and Computation (2014 p 314). See the
% associated twofactorRE.R file for details.
function sfran2_loglike(mu, eta_alpha, eta_gamma, eta, y, Xalpha, Xgamma)
sigma2_alpha = exp(eta_alpha);
sigma2_gamma = exp(eta_gamma);
sigma2 = exp(eta);
n = length(y);
Sigma = sigma2*speye(n) + sigma2_alpha * (Xalpha * Xalpha') + sigma2_gamma * (Xgamma*Xgamma');
l = -n/2 * log(2*pi) - sum(log(diag(chol(Sigma)))) - .5*(y - mu)' * (Sigma\(y - mu));
end
y = [1.39 1.29 1.12 1.16 1.52 1.62 1.88 1.87 1.24 1.18 .95 .96 .82 .92 1.18 1.20 1.47 1.41 1.57 1.65];
Xalpha = kron(speye(5), ones(4,1));
Xgamma = kron(speye(10), ones(2,1));
f = @(theta) -sfran_loglike(theta(1), theta(2), theta(3), theta(4), y, Xalpha, Xgamma);
yhat = mean(reshape(y, 4, 5));
theta0 = [mean(y) log(var(yhat)) log(var(y)/3) log(var(y)/3)];
thetahat = fminsearch(f, theta0)
Gaussian Processes
Any updates on the following can be found at the repo.
function S = gaussSample(arg1, arg2, arg3)
% Returns n samples (in the rows) from a multivariate Gaussian distribution
%
% Examples:
% S = gaussSample(mu, Sigma, 10)
% S = gaussSample(model, 100)
% S = gaussSample(struct('mu',[0], 'Sigma', eye(1)), 3)
% This file is from pmtk3.googlecode.com
switch nargin
case 3, mu = arg1; Sigma = arg2; n = arg3;
case 2, model = arg1; mu = model.mu; Sigma = model.Sigma; n = arg2;
case 1, model = arg1; mu = model.mu; Sigma = model.Sigma; n = 1;
otherwise
error('bad num args')
end
A = chol(Sigma, 'lower');
Z = randn(length(mu), n);
S = bsxfun(@plus, mu(:), A*Z)';
end
%% Visualize the effect of change the hyper-params for a 1d GP regression
% based on demo_gpr by Carl Rasmussen
%
%% Generate data
% This file is from pmtk3.googlecode.com
n = 20;
rand('state',18);
randn('state',20);
covfunc = {'covSum', {'covSEiso','covNoise'}};
loghyper = [log(1.0); log(1.0); log(0.1)];
x = 15*(rand(n,1)-0.5);
y = chol(feval(covfunc{:}, loghyper, x))'*randn(n,1); % Cholesky decomp.
xstar = linspace(-7.5, 7.5, 201)';
hyps = [log(1), log(1), log(0.1);...
log(0.3),log(1.08),log(0.00005);...
log(3),log(1.16),log(0.89)];
%% compute post pred and plot marginals
for i=1:size(hyps,1)
loghyper = hyps(i,:)';
mu, S2] = gpr(loghyper, covfunc, x, y, xstar);
[S2 = S2 - exp(2*loghyper(3)); % remove observation noise
figure;
f = [mu+2*sqrt(S2);flipdim(mu-2*sqrt(S2),1)];
fill([xstar; flipdim(xstar,1)], f, [7 7 7]/8, 'EdgeColor', [7 7 7]/8);
hold on
plot(xstar,mu,'k-','LineWidth',2);
plot(x, y, 'k+', 'MarkerSize', 17);
axis([-8 8 -3 3])
printPmtkFigure(sprintf('gprDemoChangeHparams%d', i));
end
%% Reproduce figure 2.2 from GP book
%
%%
% This file is from pmtk3.googlecode.com
setSeed(0);
L = 1;
xs = (-5:0.2:5)';
ns = length(xs);
keps = 1e-8;
muFn = @(x) 0*x(:).^2;
Kfn = @(x,z) 1*exp(-sq_dist(x'/L,z'/L)/2);
% plot sampled functions from the prior
figure; hold on
for i=1:3
model = struct('mu', muFn(xs), 'Sigma', Kfn(xs, xs) + 1e-15*eye(size(xs, 1)));
fs = gaussSample(model, 1);
plot(xs, fs, 'k-', 'linewidth', 2)
end
printPmtkFigure('gprDemoNoiseFreePrior')
% generate noise-less training data
Xtrain = [-4, -3, -2, -1, 1]';
ftrain = sin(Xtrain);
% compute posterior predictive
K = Kfn(Xtrain, Xtrain); % K
Ks = Kfn(Xtrain, xs); %K_*
Kss = Kfn(xs, xs) + keps*eye(length(xs)); % K_** (keps is essential!)
Ki = inv(K);
postMu = muFn(xs) + Ks'*Ki*(ftrain - muFn(Xtrain));
postCov = Kss - Ks'*Ki*Ks;
figure; hold on
% plot marginal posterior variance as gray band
mu = postMu(:);
S2 = diag(postCov);
f = [mu+2*sqrt(S2);flipdim(mu-2*sqrt(S2),1)];
fill([xs; flipdim(xs,1)], f, [7 7 7]/8, 'EdgeColor', [7 7 7]/8);
% plot samples from posterior predictive
for i=1:3
model = struct('mu', postMu(:)', 'Sigma', postCov);
fs = gaussSample(model, 1);
plot(xs, fs, 'k-', 'linewidth', 2)
h=plot(Xtrain, ftrain, 'kx', 'markersize', 12, 'linewidth', 3);
end
printPmtkFigure('gprDemoNoiseFreePost')