Markov Model
Here we demonstrate a Markov model. We start by showing how to create some data and estimate such a model via the markovchain package. You may want to play with it to get a better feel for how it works, as we will use it for comparison later.
library(tidyverse)
library(markovchain)
= matrix(c(.7, .3, .9, .1), nrow = 2, byrow = TRUE)
A
= new(
dtmcA 'markovchain',
transitionMatrix = A,
states = c('a', 'b'),
name = 'MarkovChain A'
)
dtmcA
MarkovChain A
A 2 - dimensional discrete Markov Chain defined by the following states:
a, b
The transition matrix (by rows) is defined as follows:
a b
a 0.7 0.3
b 0.9 0.1
plot(dtmcA)
transitionProbability(dtmcA, 'b', 'b')
[1] 0.1
= c(0, 1)
initialState = 4
steps = initialState * dtmcA^steps # using power operator
finalState finalState
a b
[1,] 0.7488 0.2512
steadyStates(dtmcA)
a b
[1,] 0.75 0.25
= sample(c('a', 'b'), 50, c(.7, .3), replace = TRUE)
observed_states
createSequenceMatrix(observed_states)
a b
a 24 11
b 11 3
markovchainFit(observed_states)
$estimate
MLE Fit
A 2 - dimensional discrete Markov Chain defined by the following states:
a, b
The transition matrix (by rows) is defined as follows:
a b
a 0.6857143 0.3142857
b 0.7857143 0.2142857
$standardError
a b
a 0.1399708 0.09476071
b 0.2369018 0.12371791
$confidenceLevel
[1] 0.95
$lowerEndpointMatrix
a b
a 0.4113764 0.1285581
b 0.3213953 0.0000000
$upperEndpointMatrix
a b
a 0.9600522 0.5000133
b 1.0000000 0.4567684
$logLikelihood
[1] -29.06116
Data Setup
Data Functions
A recursive function to take a matrix power.
<- function(M, N) {
mat_power if (N == 1) return(M)
%*% mat_power(M, N - 1)
M }
A function to create a sequence.
<- function(states, len, tmat) {
create_sequence # states: number of states
# len: length of sequence
# tmat: the transition matrix
= length(unique(states))
states_numeric = numeric(len)
out 1] = sample(states_numeric, 1, prob = colMeans(tmat)) # initial state
out[
for (i in 2:len){
= sample(states_numeric, 1, prob = tmat[out[i - 1], ])
out[i]
}
states[out] }
# example
= matrix(rep(2, 4), nrow = 2)
test_matrix test_matrix
[,1] [,2]
[1,] 2 2
[2,] 2 2
mat_power(test_matrix, 2)
[,1] [,2]
[1,] 8 8
[2,] 8 8
# transition matrix
= matrix(c(.7, .3, .4, .6), nrow = 2, byrow = TRUE)
A
mat_power(A, 10)
[,1] [,2]
[1,] 0.5714311 0.4285689
[2,] 0.5714252 0.4285748
Two states Demo
Note that a notably long sequence is needed to get close to recovering the true transition matrix.
= matrix(c(.7, .3, .9, .1), nrow = 2, byrow = TRUE)
A = create_sequence(c('a', 'b'), 500, tmat = A)
observed_states
createSequenceMatrix(observed_states)
a b
a 288 100
b 101 10
prop.table(createSequenceMatrix(observed_states), 1)
a b
a 0.7422680 0.25773196
b 0.9099099 0.09009009
= markovchainFit(observed_states)
fit fit
$estimate
MLE Fit
A 2 - dimensional discrete Markov Chain defined by the following states:
a, b
The transition matrix (by rows) is defined as follows:
a b
a 0.7422680 0.25773196
b 0.9099099 0.09009009
$standardError
a b
a 0.04373856 0.02577320
b 0.09053942 0.02848899
$confidenceLevel
[1] 0.95
$lowerEndpointMatrix
a b
a 0.6565420 0.20721741
b 0.7324559 0.03425269
$upperEndpointMatrix
a b
a 0.8279941 0.3082465
b 1.0000000 0.1459275
$logLikelihood
[1] -255.0253
# log likelihood
sum(createSequenceMatrix(observed_states) * log(fit$estimate@transitionMatrix))
[1] -255.0253
Three states demo
= matrix(
A c(.70, .20, .10,
20, .40, .40,
.05, .05, .90),
.nrow = 3,
byrow = TRUE
)
= create_sequence(c('a', 'b', 'c'), 500, tmat = A)
observed_states createSequenceMatrix(observed_states)
a b c
a 92 20 11
b 11 24 22
c 20 13 286
prop.table(createSequenceMatrix(observed_states), 1)
a b c
a 0.74796748 0.16260163 0.08943089
b 0.19298246 0.42105263 0.38596491
c 0.06269592 0.04075235 0.89655172
markovchainFit(observed_states)
$estimate
MLE Fit
A 3 - dimensional discrete Markov Chain defined by the following states:
a, b, c
The transition matrix (by rows) is defined as follows:
a b c
a 0.74796748 0.16260163 0.08943089
b 0.19298246 0.42105263 0.38596491
c 0.06269592 0.04075235 0.89655172
$standardError
a b c
a 0.07798100 0.03635883 0.02696443
b 0.05818640 0.08594701 0.08228800
c 0.01401923 0.01130267 0.05301421
$confidenceLevel
[1] 0.95
$lowerEndpointMatrix
a b c
a 0.59512750 0.09133962 0.03658157
b 0.07893918 0.25259956 0.22468337
c 0.03521872 0.01859952 0.79264575
$upperEndpointMatrix
a b c
a 0.90080746 0.23386364 0.1422802
b 0.30702573 0.58950571 0.5472465
c 0.09017313 0.06290518 1.0000000
$logLikelihood
[1] -277.6268
Function
Now we create a function to calculate the (negative) log likelihood.
<- function(par, x) {
markov_ll # par should be the c(A) of transition probabilities A
= length(unique(x))
nstates
# create transition matrix
= matrix(par, ncol = nstates)
par = t(apply(par, 1, function(x) x / sum(x)))
par
# create seq matrix
= table(x[-length(x)], x[-1])
seq_mat
# calculate log likelihood
= sum(seq_mat * log(par))
ll
-ll
}
= matrix(
A c(.70, .20, .10,
40, .20, .40,
.10, .15, .75),
.nrow = 3,
byrow = TRUE
)
= create_sequence(c('a', 'b', 'c'), 1000, tmat = A) observed_states
Estimation
Note that initial state values will be transformed to rowsum to one, so the specific initial values don’t matter (i.e. they don’t have to be probabilities). With the basic optim approach, sometimes log(0) will occur and produce a warning. Can be ignored, or use LFBGS
as demonstrated at the end.
= rep(1, 9)
initpar
= optim(
fit par = initpar,
fn = markov_ll,
x = observed_states,
method = 'BFGS',
control = list(reltol = 1e-12)
)
# get estimates on prob scale
= matrix(fit$par, ncol = 3)
est_mat = t(apply(est_mat, 1, function(x) x / sum(x))) est_mat
Comparison
Compare with markovchain package.
= markovchainFit(observed_states)
fit_compare
# compare log likelihood
c(-fit$value, fit_compare$logLikelihood)
[1] -815.6466 -815.6466
# compare estimated transition matrix
list(
`Estimated via optim` = est_mat,
`markovchain Package` = fit_compare$estimate@transitionMatrix,
`Analytical Solution` = prop.table(
table(observed_states[-length(observed_states)], observed_states[-1])
1)
, %>%
) ::map(round, 3) purrr
$`Estimated via optim`
[,1] [,2] [,3]
[1,] 0.674 0.242 0.084
[2,] 0.462 0.126 0.412
[3,] 0.106 0.151 0.743
$`markovchain Package`
a b c
a 0.674 0.242 0.084
b 0.462 0.126 0.412
c 0.106 0.151 0.743
$`Analytical Solution`
a b c
a 0.674 0.242 0.084
b 0.462 0.126 0.412
c 0.106 0.151 0.743
Visualize.
plot(
new(
'markovchain',
transitionMatrix = est_mat,
states = c('a', 'b', 'c'),
name = 'Estimated Markov Chain'
) )
If you don’t want warnings due to zeros use constraints (?constrOptim
).
= optim(
fit par = initpar,
fn = markov_ll,
x = observed_states,
method = 'L-BFGS',
lower = rep(1e-20, length(initpar)),
control = list(pgtol = 1e-12)
)
Source
Original code available at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/markov_model.R