Markov Model

Here we demonstrate a Markov model. We start by showing how to create some data and estimate such a model via the markovchain package. You may want to play with it to get a better feel for how it works, as we will use it for comparison later.

library(tidyverse)

library(markovchain)

A = matrix(c(.7, .3, .9, .1), nrow = 2, byrow = TRUE)

dtmcA = new(
  'markovchain',
  transitionMatrix = A,
  states = c('a', 'b'),
  name = 'MarkovChain A'
)

dtmcA
MarkovChain A 
 A  2 - dimensional discrete Markov Chain defined by the following states: 
 a, b 
 The transition matrix  (by rows)  is defined as follows: 
    a   b
a 0.7 0.3
b 0.9 0.1
plot(dtmcA)

transitionProbability(dtmcA, 'b', 'b')
[1] 0.1
initialState = c(0, 1)
steps = 4
finalState = initialState * dtmcA^steps # using power operator
finalState
          a      b
[1,] 0.7488 0.2512
steadyStates(dtmcA)
        a    b
[1,] 0.75 0.25
observed_states = sample(c('a', 'b'), 50, c(.7, .3), replace = TRUE)

createSequenceMatrix(observed_states)
   a  b
a 24 11
b 11  3
markovchainFit(observed_states)
$estimate
MLE Fit 
 A  2 - dimensional discrete Markov Chain defined by the following states: 
 a, b 
 The transition matrix  (by rows)  is defined as follows: 
          a         b
a 0.6857143 0.3142857
b 0.7857143 0.2142857


$standardError
          a          b
a 0.1399708 0.09476071
b 0.2369018 0.12371791

$confidenceLevel
[1] 0.95

$lowerEndpointMatrix
          a         b
a 0.4113764 0.1285581
b 0.3213953 0.0000000

$upperEndpointMatrix
          a         b
a 0.9600522 0.5000133
b 1.0000000 0.4567684

$logLikelihood
[1] -29.06116

Data Setup

Data Functions

A recursive function to take a matrix power.

mat_power <- function(M, N) {
  if (N == 1) return(M)
  
  M %*% mat_power(M, N - 1)
}

A function to create a sequence.

create_sequence  <- function(states, len, tmat) {
  # states: number of states
  # len: length of sequence
  # tmat: the transition matrix
  states_numeric = length(unique(states))
  out = numeric(len)
  out[1] = sample(states_numeric, 1, prob = colMeans(tmat)) # initial state
  
  for (i in 2:len){
    out[i] = sample(states_numeric, 1, prob = tmat[out[i - 1], ])
  }
  
  states[out]
}
# example
test_matrix = matrix(rep(2, 4), nrow = 2)
test_matrix
     [,1] [,2]
[1,]    2    2
[2,]    2    2
mat_power(test_matrix, 2)
     [,1] [,2]
[1,]    8    8
[2,]    8    8
# transition matrix
A = matrix(c(.7, .3, .4, .6), nrow = 2, byrow = TRUE)

mat_power(A, 10)
          [,1]      [,2]
[1,] 0.5714311 0.4285689
[2,] 0.5714252 0.4285748

Two states Demo

Note that a notably long sequence is needed to get close to recovering the true transition matrix.

A = matrix(c(.7, .3, .9, .1), nrow = 2, byrow = TRUE)
observed_states = create_sequence(c('a', 'b'), 500, tmat = A)

createSequenceMatrix(observed_states)
    a   b
a 288 100
b 101  10
prop.table(createSequenceMatrix(observed_states), 1)
          a          b
a 0.7422680 0.25773196
b 0.9099099 0.09009009
fit = markovchainFit(observed_states)
fit
$estimate
MLE Fit 
 A  2 - dimensional discrete Markov Chain defined by the following states: 
 a, b 
 The transition matrix  (by rows)  is defined as follows: 
          a          b
a 0.7422680 0.25773196
b 0.9099099 0.09009009


$standardError
           a          b
a 0.04373856 0.02577320
b 0.09053942 0.02848899

$confidenceLevel
[1] 0.95

$lowerEndpointMatrix
          a          b
a 0.6565420 0.20721741
b 0.7324559 0.03425269

$upperEndpointMatrix
          a         b
a 0.8279941 0.3082465
b 1.0000000 0.1459275

$logLikelihood
[1] -255.0253
# log likelihood
sum(createSequenceMatrix(observed_states) * log(fit$estimate@transitionMatrix))
[1] -255.0253

Three states demo

A = matrix(
  c(.70, .20, .10,
    .20, .40, .40,
    .05, .05, .90), 
  nrow  = 3, 
  byrow = TRUE
)

observed_states = create_sequence(c('a', 'b', 'c'), 500, tmat = A)
createSequenceMatrix(observed_states)
   a  b   c
a 92 20  11
b 11 24  22
c 20 13 286
prop.table(createSequenceMatrix(observed_states), 1)
           a          b          c
a 0.74796748 0.16260163 0.08943089
b 0.19298246 0.42105263 0.38596491
c 0.06269592 0.04075235 0.89655172
markovchainFit(observed_states)
$estimate
MLE Fit 
 A  3 - dimensional discrete Markov Chain defined by the following states: 
 a, b, c 
 The transition matrix  (by rows)  is defined as follows: 
           a          b          c
a 0.74796748 0.16260163 0.08943089
b 0.19298246 0.42105263 0.38596491
c 0.06269592 0.04075235 0.89655172


$standardError
           a          b          c
a 0.07798100 0.03635883 0.02696443
b 0.05818640 0.08594701 0.08228800
c 0.01401923 0.01130267 0.05301421

$confidenceLevel
[1] 0.95

$lowerEndpointMatrix
           a          b          c
a 0.59512750 0.09133962 0.03658157
b 0.07893918 0.25259956 0.22468337
c 0.03521872 0.01859952 0.79264575

$upperEndpointMatrix
           a          b         c
a 0.90080746 0.23386364 0.1422802
b 0.30702573 0.58950571 0.5472465
c 0.09017313 0.06290518 1.0000000

$logLikelihood
[1] -277.6268

Function

Now we create a function to calculate the (negative) log likelihood.

markov_ll <- function(par, x) {
  # par should be the c(A) of transition probabilities A
  nstates = length(unique(x))
  
  # create transition matrix
  par = matrix(par, ncol = nstates)
  par = t(apply(par, 1, function(x) x / sum(x)))
  
  # create seq matrix
  seq_mat = table(x[-length(x)], x[-1])
  
  # calculate log likelihood
  ll = sum(seq_mat * log(par))
  
  -ll
}
A = matrix(
  c(.70, .20, .10,
    .40, .20, .40,
    .10, .15, .75),
  nrow  = 3,
  byrow = TRUE
)

observed_states = create_sequence(c('a', 'b', 'c'), 1000, tmat = A)

Estimation

Note that initial state values will be transformed to rowsum to one, so the specific initial values don’t matter (i.e. they don’t have to be probabilities). With the basic optim approach, sometimes log(0) will occur and produce a warning. Can be ignored, or use LFBGS as demonstrated at the end.

initpar = rep(1, 9)

fit = optim(
  par = initpar,
  fn  = markov_ll,
  x   = observed_states,
  method  = 'BFGS',
  control = list(reltol = 1e-12)
)

# get estimates on prob scale
est_mat = matrix(fit$par, ncol = 3)
est_mat = t(apply(est_mat, 1, function(x)  x / sum(x)))

Comparison

Compare with markovchain package.

fit_compare = markovchainFit(observed_states)

# compare log likelihood
c(-fit$value, fit_compare$logLikelihood)
[1] -815.6466 -815.6466
# compare estimated transition matrix
list(
  `Estimated via optim` = est_mat,
  `markovchain Package` = fit_compare$estimate@transitionMatrix,
  `Analytical Solution` = prop.table(
    table(observed_states[-length(observed_states)], observed_states[-1])
    , 1)
) %>% 
  purrr::map(round, 3)
$`Estimated via optim`
      [,1]  [,2]  [,3]
[1,] 0.674 0.242 0.084
[2,] 0.462 0.126 0.412
[3,] 0.106 0.151 0.743

$`markovchain Package`
      a     b     c
a 0.674 0.242 0.084
b 0.462 0.126 0.412
c 0.106 0.151 0.743

$`Analytical Solution`
   
        a     b     c
  a 0.674 0.242 0.084
  b 0.462 0.126 0.412
  c 0.106 0.151 0.743

Visualize.

plot(
  new(
    'markovchain',
    transitionMatrix = est_mat,
    states = c('a', 'b', 'c'),
    name = 'Estimated Markov Chain'
  )
)

If you don’t want warnings due to zeros use constraints (?constrOptim).

fit = optim(
  par = initpar,
  fn  = markov_ll,
  x   = observed_states,
  method  = 'L-BFGS',
  lower   = rep(1e-20, length(initpar)),
  control = list(pgtol = 1e-12)
)