# Hidden Markov Model

This function duplicates hmm_viterbi.py, which comes from the Viterbi algorithm wikipedia page (at least as it was when I stumbled across it, see it in the supplemental section). This first function is just to provide R code that is similar, in case anyone is interested in a more direct comparison, but the original used lists of tuples and thus was very inefficient R-wise, and provided output that wasn’t succinct. The second function takes a vectorized approach and returns a matrix in a much more straightforward fashion. Both will provide the same result as the Python code. See The Markov Model chapter also.

## Data Setup

library(tidyverse)

obs = c('normal', 'cold', 'dizzy')  # observed state

states = c('Healthy', 'Fever')      # latent states

start_p = c('Healthy' = 0.6, 'Fever' = 0.4) # starting probabilities

# transition matrix
trans_p = list(
'Healthy' = c('Healthy' = 0.7, 'Fever' = 0.3),
'Fever'   = c('Healthy' = 0.4, 'Fever' = 0.6)
)

# emission matrix
emit_p = list(
'Healthy' = c('normal' = 0.5, 'cold' = 0.4, 'dizzy' = 0.1),
'Fever'   = c('normal' = 0.1, 'cold' = 0.3, 'dizzy' = 0.6)
)

## Function

This first function takes a Python-esque approach in the manner of the original, allowing for easier comparison.

viterbi <- function(obs, states, start_p, trans_p, emit_p) {
V = vector('list', length(obs))

for (st in seq_along(states)) {
V[[1]][[states[st]]] = list("prob" = start_p[st] * emit_p[[st]][obs[1]],
"prev" = NULL)
}

for (t in 2:length(obs)) {

for (st in seq_along(states)) {
max_tr_prob = numeric()

for (prev_st in states) {
max_tr_prob[prev_st] = V[[t-1]][[prev_st]][["prob"]] * trans_p[[prev_st]][[st]]
}

max_tr_prob = max(max_tr_prob)

for (prev_st in states) {
flag =  V[[t-1]][[prev_st]][["prob"]] * trans_p[[prev_st]][[st]] == max_tr_prob
if (flag) {
max_prob = max_tr_prob * emit_p[[st]][obs[t]]
V[[t]][[states[st]]] = list('prob' = max_prob, 'prev' = prev_st)
}
}

}

}

# I don't bother duplicating the text output code of the original
df_out = rbind(
Healthy = sapply(V, function(x) x$Healthy$prob),
Fever   = sapply(V, function(x) x$Fever$prob)
)

colnames(df_out) = obs
print(df_out)

m = paste0(
'The steps of states are: ',
paste(rownames(df_out)[apply(df_out, 2, which.max)], collapse = ' '),
paste('\nHighest probability: ', max(df_out[, ncol(df_out)]))
)

message(m)

V
}

This approach is much more R-like.

viterbi_2 <- function(obs, states, start_p, trans_mat, emit_mat) {
prob_mat = matrix(NA, nrow = length(states), ncol = length(obs))
colnames(prob_mat) = obs
rownames(prob_mat) = states

prob_mat[,1] = start_p * emit_mat[,1]

for (t in 2:length(obs)) {
prob_tran    = prob_mat[,t-1] * trans_mat
max_tr_prob  = apply(prob_tran, 2, max)
prob_mat[,t] = max_tr_prob * emit_mat[, obs[t]]
}

print(prob_mat)

m = paste0(
'The steps of states are: ',
paste(states[apply(prob_mat, 2, which.max)], collapse = ' '),
paste('\nHighest probability: ', max(prob_mat[, ncol(prob_mat)]))
)

message(m)
}

## Estimation

First we demo the initial function.

test = viterbi(
obs,
states,
start_p,
trans_p,
emit_p
)
        normal  cold   dizzy
Healthy   0.30 0.084 0.00588
Fever     0.04 0.027 0.01512
# test

set.seed(123)
obs = sample(obs, 6, replace = TRUE)

test = viterbi(
obs,
states,
start_p,
trans_p,
emit_p
)
        dizzy  dizzy    dizzy       cold        dizzy         cold
Healthy  0.06 0.0096 0.003456 0.00497664 0.0003483648 0.0003224863
Fever    0.24 0.0864 0.031104 0.00559872 0.0020155392 0.0003627971
# test

Now the vectorized approach.

set.seed(123)

obs = c('normal', 'cold', 'dizzy')
obs = sample(obs, 6, replace = T)

# need matrices now
emit_mat  = do.call(rbind, emit_p)
trans_mat = do.call(rbind, trans_p)

viterbi_2(
obs,
states,
start_p,
trans_mat,
emit_mat
)
        dizzy dizzy   dizzy      cold       dizzy         cold
Healthy  0.30 0.021 0.00216 0.0031104 0.000217728 0.0002015539
Fever    0.04 0.054 0.01944 0.0034992 0.001259712 0.0002267482

## Supplemental demo

states = c('Rainy', 'Sunny')

observations = c('walk', 'shop', 'clean')

start_probability = c('Rainy' = 0.6, 'Sunny' = 0.4)

transition_probability = rbind(
'Rainy' = c('Rainy' = 0.7, 'Sunny' = 0.3),
'Sunny' = c('Rainy' = 0.4, 'Sunny' = 0.6)
)

emission_probability = rbind(
'Rainy' = c('walk' = 0.1, 'shop' = 0.4, 'clean' = 0.5),
'Sunny' = c('walk' = 0.6, 'shop' = 0.3, 'clean' = 0.1)
)

viterbi_2(
observations,
states,
start_probability,
transition_probability,
emission_probability
)
      walk   shop    clean
Rainy 0.06 0.0384 0.013440
Sunny 0.24 0.0432 0.002592