Cox Survival

Some simple demonstrations of a standard Cox, Cox with time-varying covariates and a stratified Cox.

Standard Proportional Hazards

Data Setup

set.seed(12)

dur = 1:10
kittyblarg = rnorm(10)                                 # something happened to kitty!
kittyhappy = rep(0:1, times = 5)                       # is kitty happy?
kittydied  = sample(0:1, 10, replace = TRUE)           # kitty died! oh no!
d = data.frame(kittyblarg, kittyhappy, dur, kittydied)

# Inspect
d
   kittyblarg kittyhappy dur kittydied
1  -1.4805676          0   1         0
2   1.5771695          1   2         1
3  -0.9567445          0   3         0
4  -0.9200052          1   4         1
5  -1.9976421          0   5         1
6  -0.2722960          1   6         1
7  -0.3153487          0   7         0
8  -0.6282552          1   8         1
9  -0.1064639          0   9         0
10  0.4280148          1  10         1

Function

Create a the (partial) likelihood function to feed to optim.

cox_pl <- function(pars, preds, died, t) {
  # Arguments- 
  # pars: coefficients of interest
  # preds: predictor matrix
  # died: death
  # t: time
  
  b = pars
  X = as.matrix(preds[order(t), ])
  died2 = died[order(t)]
  
  LP = X%*%b                # Linear predictor
  
  # initialize log likelihood due to looping, not necessary
  ll = numeric(nrow(X))     
  rows = 1:nrow(preds)
  
  for (i in rows){
    riskset = ifelse(rows < i, FALSE, TRUE)                  # identify risk set
    ll[i] = died2[i]*(LP[i] - log(sum(exp(LP[riskset]))) )   # log likelihood
  }
  
  -sum(ll)
}

Estimation

Estimate with optim.

initial_values = c(0, 0)

fit = optim(
  par   = initial_values,
  fn    = cox_pl,
  preds = d[, c('kittyblarg', 'kittyhappy')],
  died  = d[, 'kittydied'],
  t     = dur,
  method  = "BFGS",
  hessian = T
)

fit
$par
[1] -0.5827125  1.3803731

$value
[1] 7.783878

$counts
function gradient 
      14        5 

$convergence
[1] 0

$message
NULL

$hessian
          [,1]      [,2]
[1,] 1.9913282 0.4735968
[2,] 0.4735968 0.7780126

Comparison

Extract results.

B  = fit$par
se = sqrt(diag(solve(fit$hessian)))
Z  = B/se

# create a summary table

result_tbl = data.frame(
  B, 
  exp = exp(B), 
  se, 
  Z,
  p = ifelse(Z > 0, pnorm(Z, lower = FALSE)*2, pnorm(Z, lower = TRUE)*2) 
)

Compare to survival package.

library(survival)

cox_model = coxph(Surv(dur, kittydied) ~ kittyblarg + kittyhappy)
B exp se Z p
coxph.kittyblarg -0.583 0.558 0.766 -0.760 0.447
coxph.kittyhappy 1.380 3.976 1.226 1.126 0.260
cox_pl.kittyblarg -0.583 0.558 0.766 -0.760 0.447
cox_pl.kittyhappy 1.380 3.976 1.226 1.126 0.260

Time-varying coefficients

Note that technically nothing new is going on here relative to the previous model. See the vignette for the survival package for further details.

Data Setup

In the following we’ll first create some noisy time points.

set.seed(123)

t1 = rep(NA, 20)
t2 = rep(NA, 20)

t1[seq(1, 20, by = 2)] = 1:10
t2[seq(1, 20, by = 2)] = t1[seq(1, 20, by = 2)] + 
  sample(1:5, 10, replace = TRUE) + 
  abs(rnorm(10))

t1[seq(2, 20, by = 2)] = t2[seq(1, 20, by = 2)]
t2[seq(2, 20, by = 2)] = t1[seq(2, 20, by = 2)] + sample(1:5) + abs(rnorm(10))

kitty = rep(1:10, e = 2)
kittyblarg = t2 + rnorm(20, sd = 5)
kittyhappy = rep(0:1, times = 5, e = 2)
die = 0:1
cens = c(0, 0)
kittydied = ifelse(runif(20)>=.5, die, cens)

d = data.frame(kitty, kittyblarg, kittyhappy, 
               t1, t2, kittydied)

# Inspect the Surv object if desired
# Surv(t1,t2, kittydied)

# Inspect the data
d
   kitty kittyblarg kittyhappy        t1        t2 kittydied
1      1  -1.870759          0  1.000000  4.686853         0
2      1   4.904677          0  4.686853  7.902718         0
3      2   4.798609          1  2.000000  5.445662         0
4      2  15.214255          1  5.445662 10.780575         0
5      3   5.467102          0  3.000000  6.224082         0
6      3   9.958737          0  6.224082  8.309781         1
7      4  -9.776800          1  4.000000  6.359814         0
8      4   4.586278          1  6.359814  8.445237         0
9      5   9.833514          0  5.000000  8.400771         0
10     5   7.368822          0  8.400771 13.471382         1
11     6  13.283435          1  6.000000 11.110683         0
12     6  18.256961          1 11.110683 14.256076         0
13     7  10.736186          0  7.000000 11.555841         0
14     7  23.935980          0 11.555841 17.721386         1
15     8   6.114988          1  8.000000 10.786913         0
16     8  14.573972          1 10.786913 12.605429         1
17     9  13.516008          0  9.000000 11.497850         0
18     9   9.750603          0 11.497850 14.182787         1
19    10   8.371929          1 10.000000 14.966617         0
20    10  19.430893          1 14.966617 19.286674         1

Function

cox_pl_tv <- function(pars, preds, died, t1, t2, data) {
  # Same arguments as before though will take a data object
  # plus variable names via string input. Also requires beginning
  # and end time point (t1, t2)
  dat = data[,c(preds, died, t1, t2)]
  dat = dat[order(dat$t2), ]
  b   = pars
  X   = as.matrix(dat[, preds])
  died2 = dat[, died]
  
  # linear predictor
  LP = X%*%b
  
  # log likelihood
  ll = numeric(nrow(X))
  rows = 1:nrow(dat)
  
  for (i in rows){
    st_i = dat$t2[i]
    
    # if they have already died/censored (row < i) or if the initial time is
    # greater than current end time (t1 > st_i),  they are not in the risk set,
    # else they are.
    riskset = ifelse(rows < i | dat$t1 > st_i, FALSE, TRUE)     
    
    ll[i] = died2[i]*(LP[i] - log(sum(exp(LP[riskset]))) )        
  }     
  
  -sum(ll)
}

Estimation

Estimate with optim.

initial_values = c(0, 0)

fit = optim(
  par   = initial_values,
  fn    = cox_pl_tv,
  preds = c('kittyblarg', 'kittyhappy'),
  died  = 'kittydied',
  data  = d,
  t1    = 't1',
  t2    = 't2',
  method  = "BFGS",
  hessian = TRUE
)

# fit

Comparison

Extract results.

B  = fit$par
se = sqrt(diag(solve(fit$hessian)))
Z  = B/se

result_tbl = data.frame(
  B, 
  exp = exp(B), 
  se, 
  Z, 
  p = ifelse(Z > 0, pnorm(Z, lower = FALSE) * 2, pnorm(Z, lower = TRUE) * 2)
)

Compare to survival package.

cox_model_tv = coxph(
  Surv(t1, t2, kittydied) ~ kittyblarg + kittyhappy,
  method = 'breslow',
  control = coxph.control(iter.max = 1000)
)

# cox_model_tv
# cox_model_tv$loglik[2]
B exp se Z p
coxph.kittyblarg -0.092 0.912 0.107 -0.853 0.394
coxph.kittyhappy -1.513 0.220 1.137 -1.331 0.183
cox_pl.kittyblarg -0.092 0.912 0.107 -0.853 0.394
cox_pl.kittyhappy -1.513 0.220 1.137 -1.331 0.183

Stratified Cox Model

Data Setup

data(ovarian, package = 'survival') 

Function

Requires cox_pl function above though one could extend to cox_pl_tv.

cox_pl_strat <- function(pars, preds, died, t, strata) {
  strat = as.factor(strata)
  d = data.frame(preds, died, t, strat)
  dlist = split(d, strata)
  
  neglls = map_dbl(
    dlist,
    function(x)
      cox_pl(
        pars  = pars,
        preds = x[, colnames(preds)],
        died  = x$died,
        t     = x$t
      )
  )
  
  sum(neglls)
}

Estimation

Estimate with optim.

initial_values = c(0, 0)

fit = optim(
  par   = initial_values,
  fn    = cox_pl_strat,
  preds = ovarian[, c('age', 'ecog.ps')],
  died  = ovarian$fustat,
  t     = ovarian$futime,
  strata  = ovarian$rx,
  method  = "BFGS",
  hessian = TRUE
)

# fit

Comparison

B  = fit$par
se = sqrt(diag(solve(fit$hessian)))
Z  = B/se

result_tbl = data.frame(
  B,
  exp = exp(B),
  se,
  Z,
  p = ifelse(Z > 0, pnorm(Z, lower = FALSE) * 2, pnorm(Z, lower = TRUE)*2)
)
  

cox_strata_model = coxph(
  Surv(futime, fustat) ~ age + ecog.ps + strata(rx), 
  data = ovarian
  )

# cox_strata_model
# cox_strata_model$loglik[2]
B exp se Z p
coxph.age 0.139 1.149 0.048 2.885 0.004
coxph.ecog.ps -0.097 0.908 0.630 -0.154 0.878
cox_pl.age 0.139 1.149 0.048 2.885 0.004
cox_pl.ecog.ps -0.097 0.908 0.630 -0.154 0.878