Cox Survival
Some simple demonstrations of a standard Cox, Cox with time-varying covariates and a stratified Cox.
Standard Proportional Hazards
Data Setup
set.seed(12)
= 1:10
dur = rnorm(10) # something happened to kitty!
kittyblarg = rep(0:1, times = 5) # is kitty happy?
kittyhappy = sample(0:1, 10, replace = TRUE) # kitty died! oh no!
kittydied = data.frame(kittyblarg, kittyhappy, dur, kittydied)
d
# Inspect
d
kittyblarg kittyhappy dur kittydied
1 -1.4805676 0 1 0
2 1.5771695 1 2 1
3 -0.9567445 0 3 0
4 -0.9200052 1 4 1
5 -1.9976421 0 5 1
6 -0.2722960 1 6 1
7 -0.3153487 0 7 0
8 -0.6282552 1 8 1
9 -0.1064639 0 9 0
10 0.4280148 1 10 1
Function
Create a the (partial) likelihood function to feed to optim.
<- function(pars, preds, died, t) {
cox_pl # Arguments-
# pars: coefficients of interest
# preds: predictor matrix
# died: death
# t: time
= pars
b = as.matrix(preds[order(t), ])
X = died[order(t)]
died2
= X%*%b # Linear predictor
LP
# initialize log likelihood due to looping, not necessary
= numeric(nrow(X))
ll = 1:nrow(preds)
rows
for (i in rows){
= ifelse(rows < i, FALSE, TRUE) # identify risk set
riskset = died2[i]*(LP[i] - log(sum(exp(LP[riskset]))) ) # log likelihood
ll[i]
}
-sum(ll)
}
Estimation
Estimate with optim.
= c(0, 0)
initial_values
= optim(
fit par = initial_values,
fn = cox_pl,
preds = d[, c('kittyblarg', 'kittyhappy')],
died = d[, 'kittydied'],
t = dur,
method = "BFGS",
hessian = T
)
fit
$par
[1] -0.5827125 1.3803731
$value
[1] 7.783878
$counts
function gradient
14 5
$convergence
[1] 0
$message
NULL
$hessian
[,1] [,2]
[1,] 1.9913282 0.4735968
[2,] 0.4735968 0.7780126
Comparison
Extract results.
= fit$par
B = sqrt(diag(solve(fit$hessian)))
se = B/se
Z
# create a summary table
= data.frame(
result_tbl
B, exp = exp(B),
se,
Z,p = ifelse(Z > 0, pnorm(Z, lower = FALSE)*2, pnorm(Z, lower = TRUE)*2)
)
Compare to survival package.
library(survival)
= coxph(Surv(dur, kittydied) ~ kittyblarg + kittyhappy) cox_model
B | exp | se | Z | p | |
---|---|---|---|---|---|
coxph.kittyblarg | -0.583 | 0.558 | 0.766 | -0.760 | 0.447 |
coxph.kittyhappy | 1.380 | 3.976 | 1.226 | 1.126 | 0.260 |
cox_pl.kittyblarg | -0.583 | 0.558 | 0.766 | -0.760 | 0.447 |
cox_pl.kittyhappy | 1.380 | 3.976 | 1.226 | 1.126 | 0.260 |
Time-varying coefficients
Note that technically nothing new is going on here relative to the previous model. See the vignette for the survival package for further details.
Data Setup
In the following we’ll first create some noisy time points.
set.seed(123)
= rep(NA, 20)
t1 = rep(NA, 20)
t2
seq(1, 20, by = 2)] = 1:10
t1[seq(1, 20, by = 2)] = t1[seq(1, 20, by = 2)] +
t2[sample(1:5, 10, replace = TRUE) +
abs(rnorm(10))
seq(2, 20, by = 2)] = t2[seq(1, 20, by = 2)]
t1[seq(2, 20, by = 2)] = t1[seq(2, 20, by = 2)] + sample(1:5) + abs(rnorm(10))
t2[
= rep(1:10, e = 2)
kitty = t2 + rnorm(20, sd = 5)
kittyblarg = rep(0:1, times = 5, e = 2)
kittyhappy = 0:1
die = c(0, 0)
cens = ifelse(runif(20)>=.5, die, cens)
kittydied
= data.frame(kitty, kittyblarg, kittyhappy,
d
t1, t2, kittydied)
# Inspect the Surv object if desired
# Surv(t1,t2, kittydied)
# Inspect the data
d
kitty kittyblarg kittyhappy t1 t2 kittydied
1 1 -1.870759 0 1.000000 4.686853 0
2 1 4.904677 0 4.686853 7.902718 0
3 2 4.798609 1 2.000000 5.445662 0
4 2 15.214255 1 5.445662 10.780575 0
5 3 5.467102 0 3.000000 6.224082 0
6 3 9.958737 0 6.224082 8.309781 1
7 4 -9.776800 1 4.000000 6.359814 0
8 4 4.586278 1 6.359814 8.445237 0
9 5 9.833514 0 5.000000 8.400771 0
10 5 7.368822 0 8.400771 13.471382 1
11 6 13.283435 1 6.000000 11.110683 0
12 6 18.256961 1 11.110683 14.256076 0
13 7 10.736186 0 7.000000 11.555841 0
14 7 23.935980 0 11.555841 17.721386 1
15 8 6.114988 1 8.000000 10.786913 0
16 8 14.573972 1 10.786913 12.605429 1
17 9 13.516008 0 9.000000 11.497850 0
18 9 9.750603 0 11.497850 14.182787 1
19 10 8.371929 1 10.000000 14.966617 0
20 10 19.430893 1 14.966617 19.286674 1
Function
<- function(pars, preds, died, t1, t2, data) {
cox_pl_tv # Same arguments as before though will take a data object
# plus variable names via string input. Also requires beginning
# and end time point (t1, t2)
= data[,c(preds, died, t1, t2)]
dat = dat[order(dat$t2), ]
dat = pars
b = as.matrix(dat[, preds])
X = dat[, died]
died2
# linear predictor
= X%*%b
LP
# log likelihood
= numeric(nrow(X))
ll = 1:nrow(dat)
rows
for (i in rows){
= dat$t2[i]
st_i
# if they have already died/censored (row < i) or if the initial time is
# greater than current end time (t1 > st_i), they are not in the risk set,
# else they are.
= ifelse(rows < i | dat$t1 > st_i, FALSE, TRUE)
riskset
= died2[i]*(LP[i] - log(sum(exp(LP[riskset]))) )
ll[i]
}
-sum(ll)
}
Estimation
Estimate with optim.
= c(0, 0)
initial_values
= optim(
fit par = initial_values,
fn = cox_pl_tv,
preds = c('kittyblarg', 'kittyhappy'),
died = 'kittydied',
data = d,
t1 = 't1',
t2 = 't2',
method = "BFGS",
hessian = TRUE
)
# fit
Comparison
Extract results.
= fit$par
B = sqrt(diag(solve(fit$hessian)))
se = B/se
Z
= data.frame(
result_tbl
B, exp = exp(B),
se,
Z, p = ifelse(Z > 0, pnorm(Z, lower = FALSE) * 2, pnorm(Z, lower = TRUE) * 2)
)
Compare to survival package.
= coxph(
cox_model_tv Surv(t1, t2, kittydied) ~ kittyblarg + kittyhappy,
method = 'breslow',
control = coxph.control(iter.max = 1000)
)
# cox_model_tv
# cox_model_tv$loglik[2]
B | exp | se | Z | p | |
---|---|---|---|---|---|
coxph.kittyblarg | -0.092 | 0.912 | 0.107 | -0.853 | 0.394 |
coxph.kittyhappy | -1.513 | 0.220 | 1.137 | -1.331 | 0.183 |
cox_pl.kittyblarg | -0.092 | 0.912 | 0.107 | -0.853 | 0.394 |
cox_pl.kittyhappy | -1.513 | 0.220 | 1.137 | -1.331 | 0.183 |
Stratified Cox Model
Data Setup
data(ovarian, package = 'survival')
Function
Requires cox_pl
function above though one could extend to cox_pl_tv
.
<- function(pars, preds, died, t, strata) {
cox_pl_strat = as.factor(strata)
strat = data.frame(preds, died, t, strat)
d = split(d, strata)
dlist
= map_dbl(
neglls
dlist,function(x)
cox_pl(
pars = pars,
preds = x[, colnames(preds)],
died = x$died,
t = x$t
)
)
sum(neglls)
}
Estimation
Estimate with optim.
= c(0, 0)
initial_values
= optim(
fit par = initial_values,
fn = cox_pl_strat,
preds = ovarian[, c('age', 'ecog.ps')],
died = ovarian$fustat,
t = ovarian$futime,
strata = ovarian$rx,
method = "BFGS",
hessian = TRUE
)
# fit
Comparison
= fit$par
B = sqrt(diag(solve(fit$hessian)))
se = B/se
Z
= data.frame(
result_tbl
B,exp = exp(B),
se,
Z,p = ifelse(Z > 0, pnorm(Z, lower = FALSE) * 2, pnorm(Z, lower = TRUE)*2)
)
= coxph(
cox_strata_model Surv(futime, fustat) ~ age + ecog.ps + strata(rx),
data = ovarian
)
# cox_strata_model
# cox_strata_model$loglik[2]
B | exp | se | Z | p | |
---|---|---|---|---|---|
coxph.age | 0.139 | 1.149 | 0.048 | 2.885 | 0.004 |
coxph.ecog.ps | -0.097 | 0.908 | 0.630 | -0.154 | 0.878 |
cox_pl.age | 0.139 | 1.149 | 0.048 | 2.885 | 0.004 |
cox_pl.ecog.ps | -0.097 | 0.908 | 0.630 | -0.154 | 0.878 |
Source
Original code available at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/survivalCox.R