Cox Survival
Some simple demonstrations of a standard Cox, Cox with time-varying covariates and a stratified Cox.
Standard Proportional Hazards
Data Setup
set.seed(12)
dur = 1:10
kittyblarg = rnorm(10) # something happened to kitty!
kittyhappy = rep(0:1, times = 5) # is kitty happy?
kittydied = sample(0:1, 10, replace = TRUE) # kitty died! oh no!
d = data.frame(kittyblarg, kittyhappy, dur, kittydied)
# Inspect
d kittyblarg kittyhappy dur kittydied
1 -1.4805676 0 1 0
2 1.5771695 1 2 1
3 -0.9567445 0 3 0
4 -0.9200052 1 4 1
5 -1.9976421 0 5 1
6 -0.2722960 1 6 1
7 -0.3153487 0 7 0
8 -0.6282552 1 8 1
9 -0.1064639 0 9 0
10 0.4280148 1 10 1
Function
Create a the (partial) likelihood function to feed to optim.
cox_pl <- function(pars, preds, died, t) {
# Arguments-
# pars: coefficients of interest
# preds: predictor matrix
# died: death
# t: time
b = pars
X = as.matrix(preds[order(t), ])
died2 = died[order(t)]
LP = X%*%b # Linear predictor
# initialize log likelihood due to looping, not necessary
ll = numeric(nrow(X))
rows = 1:nrow(preds)
for (i in rows){
riskset = ifelse(rows < i, FALSE, TRUE) # identify risk set
ll[i] = died2[i]*(LP[i] - log(sum(exp(LP[riskset]))) ) # log likelihood
}
-sum(ll)
}Estimation
Estimate with optim.
initial_values = c(0, 0)
fit = optim(
par = initial_values,
fn = cox_pl,
preds = d[, c('kittyblarg', 'kittyhappy')],
died = d[, 'kittydied'],
t = dur,
method = "BFGS",
hessian = T
)
fit$par
[1] -0.5827125 1.3803731
$value
[1] 7.783878
$counts
function gradient
14 5
$convergence
[1] 0
$message
NULL
$hessian
[,1] [,2]
[1,] 1.9913282 0.4735968
[2,] 0.4735968 0.7780126
Comparison
Extract results.
B = fit$par
se = sqrt(diag(solve(fit$hessian)))
Z = B/se
# create a summary table
result_tbl = data.frame(
B,
exp = exp(B),
se,
Z,
p = ifelse(Z > 0, pnorm(Z, lower = FALSE)*2, pnorm(Z, lower = TRUE)*2)
)Compare to survival package.
library(survival)
cox_model = coxph(Surv(dur, kittydied) ~ kittyblarg + kittyhappy)| B | exp | se | Z | p | |
|---|---|---|---|---|---|
| coxph.kittyblarg | -0.583 | 0.558 | 0.766 | -0.760 | 0.447 |
| coxph.kittyhappy | 1.380 | 3.976 | 1.226 | 1.126 | 0.260 |
| cox_pl.kittyblarg | -0.583 | 0.558 | 0.766 | -0.760 | 0.447 |
| cox_pl.kittyhappy | 1.380 | 3.976 | 1.226 | 1.126 | 0.260 |
Time-varying coefficients
Note that technically nothing new is going on here relative to the previous model. See the vignette for the survival package for further details.
Data Setup
In the following we’ll first create some noisy time points.
set.seed(123)
t1 = rep(NA, 20)
t2 = rep(NA, 20)
t1[seq(1, 20, by = 2)] = 1:10
t2[seq(1, 20, by = 2)] = t1[seq(1, 20, by = 2)] +
sample(1:5, 10, replace = TRUE) +
abs(rnorm(10))
t1[seq(2, 20, by = 2)] = t2[seq(1, 20, by = 2)]
t2[seq(2, 20, by = 2)] = t1[seq(2, 20, by = 2)] + sample(1:5) + abs(rnorm(10))
kitty = rep(1:10, e = 2)
kittyblarg = t2 + rnorm(20, sd = 5)
kittyhappy = rep(0:1, times = 5, e = 2)
die = 0:1
cens = c(0, 0)
kittydied = ifelse(runif(20)>=.5, die, cens)
d = data.frame(kitty, kittyblarg, kittyhappy,
t1, t2, kittydied)
# Inspect the Surv object if desired
# Surv(t1,t2, kittydied)
# Inspect the data
d kitty kittyblarg kittyhappy t1 t2 kittydied
1 1 -1.870759 0 1.000000 4.686853 0
2 1 4.904677 0 4.686853 7.902718 0
3 2 4.798609 1 2.000000 5.445662 0
4 2 15.214255 1 5.445662 10.780575 0
5 3 5.467102 0 3.000000 6.224082 0
6 3 9.958737 0 6.224082 8.309781 1
7 4 -9.776800 1 4.000000 6.359814 0
8 4 4.586278 1 6.359814 8.445237 0
9 5 9.833514 0 5.000000 8.400771 0
10 5 7.368822 0 8.400771 13.471382 1
11 6 13.283435 1 6.000000 11.110683 0
12 6 18.256961 1 11.110683 14.256076 0
13 7 10.736186 0 7.000000 11.555841 0
14 7 23.935980 0 11.555841 17.721386 1
15 8 6.114988 1 8.000000 10.786913 0
16 8 14.573972 1 10.786913 12.605429 1
17 9 13.516008 0 9.000000 11.497850 0
18 9 9.750603 0 11.497850 14.182787 1
19 10 8.371929 1 10.000000 14.966617 0
20 10 19.430893 1 14.966617 19.286674 1
Function
cox_pl_tv <- function(pars, preds, died, t1, t2, data) {
# Same arguments as before though will take a data object
# plus variable names via string input. Also requires beginning
# and end time point (t1, t2)
dat = data[,c(preds, died, t1, t2)]
dat = dat[order(dat$t2), ]
b = pars
X = as.matrix(dat[, preds])
died2 = dat[, died]
# linear predictor
LP = X%*%b
# log likelihood
ll = numeric(nrow(X))
rows = 1:nrow(dat)
for (i in rows){
st_i = dat$t2[i]
# if they have already died/censored (row < i) or if the initial time is
# greater than current end time (t1 > st_i), they are not in the risk set,
# else they are.
riskset = ifelse(rows < i | dat$t1 > st_i, FALSE, TRUE)
ll[i] = died2[i]*(LP[i] - log(sum(exp(LP[riskset]))) )
}
-sum(ll)
}Estimation
Estimate with optim.
initial_values = c(0, 0)
fit = optim(
par = initial_values,
fn = cox_pl_tv,
preds = c('kittyblarg', 'kittyhappy'),
died = 'kittydied',
data = d,
t1 = 't1',
t2 = 't2',
method = "BFGS",
hessian = TRUE
)
# fitComparison
Extract results.
B = fit$par
se = sqrt(diag(solve(fit$hessian)))
Z = B/se
result_tbl = data.frame(
B,
exp = exp(B),
se,
Z,
p = ifelse(Z > 0, pnorm(Z, lower = FALSE) * 2, pnorm(Z, lower = TRUE) * 2)
)Compare to survival package.
cox_model_tv = coxph(
Surv(t1, t2, kittydied) ~ kittyblarg + kittyhappy,
method = 'breslow',
control = coxph.control(iter.max = 1000)
)
# cox_model_tv
# cox_model_tv$loglik[2]| B | exp | se | Z | p | |
|---|---|---|---|---|---|
| coxph.kittyblarg | -0.092 | 0.912 | 0.107 | -0.853 | 0.394 |
| coxph.kittyhappy | -1.513 | 0.220 | 1.137 | -1.331 | 0.183 |
| cox_pl.kittyblarg | -0.092 | 0.912 | 0.107 | -0.853 | 0.394 |
| cox_pl.kittyhappy | -1.513 | 0.220 | 1.137 | -1.331 | 0.183 |
Stratified Cox Model
Data Setup
data(ovarian, package = 'survival') Function
Requires cox_pl function above though one could extend to cox_pl_tv.
cox_pl_strat <- function(pars, preds, died, t, strata) {
strat = as.factor(strata)
d = data.frame(preds, died, t, strat)
dlist = split(d, strata)
neglls = map_dbl(
dlist,
function(x)
cox_pl(
pars = pars,
preds = x[, colnames(preds)],
died = x$died,
t = x$t
)
)
sum(neglls)
}Estimation
Estimate with optim.
initial_values = c(0, 0)
fit = optim(
par = initial_values,
fn = cox_pl_strat,
preds = ovarian[, c('age', 'ecog.ps')],
died = ovarian$fustat,
t = ovarian$futime,
strata = ovarian$rx,
method = "BFGS",
hessian = TRUE
)
# fitComparison
B = fit$par
se = sqrt(diag(solve(fit$hessian)))
Z = B/se
result_tbl = data.frame(
B,
exp = exp(B),
se,
Z,
p = ifelse(Z > 0, pnorm(Z, lower = FALSE) * 2, pnorm(Z, lower = TRUE)*2)
)
cox_strata_model = coxph(
Surv(futime, fustat) ~ age + ecog.ps + strata(rx),
data = ovarian
)
# cox_strata_model
# cox_strata_model$loglik[2]| B | exp | se | Z | p | |
|---|---|---|---|---|---|
| coxph.age | 0.139 | 1.149 | 0.048 | 2.885 | 0.004 |
| coxph.ecog.ps | -0.097 | 0.908 | 0.630 | -0.154 | 0.878 |
| cox_pl.age | 0.139 | 1.149 | 0.048 | 2.885 | 0.004 |
| cox_pl.ecog.ps | -0.097 | 0.908 | 0.630 | -0.154 | 0.878 |
Source
Original code available at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/survivalCox.R