# Cox Survival

Some simple demonstrations of a standard Cox, Cox with time-varying covariates and a stratified Cox.

## Standard Proportional Hazards

### Data Setup

set.seed(12)

dur = 1:10
kittyblarg = rnorm(10)                                 # something happened to kitty!
kittyhappy = rep(0:1, times = 5)                       # is kitty happy?
kittydied  = sample(0:1, 10, replace = TRUE)           # kitty died! oh no!
d = data.frame(kittyblarg, kittyhappy, dur, kittydied)

# Inspect
d
   kittyblarg kittyhappy dur kittydied
1  -1.4805676          0   1         0
2   1.5771695          1   2         1
3  -0.9567445          0   3         0
4  -0.9200052          1   4         1
5  -1.9976421          0   5         1
6  -0.2722960          1   6         1
7  -0.3153487          0   7         0
8  -0.6282552          1   8         1
9  -0.1064639          0   9         0
10  0.4280148          1  10         1

### Function

Create a the (partial) likelihood function to feed to optim.

cox_pl <- function(pars, preds, died, t) {
# Arguments-
# pars: coefficients of interest
# preds: predictor matrix
# died: death
# t: time

b = pars
X = as.matrix(preds[order(t), ])
died2 = died[order(t)]

LP = X%*%b                # Linear predictor

# initialize log likelihood due to looping, not necessary
ll = numeric(nrow(X))
rows = 1:nrow(preds)

for (i in rows){
riskset = ifelse(rows < i, FALSE, TRUE)                  # identify risk set
ll[i] = died2[i]*(LP[i] - log(sum(exp(LP[riskset]))) )   # log likelihood
}

-sum(ll)
}

### Estimation

Estimate with optim.

initial_values = c(0, 0)

fit = optim(
par   = initial_values,
fn    = cox_pl,
preds = d[, c('kittyblarg', 'kittyhappy')],
died  = d[, 'kittydied'],
t     = dur,
method  = "BFGS",
hessian = T
)

fit
$par  -0.5827125 1.3803731$value
 7.783878

$counts function gradient 14 5$convergence
 0

$message NULL$hessian
[,1]      [,2]
[1,] 1.9913282 0.4735968
[2,] 0.4735968 0.7780126

### Comparison

Extract results.

B  = fit$par se = sqrt(diag(solve(fit$hessian)))
Z  = B/se

# create a summary table

result_tbl = data.frame(
B,
exp = exp(B),
se,
Z,
p = ifelse(Z > 0, pnorm(Z, lower = FALSE)*2, pnorm(Z, lower = TRUE)*2)
)

Compare to survival package.

library(survival)

cox_model = coxph(Surv(dur, kittydied) ~ kittyblarg + kittyhappy)
B exp se Z p
coxph.kittyblarg -0.583 0.558 0.766 -0.760 0.447
coxph.kittyhappy 1.380 3.976 1.226 1.126 0.260
cox_pl.kittyblarg -0.583 0.558 0.766 -0.760 0.447
cox_pl.kittyhappy 1.380 3.976 1.226 1.126 0.260

## Time-varying coefficients

Note that technically nothing new is going on here relative to the previous model. See the vignette for the survival package for further details.

### Data Setup

In the following we’ll first create some noisy time points.

set.seed(123)

t1 = rep(NA, 20)
t2 = rep(NA, 20)

t1[seq(1, 20, by = 2)] = 1:10
t2[seq(1, 20, by = 2)] = t1[seq(1, 20, by = 2)] +
sample(1:5, 10, replace = TRUE) +
abs(rnorm(10))

t1[seq(2, 20, by = 2)] = t2[seq(1, 20, by = 2)]
t2[seq(2, 20, by = 2)] = t1[seq(2, 20, by = 2)] + sample(1:5) + abs(rnorm(10))

kitty = rep(1:10, e = 2)
kittyblarg = t2 + rnorm(20, sd = 5)
kittyhappy = rep(0:1, times = 5, e = 2)
die = 0:1
cens = c(0, 0)
kittydied = ifelse(runif(20)>=.5, die, cens)

d = data.frame(kitty, kittyblarg, kittyhappy,
t1, t2, kittydied)

# Inspect the Surv object if desired
# Surv(t1,t2, kittydied)

# Inspect the data
d
   kitty kittyblarg kittyhappy        t1        t2 kittydied
1      1  -1.870759          0  1.000000  4.686853         0
2      1   4.904677          0  4.686853  7.902718         0
3      2   4.798609          1  2.000000  5.445662         0
4      2  15.214255          1  5.445662 10.780575         0
5      3   5.467102          0  3.000000  6.224082         0
6      3   9.958737          0  6.224082  8.309781         1
7      4  -9.776800          1  4.000000  6.359814         0
8      4   4.586278          1  6.359814  8.445237         0
9      5   9.833514          0  5.000000  8.400771         0
10     5   7.368822          0  8.400771 13.471382         1
11     6  13.283435          1  6.000000 11.110683         0
12     6  18.256961          1 11.110683 14.256076         0
13     7  10.736186          0  7.000000 11.555841         0
14     7  23.935980          0 11.555841 17.721386         1
15     8   6.114988          1  8.000000 10.786913         0
16     8  14.573972          1 10.786913 12.605429         1
17     9  13.516008          0  9.000000 11.497850         0
18     9   9.750603          0 11.497850 14.182787         1
19    10   8.371929          1 10.000000 14.966617         0
20    10  19.430893          1 14.966617 19.286674         1

### Function

cox_pl_tv <- function(pars, preds, died, t1, t2, data) {
# Same arguments as before though will take a data object
# plus variable names via string input. Also requires beginning
# and end time point (t1, t2)
dat = data[,c(preds, died, t1, t2)]
dat = dat[order(dat$t2), ] b = pars X = as.matrix(dat[, preds]) died2 = dat[, died] # linear predictor LP = X%*%b # log likelihood ll = numeric(nrow(X)) rows = 1:nrow(dat) for (i in rows){ st_i = dat$t2[i]

# if they have already died/censored (row < i) or if the initial time is
# greater than current end time (t1 > st_i),  they are not in the risk set,
# else they are.
riskset = ifelse(rows < i | dat$t1 > st_i, FALSE, TRUE) ll[i] = died2[i]*(LP[i] - log(sum(exp(LP[riskset]))) ) } -sum(ll) } ### Estimation Estimate with optim. initial_values = c(0, 0) fit = optim( par = initial_values, fn = cox_pl_tv, preds = c('kittyblarg', 'kittyhappy'), died = 'kittydied', data = d, t1 = 't1', t2 = 't2', method = "BFGS", hessian = TRUE ) # fit ### Comparison Extract results. B = fit$par
se = sqrt(diag(solve(fit$hessian))) Z = B/se result_tbl = data.frame( B, exp = exp(B), se, Z, p = ifelse(Z > 0, pnorm(Z, lower = FALSE) * 2, pnorm(Z, lower = TRUE) * 2) ) Compare to survival package. cox_model_tv = coxph( Surv(t1, t2, kittydied) ~ kittyblarg + kittyhappy, method = 'breslow', control = coxph.control(iter.max = 1000) ) # cox_model_tv # cox_model_tv$loglik
B exp se Z p
coxph.kittyblarg -0.092 0.912 0.107 -0.853 0.394
coxph.kittyhappy -1.513 0.220 1.137 -1.331 0.183
cox_pl.kittyblarg -0.092 0.912 0.107 -0.853 0.394
cox_pl.kittyhappy -1.513 0.220 1.137 -1.331 0.183

## Stratified Cox Model

### Data Setup

data(ovarian, package = 'survival') 

### Function

Requires cox_pl function above though one could extend to cox_pl_tv.

cox_pl_strat <- function(pars, preds, died, t, strata) {
strat = as.factor(strata)
d = data.frame(preds, died, t, strat)
dlist = split(d, strata)

neglls = map_dbl(
dlist,
function(x)
cox_pl(
pars  = pars,
preds = x[, colnames(preds)],
died  = x$died, t = x$t
)
)

sum(neglls)
}

### Estimation

Estimate with optim.

initial_values = c(0, 0)

fit = optim(
par   = initial_values,
fn    = cox_pl_strat,
preds = ovarian[, c('age', 'ecog.ps')],
died  = ovarian$fustat, t = ovarian$futime,
strata  = ovarian$rx, method = "BFGS", hessian = TRUE ) # fit ### Comparison B = fit$par
se = sqrt(diag(solve(fit$hessian))) Z = B/se result_tbl = data.frame( B, exp = exp(B), se, Z, p = ifelse(Z > 0, pnorm(Z, lower = FALSE) * 2, pnorm(Z, lower = TRUE)*2) ) cox_strata_model = coxph( Surv(futime, fustat) ~ age + ecog.ps + strata(rx), data = ovarian ) # cox_strata_model # cox_strata_model$loglik
B exp se Z p
coxph.age 0.139 1.149 0.048 2.885 0.004
coxph.ecog.ps -0.097 0.908 0.630 -0.154 0.878
cox_pl.age 0.139 1.149 0.048 2.885 0.004
cox_pl.ecog.ps -0.097 0.908 0.630 -0.154 0.878

## Source

Original code available at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/survivalCox.R