Probit & Bivariate Probit

Stata users seem to be the primary audience concerned with probit models, but I thought I’d play around with one even though I’ve never had reason to use it. Stata examples come from the UCLA ATS website and the Stata manual, so one can investigate the Stata result for comparison.

Standard Probit

The standard probit model is identical to the logistic model but using a different link function.

Function

probit_ll <- function(beta, X, y) {
  mu = X %*% beta
  
  # these produce identical results, but the second is the typical depiction
  ll = sum(dbinom(
    y,
    size = 1,
    prob = pnorm(mu),
    log = T
  ))
  
  # ll = sum(y * pnorm(mu, log = T) + (1 - y) * log(1 - pnorm(mu)))
  
  -ll
}

Examples

Example 1 detail available here.

library(tidyverse)

admit = haven::read_dta('https://stats.idre.ucla.edu/stat/stata/dae/binary.dta')

head(admit)
# A tibble: 6 x 4
  admit   gre   gpa  rank
  <dbl> <dbl> <dbl> <dbl>
1     0   380  3.61     3
2     1   660  3.67     3
3     1   800  4        1
4     1   640  3.19     4
5     0   520  2.93     4
6     1   760  3        2
X = model.matrix(admit~ gre + gpa + factor(rank), admit)
y = admit$admit

init = rep(0, ncol(X))

fit = optim(
  fn  = probit_ll,
  par = init,
  X   = X,
  y   = y,
  method = 'BFGS'
)

fit 
$par
[1] -2.2518260555  0.0007915268  0.5453357468 -0.4211611887 -0.8285356685 -0.9457812896

$value
[1] 229.6227

$counts
function gradient 
     140       11 

$convergence
[1] 0

$message
NULL

Example 2 from Stata manual on standard probit.

We have data on the make, weight, and mileage rating of 22 foreign and 52 domestic automobiles. We wish to fit a probit model explaining whether a car is foreign based on its weight and mileage."

auto = haven::read_dta('http://www.stata-press.com/data/r13/auto.dta')

head(auto)
# A tibble: 6 x 12
  make          price   mpg rep78 headroom trunk weight length  turn displacement gear_ratio      foreign
  <chr>         <dbl> <dbl> <dbl>    <dbl> <dbl>  <dbl>  <dbl> <dbl>        <dbl>      <dbl>    <dbl+lbl>
1 AMC Concord    4099    22     3      2.5    11   2930    186    40          121       3.58 0 [Domestic]
2 AMC Pacer      4749    17     3      3      11   3350    173    40          258       2.53 0 [Domestic]
3 AMC Spirit     3799    22    NA      3      12   2640    168    35          121       3.08 0 [Domestic]
4 Buick Century  4816    20     3      4.5    16   3250    196    40          196       2.93 0 [Domestic]
5 Buick Electra  7827    15     4      4      20   4080    222    43          350       2.41 0 [Domestic]
6 Buick LeSabre  5788    18     3      4      21   3670    218    43          231       2.73 0 [Domestic]
X = model.matrix(foreign~ weight + mpg, auto)
y = auto$foreign

init = rep(0, ncol(X))

fit = optim(
  fn  = probit_ll,
  par = init,
  X = X,
  y = y
)

fit
$par
[1]  8.277015097 -0.002335939 -0.103973147

$value
[1] 26.84419

$counts
function gradient 
     380       NA 

$convergence
[1] 0

$message
NULL

Bivariate Probit

For the bivariate model, we are dealing with two binary outcomes and their correlation.

Here is the main function.

bivariate_probit_ll <- function(pars, X, y1, y2) {
  rho = pars[1]
  mu1 = X %*% pars[2:(ncol(X) + 1)]
  mu2 = X %*% pars[(ncol(X) + 2):length(pars)]
  q1  = ifelse(y1 == 1, 1,-1)
  q2  = ifelse(y2 == 1, 1,-1)
  
  require(mnormt)
  eta1 = q1 * mu1
  eta2 = q2 * mu2
  
  ll = matrix(NA, nrow = nrow(X))
  
  for (i in 1:length(ll)) {
    corr = q1[i] * q2[i] * rho
    corr = matrix(c(1, corr, corr, 1), 2)
    ll[i] = log(
      pmnorm(
        x = c(eta1[i], eta2[i]),
        mean   = c(0, 0),
        varcov = corr
      )
    )
  }
  
  # the loop is probably clearer, and there is no difference in time, but here's
  # a oneliner ll = mapply(function(e1, e2, q1, q2) log(pmnorm(x = c(e1, e2),
  # varcov = matrix(c(1,q1*q2*rho,q1*q2*rho,1),2))), eta1, eta2, q1, q2)
  
  -sum(ll)
}

Example

From the Stata manual on bivariate probit:

We wish to model the bivariate outcomes of whether children attend private school and whether the head of the household voted for an increase in property tax based on the other covariates.

school = haven::read_dta('http://www.stata-press.com/data/r13/school.dta')

head(school)
# A tibble: 6 x 11
    obs pub12 pub34  pub5 private years school loginc logptax  vote logeduc
  <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl>  <dbl>  <dbl>   <dbl> <dbl>   <dbl>
1     1     0     1     0       0    10      1   9.77    7.05     1    7.21
2     2     0     1     0       0     8      0  10.0     7.05     0    7.61
3     3     1     0     0       0     4      0  10.0     7.05     0    8.28
4     4     0     1     0       0    13      0   9.43    6.40     0    6.82
5     5     0     1     0       0     3      1  10.0     7.28     1    7.69
6     6     1     0     0       0     5      0  10.5     7.05     0    6.91
X  = model.matrix(private ~ years + logptax + loginc, school)
y1 = school$private
y2 = school$vote

init = c(0, rep(0, ncol(X)*2))

# you'll probably get a warning or two, ignore; takes a couple seconds
fit = optim(
  fn  = bivariate_probit_ll,
  par = init,
  X   = X,
  y1  = y1,
  y2  = y2,
  method = 'BFGS'
)


loglik = fit$value
rho    = fit$par[1]
coefs_private = fit$par[2:(ncol(X) + 1)]
coefs_vote = fit$par[(ncol(X) + 2):length(init)]
names(coefs_private) = names(coefs_vote) = c('Int', 'years', 'logptax', 'loginc')

list(
  loglik  = loglik,
  rho     = rho,
  Private = coefs_private,
  Vote    = coefs_vote
)
$loglik
[1] 89.25407

$rho
[1] -0.2695802

$Private
        Int       years     logptax      loginc 
-4.14327955 -0.01193699 -0.11030513  0.37459892 

$Vote
        Int       years     logptax      loginc 
-0.52933721 -0.01686685 -1.28983223  0.99840976