Bayesian CFA

Data Setup

For an empirical data set we can use the big five data from the psych package. For simplicity, I will only examine three of the five factors, and only the first 3 items of each. I have a version that has already reverse-scored the items that need be, but this is not necessary. We we will restrict ourselves to complete data and only a sample of 280 observations (~10%).

library(tidyverse)

data('big_five', package = 'noiris')
# data('bfi', package = 'psych')

big_five_no_miss = big_five %>% 
  select(matches('(^E|^O|^N)[1-3]')) %>% 
  drop_na()  %>% 
  slice_sample(n = 280)

Model Code

The model code is quite verbose, and definitely not efficient, but hopefully clarifies this ‘latent linear model’ underlying the observations.

data {
  int<lower = 1> N;                              // sample size
  int<lower = 1> P;                              // number of variables
  int<lower = 1> K;                              // number of factors
  matrix[N,P]  X;                                // data matrix of order [N,P]
}

transformed data {
  int<lower = 1> L;
  L = P - K;                                     // Number of free loadings
}

parameters {
  vector[P] b;                                   // intercepts
  vector[L] lambda01;                            // initial factor loadings
  matrix[N, K] FS;                               // factor scores, matrix of order [N,K]
  corr_matrix[K] phi;                            // factor correlations
  vector<lower = 0, upper = 2>[K] sd_lv;         // std dev of the latent factors
  vector<lower = 0, upper = 2>[P] sd_x;          // std dev of the disturbances
  vector<lower = 0, upper = 5>[L] sd_lambda;     // hyper parameter for loading std dev
}

transformed parameters {
  vector[L] lambda;                              // factor loadings

  lambda = lambda01 .* sd_lambda;                // lambda as normal(0, sd_lambda)
}

model {
  matrix[N,P] mu;
  matrix[K,K] Ld;
  vector[K] muFactors;
  
  muFactors = rep_vector(0, K);                  // Factor means, set to zero
  Ld = diag_matrix(sd_lv) * cholesky_decompose(phi);

  for(n in 1:N) {
    mu[n,1] = b[1] + FS[n,1];                    // Extraversion
    mu[n,2] = b[2] + FS[n,1]*lambda[1];
    mu[n,3] = b[3] + FS[n,1]*lambda[2];

    mu[n,4] = b[4] + FS[n,2];                    // Neuroticism
    mu[n,5] = b[5] + FS[n,2]*lambda[3];
    mu[n,6] = b[6] + FS[n,2]*lambda[4];
    
    mu[n,7] = b[7] + FS[n,3];                    // Openness
    mu[n,8] = b[8] + FS[n,3]*lambda[5];
    mu[n,9] = b[9] + FS[n,3]*lambda[6];
  }

  // priors
  
  phi ~ lkj_corr(2.0);
  
  sd_x      ~ cauchy(0, 2.5);
  sd_lambda ~ cauchy(0, 2.5);
  sd_lv     ~ cauchy(0, 2.5);
  
  b        ~ normal(0, 10);
  lambda01 ~ normal(0, 1);
  
  // likelihood
  
  for(i in 1:N){   
    FS[i] ~ multi_normal_cholesky(muFactors, Ld);
    
    X[i]  ~ normal(mu[i], sd_x);
  }
  
}

Estimation

Note that this will likely take a while with the full data set, but you can bump the iterations down or decrease the sample size and get roughly the same estimates. With these defaults you might get some divergent warnings or other issues to possibly deal with.

stan_data = 
  list(
    N = nrow(big_five_no_miss),
    P = ncol(big_five_no_miss),
    K = 3,
    X = big_five_no_miss
  )

library(rstan)

fit_cfa = sampling(
  bayes_cfa,
  data    = stan_data,
  thin    = 4,
  cores   = 4,
  control = list(adapt_delta = .95, max_treedepth = 15)
)

Comparison

Here are the raw factor loadings and factor correlations.

print(
  fit_cfa,
  digits = 3,
  par    = c('lambda', 'phi'),
  probs  = c(.025, .5, 0.975)
)
Inference for Stan model: 6522f4cb208793aee047a3160d67d5b3.
4 chains, each with iter=2000; warmup=1000; thin=4; 
post-warmup draws per chain=250, total post-warmup draws=1000.

            mean se_mean    sd   2.5%    50%  97.5% n_eff  Rhat
lambda[1]  1.452   0.022 0.270  1.020  1.422  2.088   147 1.044
lambda[2]  1.055   0.019 0.236  0.666  1.027  1.607   162 1.008
lambda[3]  0.930   0.005 0.074  0.798  0.928  1.093   240 1.000
lambda[4]  0.696   0.003 0.063  0.580  0.695  0.822   500 0.998
lambda[5]  0.783   0.011 0.195  0.425  0.774  1.207   340 1.004
lambda[6]  1.288   0.028 0.302  0.811  1.251  2.015   114 1.045
phi[1,1]   1.000     NaN 0.000  1.000  1.000  1.000   NaN   NaN
phi[1,2]  -0.238   0.004 0.082 -0.382 -0.241 -0.068   502 1.007
phi[1,3]   0.564   0.007 0.101  0.355  0.568  0.745   236 1.005
phi[2,1]  -0.238   0.004 0.082 -0.382 -0.241 -0.068   502 1.007
phi[2,2]   1.000   0.000 0.000  1.000  1.000  1.000   835 0.996
phi[2,3]  -0.163   0.003 0.085 -0.327 -0.163  0.003   696 0.999
phi[3,1]   0.564   0.007 0.101  0.355  0.568  0.745   236 1.005
phi[3,2]  -0.163   0.003 0.085 -0.327 -0.163  0.003   696 0.999
phi[3,3]   1.000   0.000 0.000  1.000  1.000  1.000   937 0.996

Samples were drawn using NUTS(diag_e) at Wed Nov 25 17:59:17 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).

We can compare our results with those of the lavaan package, which uses standard maximum likelihood via the cfa function default settings.

library(lavaan)

mod = "
  E =~ E1 + E2 + E3 
  N =~ N1 + N2 + N3 
  O =~ O1 + O2 + O3 
"

fit_lav = cfa(mod, data = big_five_no_miss)
# summary(fit_lav)  

The following shows how to extract the parameter estimates and convert them to standardized form, followed by how to get the parameter estimates from the lavaan output.

# loadings
lambda = get_posterior_mean(fit_cfa, par = 'lambda')[,'mean-all chains']
lambda = c(1, lambda[1:2], 1, lambda[3:4], 1, lambda[5:6])

# standard deviations of factors and observed
sd_F   = rep(get_posterior_mean(fit_cfa, par = 'sd_lv')[,'mean-all chains'], e = 3)
x_sd = apply(stan_data$X, 2, sd)

# standardize
lambda_std_F = sd_F*lambda
lambda_std_all = sd_F/x_sd*lambda

# get factor correlations
fit_cors = matrix(get_posterior_mean(fit_cfa, par = 'phi')[, 5], 3, 3)

lav_par  = parameterEstimates(fit_lav, standardized = TRUE)

First we compare the loadings, both raw and standardized (either standardize the latent variable only or the latent and observed variables).

lhs op rhs est std.lv std.all std.nox lambda_est lambda_std_lv lambda_std_all
E =~ E1 1.000 0.822 0.505 0.505 1.000 0.808 0.495
E =~ E2 1.375 1.130 0.706 0.706 1.452 1.172 0.732
E =~ E3 1.042 0.857 0.599 0.599 1.055 0.852 0.595
N =~ N1 1.000 1.441 0.897 0.897 1.000 1.450 0.900
N =~ N2 0.932 1.342 0.852 0.852 0.930 1.348 0.854
N =~ N3 0.705 1.015 0.642 0.642 0.696 1.009 0.637
O =~ O1 1.000 0.728 0.617 0.617 1.000 0.714 0.605
O =~ O2 0.775 0.564 0.362 0.362 0.783 0.559 0.358
O =~ O3 1.228 0.893 0.675 0.675 1.288 0.920 0.694
fit_cors
           E          N          O
E  1.0000000 -0.2377586  0.5638754
N -0.2377586  1.0000000 -0.1625852
O  0.5638754 -0.1625852  1.0000000
lav_cors
           E          N          O
E  1.0000000 -0.2420697  0.6042405
N -0.2420697  1.0000000 -0.1749015
O  0.6042405 -0.1749015  1.0000000