# Bayesian CFA

## Data Setup

For an empirical data set we can use the big five data from the psych package. For simplicity, I will only examine three of the five factors, and only the first 3 items of each. I have a version that has already reverse-scored the items that need be, but this is not necessary. We we will restrict ourselves to complete data and only a sample of 280 observations (~10%).

library(tidyverse)

data('big_five', package = 'noiris')
# data('bfi', package = 'psych')

big_five_no_miss = big_five %>%
select(matches('(^E|^O|^N)[1-3]')) %>%
drop_na()  %>%
slice_sample(n = 280)

## Model Code

The model code is quite verbose, and definitely not efficient, but hopefully clarifies this ‘latent linear model’ underlying the observations.

data {
int<lower = 1> N;                              // sample size
int<lower = 1> P;                              // number of variables
int<lower = 1> K;                              // number of factors
matrix[N,P]  X;                                // data matrix of order [N,P]
}

transformed data {
int<lower = 1> L;
}

parameters {
vector[P] b;                                   // intercepts
matrix[N, K] FS;                               // factor scores, matrix of order [N,K]
corr_matrix[K] phi;                            // factor correlations
vector<lower = 0, upper = 2>[K] sd_lv;         // std dev of the latent factors
vector<lower = 0, upper = 2>[P] sd_x;          // std dev of the disturbances
vector<lower = 0, upper = 5>[L] sd_lambda;     // hyper parameter for loading std dev
}

transformed parameters {

lambda = lambda01 .* sd_lambda;                // lambda as normal(0, sd_lambda)
}

model {
matrix[N,P] mu;
matrix[K,K] Ld;
vector[K] muFactors;

muFactors = rep_vector(0, K);                  // Factor means, set to zero
Ld = diag_matrix(sd_lv) * cholesky_decompose(phi);

for(n in 1:N) {
mu[n,1] = b[1] + FS[n,1];                    // Extraversion
mu[n,2] = b[2] + FS[n,1]*lambda[1];
mu[n,3] = b[3] + FS[n,1]*lambda[2];

mu[n,4] = b[4] + FS[n,2];                    // Neuroticism
mu[n,5] = b[5] + FS[n,2]*lambda[3];
mu[n,6] = b[6] + FS[n,2]*lambda[4];

mu[n,7] = b[7] + FS[n,3];                    // Openness
mu[n,8] = b[8] + FS[n,3]*lambda[5];
mu[n,9] = b[9] + FS[n,3]*lambda[6];
}

// priors

phi ~ lkj_corr(2.0);

sd_x      ~ cauchy(0, 2.5);
sd_lambda ~ cauchy(0, 2.5);
sd_lv     ~ cauchy(0, 2.5);

b        ~ normal(0, 10);
lambda01 ~ normal(0, 1);

// likelihood

for(i in 1:N){
FS[i] ~ multi_normal_cholesky(muFactors, Ld);

X[i]  ~ normal(mu[i], sd_x);
}

}

## Estimation

Note that this will likely take a while with the full data set, but you can bump the iterations down or decrease the sample size and get roughly the same estimates. With these defaults you might get some divergent warnings or other issues to possibly deal with.

stan_data =
list(
N = nrow(big_five_no_miss),
P = ncol(big_five_no_miss),
K = 3,
X = big_five_no_miss
)

library(rstan)

fit_cfa = sampling(
bayes_cfa,
data    = stan_data,
thin    = 4,
cores   = 4,
control = list(adapt_delta = .95, max_treedepth = 15)
)

## Comparison

print(
fit_cfa,
digits = 3,
par    = c('lambda', 'phi'),
probs  = c(.025, .5, 0.975)
)
Inference for Stan model: 6522f4cb208793aee047a3160d67d5b3.
4 chains, each with iter=2000; warmup=1000; thin=4;
post-warmup draws per chain=250, total post-warmup draws=1000.

mean se_mean    sd   2.5%    50%  97.5% n_eff  Rhat
lambda[1]  1.452   0.022 0.270  1.020  1.422  2.088   147 1.044
lambda[2]  1.055   0.019 0.236  0.666  1.027  1.607   162 1.008
lambda[3]  0.930   0.005 0.074  0.798  0.928  1.093   240 1.000
lambda[4]  0.696   0.003 0.063  0.580  0.695  0.822   500 0.998
lambda[5]  0.783   0.011 0.195  0.425  0.774  1.207   340 1.004
lambda[6]  1.288   0.028 0.302  0.811  1.251  2.015   114 1.045
phi[1,1]   1.000     NaN 0.000  1.000  1.000  1.000   NaN   NaN
phi[1,2]  -0.238   0.004 0.082 -0.382 -0.241 -0.068   502 1.007
phi[1,3]   0.564   0.007 0.101  0.355  0.568  0.745   236 1.005
phi[2,1]  -0.238   0.004 0.082 -0.382 -0.241 -0.068   502 1.007
phi[2,2]   1.000   0.000 0.000  1.000  1.000  1.000   835 0.996
phi[2,3]  -0.163   0.003 0.085 -0.327 -0.163  0.003   696 0.999
phi[3,1]   0.564   0.007 0.101  0.355  0.568  0.745   236 1.005
phi[3,2]  -0.163   0.003 0.085 -0.327 -0.163  0.003   696 0.999
phi[3,3]   1.000   0.000 0.000  1.000  1.000  1.000   937 0.996

Samples were drawn using NUTS(diag_e) at Wed Nov 25 17:59:17 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).

We can compare our results with those of the lavaan package, which uses standard maximum likelihood via the cfa function default settings.

library(lavaan)

mod = "
E =~ E1 + E2 + E3
N =~ N1 + N2 + N3
O =~ O1 + O2 + O3
"

fit_lav = cfa(mod, data = big_five_no_miss)
# summary(fit_lav)  

The following shows how to extract the parameter estimates and convert them to standardized form, followed by how to get the parameter estimates from the lavaan output.

# loadings
lambda = get_posterior_mean(fit_cfa, par = 'lambda')[,'mean-all chains']
lambda = c(1, lambda[1:2], 1, lambda[3:4], 1, lambda[5:6])

# standard deviations of factors and observed
sd_F   = rep(get_posterior_mean(fit_cfa, par = 'sd_lv')[,'mean-all chains'], e = 3)
x_sd = apply(stan_data\$X, 2, sd)

# standardize
lambda_std_F = sd_F*lambda
lambda_std_all = sd_F/x_sd*lambda

# get factor correlations
fit_cors = matrix(get_posterior_mean(fit_cfa, par = 'phi')[, 5], 3, 3)

lav_par  = parameterEstimates(fit_lav, standardized = TRUE)

First we compare the loadings, both raw and standardized (either standardize the latent variable only or the latent and observed variables).

lhs op rhs est std.lv std.all std.nox lambda_est lambda_std_lv lambda_std_all
E =~ E1 1.000 0.822 0.505 0.505 1.000 0.808 0.495
E =~ E2 1.375 1.130 0.706 0.706 1.452 1.172 0.732
E =~ E3 1.042 0.857 0.599 0.599 1.055 0.852 0.595
N =~ N1 1.000 1.441 0.897 0.897 1.000 1.450 0.900
N =~ N2 0.932 1.342 0.852 0.852 0.930 1.348 0.854
N =~ N3 0.705 1.015 0.642 0.642 0.696 1.009 0.637
O =~ O1 1.000 0.728 0.617 0.617 1.000 0.714 0.605
O =~ O2 0.775 0.564 0.362 0.362 0.783 0.559 0.358
O =~ O3 1.228 0.893 0.675 0.675 1.288 0.920 0.694
fit_cors
           E          N          O
E  1.0000000 -0.2377586  0.5638754
N -0.2377586  1.0000000 -0.1625852
O  0.5638754 -0.1625852  1.0000000
lav_cors
           E          N          O
E  1.0000000 -0.2420697  0.6042405
N -0.2420697  1.0000000 -0.1749015
O  0.6042405 -0.1749015  1.0000000