Bayesian CFA
Data Setup
For an empirical data set we can use the big five data from the psych package. For simplicity, I will only examine three of the five factors, and only the first 3 items of each. I have a version that has already reverse-scored the items that need be, but this is not necessary. We we will restrict ourselves to complete data and only a sample of 280 observations (~10%).
library(tidyverse)
data('big_five', package = 'noiris')
# data('bfi', package = 'psych')
= big_five %>%
big_five_no_miss select(matches('(^E|^O|^N)[1-3]')) %>%
drop_na() %>%
slice_sample(n = 280)
Model Code
The model code is quite verbose, and definitely not efficient, but hopefully clarifies this ‘latent linear model’ underlying the observations.
data {
int<lower = 1> N; // sample size
int<lower = 1> P; // number of variables
int<lower = 1> K; // number of factors
matrix[N,P] X; // data matrix of order [N,P]
}
transformed data {
int<lower = 1> L;
L = P - K; // Number of free loadings
}
parameters {
vector[P] b; // intercepts
vector[L] lambda01; // initial factor loadings
matrix[N, K] FS; // factor scores, matrix of order [N,K]
corr_matrix[K] phi; // factor correlations
vector<lower = 0, upper = 2>[K] sd_lv; // std dev of the latent factors
vector<lower = 0, upper = 2>[P] sd_x; // std dev of the disturbances
vector<lower = 0, upper = 5>[L] sd_lambda; // hyper parameter for loading std dev
}
transformed parameters {
vector[L] lambda; // factor loadings
lambda = lambda01 .* sd_lambda; // lambda as normal(0, sd_lambda)
}
model {
matrix[N,P] mu;
matrix[K,K] Ld;
vector[K] muFactors;
muFactors = rep_vector(0, K); // Factor means, set to zero
Ld = diag_matrix(sd_lv) * cholesky_decompose(phi);
for(n in 1:N) {
mu[n,1] = b[1] + FS[n,1]; // Extraversion
mu[n,2] = b[2] + FS[n,1]*lambda[1];
mu[n,3] = b[3] + FS[n,1]*lambda[2];
mu[n,4] = b[4] + FS[n,2]; // Neuroticism
mu[n,5] = b[5] + FS[n,2]*lambda[3];
mu[n,6] = b[6] + FS[n,2]*lambda[4];
mu[n,7] = b[7] + FS[n,3]; // Openness
mu[n,8] = b[8] + FS[n,3]*lambda[5];
mu[n,9] = b[9] + FS[n,3]*lambda[6];
}
// priors
phi ~ lkj_corr(2.0);
sd_x ~ cauchy(0, 2.5);
sd_lambda ~ cauchy(0, 2.5);
sd_lv ~ cauchy(0, 2.5);
b ~ normal(0, 10);
lambda01 ~ normal(0, 1);
// likelihood
for(i in 1:N){
FS[i] ~ multi_normal_cholesky(muFactors, Ld);
X[i] ~ normal(mu[i], sd_x);
}
}
Estimation
Note that this will likely take a while with the full data set, but you can bump the iterations down or decrease the sample size and get roughly the same estimates. With these defaults you might get some divergent warnings or other issues to possibly deal with.
=
stan_data list(
N = nrow(big_five_no_miss),
P = ncol(big_five_no_miss),
K = 3,
X = big_five_no_miss
)
library(rstan)
= sampling(
fit_cfa
bayes_cfa,data = stan_data,
thin = 4,
cores = 4,
control = list(adapt_delta = .95, max_treedepth = 15)
)
Comparison
Here are the raw factor loadings and factor correlations.
print(
fit_cfa,digits = 3,
par = c('lambda', 'phi'),
probs = c(.025, .5, 0.975)
)
Inference for Stan model: 6522f4cb208793aee047a3160d67d5b3.
4 chains, each with iter=2000; warmup=1000; thin=4;
post-warmup draws per chain=250, total post-warmup draws=1000.
mean se_mean sd 2.5% 50% 97.5% n_eff Rhat
lambda[1] 1.452 0.022 0.270 1.020 1.422 2.088 147 1.044
lambda[2] 1.055 0.019 0.236 0.666 1.027 1.607 162 1.008
lambda[3] 0.930 0.005 0.074 0.798 0.928 1.093 240 1.000
lambda[4] 0.696 0.003 0.063 0.580 0.695 0.822 500 0.998
lambda[5] 0.783 0.011 0.195 0.425 0.774 1.207 340 1.004
lambda[6] 1.288 0.028 0.302 0.811 1.251 2.015 114 1.045
phi[1,1] 1.000 NaN 0.000 1.000 1.000 1.000 NaN NaN
phi[1,2] -0.238 0.004 0.082 -0.382 -0.241 -0.068 502 1.007
phi[1,3] 0.564 0.007 0.101 0.355 0.568 0.745 236 1.005
phi[2,1] -0.238 0.004 0.082 -0.382 -0.241 -0.068 502 1.007
phi[2,2] 1.000 0.000 0.000 1.000 1.000 1.000 835 0.996
phi[2,3] -0.163 0.003 0.085 -0.327 -0.163 0.003 696 0.999
phi[3,1] 0.564 0.007 0.101 0.355 0.568 0.745 236 1.005
phi[3,2] -0.163 0.003 0.085 -0.327 -0.163 0.003 696 0.999
phi[3,3] 1.000 0.000 0.000 1.000 1.000 1.000 937 0.996
Samples were drawn using NUTS(diag_e) at Wed Nov 25 17:59:17 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
We can compare our results with those of the lavaan package, which uses standard maximum likelihood via the cfa function default settings.
library(lavaan)
= "
mod E =~ E1 + E2 + E3
N =~ N1 + N2 + N3
O =~ O1 + O2 + O3
"
= cfa(mod, data = big_five_no_miss)
fit_lav # summary(fit_lav)
The following shows how to extract the parameter estimates and convert them to standardized form, followed by how to get the parameter estimates from the lavaan output.
# loadings
= get_posterior_mean(fit_cfa, par = 'lambda')[,'mean-all chains']
lambda = c(1, lambda[1:2], 1, lambda[3:4], 1, lambda[5:6])
lambda
# standard deviations of factors and observed
= rep(get_posterior_mean(fit_cfa, par = 'sd_lv')[,'mean-all chains'], e = 3)
sd_F = apply(stan_data$X, 2, sd)
x_sd
# standardize
= sd_F*lambda
lambda_std_F = sd_F/x_sd*lambda
lambda_std_all
# get factor correlations
= matrix(get_posterior_mean(fit_cfa, par = 'phi')[, 5], 3, 3)
fit_cors
= parameterEstimates(fit_lav, standardized = TRUE) lav_par
First we compare the loadings, both raw and standardized (either standardize the latent variable only or the latent and observed variables).
lhs | op | rhs | est | std.lv | std.all | std.nox | lambda_est | lambda_std_lv | lambda_std_all |
---|---|---|---|---|---|---|---|---|---|
E | =~ | E1 | 1.000 | 0.822 | 0.505 | 0.505 | 1.000 | 0.808 | 0.495 |
E | =~ | E2 | 1.375 | 1.130 | 0.706 | 0.706 | 1.452 | 1.172 | 0.732 |
E | =~ | E3 | 1.042 | 0.857 | 0.599 | 0.599 | 1.055 | 0.852 | 0.595 |
N | =~ | N1 | 1.000 | 1.441 | 0.897 | 0.897 | 1.000 | 1.450 | 0.900 |
N | =~ | N2 | 0.932 | 1.342 | 0.852 | 0.852 | 0.930 | 1.348 | 0.854 |
N | =~ | N3 | 0.705 | 1.015 | 0.642 | 0.642 | 0.696 | 1.009 | 0.637 |
O | =~ | O1 | 1.000 | 0.728 | 0.617 | 0.617 | 1.000 | 0.714 | 0.605 |
O | =~ | O2 | 0.775 | 0.564 | 0.362 | 0.362 | 0.783 | 0.559 | 0.358 |
O | =~ | O3 | 1.228 | 0.893 | 0.675 | 0.675 | 1.288 | 0.920 | 0.694 |
fit_cors
E N O
E 1.0000000 -0.2377586 0.5638754
N -0.2377586 1.0000000 -0.1625852
O 0.5638754 -0.1625852 1.0000000
lav_cors
E N O
E 1.0000000 -0.2420697 0.6042405
N -0.2420697 1.0000000 -0.1749015
O 0.6042405 -0.1749015 1.0000000
Source
Original code available at: https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/Bayesian/StanBugsJags/cfa