Confirmatory Factor Analysis

This mostly follows Bollen (1989) for maximum likelihood estimation of a confirmatory factor analysis. In the following example we will examine a situation where there are two underlying (correlated) latent variables for 8 observed responses. The code as is will only work with this toy data set. Setup uses the psych and mvtnorm packages, and results are checked against the lavaan package.

Data Setup

For the data we will simulate observed variables with specific loadings on two latent constructs (factors).

library(tidyverse)

set.seed(123)

# loading matrix
lambda = matrix(
  c(1.0, 0.5, 0.8, 0.6, 0.0, 0.0, 0.0, 0.0,
    0.0, 0.0, 0.0, 0.0, 1.0, 0.7, 0.6, 0.8), 
  nrow = 2, 
  byrow = TRUE
)

# correlation of factors
phi = matrix(c(1, .25, .25, 1), nrow = 2, byrow = TRUE)  

# factors and some noise
factors = mvtnorm::rmvnorm(1000, mean = rep(0, 2), sigma = phi, "chol")
e = mvtnorm::rmvnorm(1000, sigma = diag(8))

# observed responses
y = 0 + factors%*%lambda + e

# Examine
#dim(y)
psych::describe(y)

   vars    n  mean   sd median trimmed  mad   min  max range  skew kurtosis   se
X1    1 1000  0.05 1.44   0.05    0.05 1.42 -5.13 4.51  9.63  0.00     0.01 0.05
X2    2 1000  0.00 1.08  -0.01    0.00 1.04 -3.34 3.25  6.59  0.00    -0.06 0.03
X3    3 1000  0.01 1.27   0.05    0.01 1.17 -5.33 4.09  9.42 -0.06     0.32 0.04
X4    4 1000  0.00 1.14  -0.03   -0.01 1.13 -3.85 3.98  7.83  0.10     0.16 0.04
X5    5 1000  0.04 1.43   0.10    0.05 1.39 -4.43 5.21  9.63 -0.02     0.07 0.05
X6    6 1000 -0.02 1.22  -0.01   -0.02 1.27 -3.35 4.68  8.03  0.04    -0.10 0.04
X7    7 1000  0.02 1.14   0.02    0.01 1.10 -3.08 3.66  6.74  0.11     0.00 0.04
X8    8 1000  0.01 1.29   0.02    0.01 1.24 -3.68 4.50  8.18 -0.02     0.10 0.04

# round(cor(y), 2)

# see the factor structure
psych::cor.plot(cor(y))

# example exploratory fa
#psych::fa(y, nfactors=2, rotate="oblimin")

Functions

We will have two separate estimation functions, one for the covariance matrix, and another for the correlation matrix.

# measurement model, covariance approach

# trace function, strangely absent from base R
tr <- function(mat) {
  sum(diag(mat), na.rm = TRUE)
}

cfa_cov <- function (parms, data) {
  # Arguments- 
  # parms: initial values (named)
  # data: raw data
  
  # Extract parameters by name
  
  l1 = c(1, parms[grep('l1', names(parms))])      # loadings for factor 1
  l2 = c(1, parms[grep('l2', names(parms))])      # loadings for factor 2
  
  cov0 = parms[grep('cov', names(parms))]         # factor covariance, variances
  
  # Covariance matrix
  S = cov(data)*((nrow(data)-1)/nrow(data))       # ML covariance div by N rather than N-1, the multiplier adjusts
  
  # loading estimates
  lambda = cbind(
    c(l1, rep(0,length(l2))),
    c(rep(0,length(l1)), l2)
  )
  
  # disturbances
  dist_init = parms[grep('dist', names(parms))]    
  disturbs  = diag(dist_init)
  
  # factor correlation
  phi_init = matrix(c(cov0[1], cov0[2], cov0[2], cov0[3]), 2, 2)  #factor cov/correlation matrix
  
  # other calculations and log likelihood
  sigtheta = lambda%*%phi_init%*%t(lambda) + disturbs
  
  # in Bollen p + q (but for the purposes of this just p) = tr(data)
  pq = dim(data)[2] 
  
  # a reduced version; Bollen 1989 p.107
  # ll = -(log(det(sigtheta)) + tr(S%*%solve(sigtheta)) - log(det(S)) - pq) 
  
  # this should be the same as Mplus H0 log likelihood
  ll = ( (-nrow(data)*pq/2) * log(2*pi) ) - 
    (nrow(data)/2) * ( log(det(sigtheta)) + tr(S%*%solve(sigtheta)) )
  
  -ll
}

We can use the correlation matrix for standardized results. Lines correspond to those in cfa_cov.

cfa_cor <- function (parms, data) {
  
  l1 = parms[grep('l1', names(parms))]      # loadings for factor 1
  l2 = parms[grep('l2', names(parms))]      # loadings for factor 2
  cor0 = parms[grep('cor', names(parms))]   # factor correlation
  
  S = cor(data)
  
  lambda = cbind(
    c(l1, rep(0,length(l2))),
    c(rep(0,length(l1)), l2)
  )
  
  dist_init = parms[grep('dist', names(parms))]
  disturbs  = diag(dist_init)
  
  phi_init = matrix(c(1, cor0, cor0, 1), ncol=2)
  
  sigtheta = lambda%*%phi_init%*%t(lambda) + disturbs
  pq = dim(data)[2]
  
  #ll = ( log(det(sigtheta)) + tr(S%*%solve(sigtheta)) - log(det(S)) - pq )
  
  ll = ( (-nrow(data)*pq/2) * log(2*pi) ) - 
    (nrow(data)/2) * ( log(det(sigtheta)) + tr(S%*%solve(sigtheta)) )
  
  -ll
}

Estimation

Corresponding to the functions, we will get results based on the covariance and correlation matrix respectively.

Raw

Set initial values.

par_init_cov = c(rep(1, 6), rep(.05, 8), rep(.5, 3)) 
names(par_init_cov) = rep(c('l1','l2', 'dist', 'cov'), c(3, 3, 8, 3))

Estimate and extract.

fit_cov = optim(
  par  = par_init_cov,
  fn   = cfa_cov,
  data = y,
  method  = "L-BFGS-B",
  lower   = 0
) 

loadings_cov = data.frame(
  f1 = c(1, fit_cov$par[1:3], rep(0, 4)),
  f2 = c(rep(0, 4), 1, fit_cov$par[4:6])
)

disturbances_cov = fit_cov$par[7:14]

Standardized

par_init_cor = c(rep(1, 8), rep(.05, 8), 0) #for cor
names(par_init_cor) = rep(c('l1', 'l2', 'dist', 'cor'), c(4, 4, 8, 1))

fit_cor = optim(
  par  = par_init_cor,
  fn   = cfa_cor,
  data = y,
  method = "L-BFGS-B",
  lower  = 0,
  upper  = 1
)

loadings_cor = matrix(
  c(fit_cor$par[1:4], rep(0, 4), rep(0, 4), fit_cor$par[5:8]), 
  ncol = 2
)

disturbances_cor = fit_cor$par[9:16]

Comparison

Gather results for summarizing.

results = list(
  raw = list(
    loadings = round(data.frame(loadings_cov, Variances = disturbances_cov), 3),
    cov.fact = round(matrix(c(fit_cov$par[c(15, 16, 16, 17)]), ncol = 2) , 3)
  ),
  
  standardized = list(
    loadings = round(
      data.frame(
        loadings_cor,
        Variances = disturbances_cor,
        Rsq = (1 - disturbances_cor)
      ), 3),
    cor.fact = round(matrix(c(1, fit_cor$par[c(17, 17)], 1), ncol = 2), 3)
  ),
  
  # note inclusion of intercepts for total number of par
  fit_lav = data.frame(
    ll  = fit_cov$value,
    AIC = 2*fit_cov$value + 2 * (length(par_init_cov) + ncol(y)),
    BIC = 2*fit_cov$value + log(nrow(y)) * (length(par_init_cov) + ncol(y))
  )  
)

results

$raw
$raw$loadings
     f1    f2 Variances
1 1.000 0.000     1.073
2 0.459 0.000     0.955
3 0.836 0.000     0.908
4 0.570 0.000     0.961
5 0.000 1.000     1.047
6 0.000 0.739     0.941
7 0.000 0.575     0.972
8 0.000 0.803     1.034

$raw$cov.fact
      [,1]  [,2]
[1,] 1.006 0.185
[2,] 0.185 0.989


$standardized
$standardized$loadings
     X1    X2 Variances   Rsq
1 0.696 0.000     0.516 0.484
2 0.426 0.000     0.819 0.181
3 0.661 0.000     0.563 0.437
4 0.504 0.000     0.746 0.254
5 0.000 0.697     0.514 0.486
6 0.000 0.604     0.636 0.364
7 0.000 0.502     0.748 0.252
8 0.000 0.618     0.618 0.382

$standardized$cor.fact
      [,1]  [,2]
[1,] 1.000 0.186
[2,] 0.186 1.000


$fit_lav
        ll      AIC      BIC
1 12497.68 25045.37 25168.06

Compare with lavaan.

library(lavaan)

y = data.frame(y)

model = '
  F1  =~ X1 + X2 + X3 + X4
  F2  =~ X5 + X6 + X7 + X8
'

fit_lav = cfa(
  model,
  data  = y,
  mimic = 'Mplus',
  estimator = 'ML'
)

fit_lav_std = cfa(
  model,
  data  = y,
  mimic = 'Mplus',
  estimator = 'ML',
  std.lv = TRUE,
  std.ov = TRUE
) 

# note that lavaan does not count the intercepts among the free params for
# AIC/BIC by default, (can get its result via -2 * as.numeric(lls) + k *
# attr(lls, "df")), but the mimic='Mplus' should have them correspond to optim's
# results

summary(fit_lav,
        fit.measures = TRUE,
        standardized = TRUE)

lavaan 0.6-7 ended normally after 30 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of free parameters                         25
                                                      
  Number of observations                          1000
  Number of missing patterns                         1
                                                      
Model Test User Model:
                                                      
  Test statistic                                25.586
  Degrees of freedom                                19
  P-value (Chi-square)                           0.142

Model Test Baseline Model:

  Test statistic                              1229.322
  Degrees of freedom                                28
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.995
  Tucker-Lewis Index (TLI)                       0.992

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)             -12497.683
  Loglikelihood unrestricted model (H1)     -12484.890
                                                      
  Akaike (AIC)                               25045.366
  Bayesian (BIC)                             25168.060
  Sample-size adjusted Bayesian (BIC)        25088.658

Root Mean Square Error of Approximation:

  RMSEA                                          0.019
  90 Percent confidence interval - lower         0.000
  90 Percent confidence interval - upper         0.035
  P-value RMSEA <= 0.05                          1.000

Standardized Root Mean Square Residual:

  SRMR                                           0.018

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  F1 =~                                                                 
    X1                1.000                               1.003    0.696
    X2                0.459    0.046   10.043    0.000    0.460    0.426
    X3                0.836    0.066   12.590    0.000    0.839    0.661
    X4                0.570    0.050   11.450    0.000    0.572    0.504
  F2 =~                                                                 
    X5                1.000                               0.994    0.697
    X6                0.739    0.056   13.239    0.000    0.735    0.604
    X7                0.575    0.048   12.071    0.000    0.572    0.502
    X8                0.803    0.060   13.386    0.000    0.799    0.618

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  F1 ~~                                                                 
    F2                0.185    0.046    4.014    0.000    0.186    0.186

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .X1                0.054    0.046    1.173    0.241    0.054    0.037
   .X2               -0.004    0.034   -0.104    0.917   -0.004   -0.003
   .X3                0.007    0.040    0.182    0.855    0.007    0.006
   .X4                0.004    0.036    0.113    0.910    0.004    0.004
   .X5                0.044    0.045    0.964    0.335    0.044    0.031
   .X6               -0.019    0.038   -0.504    0.614   -0.019   -0.016
   .X7                0.024    0.036    0.674    0.500    0.024    0.021
   .X8                0.010    0.041    0.247    0.805    0.010    0.008
    F1                0.000                               0.000    0.000
    F2                0.000                               0.000    0.000

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .X1                1.073    0.087   12.356    0.000    1.073    0.516
   .X2                0.955    0.047   20.128    0.000    0.955    0.819
   .X3                0.908    0.065   13.887    0.000    0.908    0.563
   .X4                0.961    0.051   18.885    0.000    0.961    0.746
   .X5                1.048    0.079   13.336    0.000    1.048    0.514
   .X6                0.941    0.056   16.867    0.000    0.941    0.636
   .X7                0.972    0.050   19.255    0.000    0.972    0.748
   .X8                1.034    0.063   16.411    0.000    1.034    0.618
    F1                1.006    0.108    9.351    0.000    1.000    1.000
    F2                0.989    0.100    9.853    0.000    1.000    1.000

Mplus

If you have access to Mplus you can use Mplus Automation to prepare the data. The following code is in Mplus syntax and will produce the above model.

library(MplusAutomation)

prepareMplusData(data.frame(y), "factsim.dat")

MODEL:
 F1 BY X1-X4;
 F2 BY X5-X8;

results:
 STDYX;

Source

Original code available at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/cfa.R