#Developed 12/1/2025 - 12/21/2025 #still have problems in estimating factor indeterminancy #code taken from equations for Spearman method by #CFA from Dhaene and Rosseel (SEM, 2024 (31) 1-13) # Closed form estimation of Confirmatory factoring # D and R discuss multiple approaches but recommend the Spearman algorithm as best #follows a paper by Guttman CFA <- function(r,keys=NULL,cor="cor", use="pairwise", n.obs=NA, orthog=FALSE, weight=NULL, correct=0, method="regression", missing=FALSE,impute="none" ) { cl <- match.call() if( is.list(keys)) { select <- sub("-","",unlist(keys)) # to speed up scoring one set of scales from many items r <-r[,select] } if(!isCorrelation(r)) {n.obs <- nrow(r) switch(cor, cor = {if(!is.null(weight)) {r <- cor.wt(r,w=weight)$r} else { r <- cor(r,use=use)} }, cov = {if(!is.null(weight)) {r <- cor.wt(r,w=weight,cor=FALSE)$r} else { r <- cov(r,use=use)} covar <- TRUE}, #fixed 10/30/25 wtd = { r <- cor.wt(r,w=weight)$r}, spearman = {r <- cor(r,use=use,method="spearman")}, kendall = {r <- cor(r,use=use,method="kendall")}, tet = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, poly = {r <- polychoric(r,correct=correct,weight=weight)$rho}, tetrachoric = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, polychoric = {r <- polychoric(r,correct=correct,weight=weight)$rho}, mixed = {r <- mixedCor(r,use=use,correct=correct)$rho}, Yuleb = {r <- YuleCor(r,,bonett=TRUE)$rho}, YuleQ = {r <- YuleCor(r,1)$rho}, YuleY = {r <- YuleCor(r,.5)$rho } ) R <- S <- r original.data <- r #save these for scores } else {R <- S <- r if(is.na(n.obs)) n.obs <- 100 original.data <- NULL} diagS <- diag(S) #save this for later if(is.null(keys)) {X <- matrix(rep(1,ncol(S)),ncol=1)} else { if(is.list(keys)) {X <- make.keys(S,keys)} else {if(is.matrix(keys)) {X <- keys} else {X <- lavParse(keys)} #just a matrix of 1, 0, -1 } #converts the keys variables to 1, 0 matrix (if not already in that form) } #flip correlations to go along with directions of keys flip <- X %*% rep(1,ncol(X)) #combine all keys into one to allow us to select the over all model flipd <- diag(nrow=ncol(S), ncol=ncol(S)) diag(flipd)<- flip S <- flipd %*% S %*% t(flipd) #negatively keyed variables are flipped mask <- abs(X %*% t(X)) #S <- mask * S #select a block diagonal form #need to find the communalities by each factor using the Spearman method nfact <- ncol(X) nvar <- nrow(X) dof <- nvar * (nvar-1)/2 - nvar - (!orthog) * nfact*(nfact-1)/2 #number of correlations - number of factor loadings estimated - correlations keys <- X X <- abs(X) H2 <- NULL for (i in 1:nfact) { #this is the brute force way of doing it, look more carefully at D and R H2 <- c(H2,Spearman.h2(S[X[,i]>0,X[,i]>0]))} #H2 is the vector of 1 factor communalities diag(S) <- H2 #put the communalities on the diagonal L0 <- t(X)%*% S #DR equation 4 P0 <- t(X) %*% S %*% X #equation 5 if(ncol(P0)>1) { D <- diag(diag(P0)) # variances L <- t(L0) %*% invMatSqrt(D) #divide by sd eq 6 factor structure P <- invMatSqrt(D) %*% P0 %*% invMatSqrt(D) #converts P0 to correlations eq 7 if(orthog) {P <- diag(1,nfact)} #Lambda <- L %*% solve(P) #not simple structure --- better to use L *X %*% P eq 8 Lambda <- (L * X) %*% P #not equation 8, but produces Pattern flipf <- matrix(flip,ncol=nfact,nrow=nrow(Lambda)) Lambda <- Lambda*flipf #get the signs right L <- L *flipf colnames(Lambda) <- colnames(X)# paste0("CF",1:nfact) rownames(Lambda) <- colnames(R) colnames(P) <- rownames(P) <- colnames(Lambda) #Ls <- Lambda * abs(keys) #forced simple structure L <- L * X #force a simple structure } else { D= diag(1) L <- Lambda <- as.matrix(sqrt(diag(S))) rownames(L) <- colnames(R) if(is.null(colnames(L))) colnames(L) <- "CF1" P <- NULL Ls <- NULL Phi <-NULL} Lambda <- as.matrix(Lambda) #scores <- factor.scores(x=original.data,f=L,Phi=P,method=method, missing=missing,impute=impute) scores<- NA stats <- fa.stats(r=R,f=L ,phi=P,n.obs=n.obs, dof=dof) result <- list(loadings=L,Phi=P,Pattern=Lambda, communalities=H2, dof = dof, stats=stats, rms = stats$rms, RMSEA =stats$RMSEA[1], chi = stats$chi, BIC = stats$BIC, STATISTIC = stats$STATISTIC, Call=cl ) result$scores <- scores class(result) <- c("psych", "CFA") return(result) } sumLower <- function(R){ return( sum(R[lower.tri(R)]))} Spearman.h2 <- function(R){ #from Harman chapter 7 diag(R) <- 0 sumr <- colSums(R) sumr2 <- colSums(R^2) h2 <- (sumr^2 - sumr2)/(2*(sumLower(R)-sumr)) return(h2)} ########## #convert lavaan cfa instructions into matrix form #12/11/25 to help CFA lavParse <- function(model) { short <- gsub(" ", "", model ) #drop all blanks #first, break by lines lines <- strsplit(short,"\n") #break into a new line for each factor fact <- strsplit(lines[[1]],"=~") # break into factor names and variables fnames <- 1:length(fact) vnames<- NULL for (i in 1:length(fact)) {fnames[i]<- fact[[i]] [1] vect <- strsplit(fact[[i]][2],"\\+") for (j in 1:length(vect[[1]])) {vnames <- c(vnames,vect[[1]][j])} } v.mat <- matrix(0,ncol=length(fact),nrow=length(vnames)) colnames(v.mat) <- fnames rownames(v.mat) <- vnames #now put the 1s in for (i in 1:length(fact)) {fnames[i]<- fact[[i]] [1] vect <- strsplit(fact[[i]][2],"\\+") for (j in 1:length(vect[[1]])) {v.mat[vect[[1]][j],i]<-1 } } return(v.mat)} ##### if(FALSE){ #test set from Harman Table 7.1 P 116 har5 <- structure(c(1, 0.485, 0.4, 0.397, 0.295, 0.485, 1, 0.397, 0.397, 0.247, 0.4, 0.397, 1, 0.335, 0.275, 0.397, 0.397, 0.335, 1, 0.195, 0.295, 0.247, 0.275, 0.195, 1), dim = c(5L, 5L), dimnames = list( c("V1", "V2", "V3", "V4", "V5"), c("V1", "V2", "V3", "V4", "V5"))) #this works for multiple factors #test 4 factor model }