#first make psych and psych tools active
library(psych)
library(psychTools)
Some functions are derived from basic principles. Other functions are merely modifications (improvements - extensions) of pre existing functions.
setCor.diagram
.The lmCor
function mimics lm
for doing
regressions. lmCor
works from correlation matrices as well.
lmCor
also draws a figure (using
setCor.diagram') showing the path models.
lmdoes not do this. Can we modify
setCor.diagram’
to do this?
lm
and
lmCor
.We use the attitude data set built into R.
From the help page:
From a survey of the clerical employees of a large financial organization, the data are aggregated from the questionnaires of the approximately 35 employees for each of 30 (randomly selected) departments. The numbers give the percent proportion of favourable responses to seven questions in each department.
describe(attitude)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## rating 1 30 64.63 12.17 65.5 65.21 10.38 40 85 45 -0.36 -0.77 2.22
## complaints 2 30 66.60 13.31 65.0 67.08 14.83 37 90 53 -0.22 -0.68 2.43
## privileges 3 30 53.13 12.24 51.5 52.75 10.38 30 83 53 0.38 -0.41 2.23
## learning 4 30 56.37 11.74 56.5 56.58 14.83 34 75 41 -0.05 -1.22 2.14
## raises 5 30 64.63 10.40 63.5 64.50 11.12 43 88 45 0.20 -0.60 1.90
## critical 6 30 74.77 9.89 77.5 75.83 7.41 49 92 43 -0.87 0.17 1.81
## advance 7 30 42.93 10.29 41.0 41.83 8.90 25 72 47 0.85 0.47 1.88
mod.lm <- lm(rating ~ complaints + privileges + raises, data=attitude)
summary(mod.lm)
##
## Call:
## lm(formula = rating ~ complaints + privileges + raises, data = attitude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.3676 -6.0893 -0.0922 6.2167 9.9845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.82308 8.76216 1.463 0.155
## complaints 0.73790 0.14685 5.025 3.15e-05 ***
## privileges -0.05805 0.13264 -0.438 0.665
## raises 0.08898 0.17427 0.511 0.614
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.201 on 26 degrees of freedom
## Multiple R-squared: 0.6862, Adjusted R-squared: 0.65
## F-statistic: 18.95 on 3 and 26 DF, p-value: 1.007e-06
Then do the same using lmCor
. Although written
originally to just work from correlation matrices, lmCor
was extended to work from the raw data.
mod.1 <- lmCor(rating ~ complaints + privileges + raises, data=attitude)
mod.1
## Call: lmCor(y = rating ~ complaints + privileges + raises, data = attitude)
##
## Multiple Regression from raw data
##
## DV = rating
## slope se t p lower.ci upper.ci VIF Vy.x
## (Intercept) 0.00 0.11 0.00 1.0e+00 -0.23 0.23 1.00 0.00
## complaints 0.81 0.16 5.02 3.1e-05 0.48 1.14 2.14 0.67
## privileges -0.06 0.13 -0.44 6.7e-01 -0.33 0.22 1.47 -0.02
## raises 0.08 0.15 0.51 6.1e-01 -0.23 0.38 1.84 0.04
##
## Residual Standard Error = 0.59 with 26 degrees of freedom
##
## Multiple Regression
## R R2 Ruw R2uw Shrunken R2 SE of R2 overall F df1 df2 p
## rating 0.83 0.69 0.73 0.53 0.65 0.08 18.95 3 26 1.01e-06
Why are these different? lmCor
by default standardizes
the variables before doing the analysis. We can turn this off by saying
std=FALSE.
mod.2 <- lmCor(rating ~ complaints + privileges + raises, data=attitude,std=FALSE)
mod.2
## Call: lmCor(y = rating ~ complaints + privileges + raises, data = attitude,
## std = FALSE)
##
## Multiple Regression from raw data
##
## DV = rating
## slope se t p lower.ci upper.ci VIF Vy.x
## (Intercept) 12.82 8.76 1.46 1.6e-01 -5.19 30.83 1.00 0.00
## complaints 0.74 0.15 5.02 3.1e-05 0.44 1.04 2.14 0.67
## privileges -0.06 0.13 -0.44 6.7e-01 -0.33 0.21 1.47 -0.02
## raises 0.09 0.17 0.51 6.1e-01 -0.27 0.45 1.84 0.04
##
## Residual Standard Error = 7.2 with 26 degrees of freedom
##
## Multiple Regression
## R R2 Ruw R2uw Shrunken R2 SE of R2 overall F df1 df2 p
## rating 0.83 0.69 0.68 0.46 0.65 0.08 18.95 3 26 1.01e-06
class(mod.lm)
## [1] "lm"
class(mod.2)
## [1] "psych" "setCor"
names(mod.lm)
## [1] "coefficients" "residuals" "effects" "rank" "fitted.values" "assign"
## [7] "qr" "df.residual" "xlevels" "call" "terms" "model"
names(mod.2)
## [1] "coefficients" "se" "t" "Probability" "ci" "R" "R2"
## [8] "shrunkenR2" "seR2" "F" "probF" "df" "SE.resid" "Rset"
## [15] "Rset.shrunk" "Rset.F" "Rsetu" "Rsetv" "T" "cancor" "cancor2"
## [22] "Chisq" "raw" "residual" "ruw" "Ruw" "x.matrix" "y.matrix"
## [29] "VIF" "z" "data" "std" "Vaxy" "Call"
mod.lm$coefficients
## (Intercept) complaints privileges raises
## 12.82308020 0.73789721 -0.05805493 0.08897810
mod.2$coefficients
## rating
## (Intercept) 12.82308020
## complaints 0.73789721
## privileges -0.05805493
## raises 0.08897810
lmCor
calls setCor.diagramLets look at that function to see how it works and figure out how to
use it to do diagram from the lm
output.
setCor.diagram
## function (sc, main = "Regression model", digits = 2, show = FALSE,
## cex = 1, l.cex = 1, ...)
## {
## if (missing(l.cex))
## l.cex <- cex
## beta <- round(sc$coefficients, digits)
## if (rownames(beta)[1] %in% c("(Intercept)", "intercept*",
## "(Intercept)*")) {
## intercept <- TRUE
## }
## else {
## intercept <- FALSE
## }
## x.matrix <- round(sc$x.matrix, digits)
## y.matrix <- round(sc$y.matrix, digits)
## y.resid <- round(sc$resid, digits)
## x.names <- rownames(beta)
## if (intercept) {
## x.matrix <- x.matrix[-intercept, -intercept]
## x.names <- x.names[-intercept]
## beta <- beta[-intercept, , drop = FALSE]
## }
## y.names <- colnames(beta)
## nx <- length(x.names)
## ny <- length(y.names)
## top <- max(nx, ny)
## xlim = c(-nx/3, 10)
## ylim = c(0, top)
## top <- max(nx, ny)
## x <- list()
## y <- list()
## var.list <- arrow.list <- curve.list <- self.list <- list()
## x.scale <- top/(nx + 1)
## y.scale <- top/(ny + 1)
## plot(NA, xlim = xlim, ylim = ylim, main = main, axes = FALSE,
## xlab = "", ylab = "")
## for (i in 1:nx) {
## x[[i]] <- dia.rect(3, top - i * x.scale, x.names[i],
## cex = cex, draw = TRUE, ...)
## var.list <- c(var.list, x.names[i], x[[i]])
## }
## for (j in 1:ny) {
## y[[j]] <- dia.rect(7, top - j * y.scale, y.names[j],
## cex = cex, draw = TRUE, ...)
## var.list <- c(var.list, y.names[j], y[[j]])
## }
## for (i in 1:nx) {
## for (j in 1:ny) {
## d.arrow <- dia.arrow(x[[i]]$right, y[[j]]$left, labels = beta[i,
## j], adj = 4 - j, cex = l.cex, draw = FALSE, ...)
## arrow.list <- c(arrow.list, d.arrow)
## }
## }
## if (nx > 1) {
## for (i in 2:nx) {
## for (k in 1:(i - 1)) {
## dca <- dia.curved.arrow(x[[i]]$left, x[[k]]$left,
## x.matrix[i, k], scale = -(abs(i - k)), both = TRUE,
## dir = "u", cex = l.cex, draw = FALSE, ...)
## curve.list <- c(curve.list, dca)
## }
## }
## }
## if (ny > 1) {
## for (i in 2:ny) {
## for (k in 1:(i - 1)) {
## dca <- dia.curved.arrow(y[[i]]$right, y[[k]]$right,
## y.resid[i, k], scale = (abs(i - k)), dir = "u",
## cex = l.cex, draw = FALSE, ...)
## curve.list <- c(curve.list, dca)
## }
## }
## }
## for (i in 1:ny) {
## d.self <- dia.self(y[[i]], side = 3, scale = 0.2, draw = FALSE,
## ...)
## self.list <- c(self.list, d.self)
## }
## if (show) {
## text((10 - nx/3)/2, 0, paste("Unweighted matrix correlation = ",
## round(sc$Ruw, digits)))
## }
## multi.rect(var.list, ...)
## multi.arrow(arrow.list, ...)
## multi.curved.arrow(curve.list, ...)
## multi.self(self.list, ...)
## }
## <bytecode: 0x11e1685c0>
## <environment: namespace:psych>
Lets examine some of the parameters being passed to the function
mod.2$x.matrix
## (Intercept) complaints privileges raises
## (Intercept) 1 0.00000 0.00000 0.00000
## complaints 0 177.28276 90.95172 92.64138
## privileges 0 90.95172 149.70575 56.67126
## raises 0 92.64138 56.67126 108.10230
mod.2$y.matrix
## [1] 148.1713
mod.2$resid
## [,1]
## rating 46.49466
To make these numbers slightly more understandable, lets use the standardized regression (mod.1).
mod.1$x.matrix
## (Intercept) complaints privileges raises
## (Intercept) 1 0.0000000 0.0000000 0.0000000
## complaints 0 1.0000000 0.5582882 0.6691975
## privileges 0 0.5582882 1.0000000 0.4454779
## raises 0 0.6691975 0.4454779 1.0000000
mod.1$y.matrix
## [1] 1
mod.1$resid
## [,1]
## rating 0.31379
We have to choose whether we are doing lmCor
or
lm
analyses.
Then, if lm
we need to find the covariance matrix from
the data, rather than have it passed to us.
lmDiagram <- function(sc,main="Regression model",digits=2,show=FALSE,cex=1,l.cex=1,...) {
if(missing(l.cex)) l.cex <- cex
beta <- round(sc$coefficients,digits)
#two cases
#the normal case, the model comes from lmCor
#the 2nd case, the model comes from lm
if(length(class(sc)) ==1) {lm<- TRUE} else { lm <- FALSE}
if(!lm) {#case 1
if(rownames(beta)[1] %in% c("(Intercept)", "intercept*")) {intercept <- TRUE} else {intercept <- FALSE}
x.matrix <- round(sc$x.matrix,digits)
y.matrix <- round(sc$y.matrix,digits)
y.resid <- round(sc$resid,digits)
x.names <- rownames(beta)
if(intercept){ x.matrix <- x.matrix[-intercept,-intercept]
x.names <- x.names[-intercept]
}
beta <- beta[-intercept,,drop=FALSE]
y.names <- colnames(beta)
} else { x.matrix <- round(cov(sc$model[-1]), digits=digits) #case 2 from lm
x.names <- colnames(sc$model)[-1]
beta <- as.matrix(beta[-1]) #make the vector into a matrix, drop the intercept
y.names <- colnames(sc$model)[1]
}
nx <- length(x.names)
ny <- length(y.names)
top <- max(nx,ny)
xlim=c(-nx/3,10)
ylim=c(0,top)
top <- max(nx,ny)
x <- list()
y <- list()
x.scale <- top/(nx+1)
y.scale <- top/(ny+1)
plot(NA,xlim=xlim,ylim=ylim,main=main,axes=FALSE,xlab="",ylab="")
for(i in 1:nx) {x[[i]] <- dia.rect(3,top-i*x.scale,x.names[i],cex=cex,...) }
for (j in 1:ny) {y[[j]] <- dia.rect(7,top-j*y.scale,y.names[j],cex=cex,...) }
for(i in 1:nx) {
for (j in 1:ny) {
dia.arrow(x[[i]]$right,y[[j]]$left,labels = beta[i,j],adj=4-j,cex=l.cex,...)
}
}
if(nx >1) {
for (i in 2:nx) {
for (k in 1:(i-1)) {dia.curved.arrow(x[[i]]$left,x[[k]]$left,x.matrix[i,k],scale=-(abs(i-k)),both=TRUE,dir="u",cex = l.cex,...)}
#dia.curve(x[[i]]$left,x[[k]]$left,x.matrix[i,k],scale=-(abs(i-k))) }
} }
if(ny>1) {for (i in 2:ny) {
for (k in 1:(i-1)) {dia.curved.arrow(y[[i]]$right,y[[k]]$right,y.resid[i,k],scale=(abs(i-k)),dir="u",cex=l.cex, ...)}
}}
for(i in 1:ny) {dia.self(y[[i]],side=3,scale=.2,... )}
if(show) {text((10-nx/3)/2,0,paste("Unweighted matrix correlation = ",round(sc$Ruw,digits)))}
}
Now try it on mod.2 (the traditional lmCor
output) and
then on mod.lm (the lm
output)
lmDiagram(mod.2, main="Regression from lmCor")
lmDiagram(mod.lm, main="Regression from lm")
The original setCor.diagram
function had a few tricks to
make the graphics faster. Rather than draw each shape and line
separately we can not show show them until the last.
We pass parameters to the various diagram functions saying not to draw, but rather collect the graphics to the end. Although not a problem for small problems, it gets tedious for larger problems.
lmDiagram <- setCor.diagram <- function(sc,main="Regression model",digits=2,show=FALSE,cex=1,l.cex=1,...) {
if(missing(l.cex)) l.cex <- cex
beta <- round(sc$coefficients,digits)
#two cases
#the normal case, the model comes from lmCor
#the 2nd case, the model comes from lm
if(length(class(sc)) ==1) {lm<- TRUE} else { lm <- FALSE}
if(!lm) {#case 1
if(rownames(beta)[1] %in% c("(Intercept)", "intercept*")) {intercept <- TRUE} else {intercept <- FALSE}
x.matrix <- round(sc$x.matrix,digits)
y.matrix <- round(sc$y.matrix,digits)
y.resid <- round(sc$resid,digits)
x.names <- rownames(beta)
if(intercept){ x.matrix <- x.matrix[-intercept,-intercept]
x.names <- x.names[-intercept]
}
beta <- beta[-intercept,,drop=FALSE]
y.names <- colnames(beta)
} else { x.matrix <- round(cov(sc$model[-1]), digits=digits) #case 2 from lmkl
x.names <- colnames(sc$model)[-1]
beta <- as.matrix(beta[-1]) #make the vector into a matrix
y.names <- colnames(sc$model)[1]
}
nx <- length(x.names)
ny <- length(y.names)
top <- max(nx,ny)
xlim=c(-nx/3,10)
ylim=c(0,top)
top <- max(nx,ny)
x <- list()
y <- list()
var.list <- arrow.list <- curve.list <- self.list<- list()
x.scale <- top/(nx+1)
y.scale <- top/(ny+1)
plot(NA,xlim=xlim,ylim=ylim,main=main,axes=FALSE,xlab="",ylab="")
for(i in 1:nx) {x[[i]] <- dia.rect(3,top-i*x.scale,x.names[i],cex=cex,draw=TRUE,...)
var.list <- c(var.list,x.names[i],x[[i]])}
for (j in 1:ny) {y[[j]] <- dia.rect(7,top-j*y.scale,y.names[j],cex=cex,draw=TRUE,...)
var.list <- c(var.list,y.names[j],y[[j]]) }
for(i in 1:nx) {
for (j in 1:ny) {
d.arrow <- dia.arrow(x[[i]]$right,y[[j]]$left,labels = beta[i,j],adj=4-j,cex=l.cex,draw=FALSE,...)
arrow.list <- c(arrow.list,d.arrow)
}
}
if(nx >1) {
for (i in 2:nx) {
for (k in 1:(i-1)) {dca <- dia.curved.arrow(x[[i]]$left,x[[k]]$left,x.matrix[i,k],scale=-(abs(i-k)),both=TRUE,dir="u",cex = l.cex,draw=FALSE,...)
curve.list <- c(curve.list,dca)} #dia.curve(x[[i]]$left,x[[k]]$left,x.matrix[i,k],scale=-(abs(i-k))) }
} }
if(ny >1) {for (i in 2:ny) {
for (k in 1:(i-1)) {dca <- dia.curved.arrow(y[[i]]$right,y[[k]]$right,y.resid[i,k],scale=(abs(i-k)),dir="u",cex=l.cex,draw=FALSE, ...)
curve.list <- c(curve.list,dca)}
}}
for(i in 1:ny) {d.self <- dia.self(y[[i]],side=3,scale=.2,draw=FALSE,... )
self.list <- c(self.list,d.self)}
if(show) {text((10-nx/3)/2,0,paste("Unweighted matrix correlation = ",round(sc$Ruw,digits)))}
#now draw all the arrows at once
multi.rect(var.list,...)
multi.arrow(arrow.list,...)
multi.curved.arrow(curve.list,...)
multi.self(self.list,...)
}
lmDiagram(mod.2, main="Regression from lmCor")
lmDiagram(mod.lm, main="Regression from lm")
diagram(mod.1) #the call to the diagram function knows where to go
This concept is used in the diagram
function which
depending upon the object passed, will call one of some completely
different functions.
"diagram" <- function(fit,...) {
# figure out which psych function was called and then call the appropriate diagram
value <- NULL
omega <- bassAck <- extend <- extension <- lavaan <- fa.reg <- NULL #weird requirement to avoid being global
#if(length(class(fit)) == 1) {if (inherits(fit,"lavaan")) fn <- "lavaan" }
#if(length(class(fit)) > 1) {
names <- cs(lavaan,fa,principal,iclust,omega,lavaan,bassAck, extension, extend, fa.reg, esem, setCor)
value <- inherits(fit,names,which=TRUE) # value <- class(x)[2]
if(any(value > 1) ) { value <- names[which(value > 0)]} else {value <- "other"}
# } else {value <- "other"}
switch(value, #replaced a series of if statements to switch 12/23/18
fa = {fa.diagram(fit,...) },
principal = {fa.diagram(fit,...) },
iclust = {iclust.diagram(fit,...)},
omega = {omega.diagram(fit,...)},
lavaan = {lavaan.diagram(fit,...)},
bassAck = {bassAckward.diagram(fit,...)} ,
extend = {extension.diagram(fit ,...)},
extension ={extension.diagram(fit,...)},
fa.reg ={extension.diagram(fit,...)},
esem = {esem.diagram(fit,...)},
setCor = {setCor.diagram(fit,...)}
)
}
Lets try a few:
f5 <- fa(bfi[1:25],5)
## Loading required namespace: GPArotation
ic <- iclust(bfi[1:25])
om <- omega(ability,4, plot=FALSE)
#now draw these 3 output
diagram(f5)
diagram(ic)
diagram(om)