#first make psych and psych tools active
library(psych)
library(psychTools)

Modifying (Improving) a function

Some functions are derived from basic principles. Other functions are merely modifications (improvements - extensions) of pre existing functions.

Consider setCor.diagram.

The lmCor function mimics lm for doing regressions. lmCor works from correlation matrices as well. lmCor also draws a figure (using setCor.diagram') showing the path models.lmdoes not do this. Can we modifysetCor.diagram’ to do this?

First compare the output of lm and lmCor.

We use the attitude data set built into R.

From the help page:

From a survey of the clerical employees of a large financial organization, the data are aggregated from the questionnaires of the approximately 35 employees for each of 30 (randomly selected) departments. The numbers give the percent proportion of favourable responses to seven questions in each department.

describe(attitude)
##            vars  n  mean    sd median trimmed   mad min max range  skew kurtosis   se
## rating        1 30 64.63 12.17   65.5   65.21 10.38  40  85    45 -0.36    -0.77 2.22
## complaints    2 30 66.60 13.31   65.0   67.08 14.83  37  90    53 -0.22    -0.68 2.43
## privileges    3 30 53.13 12.24   51.5   52.75 10.38  30  83    53  0.38    -0.41 2.23
## learning      4 30 56.37 11.74   56.5   56.58 14.83  34  75    41 -0.05    -1.22 2.14
## raises        5 30 64.63 10.40   63.5   64.50 11.12  43  88    45  0.20    -0.60 1.90
## critical      6 30 74.77  9.89   77.5   75.83  7.41  49  92    43 -0.87     0.17 1.81
## advance       7 30 42.93 10.29   41.0   41.83  8.90  25  72    47  0.85     0.47 1.88
mod.lm <- lm(rating ~ complaints + privileges + raises, data=attitude)
summary(mod.lm)
## 
## Call:
## lm(formula = rating ~ complaints + privileges + raises, data = attitude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.3676  -6.0893  -0.0922   6.2167   9.9845 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 12.82308    8.76216   1.463    0.155    
## complaints   0.73790    0.14685   5.025 3.15e-05 ***
## privileges  -0.05805    0.13264  -0.438    0.665    
## raises       0.08898    0.17427   0.511    0.614    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.201 on 26 degrees of freedom
## Multiple R-squared:  0.6862, Adjusted R-squared:   0.65 
## F-statistic: 18.95 on 3 and 26 DF,  p-value: 1.007e-06

Then do the same using lmCor. Although written originally to just work from correlation matrices, lmCor was extended to work from the raw data.

mod.1 <- lmCor(rating ~ complaints + privileges + raises, data=attitude)

mod.1
## Call: lmCor(y = rating ~ complaints + privileges + raises, data = attitude)
## 
## Multiple Regression from raw data 
## 
##  DV =  rating 
##             slope   se     t       p lower.ci upper.ci  VIF  Vy.x
## (Intercept)  0.00 0.11  0.00 1.0e+00    -0.23     0.23 1.00  0.00
## complaints   0.81 0.16  5.02 3.1e-05     0.48     1.14 2.14  0.67
## privileges  -0.06 0.13 -0.44 6.7e-01    -0.33     0.22 1.47 -0.02
## raises       0.08 0.15  0.51 6.1e-01    -0.23     0.38 1.84  0.04
## 
## Residual Standard Error =  0.59  with  26  degrees of freedom
## 
##  Multiple Regression
##           R   R2  Ruw R2uw Shrunken R2 SE of R2 overall F df1 df2        p
## rating 0.83 0.69 0.73 0.53        0.65     0.08     18.95   3  26 1.01e-06

Why are these different? lmCor by default standardizes the variables before doing the analysis. We can turn this off by saying std=FALSE.

mod.2 <- lmCor(rating ~ complaints + privileges + raises, data=attitude,std=FALSE)

mod.2
## Call: lmCor(y = rating ~ complaints + privileges + raises, data = attitude, 
##     std = FALSE)
## 
## Multiple Regression from raw data 
## 
##  DV =  rating 
##             slope   se     t       p lower.ci upper.ci  VIF  Vy.x
## (Intercept) 12.82 8.76  1.46 1.6e-01    -5.19    30.83 1.00  0.00
## complaints   0.74 0.15  5.02 3.1e-05     0.44     1.04 2.14  0.67
## privileges  -0.06 0.13 -0.44 6.7e-01    -0.33     0.21 1.47 -0.02
## raises       0.09 0.17  0.51 6.1e-01    -0.27     0.45 1.84  0.04
## 
## Residual Standard Error =  7.2  with  26  degrees of freedom
## 
##  Multiple Regression
##           R   R2  Ruw R2uw Shrunken R2 SE of R2 overall F df1 df2        p
## rating 0.83 0.69 0.68 0.46        0.65     0.08     18.95   3  26 1.01e-06

examine the names and classes of mod2 and mod.lm

class(mod.lm)
## [1] "lm"
class(mod.2)
## [1] "psych"  "setCor"
names(mod.lm)
##  [1] "coefficients"  "residuals"     "effects"       "rank"          "fitted.values" "assign"       
##  [7] "qr"            "df.residual"   "xlevels"       "call"          "terms"         "model"
names(mod.2)
##  [1] "coefficients" "se"           "t"            "Probability"  "ci"           "R"            "R2"          
##  [8] "shrunkenR2"   "seR2"         "F"            "probF"        "df"           "SE.resid"     "Rset"        
## [15] "Rset.shrunk"  "Rset.F"       "Rsetu"        "Rsetv"        "T"            "cancor"       "cancor2"     
## [22] "Chisq"        "raw"          "residual"     "ruw"          "Ruw"          "x.matrix"     "y.matrix"    
## [29] "VIF"          "z"            "data"         "std"          "Vaxy"         "Call"

compare some of these elements

mod.lm$coefficients
## (Intercept)  complaints  privileges      raises 
## 12.82308020  0.73789721 -0.05805493  0.08897810
mod.2$coefficients
##                  rating
## (Intercept) 12.82308020
## complaints   0.73789721
## privileges  -0.05805493
## raises       0.08897810

lmCor calls setCor.diagram

Lets look at that function to see how it works and figure out how to use it to do diagram from the lm output.

setCor.diagram
## function (sc, main = "Regression model", digits = 2, show = FALSE, 
##     cex = 1, l.cex = 1, ...) 
## {
##     if (missing(l.cex)) 
##         l.cex <- cex
##     beta <- round(sc$coefficients, digits)
##     if (rownames(beta)[1] %in% c("(Intercept)", "intercept*", 
##         "(Intercept)*")) {
##         intercept <- TRUE
##     }
##     else {
##         intercept <- FALSE
##     }
##     x.matrix <- round(sc$x.matrix, digits)
##     y.matrix <- round(sc$y.matrix, digits)
##     y.resid <- round(sc$resid, digits)
##     x.names <- rownames(beta)
##     if (intercept) {
##         x.matrix <- x.matrix[-intercept, -intercept]
##         x.names <- x.names[-intercept]
##         beta <- beta[-intercept, , drop = FALSE]
##     }
##     y.names <- colnames(beta)
##     nx <- length(x.names)
##     ny <- length(y.names)
##     top <- max(nx, ny)
##     xlim = c(-nx/3, 10)
##     ylim = c(0, top)
##     top <- max(nx, ny)
##     x <- list()
##     y <- list()
##     var.list <- arrow.list <- curve.list <- self.list <- list()
##     x.scale <- top/(nx + 1)
##     y.scale <- top/(ny + 1)
##     plot(NA, xlim = xlim, ylim = ylim, main = main, axes = FALSE, 
##         xlab = "", ylab = "")
##     for (i in 1:nx) {
##         x[[i]] <- dia.rect(3, top - i * x.scale, x.names[i], 
##             cex = cex, draw = TRUE, ...)
##         var.list <- c(var.list, x.names[i], x[[i]])
##     }
##     for (j in 1:ny) {
##         y[[j]] <- dia.rect(7, top - j * y.scale, y.names[j], 
##             cex = cex, draw = TRUE, ...)
##         var.list <- c(var.list, y.names[j], y[[j]])
##     }
##     for (i in 1:nx) {
##         for (j in 1:ny) {
##             d.arrow <- dia.arrow(x[[i]]$right, y[[j]]$left, labels = beta[i, 
##                 j], adj = 4 - j, cex = l.cex, draw = FALSE, ...)
##             arrow.list <- c(arrow.list, d.arrow)
##         }
##     }
##     if (nx > 1) {
##         for (i in 2:nx) {
##             for (k in 1:(i - 1)) {
##                 dca <- dia.curved.arrow(x[[i]]$left, x[[k]]$left, 
##                   x.matrix[i, k], scale = -(abs(i - k)), both = TRUE, 
##                   dir = "u", cex = l.cex, draw = FALSE, ...)
##                 curve.list <- c(curve.list, dca)
##             }
##         }
##     }
##     if (ny > 1) {
##         for (i in 2:ny) {
##             for (k in 1:(i - 1)) {
##                 dca <- dia.curved.arrow(y[[i]]$right, y[[k]]$right, 
##                   y.resid[i, k], scale = (abs(i - k)), dir = "u", 
##                   cex = l.cex, draw = FALSE, ...)
##                 curve.list <- c(curve.list, dca)
##             }
##         }
##     }
##     for (i in 1:ny) {
##         d.self <- dia.self(y[[i]], side = 3, scale = 0.2, draw = FALSE, 
##             ...)
##         self.list <- c(self.list, d.self)
##     }
##     if (show) {
##         text((10 - nx/3)/2, 0, paste("Unweighted matrix correlation  = ", 
##             round(sc$Ruw, digits)))
##     }
##     multi.rect(var.list, ...)
##     multi.arrow(arrow.list, ...)
##     multi.curved.arrow(curve.list, ...)
##     multi.self(self.list, ...)
## }
## <bytecode: 0x11e1685c0>
## <environment: namespace:psych>

Lets examine some of the parameters being passed to the function

mod.2$x.matrix
##             (Intercept) complaints privileges    raises
## (Intercept)           1    0.00000    0.00000   0.00000
## complaints            0  177.28276   90.95172  92.64138
## privileges            0   90.95172  149.70575  56.67126
## raises                0   92.64138   56.67126 108.10230
mod.2$y.matrix
## [1] 148.1713
mod.2$resid
##            [,1]
## rating 46.49466

To make these numbers slightly more understandable, lets use the standardized regression (mod.1).

mod.1$x.matrix
##             (Intercept) complaints privileges    raises
## (Intercept)           1  0.0000000  0.0000000 0.0000000
## complaints            0  1.0000000  0.5582882 0.6691975
## privileges            0  0.5582882  1.0000000 0.4454779
## raises                0  0.6691975  0.4454779 1.0000000
mod.1$y.matrix
## [1] 1
mod.1$resid
##           [,1]
## rating 0.31379

A revised setCor.diagram function

We have to choose whether we are doing lmCor or lm analyses.

Then, if lm we need to find the covariance matrix from the data, rather than have it passed to us.

lmDiagram <- function(sc,main="Regression model",digits=2,show=FALSE,cex=1,l.cex=1,...) { 
if(missing(l.cex)) l.cex <- cex  

beta <- round(sc$coefficients,digits)
#two cases
#the normal case, the model comes from lmCor
#the 2nd case, the model comes from lm
if(length(class(sc)) ==1) {lm<- TRUE} else { lm <- FALSE}
if(!lm) {#case 1
if(rownames(beta)[1] %in% c("(Intercept)", "intercept*")) {intercept <- TRUE} else {intercept <- FALSE}
   
x.matrix <- round(sc$x.matrix,digits)

y.matrix <- round(sc$y.matrix,digits)
y.resid <- round(sc$resid,digits)
x.names <- rownames(beta)

if(intercept){ x.matrix <- x.matrix[-intercept,-intercept]
 x.names <- x.names[-intercept] 
 }
beta <- beta[-intercept,,drop=FALSE]
y.names <- colnames(beta)
} else { x.matrix <- round(cov(sc$model[-1]), digits=digits)  #case 2  from lm
       x.names <- colnames(sc$model)[-1]
       beta <- as.matrix(beta[-1]) #make the vector into a matrix, drop the intercept
      y.names <-  colnames(sc$model)[1]
      }
      
nx <- length(x.names)
ny <- length(y.names) 
top <- max(nx,ny)
xlim=c(-nx/3,10)
ylim=c(0,top)
top <- max(nx,ny)
x <- list()
y <- list()
x.scale <- top/(nx+1)
y.scale <- top/(ny+1)
plot(NA,xlim=xlim,ylim=ylim,main=main,axes=FALSE,xlab="",ylab="")
for(i in 1:nx) {x[[i]] <- dia.rect(3,top-i*x.scale,x.names[i],cex=cex,...) }
 
for (j in 1:ny) {y[[j]] <- dia.rect(7,top-j*y.scale,y.names[j],cex=cex,...) }
for(i in 1:nx) {
  for (j in 1:ny) {
   dia.arrow(x[[i]]$right,y[[j]]$left,labels = beta[i,j],adj=4-j,cex=l.cex,...)
   }
} 
if(nx >1) {
  for (i in 2:nx) {
  for (k in 1:(i-1)) {dia.curved.arrow(x[[i]]$left,x[[k]]$left,x.matrix[i,k],scale=-(abs(i-k)),both=TRUE,dir="u",cex = l.cex,...)}
                      #dia.curve(x[[i]]$left,x[[k]]$left,x.matrix[i,k],scale=-(abs(i-k)))  } 
  } }
  
  if(ny>1) {for (i in 2:ny) {
  for (k in 1:(i-1)) {dia.curved.arrow(y[[i]]$right,y[[k]]$right,y.resid[i,k],scale=(abs(i-k)),dir="u",cex=l.cex, ...)} 
  }}
  for(i in 1:ny) {dia.self(y[[i]],side=3,scale=.2,... )}
 if(show) {text((10-nx/3)/2,0,paste("Unweighted matrix correlation  = ",round(sc$Ruw,digits)))}
}

Now try it on mod.2 (the traditional lmCor output) and then on mod.lm (the lm output)

lmDiagram(mod.2, main="Regression from lmCor")

lmDiagram(mod.lm, main="Regression from lm")

Code to speed up the graphics

The original setCor.diagram function had a few tricks to make the graphics faster. Rather than draw each shape and line separately we can not show show them until the last.

We pass parameters to the various diagram functions saying not to draw, but rather collect the graphics to the end. Although not a problem for small problems, it gets tedious for larger problems.

lmDiagram <- setCor.diagram <- function(sc,main="Regression model",digits=2,show=FALSE,cex=1,l.cex=1,...) { 
if(missing(l.cex)) l.cex <- cex  

beta <- round(sc$coefficients,digits)
#two cases
#the normal case, the model comes from lmCor
#the 2nd case, the model comes from lm
if(length(class(sc)) ==1) {lm<- TRUE} else { lm <- FALSE}
if(!lm) {#case 1
if(rownames(beta)[1] %in% c("(Intercept)", "intercept*")) {intercept <- TRUE} else {intercept <- FALSE}
   
x.matrix <- round(sc$x.matrix,digits)

y.matrix <- round(sc$y.matrix,digits)
y.resid <- round(sc$resid,digits)
x.names <- rownames(beta)

if(intercept){ x.matrix <- x.matrix[-intercept,-intercept]
 x.names <- x.names[-intercept] 
 }
beta <- beta[-intercept,,drop=FALSE]
y.names <- colnames(beta)
} else { x.matrix <- round(cov(sc$model[-1]), digits=digits)  #case 2  from lmkl
       x.names <- colnames(sc$model)[-1]
       beta <- as.matrix(beta[-1]) #make the vector into a matrix
      y.names <-  colnames(sc$model)[1]
      }
      
nx <- length(x.names)
ny <- length(y.names) 
top <- max(nx,ny)
xlim=c(-nx/3,10)
ylim=c(0,top)
top <- max(nx,ny)
x <- list()
y <- list()
var.list <- arrow.list <-  curve.list <- self.list<- list()
x.scale <- top/(nx+1)
y.scale <- top/(ny+1)
plot(NA,xlim=xlim,ylim=ylim,main=main,axes=FALSE,xlab="",ylab="")
for(i in 1:nx) {x[[i]] <- dia.rect(3,top-i*x.scale,x.names[i],cex=cex,draw=TRUE,...) 
               var.list <- c(var.list,x.names[i],x[[i]])}
 
for (j in 1:ny) {y[[j]] <- dia.rect(7,top-j*y.scale,y.names[j],cex=cex,draw=TRUE,...)
                  var.list <- c(var.list,y.names[j],y[[j]]) }
for(i in 1:nx) {
  for (j in 1:ny) {
  
  d.arrow <-  dia.arrow(x[[i]]$right,y[[j]]$left,labels = beta[i,j],adj=4-j,cex=l.cex,draw=FALSE,...)
   arrow.list <- c(arrow.list,d.arrow)
   }
} 
if(nx >1) {
  for (i in 2:nx) {
  for (k in 1:(i-1)) {dca <- dia.curved.arrow(x[[i]]$left,x[[k]]$left,x.matrix[i,k],scale=-(abs(i-k)),both=TRUE,dir="u",cex = l.cex,draw=FALSE,...)
                     curve.list <- c(curve.list,dca)} #dia.curve(x[[i]]$left,x[[k]]$left,x.matrix[i,k],scale=-(abs(i-k)))  } 
  } }
  
  if(ny >1) {for (i in 2:ny) {
  for (k in 1:(i-1)) {dca <- dia.curved.arrow(y[[i]]$right,y[[k]]$right,y.resid[i,k],scale=(abs(i-k)),dir="u",cex=l.cex,draw=FALSE, ...)
     curve.list <- c(curve.list,dca)} 
  }}
  for(i in 1:ny) {d.self <- dia.self(y[[i]],side=3,scale=.2,draw=FALSE,... )
          self.list <- c(self.list,d.self)}
 if(show) {text((10-nx/3)/2,0,paste("Unweighted matrix correlation  = ",round(sc$Ruw,digits)))}
 
 
 #now draw all the arrows at once
 multi.rect(var.list,...)
 multi.arrow(arrow.list,...)
 multi.curved.arrow(curve.list,...)
 multi.self(self.list,...)
}

test this one as well

lmDiagram(mod.2, main="Regression from lmCor")

lmDiagram(mod.lm, main="Regression from lm")

diagram(mod.1)  #the call to the diagram function knows where to go

Functions can redirect to other functions depending on the call

This concept is used in the diagram function which depending upon the object passed, will call one of some completely different functions.

"diagram" <- function(fit,...) {
# figure out which psych function was called and then call the appropriate diagram
value <- NULL
omega <- bassAck <- extend <- extension <-  lavaan <- fa.reg <-  NULL    #weird requirement to avoid being global
#if(length(class(fit)) == 1) {if (inherits(fit,"lavaan")) fn <- "lavaan" } 


 #if(length(class(fit)) > 1)  {
    names <- cs(lavaan,fa,principal,iclust,omega,lavaan,bassAck, extension, extend, fa.reg, esem, setCor)
    value <- inherits(fit,names,which=TRUE)  # value <- class(x)[2]
    if(any(value > 1) ) { value <- names[which(value > 0)]} else {value <- "other"}
    
#     } else {value <- "other"}

switch(value,  #replaced a series of if statements to switch  12/23/18
fa  = {fa.diagram(fit,...) },
principal = {fa.diagram(fit,...) },
iclust = {iclust.diagram(fit,...)},
omega = {omega.diagram(fit,...)},
lavaan =  {lavaan.diagram(fit,...)}, 
bassAck = {bassAckward.diagram(fit,...)} ,
extend = {extension.diagram(fit ,...)},
extension ={extension.diagram(fit,...)},
fa.reg ={extension.diagram(fit,...)},
esem = {esem.diagram(fit,...)},
setCor = {setCor.diagram(fit,...)}
)
}

Lets try a few:

f5 <- fa(bfi[1:25],5)
## Loading required namespace: GPArotation
ic <- iclust(bfi[1:25])

om <- omega(ability,4, plot=FALSE)
#now draw these 3 output
diagram(f5)

diagram(ic)

diagram(om)