library(psych) #make it active
library(psychTools)
sessionInfo() #psych should be 2.4.4
## R version 4.4.0 beta (2024-04-12 r86412)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sonoma 14.4.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Chicago
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] psychTools_2.4.4 psych_2.4.4
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-164 cli_3.6.1 knitr_1.43 rlang_1.1.1 xfun_0.39
## [6] jsonlite_1.8.5 rtf_0.4-14.1 htmltools_0.5.8.1 sass_0.4.9 rmarkdown_2.26
## [11] grid_4.4.0 evaluate_0.21 jquerylib_0.1.4 fastmap_1.1.1 yaml_2.3.7
## [16] lifecycle_1.0.3 compiler_4.4.0 rstudioapi_0.16.0 R.oo_1.26.0 lattice_0.22-6
## [21] digest_0.6.31 R6_2.5.1 foreign_0.8-86 mnormt_2.1.1 parallel_4.4.0
## [26] bslib_0.7.0 R.methodsS3_1.8.2 tools_4.4.0 cachem_1.0.8
#Linear Regression from Raw data
A more complete demonstration of regression using the lm
and lmCor
functions. We also use the
partial.r
, lowerCor
, corPlot
and
lowerUpper
functions.
First, get a simulated data set, describe it, and show the
correlations as a heat map. We use a slightly more pleasing color
gradient. (Taken from the help page for corPlot
)
datafilename="http://personality-project.org/r/datasets/psychometrics.prob2.txt"
grad <- read.table(datafilename,header=TRUE) #read the data file
describe(grad) #describing the data first is important
## vars n mean sd median trimmed mad min max range skew kurtosis se
## ID 1 1000 500.50 288.82 500.50 500.50 370.65 1.0 1000.00 999.00 0.00 -1.20 9.13
## GREV 2 1000 499.77 106.11 497.50 498.74 106.01 138.0 873.00 735.00 0.09 -0.07 3.36
## GREQ 3 1000 500.53 103.85 498.00 498.51 105.26 191.0 914.00 723.00 0.22 0.08 3.28
## GREA 4 1000 498.13 100.45 495.00 498.67 99.33 207.0 848.00 641.00 -0.02 -0.06 3.18
## Ach 5 1000 49.93 9.84 50.00 49.88 10.38 16.0 79.00 63.00 0.00 0.02 0.31
## Anx 6 1000 50.32 9.91 50.00 50.43 10.38 14.0 78.00 64.00 -0.14 0.14 0.31
## Prelim 7 1000 10.03 1.06 10.00 10.02 1.48 7.0 13.00 6.00 -0.02 -0.01 0.03
## GPA 8 1000 4.00 0.50 4.02 4.01 0.53 2.5 5.38 2.88 -0.07 -0.29 0.02
## MA 9 1000 3.00 0.49 3.00 3.00 0.44 1.4 4.50 3.10 -0.07 -0.09 0.02
gr <- colorRampPalette(c("#B52127", "white", "#2171B5"))
corPlot(grad,numbers=TRUE,gr=gr,main="The (simulated) grad data set")
The correlation reflects the linear relationship between two variables. For pairs of standardized variables for any value of x, we expect \[\hat{y} = r_{xy} x\].
The squared correlation is an index of how much of the variance in one variable is associated with variability in the other variable:
\[ \sigma_{\hat{y}}^2 = r_{xy}^2 \sigma_y^2 \] And the unexplained residual variance will be
\[ \sigma_y^2 - \sigma_{\hat{y}}^2 = (1 - r_{xy}^2) \sigma_y^2\]
The total relationship between the variables reflects some overlap of
effects. We can find the partial correlation between variables
by using the partial.r
function. Just as a correlation is
the ratio of the covariance to the square of the product of the
variances, the partial r is the reduced covariance divided by the square
root of the product of the partial (residual) variances:
\[r_{xy} = \frac{cov_{xy}}{\sqrt{\sigma_x^2 \sigma_y^2}}\]
partial r:
\[r_{xy.z} = \frac{r{xy} - r_{xz}r_{yz}}{\sqrt{(1-r_{xz}^2) (1-r_{yx}^2})}\].
This is logically the same as correlation of the x with z removed from y with z removed. That is, the correlation of the residual x with the residual y.
We can find partial correlations using the
partial.r
function. This will do it for the entire matrix
at once.
rp <- partial.r(grad) #find the partial correlations
lowerMat(rp) #show them in a table
## ID GREV GREQ GREA Ach Anx Prelm GPA MA
## ID 1.00
## GREV -0.01 1.00
## GREQ 0.01 0.45 1.00
## GREA -0.01 0.39 0.28 1.00
## Ach -0.01 -0.22 -0.14 0.36 1.00
## Anx -0.02 0.15 0.09 -0.26 -0.34 1.00
## Prelim 0.03 0.08 0.02 0.22 0.08 0.00 1.00
## GPA -0.01 0.12 0.03 0.13 0.09 -0.04 0.15 1.00
## MA -0.01 0.05 0.01 0.15 0.06 -0.04 0.11 0.06 1.00
corPlot(rp,numbers=TRUE,main="Partial Correlations") #show them graphically
We can compare two correlation matrices using the
upperLower
function in combination with
corPlot
. We rotate the x axis labels to be vertical to the
axis using the xlas
option.
r <- lowerCor(grad) #find the correlations, save the results
## ID GREV GREQ GREA Ach Anx Prelm GPA MA
## ID 1.00
## GREV -0.01 1.00
## GREQ 0.00 0.73 1.00
## GREA -0.01 0.64 0.60 1.00
## Ach 0.00 0.01 0.01 0.45 1.00
## Anx -0.01 0.01 0.01 -0.39 -0.56 1.00
## Prelim 0.02 0.43 0.38 0.57 0.30 -0.23 1.00
## GPA 0.00 0.42 0.37 0.52 0.28 -0.22 0.42 1.00
## MA -0.01 0.32 0.29 0.45 0.26 -0.22 0.36 0.31 1.00
upper <- rp #this is the matrix of partial correlations
both <- lowerUpper(lower=r,upper = upper) #put the together
corPlot(both,numbers=TRUE,main="Correlations and partial correlations")
both <- lowerUpper(lower=r,upper = upper, diff=TRUE) #now plot the differences
corPlot(both,numbers=TRUE,main="Differences between correlations and partial r",xlas=3)
What predicts performance on the Graduate GPA? From the correlation matrix, we see that all three GRE scores do a good job. But they are highly correlated. This is why the partial correlations were noticably smaller. But still, what is the unique effect of each predictor?
mod1 <- lm(GPA ~ GREV + GREQ + GREA, data=grad)
summary(mod1)
##
## Call:
## lm(formula = GPA ~ GREV + GREQ + GREA, data = grad)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.26681 -0.30376 0.00734 0.30505 1.30217
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.5953021 0.0759443 34.174 < 2e-16 ***
## GREV 0.0006661 0.0002004 3.324 0.000921 ***
## GREQ 0.0000775 0.0001958 0.396 0.692329
## GREA 0.0020803 0.0001806 11.519 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4265 on 996 degrees of freedom
## Multiple R-squared: 0.2799, Adjusted R-squared: 0.2777
## F-statistic: 129 on 3 and 996 DF, p-value: < 2.2e-16
lm
As is true of most R functions, much more is returned than displayed.
We can see this using either the str
or the
names
command.
names(mod1) #show the names of the objects. Some of these have subobjects
## [1] "coefficients" "residuals" "effects" "rank" "fitted.values" "assign"
## [7] "qr" "df.residual" "xlevels" "call" "terms" "model"
We can examine the multiple correlation by looking at the original variables, the fitted (predicted) values, as well as the residuals (what is left over after we fit the data).
modelfits <- data.frame(c(grad[2:4],grad[8]),fitted=mod1$fitted.values,residuals=mod1$residuals)
pairs.panels(modelfits,main="Predictors, criterion, fits and residuals")
Note how the residuals are uncorrelaed with the predictors and the fitted value. This is as it should be, for if there were any linear information left over, we would have included it in our prediction.
The basic lm function has a number of diagnostic graphs that can be made.
par(mfrow=c(2,2))
plot(mod1)
par(mfrow=c(1,1))
We can also show relationships in terms of path models. Directed
arrows are read as “causes”, curved arrows as “correlated”. The commands
for the ``lmCor
function parallel those of the
lm
function.
mod2 <- lmCor(GPA ~ GREV + GREQ + GREA, data=grad)
summary(mod2)
##
## Multiple Regression from raw data
## lmCor(y = GPA ~ GREV + GREQ + GREA, data = grad)
##
## Multiple Regression from matrix input
##
## Beta weights
## GPA
## (Intercept) 0.000
## GREV 0.141
## GREQ 0.016
## GREA 0.416
##
## Multiple R
## GPA
## 0.53
##
## Multiple R2
## GPA
## 0.28
##
## Cohen's set correlation R2
## [1] 0.28
##
## Squared Canonical Correlations
## NULL
These coefficients are very different from the lm
coefficients. This is because lmCor
by default, reports the
standardized coefficients. Do it again, with the std=FALSE option.
mod3 <- lmCor(GPA ~ GREV + GREQ + GREA, data=grad, std=FALSE)
summary(mod3,digits=8) #these are the unstandardized coefficiets. In the figure, they appear to be 0
##
## Multiple Regression from raw data
## lmCor(y = GPA ~ GREV + GREQ + GREA, data = grad, std = FALSE)
##
## Multiple Regression from matrix input
##
## Beta weights
## GPA
## (Intercept) 2.5953021e+00
## GREV 6.6613502e-04
## GREQ 7.7497817e-05
## GREA 2.0802453e-03
##
## Multiple R
## GPA
## 0.52903926
##
## Multiple R2
## GPA
## 0.27988253
##
## Cohen's set correlation R2
## [1] -5714.5786
##
## Squared Canonical Correlations
## NULL
summary(mod1) #compare to the lm solution
##
## Call:
## lm(formula = GPA ~ GREV + GREQ + GREA, data = grad)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.26681 -0.30376 0.00734 0.30505 1.30217
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.5953021 0.0759443 34.174 < 2e-16 ***
## GREV 0.0006661 0.0002004 3.324 0.000921 ***
## GREQ 0.0000775 0.0001958 0.396 0.692329
## GREA 0.0020803 0.0001806 11.519 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4265 on 996 degrees of freedom
## Multiple R-squared: 0.2799, Adjusted R-squared: 0.2777
## F-statistic: 129 on 3 and 996 DF, p-value: < 2.2e-16
Because the predictors (GRE scores) have so much more variance than
the criterion (GPA) we could standardize the data first, and then do the
linear model. Compare that solution to the original lmCor
output.
z.grad <- data.frame(scale(grad)) #scale converts to z scores
mod4 <- lm(GPA ~ GREV + GREQ + GREA, data=z.grad)
summary(mod4) #compare p value to mod1 and the coefficients to mod2
##
## Call:
## lm(formula = GPA ~ GREV + GREQ + GREA, data = z.grad)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.52416 -0.60526 0.01462 0.60783 2.59463
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.832e-15 2.688e-02 0.000 1.000000
## GREV 1.408e-01 4.238e-02 3.324 0.000921 ***
## GREQ 1.604e-02 4.051e-02 0.396 0.692329
## GREA 4.163e-01 3.615e-02 11.519 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8499 on 996 degrees of freedom
## Multiple R-squared: 0.2799, Adjusted R-squared: 0.2777
## F-statistic: 129 on 3 and 996 DF, p-value: < 2.2e-16
We sometimes think that an effect of one variable is
mediated by another variable. The classic discussion of
mediation is in terms of direct paths from the predictor to the
criterion (the c path), the path from the predictor to the mediator (the
a path), from the mediator to the criterion (the b) path, and the effect
of the mediator (the a * b paths) (or indirect path). Thus c’ is the
difference of the direct - indirect paths (c’ = c - ab). This may be
done using the mediate
function.
For our example data set, we can develop a simple model that the effect of GREV + GREQ is mediated by GREA. We test this model, specifying the mediating variable in parentheses.
mod5 <- mediate(GPA ~ GREV + GREQ +(GREA), data=grad, std=TRUE)
summary(mod5)
## Call: mediate(y = GPA ~ GREV + GREQ + (GREA), data = grad, std = TRUE)
##
## Direct effect estimates (traditional regression) (c') X + M on Y
## GPA se t df Prob
## Intercept 0.00 0.03 0.00 996 1.00e+00
## GREV 0.14 0.04 3.32 996 9.21e-04
## GREQ 0.02 0.04 0.40 996 6.92e-01
## GREA 0.42 0.04 11.52 996 6.57e-29
##
## R = 0.53 R2 = 0.28 F = 129.04 on 3 and 996 DF p-value: 1.3e-70
##
## Total effect estimates (c) (X on Y)
## GPA se t df Prob
## Intercept 0.00 0.03 0.00 997 1.00e+00
## GREV 0.32 0.04 7.76 997 2.10e-14
## GREQ 0.13 0.04 3.13 997 1.82e-03
##
## 'a' effect estimates (X on M)
## GREA se t df Prob
## Intercept 0.00 0.02 0.0 997 1.00e+00
## GREV 0.44 0.03 12.8 997 7.52e-35
## GREQ 0.28 0.03 8.0 997 3.44e-15
##
## 'b' effect estimates (M on Y controlling for X)
## GPA se t df Prob
## GREA 0.42 0.04 11.52 996 6.57e-29
##
## 'ab' effect estimates (through all mediators)
## GPA boot sd lower upper
## GREV 0.18 0.18 0.02 0.15 0.22
## GREQ 0.11 0.11 0.02 0.15 0.22
Since mediation is just a regression with some rearranged terms, it
is useful to compare this figure to the one from lmCor
:
par(mfrow=c(1,2)) #make a two panel graph
mod2 <- lmCor(GPA ~ GREV + GREQ + GREA, data=grad)
mod <- mediate(GPA ~ GREV + GREQ +(GREA), data=grad, std=TRUE)
par(mfrow=c(1,1)) #set it back again
The primary difference of mediation analysis from regular regression is that it allows us to check the confidence intervals of the diret versus the total effects. This difference, because it reflects the difference of the product terms (c - ab) is estimated by the bootstrap.
Normal theory allows us to predict how much variation in a estimate we should expect from different random samples of that estimate. However, sometimes the distribution is not known. We can approximate this by forming new samples taken from our original sample. (That is, think of the original sample as the population, and then sample from that population.)
When we do the bootstrap, each subject in the original population sample is sampled with replacement. This means that some people will be sampled more than once, and some people will not be sampled at all. This leads to the case that the expected number of unique individuals in each boostrap will be 63.2% of the original sample. This is the limit of \[1-(1-1/x)^x \] x gets larger. (Think about the a sample of size x. The chance of being sampled is 1/x, the chance of not being chosen is 1-1/x. Someone is not chosen if they are not chosen in each of the x trials.) \[1 - 1/e\]
We can show this by using the curve function.
curve(1-(1-1/x)^x,1,20)
Moderation is a fancy term for an interaction. Z is said to
moderate the effect of X upon Y if the X effect depends upon
the level of Z. This is just the interaction term in an ANOVA or in a
linear model. Remember that we need to zero center the data before using
the product term in the le
function. We can do this
directly using lmCor
or mediate
. In this case,
the data are automatically zero centered and then the product term is
found.
lmCor
or mediate
We use the sat.act
data set.
mod6 <- lmCor(ACT ~ SATV * SATQ, data= sat.act)
mod6
## Call: lmCor(y = ACT ~ SATV * SATQ, data = sat.act)
##
## Multiple Regression from raw data
##
## DV = ACT
## slope se t p lower.ci upper.ci VIF Vy.x
## (Intercept) 0.00 0.03 0.00 1.0e+00 -0.06 0.06 1.00 0.00
## SATV 0.34 0.04 8.61 4.7e-17 0.26 0.41 1.79 0.19
## SATQ 0.41 0.04 10.53 3.6e-24 0.33 0.48 1.77 0.24
## SATV*SATQ 0.10 0.03 3.17 1.6e-03 0.04 0.16 1.20 -0.02
##
## Residual Standard Error = 0.77 with 696 degrees of freedom
##
## Multiple Regression
## R R2 Ruw R2uw Shrunken R2 SE of R2 overall F df1 df2 p
## ACT 0.64 0.41 0.55 0.3 0.41 0.03 161.53 3 696 1.86e-79
Compare this to what happens if we do the regression on the z scores (centered and standarized) from sat.act.
z.sat.act <- data.frame(scale(sat.act)) #center and standardize
mod7 <- lm (ACT ~ SATV * SATQ, data= z.sat.act)
summary(mod7)
##
## Call:
## lm(formula = ACT ~ SATV * SATQ, data = z.sat.act)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7825 -0.4591 0.0148 0.4295 3.0815
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.04824 0.03326 -1.450 0.14748
## SATV 0.33523 0.03914 8.565 < 2e-16 ***
## SATQ 0.40764 0.03905 10.438 < 2e-16 ***
## SATV:SATQ 0.07525 0.02405 3.129 0.00183 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.771 on 683 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.4099, Adjusted R-squared: 0.4073
## F-statistic: 158.2 on 3 and 683 DF, p-value: < 2.2e-16
mod8 <- lmCor(ACT ~ SATV * SATQ, data= z.sat.act )
mod8
## Call: lmCor(y = ACT ~ SATV * SATQ, data = z.sat.act)
##
## Multiple Regression from raw data
##
## DV = ACT
## slope se t p lower.ci upper.ci VIF Vy.x
## (Intercept) 0.00 0.03 0.00 1.0e+00 -0.06 0.06 1.00 0.00
## SATV 0.34 0.04 8.61 4.7e-17 0.26 0.41 1.79 0.19
## SATQ 0.41 0.04 10.53 3.6e-24 0.33 0.48 1.77 0.24
## SATV*SATQ 0.10 0.03 3.17 1.6e-03 0.04 0.16 1.20 -0.02
##
## Residual Standard Error = 0.77 with 696 degrees of freedom
##
## Multiple Regression
## R R2 Ruw R2uw Shrunken R2 SE of R2 overall F df1 df2 p
## ACT 0.64 0.41 0.55 0.3 0.41 0.03 161.53 3 696 1.86e-79
The difference in the slopes of the regressions between mods 7, 8 and 9 reflect what happens when we standardize the product terms, as well as the way the missing data are treated.
The lmCor
function calls the helper function
matReg
function to do the basic work. We can examine how
this works by just asking for the code.
The code takes a y variable, x variable, covariance matrix, and the
number of observations. These are all found in the lmCor
function.
matReg #show the function
## function (x, y, C, m = NULL, z = NULL, n.obs = 0, means = NULL,
## std = FALSE, raw = TRUE, part = FALSE)
## {
## if (is.null(n.obs))
## n.obs <- 0
## numx <- length(x)
## numz <- length(z)
## numy <- length(y)
## if ("Intercept" %in% colnames(C)) {
## df <- n.obs - numx - numz
## }
## else {
## df <- n.obs - 1 - numx - numz
## }
## Cr <- cov2cor(C)
## if (!is.null(z)) {
## numz <- length(z)
## zm <- C[z, z, drop = FALSE]
## za <- C[x, z, drop = FALSE]
## zb <- C[y, z, drop = FALSE]
## zmi <- solve(zm)
## x.matrix <- C[x, x, drop = FALSE] - za %*% zmi %*% t(za)
## if (!part)
## y.matrix <- C[y, y, drop = FALSE] - zb %*% zmi %*%
## t(zb)
## xy.matrix <- C[x, y, drop = FALSE] - za %*% zmi %*% t(zb)
## C <- cbind(rbind(y.matrix, xy.matrix), rbind(t(xy.matrix),
## x.matrix))
## }
## if (numx == 1) {
## beta <- solve(C[x, x, drop = FALSE], (C[x, y, drop = FALSE]))
## colnames(beta) <- y
## }
## else {
## beta <- solve(C[x, x], (C[x, y]))
## }
## if (!is.matrix(beta)) {
## beta <- matrix(beta, nrow = length(beta))
## }
## if (is.character(x)) {
## rownames(beta) <- x
## }
## else {
## rownames(beta) <- colnames(C)[x]
## }
## if (is.character(y)) {
## colnames(beta) <- y
## }
## else {
## colnames(beta) <- colnames(C)[y]
## }
## x.inv <- solve(C[x, x])
## yhat <- t(C[x, y, drop = FALSE]) %*% x.inv %*% C[x, y, drop = FALSE]
## resid <- C[y, y] - yhat
## if (!std) {
## Residual.se <- sqrt(diag(resid/df))
## se <- MSE <- diag(resid)/(df)
## if (length(y) > 1) {
## SST <- diag(C[y, y] - means[y]^2 * n.obs)
## }
## else {
## SST <- (C[y, y] - means[y]^2 * n.obs)
## }
## R2 <- (SST - diag(resid))/SST
## se.beta <- list()
## for (i in 1:length(y)) {
## se.beta[[i]] <- sqrt(MSE[i] * diag(x.inv))
## }
## se <- matrix(unlist(se.beta), ncol = numy)
## if (length(y) > 1) {
## SST <- diag(C[y, y] - means[y]^2 * n.obs)
## }
## else {
## SST <- (C[y, y] - means[y]^2 * n.obs)
## }
## R2 <- (SST - diag(resid))/SST
## }
## else {
## R2 <- colSums(beta * C[x, y])/diag(C[y, y, drop = FALSE])
## uniq <- 1 - (1 - 1/diag(solve(Cr[x, x, drop = FALSE])))
## if (n.obs > 2) {
## se <- (sqrt((1 - R2)/(df)) %*% t(sqrt(1/uniq)))
## se <- t(se * sqrt(diag(C[y, y, drop = FALSE])) %*%
## t(sqrt(1/diag(C[x, x, drop = FALSE]))))
## colnames(se) <- colnames(beta)
## }
## else {
## se <- NA
## }
## Residual.se <- sqrt((1 - R2)/df * (n.obs - 1))
## }
## if (!any(is.na(se))) {
## tvalue <- beta/se
## prob <- -2 * expm1(pt(abs(tvalue), df, log.p = TRUE))
## }
## else {
## tvalue <- prob <- df <- NA
## }
## result <- list(beta = beta, se = se, t = tvalue, df = df,
## prob = prob, R2 = R2, SE.resid = Residual.se)
## return(result)
## }
## <bytecode: 0x11e1e5bb0>
## <environment: namespace:psych>
We can also show the code by examining the text file that creates it. This is available as link on the syllabus at html://personality-project.org/courses/350/matReg.r
Another example of code is the `lmCor.diagram function. It calls a number of diagram functions (part of the psych package). These primitives may be called by you if you want to create your own path diagram.
The annotated code is available at html://personality-project.org/courses/350/lmCor.r . This is easier to read than asking for the code directly. Compare the two ways of doing this.
lmCor.diagram #show the actual code
## function (sc, main = "Regression model", digits = 2, show = FALSE,
## cex = 1, l.cex = 1, ...)
## {
## if (missing(l.cex))
## l.cex <- cex
## beta <- round(sc$coefficients, digits)
## if (length(class(sc)) == 1) {
## lm <- TRUE
## }
## else {
## lm <- FALSE
## }
## if (!lm) {
## if (rownames(beta)[1] %in% c("(Intercept)", "intercept*")) {
## intercept <- TRUE
## }
## else {
## intercept <- FALSE
## }
## x.matrix <- round(sc$x.matrix, digits)
## y.matrix <- round(sc$y.matrix, digits)
## y.resid <- round(sc$resid, digits)
## x.names <- rownames(beta)
## if (intercept) {
## x.matrix <- x.matrix[-intercept, -intercept]
## x.names <- x.names[-intercept]
## beta <- beta[-intercept, , drop = FALSE]
## }
## y.names <- colnames(beta)
## }
## else {
## x.matrix <- round(cov(sc$model[-1]), digits = digits)
## x.names <- colnames(sc$model)[-1]
## beta <- as.matrix(beta[-1])
## y.names <- colnames(sc$model)[1]
## }
## nx <- length(x.names)
## ny <- length(y.names)
## top <- max(nx, ny)
## xlim = c(-nx/3, 10)
## ylim = c(0, top)
## top <- max(nx, ny)
## x <- list()
## y <- list()
## var.list <- arrow.list <- curve.list <- self.list <- list()
## x.scale <- top/(nx + 1)
## y.scale <- top/(ny + 1)
## plot(NA, xlim = xlim, ylim = ylim, main = main, axes = FALSE,
## xlab = "", ylab = "")
## for (i in 1:nx) {
## x[[i]] <- dia.rect(3, top - i * x.scale, x.names[i],
## cex = cex, draw = TRUE, ...)
## var.list <- c(var.list, x.names[i], x[[i]])
## }
## for (j in 1:ny) {
## y[[j]] <- dia.rect(7, top - j * y.scale, y.names[j],
## cex = cex, draw = TRUE, ...)
## var.list <- c(var.list, y.names[j], y[[j]])
## }
## for (i in 1:nx) {
## for (j in 1:ny) {
## d.arrow <- dia.arrow(x[[i]]$right, y[[j]]$left, labels = beta[i,
## j], adj = 4 - j, cex = l.cex, draw = FALSE, ...)
## arrow.list <- c(arrow.list, d.arrow)
## }
## }
## if (nx > 1) {
## for (i in 2:nx) {
## for (k in 1:(i - 1)) {
## dca <- dia.curved.arrow(x[[i]]$left, x[[k]]$left,
## x.matrix[i, k], scale = -(abs(i - k)), both = TRUE,
## dir = "u", cex = l.cex, draw = FALSE, ...)
## curve.list <- c(curve.list, dca)
## }
## }
## }
## if (ny > 1) {
## for (i in 2:ny) {
## for (k in 1:(i - 1)) {
## dca <- dia.curved.arrow(y[[i]]$right, y[[k]]$right,
## y.resid[i, k], scale = (abs(i - k)), dir = "u",
## cex = l.cex, draw = FALSE, ...)
## curve.list <- c(curve.list, dca)
## }
## }
## }
## for (i in 1:ny) {
## d.self <- dia.self(y[[i]], side = 3, scale = 0.2, draw = FALSE,
## ...)
## self.list <- c(self.list, d.self)
## }
## if (show) {
## text((10 - nx/3)/2, 0, paste("Unweighted matrix correlation = ",
## round(sc$Ruw, digits)))
## }
## multi.rect(var.list, ...)
## multi.arrow(arrow.list, ...)
## multi.curved.arrow(curve.list, ...)
## multi.self(self.list, ...)
## }
## <bytecode: 0x13c6856c0>
## <environment: namespace:psych>
Both of these functions can be examined by stepping through them using the debug option.