To get the most recent version of the psych package, use the installer in R. The functions listed below were written before the psych package was released.
For the adventurous, the development version of the psych package may be found at http://personality-project.org/r/src. But it is better to just install the released version from CRAN.
A collection of useful functions to do psychometrics and data analysis. To load all of these, use the source command:
source("http://personality-project.org/r/useful.r") #the procedures listed below
source("http://personality-project.org/r/vss.r") #the Very Simple Structure package
#define a function to calculate coefficient alpha
alpha.scale=function (x,y) #find coefficient alpha given a scale and a data.frame of the items in the scale
{
n=length(y) #number of variables
Vi=sum(diag(var(y,na.rm=TRUE))) #sum of item variance
Vt=var(x,na.rm=TRUE) #total test variance
((Vt-Vi)/Vt)*(n/(n-1))} #alpha
#general descriptive statistics
describe <- function (x, digits = 2,na.rm=TRUE) #basic stats after dropping non-numeric data
{ #first, define a local function
valid <- function(x) {
return(sum(!is.na(x)))
}
if (is.vector(x) ) #do it for vectors or
{
stats = matrix(rep(NA,6),ncol=6) #create a temporary array
stats[1, 1] <- mean(x, na.rm=na.rm )
stats[1, 2] <- median(x,na.rm=na.rm )
stats[1, 3] <- min(x, na.rm=na.rm )
stats[1, 4] <- max(x, na.rm=na.rm )
stats[1, 5] <- skew(x,na.rm=na.rm )
stats[1, 6] <- valid(x )
len <- 1
}
else {len = dim(x)[2] #do it for matrices or data.frames
stats = matrix(rep(NA,len*6),ncol=6) #create a temporary array
for (i in 1:len) {
if (is.numeric(x[,i])) { #just do this for numeric data
stats[i, 1] <- mean(x[,i], na.rm=na.rm )
stats[i, 2] <- median(x[,i],na.rm=na.rm )
stats[i, 3] <- min(x[,i], na.rm=na.rm )
stats[i, 4] <- max(x[,i], na.rm=na.rm )
stats[i, 5] <- skew(x[,i],na.rm=na.rm )
stats[i, 6] <- valid(x[,i] )
}
}
}
temp <- data.frame(n = stats[,6],mean = stats[,1], sd = sd(x,na.rm=TRUE), median = stats[,
2],min= stats[,3],max=stats[,4], range=stats[,4]-stats[,3],skew = stats[, 5])
answer <- round(data.frame(V=seq(1:len),temp, se = temp$sd/sqrt(temp$n)), digits)
return(answer)
}
# find basic summary statistics by groups #adapted from "Kickstarting R"
summ.stats <- function (x,y) { #data are x, grouping variable is y
means <- tapply(x, y, mean, na.rm=TRUE)
sds <- tapply(x,y, sd, na.rm = TRUE)
valid <- function (x) {return(sum(!is.na(x)) )}
ns <- tapply(x,y, valid )
se=sds/sqrt(ns)
answer <-data.frame(mean=means,sd=sds,se=se,n=ns)
}
#error bars in a two dimensional plot
error.crosses <-function (x,y,z) # x and y are data frame with z points
{for (i in 1:z)
{xcen <- x$mean[i]
ycen <- y$mean[i]
xse <- x$se[i]
yse <- y$se[i]
arrows(xcen-xse,ycen,xcen+xse,ycen,length=.2, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)
arrows(xcen,ycen-yse,xcen,ycen+yse,length=.2, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)
}
}
#examples
# plot(mPA,mNA,pch=symb[condit],cex=4.5,col=colors[condit],bg=colors[condit],main="Postive vs. Negative Affect",xlim=c(-1,1.5),ylim=c(-1,1.5),xlab="Positive Affect",ylab="Negative Affect")
#error.crosses(paf.stats,naf.stats,4) #put in x and y error bars!
skew
skew= function (x, na.rm = FALSE)
{
if (na.rm) x <- x[!is.na(x)] #remove missing values
sum((x - mean(x))^3)/(length(x) * sd(x)^3) #calculate skew
}
Put scatter plots below diagonal, histograms on the diagonal, correlations above the diagonal. The scatter plots can be best fitted with a loess smooth (smooth=TRUE is the default), and the correlations can be scaled (scale=TRUE) by the size of the correlation.
These functions are adapted (stolen?) from the help.cor examples.
#some fancy graphics -- adapted from help.cor
panel.cor.scale <- function(x, y, digits=2, prefix="", cex.cor)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r = (cor(x, y,use="pairwise"))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex * abs(r))
}
panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r = (cor(x, y,use="pairwise"))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex )
}
panel.hist <- function(x, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...)
}
pairs.panels <- function (x,y,smooth=TRUE,scale=FALSE)
{if (smooth ){
if (scale) {
pairs(x,diag.panel=panel.hist,upper.panel=panel.cor.scale,lower.panel=panel.smooth)
}
else {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor,lower.panel=panel.smooth)
} #else {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor,lower.panel=panel.smooth)
}
else #smooth is not true
{ if (scale) {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor.scale)
} else {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor) }
} #end of else (smooth)
} #end of function
multi.hist <- function(x) {nvar <- dim(x)[2] #number of variables
nsize=trunc(sqrt(nvar))+1 #size of graphic
old.par <- par(no.readonly = TRUE) # all par settings which can be changed
par(mfrow=c(nsize,nsize)) #set new graphic parameters
for (i in 1:nvar) {
name=names(x)[i] #get the names for the variables
hist(x[,i],main=name,xlab=name) } #draw the histograms for each variable
on.exit(par(old.par)) #set the graphic parameters back to the original
}
#function to replace upper triangle of matrix with unattenuated correlations, and the diagonal with reliabilities
#input is a correlation matrix and a vector of reliabilities
correct.cor <- function(x,y) { n=dim(x)[1]
{ x[1,1] <- y[1,1]
for (i in 2:n) {
x[i,i] <- y[1,i] #put reliabilities on the diagonal
k=i-1
for (j in 1:k) {
x[j,i] <- x[j,i]/sqrt(y[1,i]*y[1,j]) } #fix the upper triangular part of the matrix
}
return(x) }}
#difference of dependent (paired) correlations (following Steiger,J., 1980, Tests for comparing elements of a correlation matrix. Psychological Bulletin, 87, 245-251)
paired.r <- function(xy,xz,yz,n) {
diff <- xy-xz
determin=1-xy*xy - xz*xz - yz*yz + 2*xy*xz*yz
av=(xy+xz)/2
cube= (1-yz)*(1-yz)*(1-yz)
t2 = diff * sqrt((n-1)*(1+yz)/(((2*(n-1)/(n-3))*determin+av*av*cube)))
return(t2)
}
fisherz <- function(rho) {0.5*log((1+rho)/(1-rho)) } #converts r to z
#quick demonstration of a random walk process
#to show range of variability over trials and replications
#the "confidence lines" represent 2 sd of theoretical sum
random.walk <- function(length=200,n=20,effect=0) {
num <- seq(1:length)
colors <- rainbow(n) #select colors
x <- cumsum(rnorm(length,effect))/num
plot(x,ylim = c(-2.5,2.5),typ="b",col=colors[1],main="Sample means as function of sample size",ylab="sample mean",xlab="sample size")
for (i in 2:n) {
x <- cumsum(rnorm(length,effect))/num #find the next line
points(x,col=colors[i],typ="b") #draw it
}
curve(2/sqrt(x),1,length,101,add=TRUE) #upper confidence region
curve(-2/sqrt(x),1,length,101,add=TRUE) #lower confidence region
text(length/2,2,"effect size is " )
text(length/2, 1.8,eval(effect))
}