Make the ‘psych’ and ‘psychTools’ packages active
library(psych)
library(psychTools)
sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.2.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Chicago
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] psychTools_2.4.3 psych_2.4.3
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.31 R6_2.5.1 fastmap_1.1.1 xfun_0.41 lattice_0.22-5
## [6] cachem_1.0.7 parallel_4.3.3 knitr_1.45 foreign_0.8-86 htmltools_0.5.5
## [11] rmarkdown_2.21 cli_3.6.1 R.methodsS3_1.8.2 grid_4.3.3 sass_0.4.5
## [16] jquerylib_0.1.4 mnormt_2.1.1 compiler_4.3.3 R.oo_1.25.0 rstudioapi_0.14
## [21] tools_4.3.3 rtf_0.4-14.1 nlme_3.1-164 evaluate_0.20 bslib_0.4.2
## [26] yaml_2.3.7 rlang_1.1.0 jsonlite_1.8.4
We can get the ‘Galton’ data set by just calling it by name. It is a built in data set.
It is important to find the dimensions of the data and perhaps to describe the data.
dim(galton) #what are the dimensions
## [1] 928 2
names(galton) #what are the variable names
## [1] "parent" "child"
describe(galton) #basic descriptives
## vars n mean sd median trimmed mad min max range skew kurtosis se
## parent 1 928 68.31 1.79 68.5 68.32 1.48 64.0 73.0 9 -0.04 0.05 0.06
## child 2 928 68.09 2.52 68.2 68.12 2.97 61.7 73.7 12 -0.09 -0.35 0.08
First form the table using ‘table’, then sort it using the ‘order’ function
galton.tab <- table(galton)
galton.tab #this table is ordered from short parents to tall parents
## child
## parent 61.7 62.2 63.2 64.2 65.2 66.2 67.2 68.2 69.2 70.2 71.2 72.2 73.2 73.7
## 64 1 0 2 4 1 2 2 1 1 0 0 0 0 0
## 64.5 1 1 4 4 1 5 5 0 2 0 0 0 0 0
## 65.5 1 0 9 5 7 11 11 7 7 5 2 1 0 0
## 66.5 0 3 3 5 2 17 17 14 13 4 0 0 0 0
## 67.5 0 3 5 14 15 36 38 28 38 19 11 4 0 0
## 68.5 1 0 7 11 16 25 31 34 48 21 18 4 3 0
## 69.5 0 0 1 16 4 17 27 20 33 25 20 11 4 5
## 70.5 1 0 1 0 1 1 3 12 18 14 7 4 3 3
## 71.5 0 0 0 0 1 3 4 3 5 10 4 9 2 2
## 72.5 0 0 0 0 0 0 0 1 2 1 2 7 2 4
## 73 0 0 0 0 0 0 0 0 0 0 0 1 3 0
rownames(galton.tab)
## [1] "64" "64.5" "65.5" "66.5" "67.5" "68.5" "69.5" "70.5" "71.5" "72.5" "73"
rank(rownames(galton.tab)) #string commands together
## [1] 1 2 3 4 5 6 7 8 9 10 11
ord <- order(rank(rownames(galton.tab)),decreasing=TRUE)
ord
## [1] 11 10 9 8 7 6 5 4 3 2 1
galton.tab[ord,] #this table is now orderd from tall parents to short parents
## child
## parent 61.7 62.2 63.2 64.2 65.2 66.2 67.2 68.2 69.2 70.2 71.2 72.2 73.2 73.7
## 73 0 0 0 0 0 0 0 0 0 0 0 1 3 0
## 72.5 0 0 0 0 0 0 0 1 2 1 2 7 2 4
## 71.5 0 0 0 0 1 3 4 3 5 10 4 9 2 2
## 70.5 1 0 1 0 1 1 3 12 18 14 7 4 3 3
## 69.5 0 0 1 16 4 17 27 20 33 25 20 11 4 5
## 68.5 1 0 7 11 16 25 31 34 48 21 18 4 3 0
## 67.5 0 3 5 14 15 36 38 28 38 19 11 4 0 0
## 66.5 0 3 3 5 2 17 17 14 13 4 0 0 0 0
## 65.5 1 0 9 5 7 11 11 7 7 5 2 1 0 0
## 64.5 1 1 4 4 1 5 5 0 2 0 0 0 0 0
## 64 1 0 2 4 1 2 2 1 1 0 0 0 0 0
plot(galton)
That plot is not very helpful, because it does not show how many people are at each point.
Let use the ‘jitter’ command.
First set the random seed to a set value so the figures will agree
set.seed(42) #cite Adams, 1979
plot(galton,pch=20,col="blue") #show the original data points
points(jitter(galton[,1]),jitter(galton[,2])) #add a little jitter
#note we used 'points' to add to the plot
set.seed(42) #cite Adams, 1979
plot(galton,pch=20,col="blue") #show the original data points
points(jitter(galton[,1],2),jitter(galton[,2],2)) #add a little more jitter
set.seed(42) #cite Adams, 1979
plot(galton,pch=20,col="blue") #show the original data points
points(jitter(galton[,1],5),jitter(galton[,2],5)) #add a little more jitter
error.bars.by(child ~ parent,data=galton,eyes=FALSE,v.labels=63:73,main="Galton's Height data")
scatterHist(child ~ parent,data=galton ) #normal formula imput
scatterHist(jitter(galton$child,5) ,jitter(galton$parent,5),ylab="Child",xlab="Parent",main="Galton height data") #but if the variables jittered, you need this alternative style
#Yet one more display – the ‘pairs.panels’ function
pairs.panels(galton)
#but "jiggle" aka 'jitter" the points
pairs.panels(galton,jiggle=TRUE)
# default values may be specified
small <- function(data=NULL,sample.size=20, n.iter=1000) {
nsub <- nrow(data) #this figures out the sampe size dynamically
result <- rep(NA,n.iter) #create this vector
#use a for loop to repeat the code inside the { }
for(i in 1:n.iter) { #repeat some code
samp <- sample(nsub,sample.size,replace=TRUE) #boot strap resampling
result[i] <- cor(data[samp,])[1,2]
#find the correlation for this sample and save it
} #end of the loop
return(result) #return the value we find
} #end of function
Use this function to generate some data
test <- small(galton) #this uses the default valuex describe(test)
hist(test,breaks=21) #draw a histogram of the results
## now do it for a bunch of cases
samp20 <- small(galton,20)
samp40 <- small(galton,40)
samp80 <- small(galton,80)
samp160 <- small(galton,160)
samp320 <- small(galton,320)
samp640<- small(galton,640)
sample.df <- data.frame(samp20 ,samp40, samp80, samp160,
samp320,samp640)
describe(sample.df)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## samp20 1 1000 0.44 0.20 0.47 0.45 0.19 -0.50 0.89 1.39 -0.64 0.54 0.01
## samp40 2 1000 0.46 0.13 0.47 0.46 0.13 -0.02 0.80 0.82 -0.46 0.28 0.00
## samp80 3 1000 0.45 0.09 0.46 0.46 0.09 0.12 0.72 0.60 -0.39 0.11 0.00
## samp160 4 1000 0.46 0.07 0.46 0.46 0.07 0.23 0.65 0.42 -0.11 -0.12 0.00
## samp320 5 1000 0.46 0.04 0.46 0.46 0.04 0.30 0.60 0.30 -0.11 0.00 0.00
## samp640 6 1000 0.46 0.03 0.46 0.46 0.03 0.37 0.57 0.20 -0.01 -0.07 0.00
We show them several different ways and slowly make the figure better.
violin(sample.df)
Add in error bars to the violin plot
violin(sample.df)
error.bars(sample.df,add=TRUE)
Just show the error bars. Note that the current version of `psych’ just
shows 3 colors by default. This has been fixed in the most recent
version. (Comimg soon)
error.bars(sample.df) #this only has 3 colors
error.bars(sample.df, col=rainbow(6)) #six colors
#combine the violin and error bars
violin(sample.df, main="Density of boot strap resamples from Galton")
error.bars(sample.df, col=rainbow(ncol(sample.df)),add=TRUE) #six colors