#install.packages("psych",repos="http://personality-project.org/r",type="source")
library(psych)
library(psychTools)
sessionInfo() #psych and psychTools should be 2.0.9
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] psychTools_2.0.9 psych_2.0.10
##
## loaded via a namespace (and not attached):
## [1] lattice_0.20-41 digest_0.6.25 grid_4.0.2 nlme_3.1-148 magrittr_1.5 evaluate_0.14
## [7] rlang_0.4.7 stringi_1.4.6 rmarkdown_2.3 tools_4.0.2 foreign_0.8-80 stringr_1.4.0
## [13] xfun_0.16 yaml_2.2.1 parallel_4.0.2 compiler_4.0.2 mnormt_2.0.1 tmvnsim_1.0-2
## [19] htmltools_0.5.0 knitr_1.29
#Various R commands for data manipulation and display
These are examples taken from the help file for the sai
and msqR
data sets. sai
for the State Anxiety Inventory msqR
for the Motivational State Questionnaire.
The standard experimental study at the Personality, Motivation and Cognition (PMC) laboratory (Revelle and Anderson, 1997) was to administer a number of personality trait and state measures (e.g. the epi, msq, msqR and sai) to participants before some experimental manipulation of arousal/effort/anxiety. Following the manipulation (with a 30 minute delay if giving caffeine/placebo), some performance task was given, followed once again by measures of state arousal/effort/anxiety.
Here are the item level data on the sai
(state anxiety) and the tai
(trait anxiety). Scores on these scales may be found using the scoring keys. The affect data set includes pre and post scores for two studies (flat and maps) which manipulated state by using four types of movies.
In addition to being useful for studies of motivational state, these studies provide examples of test-retest and alternate form reliabilities. Given that 10 items overlap with the msqR data, they also allow for a comparison of immediate duplication of items with 30 minute delays.
Studies CART, FAST, SHED, RAFT, and SHOP were either control groups, or did not experimentally vary arousal/effort/anxiety.
AGES, CITY, EMIT, RIM, SALT, and XRAY were caffeine manipulations between time 1 and 2 (RIM and VALE were repeated day 1 and day 2)
FIAT, FLAT, MAPS, MIXX, and THRU were 1 day studies with film manipulation between time 1 and time 2.
SAM1 and SAM2 were the first and second day of a two day study. The STAI was given once per day. MSQ not MSQR was given.
VALE and PAT were two day studies with the STAI given pre and post on both days
RIM was a two day study with the STAI and MSQ given once per day.
Usually, time of day 1 = 8:50-9am am, and 2 = 7:30 pm, however, in rob, with paid subjects, the times were 0530 and 22:30.
##The table
command tells you about the data
data(sai) #actually not necessary, because the data are 'lazy loaded'
dim(sai) #how many subjects (row) and how many columns (variables)
## [1] 5378 23
colnames(sai) #and what are the variables?
## [1] "study" "time" "id" "calm" "secure" "tense"
## [7] "regretful" "at.ease" "upset" "worrying" "rested" "anxious"
## [13] "comfortable" "confident" "nervous" "jittery" "high.strung" "relaxed"
## [19] "content" "worried" "rattled" "joyful" "pleasant"
table(sai$study) #what are the names of the studies in this file
##
## AGES Cart CITY EMIT Fast FIAT FILM FLAT GRAY HOME IMPS
## 136 126 157 71 188 140 285 510 107 134 102
## ITEM Maps MITE MIXX PAT PATS RAFT RIM ROB SALT SAM
## 49 160 49 71 130 132 40 684 97 208 648
## SHED SHOP SWAM.one SWAM.two VALE XRAY
## 116 196 94 54 294 400
table(sai$study,sai$time) #cross tabulate the studies by time adminstered.
##
## 1 2 3 4
## AGES 68 68 0 0
## Cart 63 63 0 0
## CITY 157 0 0 0
## EMIT 71 0 0 0
## Fast 94 94 0 0
## FIAT 70 70 0 0
## FILM 95 95 95 0
## FLAT 170 170 170 0
## GRAY 107 0 0 0
## HOME 67 67 0 0
## IMPS 102 0 0 0
## ITEM 49 0 0 0
## Maps 160 0 0 0
## MITE 49 0 0 0
## MIXX 71 0 0 0
## PAT 65 65 0 0
## PATS 132 0 0 0
## RAFT 40 0 0 0
## RIM 342 0 342 0
## ROB 51 0 46 0
## SALT 104 104 0 0
## SAM 324 0 324 0
## SHED 58 58 0 0
## SHOP 98 98 0 0
## SWAM.one 94 0 0 0
## SWAM.two 54 0 0 0
## VALE 77 77 70 70
## XRAY 200 200 0 0
##We can choose various studies using the subset
and %in%
commands
control <- subset(sai,sai$study %in% c("Cart", "Fast", "SHED", "RAFT", "SHOP"))
table(control$study,control$time) #it still knows the names of the studies, even though no one is there
##
## 1 2
## AGES 0 0
## Cart 63 63
## CITY 0 0
## EMIT 0 0
## Fast 94 94
## FIAT 0 0
## FILM 0 0
## FLAT 0 0
## GRAY 0 0
## HOME 0 0
## IMPS 0 0
## ITEM 0 0
## Maps 0 0
## MITE 0 0
## MIXX 0 0
## PAT 0 0
## PATS 0 0
## RAFT 40 0
## RIM 0 0
## ROB 0 0
## SALT 0 0
## SAM 0 0
## SHED 58 58
## SHOP 98 98
## SWAM.one 0 0
## SWAM.two 0 0
## VALE 0 0
## XRAY 0 0
dim(control)
## [1] 666 23
#pre and post drug studies
drug <- subset(sai,sai$study %in% c("AGES", "CITY","EMIT", "SALT", "VALE", "XRAY"))
#pre and post film studies
film <- subset(sai,sai$study %in% c("FIAT","FLAT", "MAPS", "MIXX"))
##Lets explore the %in%
command
Z <- X %in% Y returns all the elements in X that are in Y X %in% Y returns the logical values (TRUE, FALSE) for each element in X
table(sai$study) #shows the counts in each study
##
## AGES Cart CITY EMIT Fast FIAT FILM FLAT GRAY HOME IMPS
## 136 126 157 71 188 140 285 510 107 134 102
## ITEM Maps MITE MIXX PAT PATS RAFT RIM ROB SALT SAM
## 49 160 49 71 130 132 40 684 97 208 648
## SHED SHOP SWAM.one SWAM.two VALE XRAY
## 116 196 94 54 294 400
studies <- names(table(sai$study)) #gets the names of the studies
studies %in% c("Cart", "Fast", "SHED", "RAFT", "SHOP")
## [1] FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [17] FALSE TRUE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
#versus
c("Cart", "Fast", "SHED", "RAFT", "SHOP") %in% studies
## [1] TRUE TRUE TRUE TRUE TRUE
select <- studies %in% c("Cart", "Fast", "SHED", "RAFT", "SHOP")
select
## [1] FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [17] FALSE TRUE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
We will show a corPlot
of the sai
data several different ways. The first is the most naive way, by variable order
R <- corPlot(sai[4:23]) #draw the corPlot and return the correlation matrix
The previous graph does not show all of the variables on the x axis. The xlas
command makes the labels on the xaxis vertical or horizontal (default).
R <- corPlot(sai[4:23],xlas=3) #draw the corPlot and return the correlation matrix
Do a factor analysis of the data, take out 2 factors, and then sort the variables by the loadings on these two factors. We can do this using the fa
and mat.sort
functions.
f2 <- fa(R,2) #extract two factors
## Loading required namespace: GPArotation
f2 #show the factor analysis output
## Factor Analysis using method = minres
## Call: fa(r = R, nfactors = 2)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 h2 u2 com
## calm 0.55 -0.33 0.53 0.47 1.6
## secure 0.74 -0.06 0.58 0.42 1.0
## tense -0.19 0.71 0.62 0.38 1.1
## regretful -0.23 0.31 0.19 0.81 1.8
## at.ease 0.68 -0.24 0.62 0.38 1.2
## upset -0.34 0.37 0.33 0.67 2.0
## worrying -0.23 0.36 0.24 0.76 1.7
## rested 0.60 0.08 0.34 0.66 1.0
## anxious 0.02 0.75 0.56 0.44 1.0
## comfortable 0.76 -0.05 0.60 0.40 1.0
## confident 0.72 0.12 0.47 0.53 1.1
## nervous -0.06 0.75 0.59 0.41 1.0
## jittery 0.06 0.77 0.57 0.43 1.0
## high.strung 0.06 0.76 0.55 0.45 1.0
## relaxed 0.61 -0.29 0.58 0.42 1.4
## content 0.83 0.02 0.67 0.33 1.0
## worried -0.24 0.45 0.33 0.67 1.5
## rattled 0.12 0.74 0.51 0.49 1.1
## joyful 0.74 0.23 0.49 0.51 1.2
## pleasant 0.84 0.06 0.68 0.32 1.0
##
## MR1 MR2
## SS loadings 5.60 4.44
## Proportion Var 0.28 0.22
## Cumulative Var 0.28 0.50
## Proportion Explained 0.56 0.44
## Cumulative Proportion 0.56 1.00
##
## With factor correlations of
## MR1 MR2
## MR1 1.00 -0.32
## MR2 -0.32 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 190 and the objective function was 11.68
## The degrees of freedom for the model are 151 and the objective function was 1.85
##
## The root mean square of the residuals (RMSR) is 0.07
## The df corrected root mean square of the residuals is 0.08
##
## Fit based upon off diagonal values = 0.97
## Measures of factor score adequacy
## MR1 MR2
## Correlation of (regression) scores with factors 0.96 0.95
## Multiple R square of scores with factors 0.93 0.90
## Minimum correlation of possible factor scores 0.86 0.81
fa.diagram(f2) #show the fa output as a path diagram
plot(f2, labels = colnames(R)) #show it as a spatial plot
Rs <- matSort(R,f2) #sort the matrix by the factor loadings
corPlot(Rs,main="SAI sorted by factor loadings")
corPlot(Rs, xlas=2,main = "SAI sorted by factor loadings") #set x names to be vertical
#What are the scores for these anxiety items? Can we form scales?
We use scoreItems
to form scales as well as scoreOverlap
to examine the structure.
We first get (create) a keys.list
to show which items go on which scales.
sai.keys <- list(sai = c("tense","regretful" , "upset", "worrying", "anxious", "nervous" ,
"jittery" , "high.strung", "worried" , "rattled","-calm",
"-secure","-at.ease","-rested","-comfortable", "-confident" ,"-relaxed" , "-content" ,
"-joyful", "-pleasant" ) ,
sai.p = c("calm","at.ease","rested","comfortable", "confident", "secure" ,"relaxed" ,
"content" , "joyful", "pleasant" ),
sai.n = c( "tense" , "anxious", "nervous" , "jittery" , "rattled", "high.strung",
"upset", "worrying","worried","regretful" )
)
#show the keys
sai.keys
## $sai
## [1] "tense" "regretful" "upset" "worrying" "anxious" "nervous"
## [7] "jittery" "high.strung" "worried" "rattled" "-calm" "-secure"
## [13] "-at.ease" "-rested" "-comfortable" "-confident" "-relaxed" "-content"
## [19] "-joyful" "-pleasant"
##
## $sai.p
## [1] "calm" "at.ease" "rested" "comfortable" "confident" "secure"
## [7] "relaxed" "content" "joyful" "pleasant"
##
## $sai.n
## [1] "tense" "anxious" "nervous" "jittery" "rattled" "high.strung"
## [7] "upset" "worrying" "worried" "regretful"
The keys are used for specifying which items go on which scales. Note that the sai.p and sai.n are the subsets of the total and represent positive and negative items. They will correlate with the total partly because of item overlap. We can adjust for this by using the scoreOverlap
function on the correlation matrix. Although we can adjust the correlations, we can not adjust the scores, and thus there is no scores object returned from scoreOverlap
.
sai.scores <- scoreItems(sai.keys,sai)
sai.scores #this produces a fair amount of output
## Call: scoreItems(keys = sai.keys, items = sai)
##
## (Unstandardized) Alpha:
## sai sai.p sai.n
## alpha 0.91 0.92 0.88
##
## Standard errors of unstandardized Alpha:
## sai sai.p sai.n
## ASE 0.0027 0.0037 0.0045
##
## Average item correlation:
## sai sai.p sai.n
## average.r 0.34 0.52 0.41
##
## Median item correlation:
## sai sai.p sai.n
## 0.33 0.54 0.46
##
## Guttman 6* reliability:
## sai sai.p sai.n
## Lambda.6 0.94 0.92 0.9
##
## Signal/Noise based upon av.r :
## sai sai.p sai.n
## Signal/Noise 10 11 7.1
##
## Scale intercorrelations corrected for attenuation
## raw correlations below the diagonal, alpha on the diagonal
## corrected correlations above the diagonal:
## sai sai.p sai.n
## sai 0.91 -0.97 0.90
## sai.p -0.88 0.92 -0.48
## sai.n 0.81 -0.43 0.88
##
## In order to see the item by scale loadings and frequency counts of the data
## print with the short option = FALSE
summary(sai.scores) #just show the correlation matrix of the scales
## Call: scoreItems(keys = sai.keys, items = sai)
##
## Scale intercorrelations corrected for attenuation
## raw correlations below the diagonal, (unstandardized) alpha on the diagonal
## corrected correlations above the diagonal:
## sai sai.p sai.n
## sai 0.91 -0.97 0.90
## sai.p -0.88 0.92 -0.48
## sai.n 0.81 -0.43 0.88
sai.overlap <- scoreOverlap(sai.keys,sai)
sai.overlap #show the full output
## Call: scoreOverlap(keys = sai.keys, r = sai)
##
## (Standardized) Alpha:
## sai sai.p sai.n
## 0.91 0.92 0.88
##
## (Standardized) G6*:
## sai sai.p sai.n
## 0.86 0.93 0.90
##
## Average item correlation:
## sai sai.p sai.n
## 0.34 0.52 0.42
##
## Median item correlation:
## sai sai.p sai.n
## 0.33 0.54 0.45
##
## Number of items:
## sai sai.p sai.n
## 20 10 10
##
## Signal to Noise ratio based upon average r and n
## sai sai.p sai.n
## 10.4 11.0 7.2
##
## Scale intercorrelations corrected for item overlap and attenuation
## adjusted for overlap correlations below the diagonal, alpha on the diagonal
## corrected correlations above the diagonal:
## sai sai.p sai.n
## sai 0.91 -0.87 0.85
## sai.p -0.80 0.92 -0.49
## sai.n 0.76 -0.44 0.88
##
## In order to see the item by scale loadings and frequency counts of the data
## print with the short option = FALSE
summary(sai.scores)
## Call: scoreItems(keys = sai.keys, items = sai)
##
## Scale intercorrelations corrected for attenuation
## raw correlations below the diagonal, (unstandardized) alpha on the diagonal
## corrected correlations above the diagonal:
## sai sai.p sai.n
## sai 0.91 -0.97 0.90
## sai.p -0.88 0.92 -0.48
## sai.n 0.81 -0.43 0.88
summary(sai.overlap) #compare these two, why do they differ?
## Call: scoreOverlap(keys = sai.keys, r = sai)
##
## Scale intercorrelations adjusted for item overlap
## Scale intercorrelations corrected for attenuation
## raw correlations (corrected for overlap) below the diagonal, (standardized) alpha on the diagonal
## corrected (for overlap and reliability) correlations above the diagonal:
## sai sai.p sai.n
## sai 0.91 -0.87 0.85
## sai.p -0.80 0.92 -0.49
## sai.n 0.76 -0.44 0.88
#Use the scores we just found and compare them over time
The sai.scores$scores
object has the scores, but we need to combine with the experimental data
dim(sai.scores$scores)
## [1] 5378 3
sai.df <- data.frame(sai[1:3],sai.scores$scores)
describe(sai.df)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## study* 1 5378 15.94 8.19 19.00 16.13 10.38 1 28.00 27.00 -0.21 -1.26 0.11
## time 2 5378 1.66 0.83 1.00 1.56 0.00 1 4.00 3.00 0.85 -0.64 0.01
## id 3 5372 133.11 313.41 63.00 75.69 57.82 1 4184.00 4183.00 6.77 56.65 4.28
## sai 4 5378 2.02 0.51 1.95 1.99 0.52 1 3.95 2.95 0.52 0.09 0.01
## sai.p 5 5378 2.45 0.67 2.40 2.44 0.74 1 4.00 3.00 0.10 -0.51 0.01
## sai.n 6 5378 1.48 0.53 1.30 1.40 0.44 1 4.00 3.00 1.40 1.82 0.01