#install.packages("psych",repos="http://personality-project.org/r",type="source")
library(psych)
library(psychTools)
sessionInfo() #psych and psychTools should be 2.0.9

## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] psychTools_2.0.9 psych_2.0.10    
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-41 digest_0.6.25   grid_4.0.2      nlme_3.1-148    magrittr_1.5    evaluate_0.14  
##  [7] rlang_0.4.7     stringi_1.4.6   rmarkdown_2.3   tools_4.0.2     foreign_0.8-80  stringr_1.4.0  
## [13] xfun_0.16       yaml_2.2.1      parallel_4.0.2  compiler_4.0.2  mnormt_2.0.1    tmvnsim_1.0-2  
## [19] htmltools_0.5.0 knitr_1.29

#Various R commands for data manipulation and display

These are examples taken from the help file for the sai and msqR data sets. sai for the State Anxiety Inventory msqR for the Motivational State Questionnaire.

These data were collected at the PMC lab:

The standard experimental study at the Personality, Motivation and Cognition (PMC) laboratory (Revelle and Anderson, 1997) was to administer a number of personality trait and state measures (e.g. the epi, msq, msqR and sai) to participants before some experimental manipulation of arousal/effort/anxiety. Following the manipulation (with a 30 minute delay if giving caffeine/placebo), some performance task was given, followed once again by measures of state arousal/effort/anxiety.

Here are the item level data on the sai (state anxiety) and the tai (trait anxiety). Scores on these scales may be found using the scoring keys. The affect data set includes pre and post scores for two studies (flat and maps) which manipulated state by using four types of movies.

In addition to being useful for studies of motivational state, these studies provide examples of test-retest and alternate form reliabilities. Given that 10 items overlap with the msqR data, they also allow for a comparison of immediate duplication of items with 30 minute delays.

Studies CART, FAST, SHED, RAFT, and SHOP were either control groups, or did not experimentally vary arousal/effort/anxiety.

AGES, CITY, EMIT, RIM, SALT, and XRAY were caffeine manipulations between time 1 and 2 (RIM and VALE were repeated day 1 and day 2)

FIAT, FLAT, MAPS, MIXX, and THRU were 1 day studies with film manipulation between time 1 and time 2.

SAM1 and SAM2 were the first and second day of a two day study. The STAI was given once per day. MSQ not MSQR was given.

VALE and PAT were two day studies with the STAI given pre and post on both days

RIM was a two day study with the STAI and MSQ given once per day.

Usually, time of day 1 = 8:50-9am am, and 2 = 7:30 pm, however, in rob, with paid subjects, the times were 0530 and 22:30.

##The table command tells you about the data

data(sai)  #actually not necessary, because the data are 'lazy loaded' 
dim(sai)  #how many subjects (row) and how many columns (variables)

## [1] 5378   23

colnames(sai) #and what are the variables?

##  [1] "study"       "time"        "id"          "calm"        "secure"      "tense"      
##  [7] "regretful"   "at.ease"     "upset"       "worrying"    "rested"      "anxious"    
## [13] "comfortable" "confident"   "nervous"     "jittery"     "high.strung" "relaxed"    
## [19] "content"     "worried"     "rattled"     "joyful"      "pleasant"

table(sai$study) #what are the names of the studies in this file

## 
##     AGES     Cart     CITY     EMIT     Fast     FIAT     FILM     FLAT     GRAY     HOME     IMPS 
##      136      126      157       71      188      140      285      510      107      134      102 
##     ITEM     Maps     MITE     MIXX      PAT     PATS     RAFT      RIM      ROB     SALT      SAM 
##       49      160       49       71      130      132       40      684       97      208      648 
##     SHED     SHOP SWAM.one SWAM.two     VALE     XRAY 
##      116      196       94       54      294      400

table(sai$study,sai$time) #cross tabulate the studies by time adminstered.

##           
##              1   2   3   4
##   AGES      68  68   0   0
##   Cart      63  63   0   0
##   CITY     157   0   0   0
##   EMIT      71   0   0   0
##   Fast      94  94   0   0
##   FIAT      70  70   0   0
##   FILM      95  95  95   0
##   FLAT     170 170 170   0
##   GRAY     107   0   0   0
##   HOME      67  67   0   0
##   IMPS     102   0   0   0
##   ITEM      49   0   0   0
##   Maps     160   0   0   0
##   MITE      49   0   0   0
##   MIXX      71   0   0   0
##   PAT       65  65   0   0
##   PATS     132   0   0   0
##   RAFT      40   0   0   0
##   RIM      342   0 342   0
##   ROB       51   0  46   0
##   SALT     104 104   0   0
##   SAM      324   0 324   0
##   SHED      58  58   0   0
##   SHOP      98  98   0   0
##   SWAM.one  94   0   0   0
##   SWAM.two  54   0   0   0
##   VALE      77  77  70  70
##   XRAY     200 200   0   0

##We can choose various studies using the subset and %in% commands

control <- subset(sai,sai$study %in% c("Cart", "Fast", "SHED",  "RAFT", "SHOP")) 
table(control$study,control$time)  #it still knows the names of the studies, even though no one is there

##           
##             1  2
##   AGES      0  0
##   Cart     63 63
##   CITY      0  0
##   EMIT      0  0
##   Fast     94 94
##   FIAT      0  0
##   FILM      0  0
##   FLAT      0  0
##   GRAY      0  0
##   HOME      0  0
##   IMPS      0  0
##   ITEM      0  0
##   Maps      0  0
##   MITE      0  0
##   MIXX      0  0
##   PAT       0  0
##   PATS      0  0
##   RAFT     40  0
##   RIM       0  0
##   ROB       0  0
##   SALT      0  0
##   SAM       0  0
##   SHED     58 58
##   SHOP     98 98
##   SWAM.one  0  0
##   SWAM.two  0  0
##   VALE      0  0
##   XRAY      0  0

dim(control)

## [1] 666  23

#pre and post drug studies
drug <- subset(sai,sai$study %in% c("AGES", "CITY","EMIT", "SALT", "VALE", "XRAY")) 
#pre and post film studies
film <- subset(sai,sai$study %in% c("FIAT","FLAT", "MAPS", "MIXX"))

##Lets explore the %in% command

Z <- X %in% Y returns all the elements in X that are in Y X %in% Y returns the logical values (TRUE, FALSE) for each element in X

 table(sai$study) #shows the counts in each study

## 
##     AGES     Cart     CITY     EMIT     Fast     FIAT     FILM     FLAT     GRAY     HOME     IMPS 
##      136      126      157       71      188      140      285      510      107      134      102 
##     ITEM     Maps     MITE     MIXX      PAT     PATS     RAFT      RIM      ROB     SALT      SAM 
##       49      160       49       71      130      132       40      684       97      208      648 
##     SHED     SHOP SWAM.one SWAM.two     VALE     XRAY 
##      116      196       94       54      294      400

 studies <- names(table(sai$study)) #gets the names of the studies
 studies %in% c("Cart", "Fast", "SHED",  "RAFT", "SHOP")

##  [1] FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [17] FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE

 #versus
 c("Cart", "Fast", "SHED",  "RAFT", "SHOP") %in% studies

## [1] TRUE TRUE TRUE TRUE TRUE

 select <- studies %in% c("Cart", "Fast", "SHED",  "RAFT", "SHOP")
 select

##  [1] FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [17] FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE

Various data manipulation and displays

We will show a corPlot of the sai data several different ways. The first is the most naive way, by variable order

R <- corPlot(sai[4:23]) #draw the corPlot and return the correlation matrix

use a graphical parameter to make that graph better

The previous graph does not show all of the variables on the x axis. The xlas command makes the labels on the xaxis vertical or horizontal (default).

R <- corPlot(sai[4:23],xlas=3) #draw the corPlot and return the correlation matrix

Do a factor analysis of the data, take out 2 factors, and then sort the variables by the loadings on these two factors. We can do this using the fa and mat.sort functions.

f2 <- fa(R,2) #extract two factors

## Loading required namespace: GPArotation

f2 #show the factor analysis output

## Factor Analysis using method =  minres
## Call: fa(r = R, nfactors = 2)
## Standardized loadings (pattern matrix) based upon correlation matrix
##               MR1   MR2   h2   u2 com
## calm         0.55 -0.33 0.53 0.47 1.6
## secure       0.74 -0.06 0.58 0.42 1.0
## tense       -0.19  0.71 0.62 0.38 1.1
## regretful   -0.23  0.31 0.19 0.81 1.8
## at.ease      0.68 -0.24 0.62 0.38 1.2
## upset       -0.34  0.37 0.33 0.67 2.0
## worrying    -0.23  0.36 0.24 0.76 1.7
## rested       0.60  0.08 0.34 0.66 1.0
## anxious      0.02  0.75 0.56 0.44 1.0
## comfortable  0.76 -0.05 0.60 0.40 1.0
## confident    0.72  0.12 0.47 0.53 1.1
## nervous     -0.06  0.75 0.59 0.41 1.0
## jittery      0.06  0.77 0.57 0.43 1.0
## high.strung  0.06  0.76 0.55 0.45 1.0
## relaxed      0.61 -0.29 0.58 0.42 1.4
## content      0.83  0.02 0.67 0.33 1.0
## worried     -0.24  0.45 0.33 0.67 1.5
## rattled      0.12  0.74 0.51 0.49 1.1
## joyful       0.74  0.23 0.49 0.51 1.2
## pleasant     0.84  0.06 0.68 0.32 1.0
## 
##                        MR1  MR2
## SS loadings           5.60 4.44
## Proportion Var        0.28 0.22
## Cumulative Var        0.28 0.50
## Proportion Explained  0.56 0.44
## Cumulative Proportion 0.56 1.00
## 
##  With factor correlations of 
##       MR1   MR2
## MR1  1.00 -0.32
## MR2 -0.32  1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  190  and the objective function was  11.68
## The degrees of freedom for the model are 151  and the objective function was  1.85 
## 
## The root mean square of the residuals (RMSR) is  0.07 
## The df corrected root mean square of the residuals is  0.08 
## 
## Fit based upon off diagonal values = 0.97
## Measures of factor score adequacy             
##                                                    MR1  MR2
## Correlation of (regression) scores with factors   0.96 0.95
## Multiple R square of scores with factors          0.93 0.90
## Minimum correlation of possible factor scores     0.86 0.81

fa.diagram(f2)  #show the fa output as a path diagram

plot(f2, labels = colnames(R)) #show it as a spatial plot

Rs <- matSort(R,f2) #sort the matrix by the factor loadings
corPlot(Rs,main="SAI sorted by factor loadings")

corPlot(Rs, xlas=2,main = "SAI sorted by factor loadings") #set x names to be vertical

#What are the scores for these anxiety items? Can we form scales?

We use scoreItems to form scales as well as scoreOverlap to examine the structure.

We first get (create) a keys.list to show which items go on which scales.

sai.keys <- list(sai = c("tense","regretful" , "upset", "worrying", "anxious", "nervous" ,  
"jittery" , "high.strung", "worried" , "rattled","-calm", 
"-secure","-at.ease","-rested","-comfortable", "-confident" ,"-relaxed" , "-content" , 
"-joyful", "-pleasant"  ) ,
sai.p = c("calm","at.ease","rested","comfortable", "confident", "secure" ,"relaxed" ,     
       "content" , "joyful", "pleasant" ),  
sai.n = c( "tense" , "anxious", "nervous" , "jittery" , "rattled",     "high.strung",  
         "upset", "worrying","worried","regretful" )
) 
#show the keys
sai.keys

## $sai
##  [1] "tense"        "regretful"    "upset"        "worrying"     "anxious"      "nervous"     
##  [7] "jittery"      "high.strung"  "worried"      "rattled"      "-calm"        "-secure"     
## [13] "-at.ease"     "-rested"      "-comfortable" "-confident"   "-relaxed"     "-content"    
## [19] "-joyful"      "-pleasant"   
## 
## $sai.p
##  [1] "calm"        "at.ease"     "rested"      "comfortable" "confident"   "secure"     
##  [7] "relaxed"     "content"     "joyful"      "pleasant"   
## 
## $sai.n
##  [1] "tense"       "anxious"     "nervous"     "jittery"     "rattled"     "high.strung"
##  [7] "upset"       "worrying"    "worried"     "regretful"

The keys are used for specifying which items go on which scales. Note that the sai.p and sai.n are the subsets of the total and represent positive and negative items. They will correlate with the total partly because of item overlap. We can adjust for this by using the scoreOverlap function on the correlation matrix. Although we can adjust the correlations, we can not adjust the scores, and thus there is no scores object returned from scoreOverlap.

sai.scores <- scoreItems(sai.keys,sai) 
sai.scores  #this produces a fair amount of output

## Call: scoreItems(keys = sai.keys, items = sai)
## 
## (Unstandardized) Alpha:
##        sai sai.p sai.n
## alpha 0.91  0.92  0.88
## 
## Standard errors of unstandardized Alpha:
##          sai  sai.p  sai.n
## ASE   0.0027 0.0037 0.0045
## 
## Average item correlation:
##            sai sai.p sai.n
## average.r 0.34  0.52  0.41
## 
## Median item correlation:
##   sai sai.p sai.n 
##  0.33  0.54  0.46 
## 
##  Guttman 6* reliability: 
##           sai sai.p sai.n
## Lambda.6 0.94  0.92   0.9
## 
## Signal/Noise based upon av.r : 
##              sai sai.p sai.n
## Signal/Noise  10    11   7.1
## 
## Scale intercorrelations corrected for attenuation 
##  raw correlations below the diagonal, alpha on the diagonal 
##  corrected correlations above the diagonal:
##         sai sai.p sai.n
## sai    0.91 -0.97  0.90
## sai.p -0.88  0.92 -0.48
## sai.n  0.81 -0.43  0.88
## 
##  In order to see the item by scale loadings and frequency counts of the data
##  print with the short option = FALSE

summary(sai.scores) #just show the correlation matrix of the scales

## Call: scoreItems(keys = sai.keys, items = sai)
## 
## Scale intercorrelations corrected for attenuation 
##  raw correlations below the diagonal, (unstandardized) alpha on the diagonal 
##  corrected correlations above the diagonal:
##         sai sai.p sai.n
## sai    0.91 -0.97  0.90
## sai.p -0.88  0.92 -0.48
## sai.n  0.81 -0.43  0.88

sai.overlap <- scoreOverlap(sai.keys,sai)
sai.overlap  #show the full output

## Call: scoreOverlap(keys = sai.keys, r = sai)
## 
## (Standardized) Alpha:
##   sai sai.p sai.n 
##  0.91  0.92  0.88 
## 
## (Standardized) G6*:
##   sai sai.p sai.n 
##  0.86  0.93  0.90 
## 
## Average item correlation:
##   sai sai.p sai.n 
##  0.34  0.52  0.42 
## 
## Median item correlation:
##   sai sai.p sai.n 
##  0.33  0.54  0.45 
## 
## Number of items:
##   sai sai.p sai.n 
##    20    10    10 
## 
## Signal to Noise ratio based upon average r and n 
##   sai sai.p sai.n 
##  10.4  11.0   7.2 
## 
## Scale intercorrelations corrected for item overlap and attenuation 
##  adjusted for overlap correlations below the diagonal, alpha on the diagonal 
##  corrected correlations above the diagonal:
##         sai sai.p sai.n
## sai    0.91 -0.87  0.85
## sai.p -0.80  0.92 -0.49
## sai.n  0.76 -0.44  0.88
## 
##  In order to see the item by scale loadings and frequency counts of the data
##  print with the short option = FALSE

summary(sai.scores)

## Call: scoreItems(keys = sai.keys, items = sai)
## 
## Scale intercorrelations corrected for attenuation 
##  raw correlations below the diagonal, (unstandardized) alpha on the diagonal 
##  corrected correlations above the diagonal:
##         sai sai.p sai.n
## sai    0.91 -0.97  0.90
## sai.p -0.88  0.92 -0.48
## sai.n  0.81 -0.43  0.88

summary(sai.overlap) #compare these two, why do they differ?

## Call: scoreOverlap(keys = sai.keys, r = sai)
## 
## Scale intercorrelations adjusted for item overlap
## Scale intercorrelations corrected for attenuation 
##  raw correlations (corrected for overlap) below the diagonal, (standardized) alpha on the diagonal 
##  corrected (for overlap and reliability) correlations above the diagonal:
##         sai sai.p sai.n
## sai    0.91 -0.87  0.85
## sai.p -0.80  0.92 -0.49
## sai.n  0.76 -0.44  0.88

#Use the scores we just found and compare them over time

The sai.scores$scores object has the scores, but we need to combine with the experimental data

dim(sai.scores$scores)

## [1] 5378    3

sai.df <- data.frame(sai[1:3],sai.scores$scores)
describe(sai.df)

##        vars    n   mean     sd median trimmed   mad min     max   range  skew kurtosis   se
## study*    1 5378  15.94   8.19  19.00   16.13 10.38   1   28.00   27.00 -0.21    -1.26 0.11
## time      2 5378   1.66   0.83   1.00    1.56  0.00   1    4.00    3.00  0.85    -0.64 0.01
## id        3 5372 133.11 313.41  63.00   75.69 57.82   1 4184.00 4183.00  6.77    56.65 4.28
## sai       4 5378   2.02   0.51   1.95    1.99  0.52   1    3.95    2.95  0.52     0.09 0.01
## sai.p     5 5378   2.45   0.67   2.40    2.44  0.74   1    4.00    3.00  0.10    -0.51 0.01
## sai.n     6 5378   1.48   0.53   1.30    1.40  0.44   1    4.00    3.00  1.40     1.82 0.01

350 Week 8 data manipulation

William Revelle

10/27/2020

These data were collected at the PMC lab:

Various data manipulation and displays

use a graphical parameter to make that graph better