#install.packages("psych",repos="http://personality-project.org/r", type="source")
library(psych) #make it active
library(psychTools)
sessionInfo()   #psych should be 2.2.5 or higher

## R version 4.2.0 (2022-04-22)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.3.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] psychTools_2.2.5 psych_2.2.5     
## 
## loaded via a namespace (and not attached):
##  [1] rstudioapi_0.13 knitr_1.39      magrittr_2.0.3  mnormt_2.0.2    lattice_0.20-45 R6_2.5.1       
##  [7] rlang_1.0.2     fastmap_1.1.0   stringr_1.4.0   tools_4.2.0     parallel_4.2.0  grid_4.2.0     
## [13] tmvnsim_1.0-2   nlme_3.1-157    xfun_0.30       cli_3.3.0       jquerylib_0.1.4 htmltools_0.5.2
## [19] yaml_2.3.5      digest_0.6.29   sass_0.4.0      evaluate_0.15   rmarkdown_2.11  stringi_1.7.6  
## [25] compiler_4.2.0  bslib_0.3.1     jsonlite_1.8.0  foreign_0.8-82

Multilevel modeling : An Introduction

One of the more exciting changes in the way that we conduct psychological research is the introduction of intensive longitudinal modeling using Experience Sampling Methododology (ESM) which allows us to collect state data over time. Alhough originally done with various diary procedures, it is now typically done with smart phones and apps. At the same time, the procedures for evaluating longitudinal data have seen a computional revolution as computational approaches of multilevel modeling have been introduced.

Here we go through the R procedures for doing this analysis.

See the accompanying slides at http:\personality-project.org/courses/350/350.dynamics and the accompanying article at http://personality-project.org/revelle/publications/rw.tutorial.19.pdf .

A toy problem

It is convenient, when testing new procedures, to simulate data. We do this using the sim.multi function.

We set the random seed to a specific number in order to produce replicable results. If the seed is not specified, a different pseudo random sequence will be generated for each simulation.

The simulation is set to create data for four subjects with two measures (V1 and V2 and V3 and V4) of each of two factors.

Items are assumed to have loadings of .6 on one factor, 0 on the other. The factor intercorrelations at the overall level are randomly varying around 0, but differ for each subject, with factor intercorrelations of -.7, 0, 0, and .7 respectively.

Although normally we would not round the results of the simulation we do so to create the data in Tables~\(\ref{tab:fat}\) and ~\(\ref{tab:wide}\). In addition, the normal output of the is already in wide form. We convert it to Fat form and then back again as demonstrations of the and functions.

library(psych)   #activate the psych package
#create the data
set.seed(42)
x <- sim.multi(n.obs=4,nvar=4,nfact=2,days=6,ntrials=6,plot=TRUE, 
          phi.i=c(-.7,0,0,.7),loading=.6)

raw <- round(x[3:8])
raw[1:4] <- raw[1:4] + 6
#make a 'Fat' version
XFat <- reshape(raw,idvar="id",timevar="time",times=1:4,direction="wide")
#show it
XFat

##    id V1.24 V2.24 V3.24 V4.24 V1.48 V2.48 V3.48 V4.48 V1.72 V2.72 V3.72 V4.72 V1.96 V2.96 V3.96
## 1   1     7    10     4     3     8     7     6     4     8     8     5     2     5     5     5
## 7   2     6     6     6     5     7     8     5     5     7     7     6     7     7     7     6
## 13  3     5     6     4     4     6     5     5     4     5     6     6     7     6     6     9
## 19  4     4     5     4     4     6     4     4     5     5     7     5     6     3     4     5
##    V4.96 V1.120 V2.120 V3.120 V4.120 V1.144 V2.144 V3.144 V4.144
## 1      6      8      8      5      5     11      9      1      2
## 7      6      7      7      4      4      8      7      6      6
## 13     6      4      4      6      7      5      4      7      7
## 19     5      5      4      4      3      5      4      4      5

#now make it wide
XWide <- reshape(XFat,idvar="id",varying=2:25,direction="long")
Xwide <- dfOrder(XWide,"id")
Xwide   #show the data in typical wide format

##       id time V1 V2 V3 V4
## 1.24   1   24  7 10  4  3
## 1.48   1   48  8  7  6  4
## 1.72   1   72  8  8  5  2
## 1.96   1   96  5  5  5  6
## 1.120  1  120  8  8  5  5
## 1.144  1  144 11  9  1  2
## 2.24   2   24  6  6  6  5
## 2.48   2   48  7  8  5  5
## 2.72   2   72  7  7  6  7
## 2.96   2   96  7  7  6  6
## 2.120  2  120  7  7  4  4
## 2.144  2  144  8  7  6  6
## 3.24   3   24  5  6  4  4
## 3.48   3   48  6  5  5  4
## 3.72   3   72  5  6  6  7
## 3.96   3   96  6  6  9  6
## 3.120  3  120  4  4  6  7
## 3.144  3  144  5  4  7  7
## 4.24   4   24  4  5  4  4
## 4.48   4   48  6  4  4  5
## 4.72   4   72  5  7  5  6
## 4.96   4   96  3  4  5  5
## 4.120  4  120  5  4  4  3
## 4.144  4  144  5  4  4  5

#note that there are 4 subjects, with 6 observations per subject on each of 4 variables

#add in the trait information
traits <- data.frame(id = 1:4,extraversion =c(5,10,15,20),
                       neuroticism =c(10,5, 15,10))
traits   #this would be some data collected separately

##   id extraversion neuroticism
## 1  1            5          10
## 2  2           10           5
## 3  3           15          15
## 4  4           20          10

Xwide.traits <- merge(Xwide,traits, by ="id")  #this merges two data.frames using the id
Xwide.traits   #show the merged data

##    id time V1 V2 V3 V4 extraversion neuroticism
## 1   1   24  7 10  4  3            5          10
## 2   1   48  8  7  6  4            5          10
## 3   1   72  8  8  5  2            5          10
## 4   1   96  5  5  5  6            5          10
## 5   1  120  8  8  5  5            5          10
## 6   1  144 11  9  1  2            5          10
## 7   2   24  6  6  6  5           10           5
## 8   2   48  7  8  5  5           10           5
## 9   2   72  7  7  6  7           10           5
## 10  2   96  7  7  6  6           10           5
## 11  2  120  7  7  4  4           10           5
## 12  2  144  8  7  6  6           10           5
## 13  3   24  5  6  4  4           15          15
## 14  3   48  6  5  5  4           15          15
## 15  3   72  5  6  6  7           15          15
## 16  3   96  6  6  9  6           15          15
## 17  3  120  4  4  6  7           15          15
## 18  3  144  5  4  7  7           15          15
## 19  4   24  4  5  4  4           20          10
## 20  4   48  6  4  4  5           20          10
## 21  4   72  5  7  5  6           20          10
## 22  4   96  3  4  5  5           20          10
## 23  4  120  5  4  4  3           20          10
## 24  4  144  5  4  4  5           20          10

First, find the descriptive statistics

Basic descriptive statistics may be found by using the appropriate functions in the package. We find overall descriptives (), descriptives by group (individual), and then a variety of multilevel statistics including the pooled within person correlations, the between person correlations, and the individual within individual correlations using the function. We plot the data using the function.

help(describe)    #get help on the input commands for the describe function.
#first the overall descriptives
describe(Xwide.traits)

##              vars  n  mean    sd median trimmed   mad min max range  skew kurtosis   se
## id              1 24  2.50  1.14    2.5    2.50  1.48   1   4     3  0.00    -1.49 0.23
## time            2 24 84.00 41.87   84.0   84.00 53.37  24 144   120  0.00    -1.41 8.55
## V1              3 24  6.17  1.74    6.0    6.10  1.48   3  11     8  0.61     0.44 0.35
## V2              4 24  6.17  1.74    6.0    6.05  1.48   4  10     6  0.28    -0.91 0.35
## V3              5 24  5.08  1.47    5.0    5.05  1.48   1   9     8 -0.06     1.81 0.30
## V4              6 24  4.92  1.50    5.0    5.00  1.48   2   7     5 -0.31    -0.89 0.31
## extraversion    7 24 12.50  5.71   12.5   12.50  7.41   5  20    15  0.00    -1.49 1.17
## neuroticism     8 24 10.00  3.61   10.0   10.00  3.71   5  15    10  0.00    -1.16 0.74

#do it again, but by subjects
describeBy(Xwide.traits, group="id")

## 
##  Descriptive statistics by group 
## id: 1
##              vars n  mean    sd median trimmed   mad min max range  skew kurtosis    se
## id              1 6  1.00  0.00    1.0    1.00  0.00   1   1     0   NaN      NaN  0.00
## time            2 6 84.00 44.90   84.0   84.00 53.37  24 144   120  0.00    -1.80 18.33
## V1              3 6  7.83  1.94    8.0    7.83  0.74   5  11     6  0.19    -1.06  0.79
## V2              4 6  7.83  1.72    8.0    7.83  1.48   5  10     5 -0.38    -1.32  0.70
## V3              5 6  4.33  1.75    5.0    4.33  0.74   1   6     5 -0.98    -0.66  0.71
## V4              6 6  3.67  1.63    3.5    3.67  2.22   2   6     4  0.21    -1.86  0.67
## extraversion    7 6  5.00  0.00    5.0    5.00  0.00   5   5     0   NaN      NaN  0.00
## neuroticism     8 6 10.00  0.00   10.0   10.00  0.00  10  10     0   NaN      NaN  0.00
## --------------------------------------------------------------------------- 
## id: 2
##              vars n mean    sd median trimmed   mad min max range  skew kurtosis    se
## id              1 6  2.0  0.00    2.0     2.0  0.00   2   2     0   NaN      NaN  0.00
## time            2 6 84.0 44.90   84.0    84.0 53.37  24 144   120  0.00    -1.80 18.33
## V1              3 6  7.0  0.63    7.0     7.0  0.00   6   8     2  0.00    -0.92  0.26
## V2              4 6  7.0  0.63    7.0     7.0  0.00   6   8     2  0.00    -0.92  0.26
## V3              5 6  5.5  0.84    6.0     5.5  0.00   4   6     2 -0.85    -1.17  0.34
## V4              6 6  5.5  1.05    5.5     5.5  0.74   4   7     3  0.00    -1.57  0.43
## extraversion    7 6 10.0  0.00   10.0    10.0  0.00  10  10     0   NaN      NaN  0.00
## neuroticism     8 6  5.0  0.00    5.0     5.0  0.00   5   5     0   NaN      NaN  0.00
## --------------------------------------------------------------------------- 
## id: 3
##              vars n  mean    sd median trimmed   mad min max range  skew kurtosis    se
## id              1 6  3.00  0.00    3.0    3.00  0.00   3   3     0   NaN      NaN  0.00
## time            2 6 84.00 44.90   84.0   84.00 53.37  24 144   120  0.00    -1.80 18.33
## V1              3 6  5.17  0.75    5.0    5.17  0.74   4   6     2 -0.17    -1.54  0.31
## V2              4 6  5.17  0.98    5.5    5.17  0.74   4   6     2 -0.25    -2.08  0.40
## V3              5 6  6.17  1.72    6.0    6.17  1.48   4   9     5  0.38    -1.32  0.70
## V4              6 6  5.83  1.47    6.5    5.83  0.74   4   7     3 -0.39    -2.00  0.60
## extraversion    7 6 15.00  0.00   15.0   15.00  0.00  15  15     0   NaN      NaN  0.00
## neuroticism     8 6 15.00  0.00   15.0   15.00  0.00  15  15     0   NaN      NaN  0.00
## --------------------------------------------------------------------------- 
## id: 4
##              vars n  mean    sd median trimmed   mad min max range  skew kurtosis    se
## id              1 6  4.00  0.00      4    4.00  0.00   4   4     0   NaN      NaN  0.00
## time            2 6 84.00 44.90     84   84.00 53.37  24 144   120  0.00    -1.80 18.33
## V1              3 6  4.67  1.03      5    4.67  0.74   3   6     3 -0.37    -1.37  0.42
## V2              4 6  4.67  1.21      4    4.67  0.00   4   7     3  1.08    -0.64  0.49
## V3              5 6  4.33  0.52      4    4.33  0.00   4   5     1  0.54    -1.96  0.21
## V4              6 6  4.67  1.03      5    4.67  0.74   3   6     3 -0.37    -1.37  0.42
## extraversion    7 6 20.00  0.00     20   20.00  0.00  20  20     0   NaN      NaN  0.00
## neuroticism     8 6 10.00  0.00     10   10.00  0.00  10  10     0   NaN      NaN  0.00

#now find the within and between individual correlations
sb <- statsBy(Xwide.traits, group="id",cors=TRUE)

## Warning in cor(x[-gr], use = use, method = method): the standard deviation is zero

## Warning in cor(x[-gr], use = use, method = method): the standard deviation is zero

## Warning in cor(x[-gr], use = use, method = method): the standard deviation is zero

## Warning in cor(x[-gr], use = use, method = method): the standard deviation is zero

## Warning in cor(new.data[, 1:nvar], use = "pairwise", method = method): the standard deviation is
## zero

## Warning in cor(diffs, use = use, method = method): the standard deviation is zero

## Warning in cov2cor(xvals$rwg): diag(.) had 0 or NA entries; non-finite result is doubtful

## Warning in cor(new.data[, 1:(nvar)], new.data[, (nvar + 1):ncol(new.data)], : the standard deviation
## is zero

## Warning in cor(new.data[, (nvar + 1):ncol(new.data)], diffs, use = "pairwise", : the standard
## deviation is zero

#show them
lowerMat(sb$rwg)   #top part of Table 6

##                 tm.wg V1.wg V2.wg V3.wg V4.wg extr. nrtc.
## time.wg          1.00                                    
## V1.wg            0.24  1.00                              
## V2.wg           -0.27  0.45  1.00                        
## V3.wg            0.00 -0.35 -0.22  1.00                  
## V4.wg            0.24 -0.40 -0.31  0.57  1.00            
## extraversion.wg    NA    NA    NA    NA    NA    NA      
## neuroticism.wg     NA    NA    NA    NA    NA    NA    NA

lowerMat(sb$rbg)   #lower part of Table 6

##                 tm.bg V1.bg V2.bg V3.bg V4.bg extr. nrtc.
## time.bg            NA                                    
## V1.bg              NA  1.00                              
## V2.bg              NA  1.00  1.00                        
## V3.bg              NA -0.21 -0.21  1.00                  
## V4.bg              NA -0.49 -0.49  0.90  1.00            
## extraversion.bg    NA -0.98 -0.98  0.09  0.44  1.00      
## neuroticism.bg     NA -0.50 -0.50  0.30  0.14  0.32  1.00

round(sb$within,2)   #Table 7

##   time-V1 time-V2 time-V3 time-V4 time-extrv time-nrtcs V1-V2 V1-V3 V1-V4 V1-extrv V1-nrtcs V2-V3
## 1    0.47   -0.16   -0.55    0.07         NA         NA  0.59 -0.69 -0.72       NA       NA -0.51
## 2    0.85    0.17   -0.19    0.05         NA         NA  0.50  0.00  0.30       NA       NA -0.38
## 3   -0.36   -0.71    0.65    0.84         NA         NA  0.50  0.28 -0.51       NA       NA -0.02
## 4    0.00   -0.35    0.00   -0.10         NA         NA  0.05 -0.50  0.06       NA       NA  0.53
##   V2-V4 V2-extrv V2-nrtcs V3-V4 V3-extrv V3-nrtcs V4-extrv V4-nrtcs extrv-nrtcs
## 1 -0.73       NA       NA  0.54       NA       NA       NA       NA          NA
## 2  0.00       NA       NA  0.80       NA       NA       NA       NA          NA
## 3 -0.39       NA       NA  0.57       NA       NA       NA       NA          NA
## 4  0.53       NA       NA  0.62       NA       NA       NA       NA          NA

#
#plot the data
mlPlot(Xwide.traits,grp="id",items =3:6)

Multilevel Reliability

Reliability is the question of how much variance in a measure is not error. When examining multi-level data, the question is a bit harder, because we can think about several different reliabilities. Within subjects, across subjects, across time. …

Traditional reliability measures decompose between person test variance (\(\sigma^2_x\)) into reliable and un-reliable or error variance (\(\sigma^2_e\)).

\[\begin{equation} \rho_{xx} = \frac{1 - \sigma^2_e}{\sigma^2_x} \end{equation}\]

The problem is then how to find \(\sigma^2_e\). Solutions include test retest correlations and estimates based upon the internal structure of the test . Generalizability theory was developed to solve the problem of multiple sources of variance for each score. This is the situation for multi-level data. For when subjects have scores on multiple items and multiple time points, we are interested in several different indices of reliability. The technique developed by was to estimate the variance components using standard ANOVA procedures to find the appropriate Mean Squares for subjects, time, items, etc. and then convert these to variance components based upon the expected values of the MS (Table~\(\ref{tab:anova}\) top part). Taking advantage of the power of to integrate the output of one function into another, we combine aov lme and lmer into one function (multilevel.reliability or mlr) which can take wide data, transform it into the long format needed for aovetc., do the analyses, and then find the reliabilities based upon these components and the formulae given by . Thus the command to find multilevel reliability for a set of variables is just one line of code rather than the complex expressions necessary in SPSS or SAS .

An excellent tutorial on this is found in a chapter by Pat Shrout and Sean Lane. We use their toy example for the analysis.

Get the Shrout and Lane data

A convenent way to store and transmit small data sets is using the dput function which puts the data into a readable and compact form. We can read it directly back into R.

shrout <- structure(list(Person = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), Time = c(1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 
4L, 4L), Item1 = c(2L, 3L, 6L, 3L, 7L, 3L, 5L, 6L, 3L, 8L, 4L, 
4L, 7L, 5L, 6L, 1L, 5L, 8L, 8L, 6L), Item2 = c(3L, 4L, 6L, 4L, 
8L, 3L, 7L, 7L, 5L, 8L, 2L, 6L, 8L, 6L, 7L, 3L, 9L, 9L, 7L, 8L
), Item3 = c(6L, 4L, 5L, 3L, 7L, 4L, 7L, 8L, 9L, 9L, 5L, 7L, 
9L, 7L, 8L, 4L, 7L, 9L, 9L, 6L)), .Names = c("Person", "Time", 
"Item1", "Item2", "Item3"), class = "data.frame", row.names = c(NA, 
-20L))

headTail(shrout)  #first and last 4 rows of the data frame in normal wide format

##     Person Time Item1 Item2 Item3
## 1        1    1     2     3     6
## 2        2    1     3     4     4
## 3        3    1     6     6     5
## 4        4    1     3     4     3
## ...    ...  ...   ...   ...   ...
## 17       2    4     5     9     7
## 18       3    4     8     9     9
## 19       4    4     8     7     9
## 20       5    4     6     8     6

#or just show the entire data set
shrout

##    Person Time Item1 Item2 Item3
## 1       1    1     2     3     6
## 2       2    1     3     4     4
## 3       3    1     6     6     5
## 4       4    1     3     4     3
## 5       5    1     7     8     7
## 6       1    2     3     3     4
## 7       2    2     5     7     7
## 8       3    2     6     7     8
## 9       4    2     3     5     9
## 10      5    2     8     8     9
## 11      1    3     4     2     5
## 12      2    3     4     6     7
## 13      3    3     7     8     9
## 14      4    3     5     6     7
## 15      5    3     6     7     8
## 16      1    4     1     3     4
## 17      2    4     5     9     7
## 18      3    4     8     9     9
## 19      4    4     8     7     9
## 20      5    4     6     8     6

We use the multilevel.reliability function (from psych) to analyze the data. Various reliability (or generalizability) coefficients may be found from these variance components. For instance, the reliability of individual differences over k fixed time points and m multiple items is \[\begin{equation} R_{kF} = \frac{\sigma^2_{id} + (\sigma^2_{id x items}/m)}{\sigma^2_{id} + (\sigma^2_{id x items}/m) + \sigma^2_{error}/(k m)} \label{eq:Rkf} \end{equation}\] Equation~\(\ref{eq:Rkf}\) is just one (#6) of the six generalizability coefficients discussed by .

mg <- multilevel.reliability(shrout,grp="Person",Time="Time",items=
        c("Item1","Item2","Item3"),plot=TRUE)

mg

## 
## Multilevel Generalizability analysis   
## Call: multilevel.reliability(x = shrout, grp = "Person", Time = "Time", 
##     items = c("Item1", "Item2", "Item3"), plot = TRUE)
## 
## The data had  5  observations taken over  4  time intervals for  3 items.
## 
##  Alternative estimates of reliability based upon Generalizability theory
## 
## RkF  =  0.97 Reliability of average of all ratings across all items and  times (Fixed time effects)
## R1R  =  0.6 Generalizability of a single time point across all items (Random time effects)
## RkR  =  0.85 Generalizability of average time points across all items (Random time effects)
## Rc   =  0.74 Generalizability of change (fixed time points, fixed items) 
## RkRn =  0.85 Generalizability of between person differences averaged over time (time nested within people)
## Rcn  =  0.65 Generalizability of within person variations averaged over items  (time nested within people)
## 
##  These reliabilities are derived from the components of variance estimated by ANOVA 
##              variance Percent
## ID               2.34    0.44
## Time             0.38    0.07
## Items            0.61    0.11
## ID x time        0.92    0.17
## ID x items       0.12    0.02
## time x items     0.05    0.01
## Residual         0.96    0.18
## Total            5.38    1.00
## 
##  The nested components of variance estimated from lme are:
##          variance Percent
## id            2.3    0.45
## id(time)      1.1    0.21
## residual      1.7    0.34
## total         5.1    1.00
## 
## To see the ANOVA and alpha by subject, use the short = FALSE option.
##  To see the summaries of the ICCs by subject and time, use all=TRUE
##  To see specific objects select from the following list:
##  ANOVA s.lmer s.lme alpha summary.by.person summary.by.time ICC.by.person ICC.by.time lmer long Call

As is true of most R functions, there is a lot more there than meets the eye:

names(mg) #list the various objects included in the mg object

##  [1] "n.obs"             "n.time"            "n.items"           "components"       
##  [5] "RkF"               "R1R"               "RkR"               "Rc"               
##  [9] "RkRn"              "Rcn"               "ANOVA"             "s.lmer"           
## [13] "s.lme"             "alpha"             "summary.by.person" "summary.by.time"  
## [17] "ICC.by.person"     "ICC.by.time"       "lmer"              "long"             
## [21] "Call"

headTail(mg$long)  #show the first and last rows of the long object

##       id time values items
## 1      1    1      2 Item1
## 2      1    2      3 Item1
## 3      1    3      4 Item1
## 4      1    4      1 Item1
## ... <NA> <NA>    ...  <NA>
## 57     5    1      7 Item3
## 58     5    2      9 Item3
## 59     5    3      8 Item3
## 60     5    4      6 Item3

One purpose of studying multi-level data is to explore individual differences in changes over time. People can differ in their affect as a function of the situation , their scores can increase or decrease over time, and they can show diurnal variation in their moods or their performance. To find the slope over time, we simply apply a within subject regresssion, and to examine the phase and fit of diurnal variation, we use circular statistics using the cosinor function. Later, we discuss how to generate and analyze simulated data with a variety of data structures, particularly growth and decay over time, and diurnal variation.

A real data set (taken from Aaron Fisher at UCB)

In a study of 10 participants diagnosed with clinically generalized anxiety disorder, collected 28 items for at least 60 days per participant. In an impressive demonstration of how different people are, examined the dynamic factor structure of each person using procedures discussed by . The purpose of the study was to encourage the use of personalized care for clinical psychology. Most importantly, for our purposes, the data set he analyzed is available at Aaron Fisher’s website https://www.dynamicpsychlab.com for easy download and subsequent analysis. (He seems to have moved the data from there. But a copy of his data is available at http://personality-project.org/courses/350/Fisher_2015_data“ )

As discussed in the Appendix to our tutorial, the 10 data files available may be merged into one file and we can examine both the reliability of scales made up of subsets of items, as well as the correlational pattern of these scales. It is important to note that the original paper goes far beyond what we report here, and indeed, the analyses that follow are independent of the main thrust of Fisher’s paper.

Of the 28 items, we focus on just eight, four measuring general positive affect (happy, content, relaxed, and positive), and four measuring tension and negative affect (angry, afraid, sad, and lonely). We see that the rating suggest a clearly reliable separation between individuals for both positive and negative affects (Table~\(\ref{tab:fisher:reliability}\)). Scoring each subject for these two moods at all time points may done using scoreItems

First, we create a little function to our work for us. This is partially tailored for the Fisher data set, but the concept of concatinating files remains the same for any such arrangement.

We can examine the file structure outside of R by using our browser to see the structure.

First create a small function to get the data

We use a few R commands such as getwd, setwd to adjust our working directory from the default location. The function loops over all the subjects. It dynamically calculates how many subjects there are by looking at the length of the names parameter. With a bit of effort, we could even make this automatic.

"combine.data" <- function(dir=NULL,names,filename=NULL) {
  new <- NULL
  n <- length(names)
  old.dir <- getwd()   #save the current working directory
for (subject in 1:n) {  #repeat n times, once for each subject
if(is.null(filename)) {setwd(dir)}  else {dir <- filename}     #set the working directory to where the files are
  #this is specific to this particular data structure
  x <- read.file(f=paste0(dir,"/P",names[subject],"/pre",names[subject],".csv")) 
  nx <- nrow(x)
   #add id and time to this data frame
  temp <- data.frame(id=names[subject],time=1:nx,x) 
  new <- rbind(new,temp)  #combine with prior data.frames to make a longer object
   }  #end of the subject loop
 setwd(old.dir)   #set the working directory back to the original
 return(new)}   #end the function by returning the data

We use this general function for the specific case of Fisher’s data

We do a little bit of work to get the files from Fisher’s server to ours (not shown) and then run our function. The function uses the read.file function from psychTools.

When we run the function, it echoes out the name of each file read.

Note that Fisher used a missing value of 999. We use the scrub function t replace all 999 with NA.

#now use this function to read in data from a set of files (downloaded from Fisher's web page) and
#  combine into one data.frame
names <- c("002","007","009","010","011","013","022","023","030","065")  #hard coded from his file names
filename="http://personality-project.org/courses/350/Fisher_2015_Data"     #specify where the data are
new <- combine.data(dir=NULL, names=names,filename=filename)    #use the function we just created for this problem

## Data from the .csv file http://personality-project.org/courses/350/Fisher_2015_Data/P002/pre002.csv has been loaded.

## Data from the .csv file http://personality-project.org/courses/350/Fisher_2015_Data/P007/pre007.csv has been loaded.

## Data from the .csv file http://personality-project.org/courses/350/Fisher_2015_Data/P009/pre009.csv has been loaded.

## Data from the .csv file http://personality-project.org/courses/350/Fisher_2015_Data/P010/pre010.csv has been loaded.

## Data from the .csv file http://personality-project.org/courses/350/Fisher_2015_Data/P011/pre011.csv has been loaded.

## Data from the .csv file http://personality-project.org/courses/350/Fisher_2015_Data/P013/pre013.csv has been loaded.

## Data from the .csv file http://personality-project.org/courses/350/Fisher_2015_Data/P022/pre022.csv has been loaded.

## Data from the .csv file http://personality-project.org/courses/350/Fisher_2015_Data/P023/pre023.csv has been loaded.

## Data from the .csv file http://personality-project.org/courses/350/Fisher_2015_Data/P030/pre030.csv has been loaded.

## Data from the .csv file http://personality-project.org/courses/350/Fisher_2015_Data/P065/pre065.csv has been loaded.

describe(new) #note how missing values were coded as 999

##             vars   n   mean     sd median trimmed   mad min max range skew kurtosis    se
## id*            1 792   5.27   2.90    5.0    5.21  2.97   1  10     9 0.17    -1.25  0.10
## time           2 792  41.34  25.36   40.0   40.15 29.65   1 118   117 0.42    -0.31  0.90
## happy          3 788 156.44 316.53   38.0   70.51 26.69   3 999   996 2.27     3.20 11.28
## sad            4 788 148.20 319.72   29.0   61.04 31.13   0 999   999 2.27     3.19 11.39
## angry          5 788 148.61 319.41   30.0   61.44 28.17   1 999   998 2.27     3.20 11.38
## content        6 788 154.26 317.46   33.0   67.99 26.69   3 999   996 2.27     3.19 11.31
## afraid         7 788 152.49 318.28   36.0   66.33 37.06   0 999   999 2.26     3.18 11.34
## lonely         8 788 151.39 318.66   36.0   65.07 37.06   0 999   999 2.27     3.18 11.35
## relaxed        9 788 152.97 317.78   30.0   66.36 19.27   2 999   997 2.27     3.20 11.32
## tired         10 788 167.59 312.62   55.0   84.41 35.58   1 999   998 2.26     3.18 11.14
## anxious       11 788 170.52 311.44   62.5   87.98 30.39   0 999   999 2.27     3.19 11.09
## positive      12 788 158.27 315.89   41.0   72.74 31.13   3 999   996 2.27     3.20 11.25
## percent       13 788 170.42 315.02   57.0   87.27 32.62   4 999   995 2.23     3.03 11.22
## interfere     14 788 168.30 315.96   52.5   84.92 34.84   0 999   999 2.23     3.03 11.26
## upset         15 788 170.76 313.10   56.0   87.49 32.62   6 999   993 2.25     3.11 11.15
## wcontent      16 788 171.13 314.70   56.5   87.92 30.39   3 999   996 2.23     3.04 11.21
## tension       17 788 170.10 313.37   60.0   86.97 31.13   4 999   995 2.25     3.11 11.16
## difficult     18 788 172.06 314.41   58.0   89.25 31.13   2 999   997 2.23     3.03 11.20
## control       19 788 171.82 314.50   59.0   88.88 31.13   3 999   996 2.23     3.03 11.20
## concentrate   20 788 164.91 318.81   46.0   80.71 29.65   4 999   995 2.22     2.97 11.36
## mustens       21 788 167.25 318.19   53.0   83.88 40.03   0 999   999 2.21     2.95 11.34
## fatigue       22 788 171.17 316.65   52.0   88.30 35.58   4 999   995 2.21     2.95 11.28
## irritable     23 788 166.52 318.18   47.0   82.59 26.69   2 999   997 2.22     2.97 11.33
## sleep         24 788 169.42 317.51   50.0   86.41 40.03   3 999   996 2.21     2.94 11.31
## restless      25 788 171.13 316.51   53.0   88.02 32.62   3 999   996 2.22     2.96 11.28
## avoid         26 788 166.69 318.65   45.0   83.20 37.06   0 999   999 2.21     2.93 11.35
## prepare       27 788 165.72 318.94   44.0   82.09 37.06   0 999   999 2.21     2.94 11.36
## procrast      28 788 168.61 317.98   45.5   85.53 39.29   3 999   996 2.20     2.93 11.33
## reassur       29 788 156.43 321.83   35.0   70.45 22.24   3 999   996 2.23     2.98 11.46

fisher <- scrub(new,2:29,isvalue=999)   #change 999 to NA 
dim(fisher)  #show the number of rows and columns

## [1] 792  29

describe(fisher) #to see what is there and to check that we recoded everything correctly.

##             vars   n  mean    sd median trimmed   mad min max range  skew kurtosis   se
## id*            1 792  5.27  2.90      5    5.21  2.97   1  10     9  0.17    -1.25 0.10
## time           2 792 41.34 25.36     40   40.15 29.65   1 118   117  0.42    -0.31 0.90
## happy          3 691 38.16 21.69     33   36.37 20.76   3  97    94  0.67    -0.49 0.82
## sad            4 691 28.77 23.42     21   26.02 22.24   0 100   100  0.83    -0.17 0.89
## angry          5 691 29.23 20.72     25   26.99 22.24   1 100    99  0.89     0.38 0.79
## content        6 691 35.68 23.52     28   33.15 20.76   3 100    97  0.81    -0.45 0.89
## afraid         7 691 33.66 25.77     29   31.01 29.65   0 100   100  0.67    -0.53 0.98
## lonely         8 691 32.40 25.35     28   29.80 31.13   0 100   100  0.65    -0.49 0.96
## relaxed        9 691 34.21 20.89     28   31.85 14.83   2  93    91  0.97     0.11 0.79
## tired         10 691 50.88 25.55     50   51.03 31.13   1 100    99 -0.03    -1.07 0.97
## anxious       11 691 54.22 24.37     57   55.20 26.69   0 100   100 -0.31    -0.84 0.93
## positive      12 691 40.25 22.31     38   38.73 25.20   3 100    97  0.51    -0.71 0.85
## percent       13 689 51.36 23.13     50   50.98 29.65   4 100    96  0.12    -0.96 0.88
## interfere     14 689 48.94 25.18     45   48.21 29.65   0  99    99  0.22    -1.18 0.96
## upset         15 690 53.13 23.36     50   52.87 29.65   6 100    94  0.09    -1.05 0.89
## wcontent      16 689 52.18 22.35     50   51.67 25.20   3 100    97  0.16    -0.83 0.85
## tension       17 690 52.37 23.67     51   52.77 31.13   4 100    96 -0.11    -1.20 0.90
## difficult     18 689 53.24 23.17     51   53.16 28.17   2 100    98  0.00    -0.94 0.88
## control       19 689 52.96 23.19     51   52.73 28.17   3 100    97  0.05    -0.96 0.88
## concentrate   20 688 43.67 21.09     42   43.01 25.20   4 100    96  0.29    -0.86 0.80
## mustens       21 688 46.36 25.44     45   45.70 35.58   0 100   100  0.13    -1.28 0.97
## fatigue       22 688 50.84 24.62     49   50.30 30.39   4 100    96  0.19    -1.08 0.94
## irritable     23 688 45.52 20.91     42   44.64 20.76   2 100    98  0.36    -0.48 0.80
## sleep         24 688 48.84 27.40     42   47.76 31.13   3 100    97  0.38    -1.16 1.04
## restless      25 688 50.80 22.36     49   50.43 26.69   3 100    97  0.13    -0.95 0.85
## avoid         26 688 45.71 28.71     38   43.64 29.65   0 100   100  0.58    -0.96 1.09
## prepare       27 688 44.60 27.71     36   42.93 29.65   0 100   100  0.48    -1.13 1.06
## procrast      28 688 47.91 29.49     39   46.30 32.62   3 100    97  0.44    -1.22 1.12
## reassur       29 688 33.96 17.08     30   32.84 17.79   3  94    91  0.59    -0.12 0.65

The data set contains 28 different mood words. For the purpose of the demonstration, we want to find the reliability of four positive affect terms (happy, content, relaxed, and positive) as well as the four negaitve affect terms (Table~\(\ref{tab:fisher:reliability}\)). We then find scores for all subjects at all time periods by using the scoreItems function. We combine these scores with the id and time information from the original file, and then plot it with the mlPlot function. Finally, to examine the within subject correlations we use the statsBy function with the cors=TRUE option (Table~\(\ref{tab:fisher:alpha}\)).

Preliminary analyses

Before we do the multilevel analysis, we can treat the data as a simple data set. Lets find the correlations of the positive and negative affect terms. This shows the standard pattern of independence between positive and negative affect. But this is an overall effect, what happens within subjects?

positive <- cs(happy,content,relaxed, positive)
negative <- cs(angry,afraid, sad, lonely)
pana <- c(positive,negative)
R <- lowerCor(fisher[pana])

##          happy cntnt relxd postv angry afrad sad   lonly
## happy     1.00                                          
## content   0.80  1.00                                    
## relaxed   0.67  0.74  1.00                              
## positive  0.84  0.79  0.70  1.00                        
## angry    -0.19 -0.15 -0.12 -0.15  1.00                  
## afraid   -0.36 -0.37 -0.28 -0.34  0.65  1.00            
## sad      -0.34 -0.32 -0.23 -0.31  0.67  0.75  1.00      
## lonely   -0.24 -0.21 -0.12 -0.21  0.64  0.74  0.80  1.00

corPlot(R,numbers=TRUE)

Now lets find the simple reliabilities of these two sets of items. We use the scoreItems function.

pana.scores <- scoreItems(keys=list(positive=positive,negative=negative), fisher)

## Number of categories should be increased  in order to count frequencies.

pana.scores

## Call: scoreItems(keys = list(positive = positive, negative = negative), 
##     items = fisher)
## 
## (Unstandardized) Alpha:
##       positive negative
## alpha     0.93     0.91
## 
## Standard errors of unstandardized Alpha:
##       positive negative
## ASE      0.019     0.02
## 
## Average item correlation:
##           positive negative
## average.r     0.76     0.71
## 
## Median item correlation:
## positive negative 
##     0.76     0.70 
## 
##  Guttman 6* reliability: 
##          positive negative
## Lambda.6     0.91     0.89
## 
## Signal/Noise based upon av.r : 
##              positive negative
## Signal/Noise       12      9.7
## 
## Scale intercorrelations corrected for attenuation 
##  raw correlations below the diagonal, alpha on the diagonal 
##  corrected correlations above the diagonal:
##          positive negative
## positive     0.93    -0.33
## negative    -0.30     0.91
## 
##  In order to see the item by scale loadings and frequency counts of the data
##  print with the short option = FALSE

Looking at the correlations between the scales, we see that there is a slight negative correlation between positive and negative affect for the pooled data. Lets show that using pairs.panels on the scores.

pairs.panels(pana.scores$scores)

Now, do the multilevel analyses

The mlr function (aka multilevel.reliabilty) function takes advantage of two very powerful packages for doing mixed effects lmer and nlme'. These need to be installed before you can runmlr. They are loaded automatically whenmlr` starts up.

We also make use of the lattice graphic functions which are more powerful than simple core-R graphics but not as powerful as ggplot.

#find the multilevel reliabilities
pos.fisher.rel <- mlr(fisher,"id","time",items = c("happy","content", 
                                "relaxed", "positive"), aov=FALSE,lmer=TRUE)

## Loading required namespace: lme4

## boundary (singular) fit: see ?isSingular

neg.fisher.rel <- mlr(fisher,"id" ,"time",items = c("angry","afraid",
                         "sad","lonely" ),aov= FALSE,lmer=TRUE)

## boundary (singular) fit: see ?isSingular

#organize the alpha's for each subject

alpha.df <-  data.frame(positive = pos.fisher.rel$alpha[,c(1,3)], 
                     negative = neg.fisher.rel$alpha[,c(1,3)])


select <-c("happy","content", "relaxed", "positive" ,
               "angry","afraid","sad","lonely" )
affect.keys <- list(positive =  c("happy","content", "relaxed", "positive"),
                    negative =  c("angry","afraid","sad","lonely" ) )
affect.scores <- scoreItems(keys= affect.keys, items = fisher[select], min=0, 
                             max=100)

## Number of categories should be increased  in order to count frequencies.

affect.df <-cbind(fisher[1:2], affect.scores$score)

affect.df <- char2numeric(affect.df,flag=FALSE) #something is breaking statsBy
#by making everything numeric, we seem to solve this problem
#but we need to set flag to FALSE

mlPlot(affect.df, grp = "id",time="time", items= 3:4, typ="p", pch=c(20,21))

describe(affect.df) #debbugging purposes

##          vars   n  mean    sd median trimmed   mad  min    max range skew kurtosis   se
## id          1 792  5.27  2.90   5.00    5.21  2.97 1.00  10.00   9.0 0.17    -1.25 0.10
## time        2 792 41.34 25.36  40.00   40.15 29.65 1.00 118.00 117.0 0.42    -0.31 0.90
## positive    3 792 36.40 18.76  31.75   34.39 15.57 3.75  93.25  89.5 0.94     0.20 0.67
## negative    4 792 30.34 19.79  25.75   28.41 21.50 1.50  98.50  97.0 0.75    -0.07 0.70

stats.affect <- statsBy(affect.df,group="id",cors=TRUE)
stats.affect$within

##     time-postv  time-negtv postv-negtv
## 1  -0.38267572 -0.18262620 -0.36463597
## 2  -0.47970186  0.52654566 -0.60484009
## 3   0.04923507 -0.03452747  0.35141267
## 4  -0.28772283 -0.36429214 -0.27646868
## 5   0.42555939  0.12569681 -0.28286020
## 6   0.02500525 -0.21866366 -0.05130851
## 7  -0.03767051 -0.32317756 -0.23794901
## 8  -0.01260779 -0.22267681 -0.15609861
## 9   0.29436633  0.11297704 -0.43407796
## 10  0.08948664 -0.47978968 -0.74120395

# combine these with the alphas 
 ar <- autoR(affect.df[2:4],group= affect.df["id"])
 alpha.df <- cbind(alpha.df,stats.affect$within,ar$autoR[,2:3])
 alpha.df  #show the results

##       positive.Raw.alpha positive.av.r negative.Raw.alpha negative.av.r  time-postv  time-negtv
## ID002          0.3039073     0.1268344          0.5669880     0.3998452 -0.38267572 -0.18262620
## ID007          0.8216652     0.5388541          0.5296732     0.2190096 -0.47970186  0.52654566
## ID009          0.7465151     0.4270949          0.7971364     0.4924038  0.04923507 -0.03452747
## ID010          0.8675420     0.6263801          0.6443405     0.3207922 -0.28772283 -0.36429214
## ID011          0.8790106     0.6481144          0.5384805     0.2928347  0.42555939  0.12569681
## ID013          0.6095157     0.2715539          0.7320391     0.4047183  0.02500525 -0.21866366
## ID022          0.8362709     0.5532570          0.3925371     0.1394942 -0.03767051 -0.32317756
## ID023          0.6301238     0.2942481          0.4509687     0.1842294 -0.01260779 -0.22267681
## ID030          0.6594051     0.3448179          0.4945881     0.1718966  0.29436633  0.11297704
## ID065          0.8875579     0.6775638          0.8313027     0.5526434  0.08948664 -0.47978968
##       postv-negtv   positive    negative
## ID002 -0.36463597  0.2678815 -0.07390722
## ID007 -0.60484009  0.3828602  0.43389815
## ID009  0.35141267  0.1002674  0.08562170
## ID010 -0.27646868  0.4049834  0.36046893
## ID011 -0.28286020  0.4602309  0.22524774
## ID013 -0.05130851 -0.1297267  0.20941283
## ID022 -0.23794901  0.2869868  0.17152494
## ID023 -0.15609861  0.3805284  0.05043851
## ID030 -0.43407796  0.1456173  0.39018901
## ID065 -0.74120395  0.2337070  0.29448525

Simulations as a way of creating test data sets

A very powerful tool in learning how to analyze data and to test how well various models work is to create simulated data set. The sim.multi function allows for creating arbitrarily large data sets with a variety of complex data structures. The basic generator allows one to define a factor model with 1 … f factors with 1.. j items per factor and for 1 … i subjects and 1 … k time points. It is possible to define factor loadings globally, or individually for each subject. Factors (and their corresponding items) can increase, remain stable, or decrease over time, and can show diurnal variation with different phase relationships. Diurnal variation is simulated as a sinusoidal curve varying over 24 hours with a peak (phase angle) at different times of day.

The default for sim.multi is for four subjects over 6 days on two variables:

sim.multi()  #just use the default values

A more complicate example of 16 such subjects is seen in Figure~\(\ref{fig:diurnal}\) and various summary statistics are given in Table~\(\ref{tab:diurnal}\). 16 subjects were simulated with two measures taken 8 times a day for six days. The scores for some subjects decreased, while for others they increased over the 6 days. People differ in the strength (amplitude) and phase of their diurnal rhythms.

We create 16 subjects, with two factors and three items per factor. We simulate data collected 6 times/day over 16 days. We set the random seed to a fixed number for a reproducible example. After generating the data, we apply the cosinor function to estimate the diurnal nature of the signal, statsBy to find the within individual correlations, and autoR to find the auto correlations of the data. We combine the various outputs into one data.frame to display in Table~\(\ref{tab:diurnal}\).

set.seed(17)
 x <- sim.multi(n.obs=16,nvar=2,nfact=2,ntrials=48,days=6,
 sin.i =c(1,0,0,1),cos.i=c(1,.5,0,-.5), sigma.i=c(0,0,0,0), 
 sigma=1,phi.i=c(0,0,.5,.5),beta.i=c(-1.,0,1,0,-.5,0,.5,0))

co <-  cosinor(x$time,x[3:6],code="id")
sb <- statsBy(x,group="id",cors=TRUE)

## Warning in cor(new.data[, 1:nvar], use = "pairwise", method = method): the standard deviation is
## zero

## Warning in cor(new.data[, 1:(nvar)], new.data[, (nvar + 1):ncol(new.data)], : the standard deviation
## is zero

aR <-autoR(x,group="id")

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

## Warning in cor(x[1:(n.obs - lag), ], x[(lag + 1):n.obs, ], use = use): the standard deviation is
## zero

sim.df <- data.frame(co[,c(1,2,4,5)],sb$within[,c(8,9,10)],aR$autoR[,3:4])
sim.df  #show it

##     V1.phase   V2.phase     V1.fit    V2.fit       V1.V2    V1.time     V2.time        V1        V2
## 1   3.371217  0.3859637 0.73411761 0.6354381  0.32726685 -0.6425817 -0.04390623 0.7761724 0.2268002
## 2  19.205095  7.5412800 0.11539895 0.8363729 -0.19827353  0.8302723 -0.19502655 0.6952954 0.5318643
## 3   2.610029  2.4029765 0.83677255 0.8180621  0.80692479 -0.4436398 -0.30676658 0.6343323 0.5094875
## 4   6.915088  8.4383109 0.48478928 0.8468689  0.45724665  0.5147136  0.04754537 0.4638642 0.5083457
## 5   3.370070 21.9799811 0.79409539 0.6527427  0.03876169 -0.5500244  0.12451251 0.7054045 0.1946996
## 6  15.068900  7.3340413 0.08813653 0.8331447 -0.14074057  0.8184301 -0.20232975 0.6612120 0.5680461
## 7   2.907157  1.3445995 0.89202092 0.8435910  0.78773451 -0.4138914 -0.22555866 0.7028118 0.5361936
## 8   7.466012  8.2666158 0.36078097 0.8383743  0.43183787  0.7037733  0.11029588 0.5991324 0.5626786
## 9   3.582658  0.4441065 0.72870673 0.5235346  0.36451416 -0.6477390 -0.12098387 0.7353693 0.1239370
## 10 20.279698  7.0951689 0.11196722 0.8999847 -0.06584164  0.9044455 -0.08733102 0.8096398 0.6396230
## 11  2.661557  1.9848138 0.88108627 0.8117997  0.82231666 -0.3838924 -0.28905002 0.6696703 0.5411513
## 12  7.451444  8.2412482 0.49205793 0.8799029  0.50313249  0.5476031  0.08936505 0.4516068 0.6218569
## 13  3.268059 22.7373443 0.70393279 0.5198812  0.07155341 -0.6424034  0.05987705 0.7332833 0.2407432
## 14 22.885790  7.3526793 0.14795875 0.8404139 -0.12621945  0.8339148 -0.21133045 0.6536067 0.5716417
## 15  2.612928  1.8371720 0.86933154 0.8183257  0.79270643 -0.3396361 -0.30421584 0.6116318 0.5908583
## 16  8.467780  7.2311714 0.43773459 0.8185171  0.51983867  0.4909024  0.06636563 0.2401676 0.5193529

#to find the pooled data, we do the same analyses, 
#  but without specifying the group
cos <-  cosinor(x$time,x[3:6])  #pooled results
aR <-autoR(x)     #pooled results
rs <- cor(x[3:5])
rs <- rs[lower.tri(rs)]   #just use the relevant ones
pooled <- c(cos[1:2,1:2],rs,aR$autoR[3:4])
#combine the pooled data with the individual data
sim.df <- rbind(sim.df,pooled)   #rbind adds rows of data frames together
                                #see also cbind to add columns together
round(sim.df,2)  #print it more neatly

##    V1.phase V2.phase V1.fit V2.fit V1.V2 V1.time V2.time   V1   V2
## 1      3.37     0.39   0.73   0.64  0.33   -0.64   -0.04 0.78 0.23
## 2     19.21     7.54   0.12   0.84 -0.20    0.83   -0.20 0.70 0.53
## 3      2.61     2.40   0.84   0.82  0.81   -0.44   -0.31 0.63 0.51
## 4      6.92     8.44   0.48   0.85  0.46    0.51    0.05 0.46 0.51
## 5      3.37    21.98   0.79   0.65  0.04   -0.55    0.12 0.71 0.19
## 6     15.07     7.33   0.09   0.83 -0.14    0.82   -0.20 0.66 0.57
## 7      2.91     1.34   0.89   0.84  0.79   -0.41   -0.23 0.70 0.54
## 8      7.47     8.27   0.36   0.84  0.43    0.70    0.11 0.60 0.56
## 9      3.58     0.44   0.73   0.52  0.36   -0.65   -0.12 0.74 0.12
## 10    20.28     7.10   0.11   0.90 -0.07    0.90   -0.09 0.81 0.64
## 11     2.66     1.98   0.88   0.81  0.82   -0.38   -0.29 0.67 0.54
## 12     7.45     8.24   0.49   0.88  0.50    0.55    0.09 0.45 0.62
## 13     3.27    22.74   0.70   0.52  0.07   -0.64    0.06 0.73 0.24
## 14    22.89     7.35   0.15   0.84 -0.13    0.83   -0.21 0.65 0.57
## 15     2.61     1.84   0.87   0.82  0.79   -0.34   -0.30 0.61 0.59
## 16     8.47     7.23   0.44   0.82  0.52    0.49    0.07 0.24 0.52
## 17     3.37     5.23   0.28   0.37  0.36   -0.03   -0.07 0.89 0.73

df2latex(sim.df)  #if you have LaTex, this makes a table for you.

## % df2latex % sim.df 
## \begin{table}[htpb]\caption{df2latex}
## \begin{center}
## \begin{scriptsize} 
## \begin{tabular} {l r r r r r r r r r }
##  \multicolumn{ 9 }{l}{ A table from the psych package in R } \cr 
##  \hline Variable  &   {V1.ph} &  {V2.ph} &  {V1.ft} &  {V2.ft} &  {V1.V2} &  {V1.tm} &  {V2.tm} &  {V1} &  {V2}\cr 
##   \hline 
## 1   &   3.37  &   0.39  &  0.73  &  0.64  &   0.33  &  -0.64  &  -0.04  &  0.78  &  0.23 \cr 
##  2   &  19.21  &   7.54  &  0.12  &  0.84  &  -0.20  &   0.83  &  -0.20  &  0.70  &  0.53 \cr 
##  3   &   2.61  &   2.40  &  0.84  &  0.82  &   0.81  &  -0.44  &  -0.31  &  0.63  &  0.51 \cr 
##  4   &   6.92  &   8.44  &  0.48  &  0.85  &   0.46  &   0.51  &   0.05  &  0.46  &  0.51 \cr 
##  5   &   3.37  &  21.98  &  0.79  &  0.65  &   0.04  &  -0.55  &   0.12  &  0.71  &  0.19 \cr 
##  6   &  15.07  &   7.33  &  0.09  &  0.83  &  -0.14  &   0.82  &  -0.20  &  0.66  &  0.57 \cr 
##  7   &   2.91  &   1.34  &  0.89  &  0.84  &   0.79  &  -0.41  &  -0.23  &  0.70  &  0.54 \cr 
##  8   &   7.47  &   8.27  &  0.36  &  0.84  &   0.43  &   0.70  &   0.11  &  0.60  &  0.56 \cr 
##  9   &   3.58  &   0.44  &  0.73  &  0.52  &   0.36  &  -0.65  &  -0.12  &  0.74  &  0.12 \cr 
##  10   &  20.28  &   7.10  &  0.11  &  0.90  &  -0.07  &   0.90  &  -0.09  &  0.81  &  0.64 \cr 
##  11   &   2.66  &   1.98  &  0.88  &  0.81  &   0.82  &  -0.38  &  -0.29  &  0.67  &  0.54 \cr 
##  12   &   7.45  &   8.24  &  0.49  &  0.88  &   0.50  &   0.55  &   0.09  &  0.45  &  0.62 \cr 
##  13   &   3.27  &  22.74  &  0.70  &  0.52  &   0.07  &  -0.64  &   0.06  &  0.73  &  0.24 \cr 
##  14   &  22.89  &   7.35  &  0.15  &  0.84  &  -0.13  &   0.83  &  -0.21  &  0.65  &  0.57 \cr 
##  15   &   2.61  &   1.84  &  0.87  &  0.82  &   0.79  &  -0.34  &  -0.30  &  0.61  &  0.59 \cr 
##  16   &   8.47  &   7.23  &  0.44  &  0.82  &   0.52  &   0.49  &   0.07  &  0.24  &  0.52 \cr 
##  17   &   3.37  &   5.23  &  0.28  &  0.37  &   0.36  &  -0.03  &  -0.07  &  0.89  &  0.73 \cr 
##  \hline 
## \end{tabular}
## \end{scriptsize}
## \end{center}
## \label{default}
## \end{table}

Random Coefficients on the Fisher data set

The R packages nlme and lme4 handle a variety of multilevel modeling procedures and can be used to conduct random coefficient modeling (RCM), which is the formal term for models that vary at more than one level.

RCM is done in nlme with the lme function and in lme4 with the lmer function. These functions allow for the specification of fixed effects and random effects within and across multiple levels. Therefore, main effects of traits and states can be modeled using these functions, as can random effects of states across traits (or any higher level variables).

To do this analysis we first found the mean positive and negative affect for each subject, and then centered these data around the individual’s overall mean negative.cent). We see these effects (lme) and the random coefficients for each individual (extracted using the coef function) for the two variables (positive and negative affect) derived from the Fisher data set in Table~\(\ref{tab:ml:effects}\). We see that negative emotional states lead to lower positive emotions, but that the effect of trait negative emotion does not affect state positive affect.

We prepare the Fisher data for random coefficient modeling by computing aggregate means (level 2 data) for each participant, merging the level 2 data with the level 1 state data, group-mean centering the state predictor variable around each participant’s mean, and merging the centered data. Then we conduct random coefficient models are conducted in the nlmepackage with the lme fuction and in the lme4package with the lmer function. (We might need to install these packages first. )

Random coefficients for each participant are extracted with the coeffunction (Table~\(\ref{tab:ml:effects}\)).

First, do some data munging or organization

Functions that work on nested data typically require using the long format, instead of the more traditional wide format. Thus, we need to do some work to convert the data into the form we want. There are several ways of doing this.

The aggregate function will apply an arbitrary function to each subset of the data defined by a list, the statsBy function will report statistics for each value of a grouping variable.

#compute aggregate means for each participant
#affect.mean <- aggregate(fisher,list(affect.df$id),mean, na.rm = TRUE)  #one way

affect.mean <- statsBy(affect.df,group="id")$mean 

# another way
affect.mean.df <- data.frame(group= rownames(affect.mean),affect.mean)

#rename columns to prepare for merge
colnames(affect.mean.df) <- c("id","id.1","time.mean","positive.mean",
                        "negative.mean") 
#merge participant means with individual responses 
affect.mean.df <- merge(affect.df,affect.mean.df,by="id")
#group-mean center positive and negative affect
affect.centered <- affect.mean.df[,c(3,4)] - affect.mean.df[,c(7,8)]  
#rename columns to prepare for merge
colnames(affect.centered) <- c("positive.cent","negative.cent") 
#add centered variables to data frame
affect.mean.centered <- cbind(affect.mean.df,affect.centered)

Use the nlme package

(perhaps need to install it first)

#using the nlme package
library(nlme)
#this model predicts positive affect from time and negative affect (centered), 
#and it allows the slopes relating positive affect to time and negative affect
# to  vary across participants

pa.na.time.nlme  <- lme(positive ~ time + negative.cent + negative.mean 
                    + negative.cent:negative.mean, 
                    random= ~time + negative.cent|id, 
                    data=affect.mean.centered, 
                    na.action = na.omit)
                    
summary(pa.na.time.nlme) #shows fixed and random effects

## Linear mixed-effects model fit by REML
##   Data: affect.mean.centered 
##        AIC      BIC    logLik
##   6005.934 6061.953 -2990.967
## 
## Random effects:
##  Formula: ~time + negative.cent | id
##  Structure: General positive-definite, Log-Cholesky parametrization
##               StdDev     Corr         
## (Intercept)   18.5638708 (Intr) time  
## time           0.1276904 -0.728       
## negative.cent  0.3737404 -0.510  0.746
## Residual       9.9587116              
## 
## Fixed effects:  positive ~ time + negative.cent + negative.mean + negative.cent:negative.mean 
##                                Value Std.Error  DF   t-value p-value
## (Intercept)                 44.67385  9.662604 779  4.623376  0.0000
## time                        -0.06154  0.043767 779 -1.406018  0.1601
## negative.cent               -0.66095  0.218081 779 -3.030780  0.0025
## negative.mean               -0.21649  0.254564   8 -0.850428  0.4198
## negative.cent:negative.mean  0.00886  0.005636 779  1.572081  0.1163
##  Correlation: 
##                             (Intr) time   ngtv.c ngtv.m
## time                        -0.433                     
## negative.cent               -0.180  0.379              
## negative.mean               -0.791 -0.001  0.014       
## negative.cent:negative.mean  0.013  0.003 -0.816 -0.017
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -3.26512039 -0.53855711 -0.02485865  0.49572673  5.11318894 
## 
## Number of Observations: 792
## Number of Groups: 10

#extract the coefficients for each participant
coef.pa.time.nlme <- coef(pa.na.time.nlme)  
round(coef.pa.time.nlme,2)

##    (Intercept)  time negative.cent negative.mean negative.cent:negative.mean
## 1        68.93 -0.25         -0.97         -0.22                        0.01
## 2        79.08 -0.23         -1.32         -0.22                        0.01
## 3        48.53  0.03         -0.11         -0.22                        0.01
## 4        34.54 -0.13         -0.73         -0.22                        0.01
## 5        22.71  0.13         -0.43         -0.22                        0.01
## 6        53.14 -0.02         -0.41         -0.22                        0.01
## 7        34.83 -0.06         -0.59         -0.22                        0.01
## 8        38.58 -0.02         -0.53         -0.22                        0.01
## 9        29.06  0.04         -0.60         -0.22                        0.01
## 10       37.33 -0.11         -0.92         -0.22                        0.01

describe(coef.pa.time.nlme)  #describe the coefficients

##                             vars  n  mean    sd median trimmed   mad   min   max range  skew
## (Intercept)                    1 10 44.67 17.88  37.96   43.12 14.43 22.71 79.08 56.37  0.68
## time                           2 10 -0.06  0.12  -0.04   -0.06  0.12 -0.25  0.13  0.37 -0.19
## negative.cent                  3 10 -0.66  0.34  -0.59   -0.65  0.25 -1.32 -0.11  1.22 -0.34
## negative.mean                  4 10 -0.22  0.00  -0.22   -0.22  0.00 -0.22 -0.22  0.00  -Inf
## negative.cent:negative.mean    5 10  0.01  0.00   0.01    0.01  0.00  0.01  0.01  0.00   NaN
##                             kurtosis   se
## (Intercept)                    -0.97 5.65
## time                           -1.28 0.04
## negative.cent                  -0.77 0.11
## negative.mean                    NaN 0.00
## negative.cent:negative.mean      NaN 0.00

Now use the lme4 package

(might need to install)

#using the lme4 package
library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'lme4'

## The following object is masked from 'package:nlme':
## 
##     lmList

pa.na.time.lme4 <- lmer(positive  ~ time + negative.cent +  negative.mean + 
        negative.cent:negative.mean + (time|id) + (negative.cent|id), 
        data = affect.mean.centered, 
        na.action = na.omit)

## boundary (singular) fit: see ?isSingular

summary(pa.na.time.lme4)  #the summary function gives the important results

## Linear mixed model fit by REML ['lmerMod']
## Formula: positive ~ time + negative.cent + negative.mean + negative.cent:negative.mean +  
##     (time | id) + (negative.cent | id)
##    Data: affect.mean.centered
## 
## REML criterion at convergence: 5986.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1201 -0.5499 -0.0150  0.4816  5.1057 
## 
## Random effects:
##  Groups   Name          Variance  Std.Dev. Corr 
##  id       (Intercept)   138.78189 11.7806       
##           time            0.01648  0.1284  -1.00
##  id.1     (Intercept)   182.06416 13.4931       
##           negative.cent   0.14889  0.3859  -0.14
##  Residual                99.02096  9.9509       
## Number of obs: 792, groups:  id, 10
## 
## Fixed effects:
##                              Estimate Std. Error t value
## (Intercept)                 45.245478   9.808903   4.613
## time                        -0.057221   0.044013  -1.300
## negative.cent               -0.805451   0.274386  -2.935
## negative.mean               -0.241187   0.265696  -0.908
## negative.cent:negative.mean  0.013242   0.007846   1.688
## 
## Correlation of Fixed Effects:
##             (Intr) time   ngtv.c ngtv.m
## time        -0.376                     
## negativ.cnt -0.114  0.004              
## negative.mn -0.813  0.000  0.106       
## ngtv.cnt:n.  0.099  0.002 -0.880 -0.123
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular

coef.pa.na.time.lme4 <- coef(pa.na.time.lme4)
coef.pa.na.time.lme4

## $id
##    (Intercept)        time negative.cent negative.mean negative.cent:negative.mean
## 1    80.983299 -0.25191598    -0.9685410    -0.2411869                  0.01324175
## 2    70.288405 -0.19364719    -1.4950520    -0.2411869                  0.01324175
## 3    31.408686  0.01816544    -0.2780713    -0.2411869                  0.01324175
## 4    59.004320 -0.13218390    -0.7478873    -0.2411869                  0.01324175
## 5     7.784831  0.14686024    -0.6105931    -0.2411869                  0.01324175
## 6    39.324214 -0.02495805    -0.5845356    -0.2411869                  0.01324175
## 7    48.274951 -0.07372901    -0.6389093    -0.2411869                  0.01324175
## 8    38.511729 -0.02053682    -0.7201792    -0.2411869                  0.01324175
## 9    23.545957  0.06099432    -0.8363275    -0.2411869                  0.01324175
## 10   53.328386 -0.10126053    -1.1744120    -0.2411869                  0.01324175
## 
## attr(,"class")
## [1] "coef.mer"

Yet another way to get within subject patterns of results

The statsBy function from psych will give some useful statistics on the within subject correlational structure of the data.

sb.affect <- statsBy(affect.df,group="id",cors=TRUE)
sb.affect #how much do the subjects differ?

## Statistics within and between groups  
## Call: statsBy(data = affect.df, group = "id", cors = TRUE)
## Intraclass Correlation 1 (Percentage of variance due to groups) 
##       id     time positive negative 
##     1.00     0.10     0.64     0.70 
## Intraclass Correlation 2 (Reliability of group differences) 
##       id     time positive negative 
##     1.00     0.90     0.99     0.99 
## eta^2 between groups  
##     time.bg positive.bg negative.bg 
##        0.10        0.62        0.68 
## 
## To see the correlations between and within groups, use the short=FALSE option in your print statement.
## Many results are not shown directly. To see specific objects select from the following list:
##  mean sd n F ICC1 ICC2 ci1 ci2 r r.ci within pooled sd.r raw rbg ci.bg pbg rwg nw ci.wg pwg etabg etawg nWg nwg nG Call

names(sb.affect)  #what are the possible objects to explore?

##  [1] "mean"   "sd"     "n"      "F"      "ICC1"   "ICC2"   "ci1"    "ci2"    "r"      "r.ci"  
## [11] "within" "pooled" "sd.r"   "raw"    "rbg"    "ci.bg"  "pbg"    "rwg"    "nw"     "ci.wg" 
## [21] "pwg"    "etabg"  "etawg"  "nWg"    "nwg"    "nG"     "Call"

round(sb.affect$within,2)  # what are the correlations within subjects

##    time-postv time-negtv postv-negtv
## 1       -0.38      -0.18       -0.36
## 2       -0.48       0.53       -0.60
## 3        0.05      -0.03        0.35
## 4       -0.29      -0.36       -0.28
## 5        0.43       0.13       -0.28
## 6        0.03      -0.22       -0.05
## 7       -0.04      -0.32       -0.24
## 8       -0.01      -0.22       -0.16
## 9        0.29       0.11       -0.43
## 10       0.09      -0.48       -0.74

Compare these correlations with the graphics we drew earlier.

mlPlot(affect.df, grp = "id",time="time", items= 3:4, typ="p", pch=c(20,21))

Conclusions

Modern data collection techniques allow for intensive measurement within subjects. Analyzing this type of data requires analyzing data at the within subject as well as between subject level. Although sometimes conclusions will be the same at both levels, it is frequently the case that examining within subject data will show much more complex patterns of results than when they are simply aggregated. This tutorial is a simple introduction to the kind of data analytic strategies that are possible.

350 Week 7b Multilevel modeling

William Revelle

05/09/2022