Personality Project

Enhance your data visualisation with R

install.packages("ctv")
library(ctv)
install.views("Psychometrics")

library(psych)
library(sem) 

?psych

objects(package:psych)

.First <- function(library(psych))

help(read.table)  #or
?read.table   #another way of asking for help 

apropos("read") #returns all available functions with that term in their name.
RSiteSearch("read") #opens a webbrowser and searches voluminous files

apropos(table)  #lists all the commands that have the word "table" in them

apropos(table)
 [1]"ftable"              "model.tables" "pairwise.table"   "print.ftable"        "r2dtable"           
 [6]"read.ftable"         "write.ftable" ".__C__mtable"     ".__C__summary.table" ".__C__table"        
[11]"as.data.frame.table" "as.table"     "as.table.default" "is.table"           "margin.table"       
[16]"print.summary.table" "print.table"  "prop.table"       "read.table"       "read.table.url"     
[21]"summary.table"       "table"        "write.table"      "write.table0"   

#specify the name and address of the remote file

datafilename <- file.choose()     # use the OS to find the file 

#e.g.,  datafilename <- "Desktop/epi.big5.txt"  #locate the local directory  

person.data  <- read.table(datafilename,header=TRUE)  #read the data file

#Alternatively, to read in a comma delimited file:
#person.data  <- read.table(datafilename,header=TRUE,sep=",")
 my.data <- read.clipboard()      #or
 my.data <- read.clipboard.csv()  #if comma delimited
 my.data <- read.clipboard.tab()  #if tab delimited (e.g., from Excel

#specify the name and address of the remote file
datafilename <- "https://personality-project.org/r/datasets/maps.mixx.epi.bfi.data"
#datafilename <- "Desktop/epi.big5.txt"  #read from local directory  or
# datafilename <- file.choose()     # use the OS to find the file 
#in all cases
person.data  <- read.table(datafilename,header=TRUE)  #read the data file

#Alternatively, to read in a comma delimited file:
#person.data  <- read.table(datafilename,header=TRUE,sep=",")  

names(person.data)  #list the names of the variables

> datafilename <- "https://personality-project.org/r/datasets/maps.mixx.epi.bfi.data"
> #datafilename <- "Desktop/epi.big5.txt"  #read from local directory
> #person.data  <- read.table(datafilename,header=TRUE)  #read the data file
> person.data <- read.file(datafilename)  #this assumes header information
> #Alternatively, to read in a comma delimited file:
> #person.data  <- read.table(datafilename,header=TRUE,sep=",")  

> names(person.data)  #list the names of the variables
 [1] "epiE"     "epiS"     "epiImp"   "epilie"    "epiNeur"   "bfagree"  "bfcon"    "bfext" 
 [9] "bfneur"   "bfopen"   "bdi"      "traitanx"  "stateanx"
person.data[5,8]         #the 5th subject, 8th variable  or
person.data[5,"bfext"]   #5th subject,  "Big Five Inventory - Extraversion" variable
#or  
person.data[5,]         #To list all the data for a particular subject (e.g, the 5th subject) 
person.data[5:10,]      #list cases 5 - 10
person.data[5:10,"bfext"]  #list  just the extraversion scores for subjects 5-10
person.data[5:10,4:8]      #list the 4th through 8th variables for subjects 5 - 10.
subset(person.data,epilie>6)    #print the data meeting the logical condition epilie>6
#also try
person.data[person.data["epilie"]>6,]

epiE epiS epiImp epilie epiNeur bfagree bfcon bfext bfneur bfopen bdi traitanx stateanx
16    11    5      6      7      13     126    78   112     83    132   4       45       28
212    6    4      1      7       4     147   119   102     81    142   2       26       21
subset(person.data[,2:5],epilie>5 & epiNeur<3) #notice that we are taking the logical 'and'  

 epiS epiImp epilie epiNeur
12    12      3      6       1
118    8      2      6       2

subset(person.data[,3:7], epilie>6 | epiNeur ==2 ) #do a logical 'or' of the two conditions

epi <- subset(data,select=epiE:epiNeur)  #select particular variables from a data frame

datafilename <- "https://personality-project.org/r/datasets/finkel.sav"   #remote file

eli.data <- read.file(datafilename) 

save(object,file="local name") #save an object (e.g., a correlation matrix) for later analysis
load(file)         	       #gets the object (e.g., the correlation matrix back)
load(url("https://personality-project.org/r/datasets/big5r.txt"))  #get the correlation matrix
ls() #shows the variables in the workspace
help (name) #provides help about a function "name"  

? name #does the same

rm(variable) #removes that variable

rm(list = ls()) #removes all variables from the work space
attach("table") #makes the elements of "table" available to be called directly (make sure to always detach later. 
#  The use of attach ... detach has become discouraged.

names(table) #what are the variables inside the table

variable <-c(value1,value2,value3 ...)   #assigns values to a variable.
 # column bind (combine) variables to make up a new array with these variables

newvariable <- cbind (variable1, variable2, variable3 ... variable n)  

#  e.g.

ls()     #show the variables in the workspace

datafilename <- "https://personality-project.org/R/datasets/maps.mixx.epi.bfi.data" 
person.data <-  read.file(datafilename)  #read the data file
names(person.data)  #list the names of the variables
colnames(person.data) #specify that you want the names of the columns

attach(person.data)      #make the separate variable available -- always do detach when finished. 
             #The with construct is better.

new.epi <-  cbind(epiE,epiS,epiImp,epilie,epiNeur)       #form a new variable "epi" 
epi.df <- data.frame(new.epi)      #actually, more useful to treat this variable as a data frame 

bfi.df <- data.frame(cbind(bfext,bfneur,bfagree,bfcon,bfopen))    
	#create bfi as a data frame as well

detach(person.data)   #  very important to detach after an attach

#alternatively:
with(person.data,{
    epi <-  cbind(epiE,epiS,epiImp,epilie,epiNeur)       #form a new variable "epi" 
    epi.df <- data.frame(epi)      #actually, more useful to treat this variable as a data frame 

bfi.df <- data.frame(cbind(bfext,bfneur,bfagree,bfcon,bfopen))    
	#create bfi as a data frame as well
   epi.df <- data.frame(epi)      #actually, more useful to treat this variable as a data frame 

bfi.df <- data.frame(cbind(bfext,bfneur,bfagree,bfcon,bfopen))    
	#create bfi as a data frame as well

describe(bfi.df)
}  #end of the stuff to be done within the with command
)   #end of the with command

#alternatively:
epi.df <- data.frame(with(person.data,{ cbind(epiE,epiS,epiImp,epilie,epiNeur)}  )) #actually, more useful to treat this variable as a data frame 

bfi.df <- data.frame(with(person.data,cbind(bfext,bfneur,bfagree,bfcon,bfopen)) )   
	#create bfi as a data frame as well
   epi.df <- data.frame(epi)      #actually, more useful to treat this variable as a data frame 

epi <-  person.data[c("epiE","epiS","epiImp","epilie","epiNeur") ]      #form a new variable "epi" 
epi.df <- data.frame(epi)      #actually, more useful to treat this variable as a data frame 

bfi.df <- data.frame(person.data[c(9,10,7,8,11)])   #create bfi as a data frame as well

ls()    #show the variables
y <- edit(person.data)    
	#show the data.frame or matrix x in a text editor and save changes to y
fix(person.data)          
	#show the data.frame or matrix x in a text editor 
invisibible(edit(x))
        #creates an edit window without also printing to console -- directly make changes.
        #Similar to the most basic spreadsheet.  Very dangerous!
head(x)  #show the first few lines of a data.frame or matrix
tail(x)  #show the last few lines of a data.frame or matrix
str(x)  #show the structure of x

attach(person.data)   #make the variables inside person data available
imp2 <- epiImp*epiImp    #square epiImp
si <- epiS+epiImp        #sum sociability and impulsivity
statetotrait <- stateanx/traitanx  #find the ratio of state to trait anxiety
}) 
weirdproduct <- epi*bfi     #multiply respective elements in two tables
meanimp <- mean(imp)
stdevimp <- sd(imp)
standarizedimp <- scale(imp,scale=TRUE)    #center and then divide by the standard deviation
detach(person.data)   #and make sure to detach the variable when you are finished with it

person.data[person.data == -9] <- NA    
	#replace all cases where a particular code is observed with NA

males=subset(person.data |gender ==1)
	#x=subset(y|condition)

iris2 <- round(iris[1:10,1:4])   #get a sample data set
key <- iris2[1,]    #make up an arbitary answer key
score <- t(t( rowSums((iris2 == c(key[]))+0)))   #look for correct item and add them up
mydata <- data.frame(iris2,score)     #
key         #what is the scoring key
mydata      #show the data with the number of items that match the key

describe(epi.df)  

         var   n  mean   sd median trimmed  mad min max range  skew kurtosis   se
epiE      1 231 13.33 4.14     14   13.49 4.45   1  22    21 -0.33    -0.06 0.27
epiS      2 231  7.58 2.69      8    7.77 2.97   0  13    13 -0.57    -0.02 0.18
epiImp    3 231  4.37 1.88      4    4.36 1.48   0   9     9  0.06    -0.62 0.12
epilie    4 231  2.38 1.50      2    2.27 1.48   0   7     7  0.66     0.24 0.10
epiNeur   5 231 10.41 4.90     10   10.39 4.45   0  23    23  0.06    -0.50 0.32

 epiz <- scale(epi)          #centers (around the mean) and scales by the sd  
 epic <- scale(epi,scale=FALSE) #centers but does not Scale

 describe(epiz)
 describe(epic)

 >  describe(epiz)
        var   n mean sd median trimmed  mad   min  max range  skew kurtosis   se
epiE      1 231    0  1   0.16    0.04 1.08 -2.98 2.10  5.08 -0.33    -0.06 0.07
epiS      2 231    0  1   0.15    0.07 1.10 -2.82 2.01  4.84 -0.57    -0.02 0.07
epiImp    3 231    0  1  -0.20    0.00 0.79 -2.32 2.46  4.78  0.06    -0.62 0.07
epilie    4 231    0  1  -0.25   -0.07 0.99 -1.59 3.09  4.68  0.66     0.24 0.07
epiNeur   5 231    0  1  -0.08    0.00 0.91 -2.12 2.57  4.69  0.06    -0.50 0.07
>  describe(epic)
        var   n mean   sd median trimmed  mad    min   max range  skew kurtosis   se
epiE      1 231    0 4.14   0.67    0.16 4.45 -12.33  8.67    21 -0.33    -0.06 0.27
epiS      2 231    0 2.69   0.42    0.18 2.97  -7.58  5.42    13 -0.57    -0.02 0.18
epiImp    3 231    0 1.88  -0.37   -0.01 1.48  -4.37  4.63     9  0.06    -0.62 0.12
epilie    4 231    0 1.50  -0.38   -0.11 1.48  -2.38  4.62     7  0.66     0.24 0.10
epiNeur   5 231    0 4.90  -0.41   -0.02 4.45 -10.41 12.59    23  0.06    -0.50 0.32

 round(apply(epi,2,mean),1)
 round(apply(epi,2,var),2)
 round(apply(epi,2,sd),3)

apply(epi,2,fivenum)      #give the lowest, 25%, median, 75% and highest value  (compare to summary)

 describe(epi.df)      #use the describe function 

        var   n  mean   sd median trimmed  mad min max range  skew kurtosis   se
epiE      1 231 13.33 4.14     14   13.49 4.45   1  22    21 -0.33    -0.06 0.27
epiS      2 231  7.58 2.69      8    7.77 2.97   0  13    13 -0.57    -0.02 0.18
epiImp    3 231  4.37 1.88      4    4.36 1.48   0   9     9  0.06    -0.62 0.12
epilie    4 231  2.38 1.50      2    2.27 1.48   0   7     7  0.66     0.24 0.10
epiNeur   5 231 10.41 4.90     10   10.39 4.45   0  23    23  0.06    -0.50 0.32

 stem(person.data$bfneur)       #stem and leaf diagram 

  The decimal point is 1 digit(s) to the right of the |

   3 | 45
   4 | 26778
   5 | 000011122344556678899
   6 | 000011111222233344567888899
   7 | 00000111222234444566666777889999
   8 | 0011111223334444455556677789
   9 | 00011122233333444555667788888888899
  10 | 00000011112222233333444444455556667778899
  11 | 000122222444556677899
  12 | 03333577889
  13 | 0144
  14 | 224
  15 | 2

 round(cor(epi.df),2)

 #correlation matrix with values rounded to 2 decimals

         epiE  epiS epiImp epilie epiNeur
epiE     1.00  0.85   0.80  -0.22   -0.18
epiS     0.85  1.00   0.43  -0.05   -0.22
epiImp   0.80  0.43   1.00  -0.24   -0.07
epilie  -0.22 -0.05  -0.24   1.00   -0.25
epiNeur -0.18 -0.22  -0.07  -0.25    1.00

 round(cor(epi.df,bfi.df),2)

 #cross correlations between the 5 EPI scales and the 5 BFI scales

       bfext bfneur bfagree bfcon bfopen
epiE     0.54  -0.09    0.18 -0.11   0.14
epiS     0.58  -0.07    0.20  0.05   0.15
epiImp   0.35  -0.09    0.08 -0.24   0.07
epilie  -0.04  -0.22    0.17  0.23  -0.03
epiNeur -0.17   0.63   -0.08 -0.13   0.09

 corr.test(sat.act)

  > corr.test(epi.df)
Call:corr.test(x = epi.df)
Correlation matrix 
         epiE  epiS epiImp epilie epiNeur
epiE     1.00  0.85   0.80  -0.22   -0.18
epiS     0.85  1.00   0.43  -0.05   -0.22
epiImp   0.80  0.43   1.00  -0.24   -0.07
epilie  -0.22 -0.05  -0.24   1.00   -0.25
epiNeur -0.18 -0.22  -0.07  -0.25    1.00
Sample Size 
        epiE epiS epiImp epilie epiNeur
epiE     231  231    231    231     231
epiS     231  231    231    231     231
epiImp   231  231    231    231     231
epilie   231  231    231    231     231
epiNeur  231  231    231    231     231
Probability values (Entries above the diagonal are adjusted for multiple tests.) 
        epiE epiS epiImp epilie epiNeur
epiE    0.00 0.00   0.00   0.00    0.02
epiS    0.00 0.00   0.00   0.53    0.00
epiImp  0.00 0.00   0.00   0.00    0.53
epilie  0.00 0.43   0.00   0.00    0.00
epiNeur 0.01 0.00   0.26   0.00    0.00

#see the graphic window for the output

boxplot(epi)        #boxplot of the five epi scales
hist(epiE)          #simple histogram
plot(epiE,epiImp)    #simple scatter plot
pairs(epi.df)           #splom plot

#tell where the data come from
datafilename="https://personality-project.org/r/datasets/R.appendix1.data"
data.ex1=read.table(datafilename,header=T)   #read the data into a table

aov.ex1 = aov(Alertness~Dosage,data=data.ex1)  #do the analysis of variance
summary(aov.ex1)                                    #show the summary table
print(model.tables(aov.ex1,"means"),digits=3)
	#report the means and the number of subjects/cell
boxplot(Alertness~Dosage,data=data.ex1)
	#graphical summary appears in graphics window

datafilename="https://personality-project.org/R/datasets/R.appendix2.data"
data.ex2=read.table(datafilename,header=T)   #read the data into a table
data.ex2                                      #show the data
aov.ex2 = aov(Alertness~Gender*Dosage,data=data.ex2)         #do the analysis of variance
summary(aov.ex2)                                    #show the summary table
print(model.tables(aov.ex2,"means"),digits=3)      
	#report the means and the number of subjects/cell
boxplot(Alertness~Dosage*Gender,data=data.ex2) 
	#graphical summary of means of the 4 cells
attach(data.ex2)
interaction.plot(Dosage,Gender,Alertness)  #another way to graph the means 
detach(data.ex2)

                      #Run the analysis:
datafilename="https://personality-project.org/r/datasets/R.appendix3.data"
data.ex3=read.table(datafilename,header=T)   #read the data into a table
data.ex3                                      #show the data
aov.ex3 = aov(Recall~Valence+Error(Subject/Valence),data.ex3)
summary(aov.ex3)
print(model.tables(aov.ex3,"means"),digits=3)       
	#report the means and the number of subjects/cell
boxplot(Recall~Valence,data=data.ex3)          #graphical output

datafilename="https://personality-project.org/r/datasets/R.appendix4.data"
data.ex4=read.table(datafilename,header=T)   #read the data into a table
 data.ex4                                      #show the data
 aov.ex4=aov(Recall~(Task*Valence)+Error(Subject/(Task*Valence)),data.ex4 )

summary(aov.ex4)
print(model.tables(aov.ex4,"means"),digits=3)       
	#report the means and the number of subjects/cell
boxplot(Recall~Task*Valence,data=data.ex4) #graphical summary of means of the 6 cells
attach(data.ex4)
interaction.plot(Valence,Task,Recall)    #another way to graph the interaction
detach(data.ex4)

datafilename="https://personality-project.org/r/datasets/R.appendix5.data"
data.ex5=read.table(datafilename,header=T)   #read the data into a table
#data.ex5                                      #show the data
aov.ex5 = 
aov(Recall~(Task*Valence*Gender*Dosage)+Error(Subject/(Task*Valence))+
(Gender*Dosage),data.ex5)

summary(aov.ex5)             
print(model.tables(aov.ex5,"means"),digits=3)       
	#report the means and the number of subjects/cell
boxplot(Recall~Task*Valence*Gender*Dosage,data=data.ex5) 
	#graphical summary of means of the 36 cells
boxplot(Recall~Task*Valence*Dosage,data=data.ex5)
	#graphical summary of means of  18 cells

datafilename="/Users/bill/Desktop/R.tutorial/datasets/recall1.data"
recall.data=read.table(datafilename,header=TRUE)
recall.data    #show the data

raw=recall.data[,1:8]              #just trial data 
#First set some specific paremeters for the analysis -- this allows
numcases=27                  #How many subjects are there?
numvariables=8               #How many repeated measures are there?
numreplications=2            #How many replications/subject?
numlevels1=2                 #specify the number of levels for within subject variable 1
numlevels2=2                 #specify the number of levels for within subject variable 2

stackedraw=stack(raw)        #convert the data array into a vector 
                             #add the various coding variables for the conditions
                             #make sure to check that this coding is correct
recall.raw.df=data.frame(recall=stackedraw,
	subj=factor(rep(paste("subj", 1:numcases, sep=""), numvariables)),
	replication=factor(rep(rep(c("1","2"), c(numcases, numcases)),
		numvariables/numreplications)),
	time=factor(rep(rep(c("short", "long"),
		c(numcases*numreplications, numcases*numreplications)),numlevels1)),
	study=rep(c("d45", "d90") ,c(numcases*numlevels1*numreplications,
		numcases*numlevels1*numreplications)))

recall.aov= aov(recall.values ~ time * study + Error(subj/(time * study)), data=recall.raw.df) 
	#do the ANOVA
summary(recall.aov)                                #show the output
print(model.tables(recall.aov,"means"),digits=3)   #show the cell means for the anova table

datafilename="https://personality-project.org/r/datasets/maps.mixx.epi.bfi.data"  
	#where are the data
personality.data =read.file(datafilename)  #read the data file
colnames(personality.data)     #what variables are in the data set?

model1 = lm(bdi~epiNeur,data=personality.data)  #simple regression of  beck depression on Neuroticism
summary(model1)   # basic statistical summary

Call:
lm(formula = bdi ~ epiNeur, data = personality.data)

Residuals:
     Min       1Q   Median       3Q      Max 
-11.9548  -3.1577  -0.7707   2.0452  16.4092 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.32129    0.73070   -0.44    0.661    
epiNeur      0.68200    0.06353   10.74   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 4.721 on 229 degrees of freedom
Multiple R-squared:  0.3348,	Adjusted R-squared:  0.3319 
F-statistic: 115.3 on 1 and 229 DF,  p-value: < 2.2e-16

                  #pass parameters to the graphics device
op <- par(mfrow = c(2, 2), # 2 x 2 pictures on one plot
               pty = "s")       # square plotting region,
                                # independent of device size

plot(model1)     #diagnostic plots in the graphics window

model2=lm(bdi~epiNeur+traitanx)    #add in trait anxiety
summary(model2)    #basic output
plot(model2)

anova(model1,model2)             #compare the difference between the two models
model2.5=lm(bdi~epiNeur*traitanx) 
	#test for the interaction, note that the main effects are incorrect
summary(model2.5)               #because we need to 0 center the data
anova(model2,model2.5)          #compare the two models 

                                #rescale the data to do the analysis  
cneur=scale(epiNeur,scale=F)    #0 center epiNeur
zneur=scale(epiNeur,scale=T)     #standardize epiNeur
ctrait = scale(traitanx,scale=F) #0 center traitAnx

model3=lm(bdi~cneur+ctrait+cneur*ctrait)
summary(model3)                    #explicitly list the additive and interactive terms
plot(model3)

model4=lm(bdi~cneur*ctrait)
summary(model4)                    #note how this is exactly the same as the previous model

epi=cbind(epiS,epiImp,epilie,epiNeur) 
	#form a new variable "epi" without overall extraversion
epi=as.matrix(epi)      #actually, more useful to treat this variable as a matrix

bfi=as.matrix(cbind(bfext,bfneur,bfagree,bfcon,bfopen))    #create bfi as a matrix as well
epi=scale(epi,scale=T)   #standardize the epi
bfi=scale(bfi,scale=T)   #standardize the bfi

model5=lm(bdi~bfi)                 #model beck depression by the Big 5 inventory
summary(model5)
model6=lm(bdi~bfi+epi)  		
summary(model6)

model7 = lm(bdi~bfi*epi)          
	#additive model of epi and bfi as well as the interactions between the sets
summary(model7)                   #given as an example of overfitting
 ## At end of plotting, reset to previous settings:
     par(op)

#get the data
datafilename="https://personality-project.org/R/datasets/extraversion.items.txt" 
	#where are the data

items=read.table(datafilename,header=TRUE)                    #read the data

attach(items)         #make this the active path
E1=q_262 -q_1480 +q_819 -q_1180 +q_1742  +14  
					#find a five item extraversion scale 
                    #note that because the item responses ranged from 1-6, to reverse an item
                    #we subtract it from the maximum response possible + the minimum.  
                    #Since there were two reversed items, this is the same as adding 14

E1.df = data.frame(q_262 ,q_1480 ,q_819 ,q_1180 ,q_1742 ) #put these items into a data frame

summary(E1.df)                             #give summary statistics for these items
round(cor(E1.df,use="pair"),2)             #correlate the 5 items, rounded off to 2 decimals,
						#use pairwise cases
round(cor(E1.df,E1,use="pair"),2)          #show the item by scale correlations
                                           #define a function to find the alpha coefficient

alpha.scale=function (x,y)   #create a reusable function to find coefficient alpha
	#input to the function are a scale and a data.frame of the items in the scale
        {
                Vi=sum(diag(var(y,na.rm=TRUE)))     #sum of item variance
                Vt=var(x,na.rm=TRUE)                #total test variance
                n=dim(y)[2]     #how many items are in the scale?  (calculated dynamically)
                ((Vt-Vi)/Vt)*(n/(n-1))}             #alpha

E.alpha=alpha.scale(E1,E1.df)  
	#find the alpha for the scale E1 made up of the 5 items in E1.df 
detach(items) 
	#take them out of the search path   

summary(E1.df)      #give summary statistics for these items
     q_262          q_1480          q_819           q_1180          q_1742     
 Min.   :1.00   Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
 1st Qu.:2.00   1st Qu.:2.000   1st Qu.:4.000   1st Qu.:2.000   1st Qu.:3.750  
 Median :3.00   Median :3.000   Median :5.000   Median :4.000   Median :5.000  
 Mean   :3.07   Mean   :2.885   Mean   :4.565   Mean   :3.295   Mean   :4.385  
 3rd Qu.:4.00   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:4.000   3rd Qu.:6.000  
 Max.   :6.00   Max.   :6.000   Max.   :6.000   Max.   :6.000   Max.   :6.000  

round(cor(E1.df,use="pair"),2)              
	#correlate the 5 items, rounded off to 2 decimals, use complete cases
       q_262 q_1480 q_819 q_1180 q_1742
q_262   1.00  -0.26  0.41  -0.51   0.48
q_1480 -0.26   1.00 -0.66   0.52  -0.47
q_819   0.41  -0.66  1.00  -0.41   0.65
q_1180 -0.51   0.52 -0.41   1.00  -0.49
q_1742  0.48  -0.47  0.65  -0.49   1.00

round(cor(E1.df,E1,use="pair"),2)      #show the item by scale correlations
        [,1]
q_262   0.71
q_1480 -0.75
q_819   0.80
q_1180 -0.78
q_1742  0.80

alpha.scale=function (x,y)   
	#find coefficient alpha given a scale and a data.frame of the items in the scale
+ 	{
+ 	Vi=sum(diag(var(y,na.rm=TRUE)))     #sum of item variance
+ 	Vt=var(x,na.rm=TRUE)                #total test variance
+   	n=dim(y)[2]                #how many items are in the scale? (calculated dynamically)
+   	((Vt-Vi)/Vt)*(n/(n-1))}    #alpha
> 
> 
>

E.alpha=alpha.scale(E1,E1.df,5)      
	#find the alpha for the scale E1 made up of the 5 items in E1.df 

E.alpha
[1] 0.822683

E1.df <- with(items,data.frame(q_262 ,q_1480 ,q_819 ,q_1180 ,q_1742 ))  
	#another way to create the data.frame
alpha(E1.df) 

Reliability analysis   
Call: alpha(x = E1.df)

  raw_alpha std.alpha G6(smc) average_r mean   sd
      0.82      0.83    0.83      0.49  3.6 0.52

 Reliability if an item is dropped:
        raw_alpha std.alpha G6(smc) average_r
q_262        0.82      0.82    0.80      0.53
q_1480-      0.79      0.80    0.76      0.49
q_819        0.77      0.77    0.74      0.46
q_1180-      0.79      0.79    0.77      0.49
q_1742       0.77      0.78    0.77      0.46

 Item statistics 
          n    r r.cor r.drop mean  sd
q_262   200 0.70  0.58   0.52  3.1 1.5
q_1480- 200 0.76  0.70   0.60  2.9 1.4
q_819   200 0.82  0.78   0.69  4.6 1.2
q_1180- 200 0.77  0.69   0.62  3.3 1.5
q_1742  200 0.80  0.74   0.67  4.4 1.4

Non missing response frequency for each item
          0    1    2    3    4    5    6 miss
q_262  0.00 0.18 0.20 0.22 0.22 0.12 0.06    0
q_1480 0.00 0.18 0.25 0.18 0.26 0.08 0.03    0
q_819  0.00 0.02 0.06 0.12 0.17 0.42 0.22    0
q_1180 0.01 0.14 0.19 0.16 0.30 0.14 0.06    0
q_1742 0.00 0.04 0.08 0.13 0.22 0.26 0.27    0
Warning message:
In alpha(E1.df) :
  Some items were negatively correlated with total scale and were automatically reversed

iris2 <- round(iris[1:10,1:4])   #get a sample data set
key <- iris2[1,]    #make up an arbitary answer key
score <- t(t( rowSums((iris2 == c(key[]))+0)))   #look for correct item and add them up
mydata <- data.frame(iris2,score)     #
key         #what is the scoring key
mydata      #show the data with the number of items that match the key

data(iqitems)
iq.keys <- c(4,4,3,1,4,3,2,3,1,4,1,3,4,3)
score.multiple.choice(iq.keys,iqitems)
#convert them to true false 
iq.scrub <- scrub(iqitems,isvalue=0)  #first get rid of the zero responses
iq.tf <-  score.multiple.choice(iq.keys,iq.scrub,score=FALSE) 
#convert to wrong (0) and correct (1) for analysis
describe(iq.tf)

Call: score.multiple.choice(key = iq.keys, data = iqitems)

(Unstandardized) Alpha:
[1] 0.63

Average item correlation:
[1] 0.11

item statistics 
     key    0    1    2    3    4    5    6 miss    r    n mean   sd  skew kurtosis   se
iq1    4 0.04 0.01 0.03 0.09 0.80 0.02 0.01    0 0.59 1000 0.80 0.40 -1.51     0.27 0.01
iq8    4 0.03 0.10 0.01 0.02 0.80 0.01 0.04    0 0.39 1000 0.80 0.40 -1.49     0.22 0.01
iq10   3 0.10 0.22 0.09 0.37 0.04 0.13 0.04    0 0.35 1000 0.37 0.48  0.53    -1.72 0.02
iq15   1 0.03 0.65 0.16 0.15 0.00 0.00 0.00    0 0.35 1000 0.65 0.48 -0.63    -1.60 0.02
iq20   4 0.03 0.02 0.03 0.03 0.85 0.02 0.01    0 0.42 1000 0.85 0.35 -2.00     2.01 0.01
iq44   3 0.03 0.10 0.06 0.64 0.02 0.14 0.01    0 0.42 1000 0.64 0.48 -0.61    -1.64 0.02
iq47   2 0.04 0.08 0.59 0.06 0.11 0.07 0.05    0 0.51 1000 0.59 0.49 -0.35    -1.88 0.02
iq2    3 0.07 0.08 0.31 0.32 0.15 0.05 0.02    0 0.26 1000 0.32 0.46  0.80    -1.37 0.01
iq11   1 0.04 0.87 0.03 0.01 0.01 0.01 0.04    0 0.54 1000 0.87 0.34 -2.15     2.61 0.01
iq16   4 0.05 0.05 0.08 0.07 0.74 0.01 0.00    0 0.56 1000 0.74 0.44 -1.11    -0.77 0.01
iq32   1 0.04 0.54 0.02 0.14 0.10 0.04 0.12    0 0.50 1000 0.54 0.50 -0.17    -1.97 0.02
iq37   3 0.07 0.10 0.09 0.26 0.13 0.02 0.34    0 0.23 1000 0.26 0.44  1.12    -0.74 0.01
iq43   4 0.04 0.07 0.04 0.02 0.78 0.03 0.00    0 0.50 1000 0.78 0.41 -1.35    -0.18 0.01
iq49   3 0.06 0.27 0.09 0.32 0.14 0.08 0.05    0 0.28 1000 0.32 0.47  0.79    -1.38 0.01
> #convert them to true false 
> iq.scrub <- scrub(iqitems,isvalue=0)  #first get rid of the zero responses
> iq.tf <-  score.multiple.choice(iq.keys,iq.scrub,score=FALSE)
#convert to wrong (0) and correct (1) for analysis
> describe(iq.tf)
     var   n mean   sd median trimmed mad min max range  skew kurtosis   se
iq1    1 965 0.83 0.38      1    0.91   0   0   1     1 -1.75     1.08 0.01
iq8    2 972 0.82 0.38      1    0.90   0   0   1     1 -1.68     0.83 0.01
iq10   3 900 0.41 0.49      0    0.39   0   0   1     1  0.36    -1.88 0.02
iq15   4 968 0.67 0.47      1    0.72   0   0   1     1 -0.73    -1.46 0.02
iq20   5 972 0.88 0.33      1    0.97   0   0   1     1 -2.31     3.36 0.01
iq44   6 971 0.66 0.47      1    0.71   0   0   1     1 -0.69    -1.52 0.02
iq47   7 955 0.61 0.49      1    0.64   0   0   1     1 -0.47    -1.78 0.02
iq2    8 929 0.34 0.47      0    0.30   0   0   1     1  0.68    -1.54 0.02
iq11   9 964 0.90 0.30      1    1.00   0   0   1     1 -2.63     4.93 0.01
iq16  10 953 0.78 0.41      1    0.85   0   0   1     1 -1.35    -0.19 0.01
iq32  11 962 0.56 0.50      1    0.58   0   0   1     1 -0.26    -1.93 0.02
iq37  12 928 0.27 0.45      0    0.22   0   0   1     1  1.01    -0.99 0.01
iq43  13 958 0.81 0.39      1    0.89   0   0   1     1 -1.61     0.60 0.01
iq49  14 939 0.34 0.47      0    0.30   0   0   1     1  0.69    -1.53 0.02

#specify the name and address of the remote file
datafilename="https://personality-project.org/r/datasets/maps.mixx.msq1.epi.bf.txt"
#note that it is also available as built in example in the psych package named msq
msq =read.table(datafilename,header=TRUE)  #read the data file
mymsq=msq[,2:72]             #select the subset of items in the MSQ

mymsq[mymsq=="9"] = NA       # change all occurences of 9 to be missing values 
mymsq <- data.frame(mymsq)    #convert the input matrix into a data frame for easier manipulation

names(mymsq)                        #what are the variables?
describe(mymsq)                     #basic summary statistics -- check for miscodings

cleaned <- na.omit(mymsq)     #remove the cases with missing values
f2 <- fa(cleaned,2,rotation="varimax")   		#factor analyze the resulting item
#(f2)                                              #show the result
load=loadings(f2)
print(load,sort=TRUE,digits=2,cutoff=0.01)  #show the loadings       
plot(load)                                 #plot factor 1 by 2
identify(load,labels=names(msq))           
 	#put names of selected points onto the figure  -- to stop, click with command key
 plot(f2,labels=names(msq))

 msqcovar=cov(mymsq,use="pair")  #find the covariance matrix for later factoring
 f3=factanal(x, factors=3, data = NULL, covmat = msqcovar, n.obs = 300,start=NULL,
 	rotation = "varimax")
 f4=factanal(x, factors=4, data = NULL, covmat = msqcovar, n.obs = 300,start=NULL,
 	rotation = "varimax")

factor.congruence(list(f2,f3,f4))

          MR1   MR2 Factor1 Factor2 Factor3 Factor1 Factor2 Factor3 Factor4
MR1      1.00 -0.01    0.99   -0.09   -0.14    0.99   -0.09   -0.02   -0.64
MR2     -0.01  1.00   -0.11    0.99   -0.25   -0.08    0.99   -0.37    0.18
Factor1  0.99 -0.11    1.00   -0.18   -0.07    0.99   -0.18    0.06   -0.64
Factor2 -0.09  0.99   -0.18    1.00   -0.14   -0.15    1.00   -0.27    0.29
Factor3 -0.14 -0.25   -0.07   -0.14    1.00   -0.03   -0.13    0.95    0.46
Factor1  0.99 -0.08    0.99   -0.15   -0.03    1.00   -0.15    0.06   -0.56
Factor2 -0.09  0.99   -0.18    1.00   -0.13   -0.15    1.00   -0.25    0.25
Factor3 -0.02 -0.37    0.06   -0.27    0.95    0.06   -0.25    1.00    0.19
Factor4 -0.64  0.18   -0.64    0.29    0.46   -0.56    0.25    0.19    1.00

library(psych)
my.vss <- VSS(mydata,n=8,rotate="none",diagonal=FALSE,...)   #compares up to 8 factors
op <- par(mfrow=c(1,2))  #make a two panel graph
VSS.plot(my.vss)  #shows a simple summary of VSS
VSS.scree(cor(mydata),main="scree plot of principal components of mydata")

r.mat<- Harman74.cor$cov
ic.demo <- ICLUST(r.mat)
ICLUST.graph(ic.demo,out.file = ic.demo.dot)

pnorm(1,mean=0,sd=1)
[1] 0.8413447

pnorm(1)       #default values of mean=0, sd=1 are used)
pnorm(1,1,10)  #parameters may be passed if in default order or by name

samplesize=1000
size.r=.6
theta=rnorm(samplesize,0,1)      #generate some random normal deviates
e1=rnorm(samplesize,0,1)         #generate errors for x
e2=rnorm(samplesize,0,1)         #generate errors for y
weight=sqrt(size.r)              #weight as a function of correlation
x=weight*theta+e1*sqrt(1-size.r) #combine true score (theta) with error
y=weight*theta+e2*sqrt(1-size.r)
cor(x,y)                          #correlate the resulting pair
df=data.frame(cbind(theta,e1,e2,x,y))  #form a data frame to hold all of the elements
round(cor(df),2)               #show the correlational structure
pairs.panels(df)                #plot the correlational structure (assumes psych package)

library(mvtnorm)
samplesize=1000
size.r=.6
sigmamatrix <- matrix( c(1,sqrt(size.r),sqrt(size.r),sqrt(size.r),1,size.r,
	sqrt(size.r),size.r,1),ncol=3)
xy <- rmvnorm(samplesize,sigma=sigmamatrix)
round(cor(xy),2)
pairs.panels(xy) 	#assumes the psych package

source("https://personality-project.org/r/useful.r")  #some basic data manipulation procedures
source("https://personality-project.org/r/vss.r")     #the Very Simple Structure package

                                         #read files with labels in first row
read.table(filename,header=TRUE)           #read a tab or space delimited file
read.table(filename,header=TRUE,sep=',')   #read csv files (comma separated)

x=c(1,2,4,8,16 )                             #create a data vector with specified elements
y=c(1:8,1:4)                               #creat a data vector with 12 entries
matr=rbind(1:8,1:4)                         #create two rows in a    2 * 8 matrix
matc=cbind(1:8,1:4)                         #create two columns in a 8 * 2 matrix
n=10
x1=c(rnorm(n))                              #create a n item vector of random normal deviates
y1=c(runif(n))+n         
	#create another n item vector that has n added to each random uniform distribution
z=rbinom(n,size,prob)                      
	#create n samples of size "size" with probability prob from the binomialitem
sample(x, size, replace = FALSE, prob = NULL)  
	#take a sample (with or without replacement) of size from x

vect=c(x,y)                                #combine them into one vector of length 2n
mat=cbind(x,y)                             #combine them into a n x 2 matrix (column wise)
mat[4,2]                                   #display the 4th row and the 2nd column
mat[3,]                                    #display the 3rd row
mat[,2]                                    #display the 2nd column
mat=cbind(rep(1:4,2),rep(4:1,2))           #create a 8 * 2 matrix with repeating elements
subset(data,logical)                       #those objects meeting a logical criterion
subset(data.df,select=variables,logical)  
	#get those objects from a data frame that meet a criterion 

ls()                                 #list the variables in the workspace
rm(x)                                #remove x from the workspace
rm(list=ls())                        #remove all the variables from the workspace
attach(mat)                          #make the names of the variables in the matrix available
detach(mat)                          #releases the names
new=old[,-n]                         #drop the nth column
new=old[n,]                          #drop the nth row
new=subset(old,logical)              #select those cases that meet the logical condition
complete = subset(data,complete.cases(data)) 
	#find those cases with no missing values
new=old[n1:n2,n3:n4]              #select the n1 through n2 rows of variables n3 through n4)

x.df=data.frame(x1,x2,x3 ...)        #combine different kinds of data into a data frame
	as.data.frame()
	is.data.frame()
x=as.matrix()
scale()         #converts a data frame to standardized scores
factor() 	#converts a numeric variable into a factor (essential for ANOVA)
gl(n,k,length)	#makes an n-level,k replicates, length long vectof factors
y <- edit(x) 	#opens a screen editor and saves changes made to x intoy
fix(x)		#opens a screen editor window and makes and saves changes to x

 max()
 min()
 mean()
 median()
 sum()
 var()     		#produces the variance covariance matrix
 sd()      		#standard deviation
 mad()     		#(median absolute deviation)
 fivenum() 		#Tukey fivenumbers min, lowerhinge, median, upper hinge, max
 scale(data,scale=T)   	#centers around the mean and scales by the sd)
 colSums(), rowSums(), colMeans(), rowMeans()   #see also apply(x,1,sum)
 rowsum(x,group)     	#sum by group

 cor(x,y,use="pair")   
 	#correlation matrix for pairwise complete data, use="complete" for complete cases

 t.test(x,y) #x is a data vector, y is a grouping vector  independent groups
 t.test(x,y,pair=TRUE) #x is a data vector, y is a grouping vector --  paired groups
 pairwise.t.test(x,g) does multiple comparisons of all groups defined by g

 aov(x~y,data=datafile)  #where x and y can be matrices
	 aov.ex1 = aov(Alertness~Dosage,data=data.ex1)  
	 	#do the analysis of variance or
	 aov.ex2 = aov(Alertness~Gender*Dosage,data=data.ex2)         
	 	#do a two way analysis of variance
	summary(aov.ex1)                                    
		#show the summary table
	print(model.tables(aov.ex1,"means"),digits=3)       
		#report the means and the number of subjects/cell
	boxplot(Alertness~Dosage,data=data.ex1)       
		#graphical summary appears in graphics window

 lm(x~y,data=dataset)                      #basic linear model where x and y can be matrices

 lm(Y~X)                                 #Y and X can be matrices
 lm(Y~X1+X2)
 lm(Y~X|W)                               #separate analyses for each level of W                             
 solve(A,B)                              #inverse of A * B   - used for linear regression
 solve(A)                                #inverse of A

     colSums (x, na.rm = FALSE, dims = 1)
     rowSums (x, na.rm = FALSE, dims = 1)
     colMeans(x, na.rm = FALSE, dims = 1)
     rowMeans(x, na.rm = FALSE, dims = 1)
     rowsum(x, group, reorder = TRUE, ...)         
     	#finds row sums for each level of a grouping variable
     apply(X, MARGIN, FUN, ...)                   
     	#applies the function (FUN) to either rows (1) or columns (2) on object X
     apply(x,1,min)                            
    	#finds the minimum for each row
     apply(x,2,max)                            
     	#finds the maximum for each column
     col.max(x)                                   
     	#another way to find which column has the maximum value for each row 
     which.min(x)
     which.max(x)
     z=apply(big5r,1,which.min)               
     	#tells the row with the minimum value for every column

stem()                                   #stem and leaf diagram

par(mfrow=c(2,1))                        #number of rows and columns to graph

boxplot(x,notch=T,names= grouping, main="title")                  #boxplot (box and whiskers)

hist()                                   #histogram
plot()
  plot(x,y,xlim=range(-1,1),ylim=range(-1,1),main=title)
  par(mfrow=c(1,1))     #change the graph window back to one figure
  symb=c(19,25,3,23)
  colors=c("black","red","green","blue")
  charact=c("S","T","N","H")
  plot(x,y,pch=symb[group],col=colors[group],bg=colors[condit],cex=1.5,main="main title")
  points(mPA,mNA,pch=symb[condit],cex=4.5,col=colors[condit],bg=colors[condit])

curve()
  abline(a,b)
  abline(a, b, untf = FALSE, ...)
  abline(h=, untf = FALSE, ...)
  abline(v=, untf = FALSE, ...)
  abline(coef=, untf = FALSE, ...)
  abline(reg=, untf = FALSE, ...)

identify()
	plot(eatar,eanta,xlim=range(-1,1),ylim=range(-1,1),main=title)
	identify(eatar,eanta,labels=labels(energysR[,1])  )       
		#dynamically puts names on the plots
locate()
pairs()                                  #SPLOM (scatter plot Matrix)

matplot ()                             #ordinate is row of the matrix
biplot ()                              #factor loadings and factor scores on same graph
coplot(x~y|z)                          #x by y conditioned on z
symb=c(19,25,3,23)                              #choose some nice plotting symbols
colors=c("black","red","green","blue")          #choose some nice colors

barplot()                  #simple bar plot
interaction.plot ()         #shows means for an ANOVA design

plot(degreedays,therms)               #show  the data points
by(heating,Location,function(x) abline(lm(therms~degreedays,data=x)))  
	#show the best fitting regression for each group

x= recordPlot()            #save the current plot device output in the object x
replayPlot(x)              #replot object x
dev.control                #various control functions for printing/saving graphic files

pnorm(1,mean=0,sd=1)
[1] 0.8413447

pnorm(1)       #default values of mean=0, sd=1 are used)
pnorm(1,1,10)  #parameters may be passed if in default order or by name

samplesize=1000
size.r=.6
theta=rnorm(samplesize,0,1)      #generate some random normal deviates
e1=rnorm(samplesize,0,1)         #generate errors for x
e2=rnorm(samplesize,0,1)         #generate errors for y
weight=sqrt(size.r)              #weight as a function of correlation
x=weight*theta+e1*sqrt(1-size.r) #combine true score (theta) with error
y=weight*theta+e2*sqrt(1-size.r)
cor(x,y)                          #correlate the resulting pair
df=data.frame(cbind(theta,e1,e2,x,y))  #form a data frame to hold all of the elements
round(cor(df),2)               #show the correlational structure
pairs.panels(df)                #plot the correlational structure (assumes psych package)

library(mvtnorm)
samplesize=1000
size.r=.6
sigmamatrix <- matrix( c(1,sqrt(size.r),sqrt(size.r),sqrt(size.r),1,size.r,
	sqrt(size.r),size.r,1),ncol=3)
xy <- rmvnorm(samplesize,sigma=sigmamatrix)
round(cor(xy),2)
pairs.panels(xy) 	#assumes the psych package

psych-package	A package for personality, psychometric, and psychological research
%+%	A function to add two vectors or matrices
alpha.scale	Cronbach alpha for a scale
circ.sim	Generate simulated data structures for circumplex or simple structure
circ.simulation	Simulations of circumplex and simple structure
circ.tests	Apply four tests of circumplex versus simple structure
cluster.cor	Find correlations of composite variables from a larger matrix
cluster.fit	cluster Fit: fit of the cluster model to a correlation matrix
cluster.loadings	Find item by cluster correlations, corrected for overlap and reliability
correct.cor	Find dis-attenuated correlations and give alpha reliabilities
count.pairwise	Count number of pairwise cases for a data set with missing (NA) data.
describe	Basic descriptive statistics useful for psychometrics
describe.by	Basic summary statistics by group
eigen.loadings	Extract eigen vectors, eigen values, show loadings
error.crosses	Plot x and y error bars
factor.congruence	Coefficient of factor congruence
factor.fit	How well does the factor model fit a correlation matrix. Part of the VSS package
factor.model	Find R = F F' + U2 is the basic factor model
fa	Minimum Residual, Principal Axis, GLS, MLE factor analysis
factor.pa	Principal Axis Factor Analysis
factor.residuals	R* = R- F F'
factor.rotate	"Hand" rotate a factor loading matrix
factor2cluster	Extract cluster definitions from factor loadings
fisherz	Fisher z transform of r
geometric.mean	Find the geometric mean of a vector or columns of a data.frame.
harmonic.mean	Find the harmonic mean of a vector, matrix, or columns of a data.frame
ICLUST	ICLUST: Item Cluster Analysis - Hierarchical cluster analysis using psychometric principles
ICLUST	iclust: Item Cluster Analysis - Hierarchical cluster analysis using psychometric principles
ICLUST.cluster	Function to form hierarchical cluster analysis of items
ICLUST.graph	create control code for ICLUST graphical output
ICLUST.sort	sort items by absolute size of cluster loadins
irt.fa	Item Response Theory using factor analysis of tetrachoric or polychoric correlations
irt.0p	Item Response Theory estimate of theta (ability) using a Rasch (like) model
irt.1p	Item Response Theory estimate of theta (ability) using a Rasch (like) model
irt.2p	Item Response Theory estimate of theta (ability) using a Rasch (like) model
irt.discrim	Simple function to estimate item difficulties using IRT concepts
irt.item.diff.rasch	Simple function to estimate item difficulties using IRT concepts
irt.person.rasch	Item Response Theory estimate of theta (ability) using a Rasch (like) model
kurtosi	Kurtosis of a vector, matrix, or data frame
make.hierarchical	Create a population or sample correlation matrix with hierachical structure.
mat.regress	Multiple Regression from matrix input
multi.hist	Multiple histograms on one screen
omega	Calculate the omega estimate of factor saturation
paired.r	Test the difference between paired correlations
pairs.panels	SPLOM, histograms and correlations for a data matrix
panel.cor	SPLOM, histograms and correlations for a data matrix
panel.hist	SPLOM, histograms and correlations for a data matrix
phi	Find the phi coefficient of correlation between two dichotomous variables
phi2poly	Convert a phi coefficient to a polychoric correlation
polychoric	Find polychoric correlations
principal	Principal components analysis
psych	A package for personality, psychometric, and psychological research
psycho.demo	Create demo data for psychometrics
read.clipboard	shortcut for reading from the clipboard
schmid	Apply the Schmid Leiman transformation to a correlation matrix
score.alpha	Score scales and find Cronbach's alpha as well as associated statistics
score.items	Score item composite scales and find Cronbach's alpha as well as associated statistics
skew	Calculate skew for a vector, matrix, or data.frame
VSS	Apply the Very Simple Structure criterion to determine the appropriate number of factors.
VSS.parallel	Compare real and random VSS solutions
VSS.plot	Plot VSS fits
VSS.scree	Plot a scree test
VSS.simulate	create VSS like data

Help and Guidance

Package vignettes

Help and Guidance

From a text file

From the web

Data input example

Basic descriptive statistics

The describe function.

Simple graphical descriptions: the stem and leaf diagram

Correlations

Testing the significance of a set of correlations

Reorganizing the data for within subject analyses

Scale Construction and Reliability

Using the alpha function from psych

Scoring multiple choice tests

Factor Analysis

Factor rotations

Multidimensional scaling

Further topics

Structural Equation Modeling

Multilevel Models/ Hierarchical Level Models

Simulating data structures and probability tables

Input and display

moving around

data manipulation

Statistics and transformations

Useful additional commands

Graphics