Using the psych package for descriptive statistics

library(psych) #make it active
library(psychTools) #make it active (necessary for read.file)
#specifiy a file to read (e.g.) 
filename <- "http://personality-project.org/r/datasets/simulation.txt"
#read the file
my.data <- read.file(filename)
## Data from the .txt file http://personality-project.org/r/datasets/simulation.txt has been loaded.

#Get basic descriptives

First, find out how many rows and columns and then, if not too many, show a few

dim(my.data)
## [1] 72  7
names(my.data)
## [1] "Time"        "Anxiety"     "Impulsivity" "sex"         "Arousal"     "Tension"    
## [7] "Performance"
headTail(my.data)
##     Time Anxiety Impulsivity sex Arousal Tension Performance
## 1      9       4           9   1      50      55          40
## 2     19       8           8   1      70      64          90
## 3      9       5          10   2      50      69          48
## 4      9       4           1   2      57      55          68
## ...  ...     ...         ... ...     ...     ...         ...
## 69    19       6           1   1      66      53          88
## 70     9       5          10   2      48      63          40
## 71    19       6           8   2      69      60          95
## 72    19      10           1   2      66      48          93

Then get the basic desriptive statistics

describe(my.data)
##             vars  n  mean    sd median trimmed   mad min max range  skew kurtosis   se
## Time           1 72 14.28  5.03   19.0   14.34  0.00   9  19    10 -0.11    -2.02 0.59
## Anxiety        2 72  5.24  2.18    5.0    5.24  2.97   0  10    10 -0.04    -0.65 0.26
## Impulsivity    3 72  4.90  3.98    4.5    4.88  5.19   0  10    10  0.02    -1.83 0.47
## sex            4 72  1.50  0.50    1.5    1.50  0.74   1   2     1  0.00    -2.03 0.06
## Arousal        5 72 60.90  8.10   66.0   61.29  5.93  48  70    22 -0.27    -1.67 0.96
## Tension        6 72 56.83  6.29   57.0   57.14  5.93  38  69    31 -0.53     0.42 0.74
## Performance    7 72 72.21 17.41   78.0   73.19 18.53  38  98    60 -0.43    -1.10 2.05

#Basic descriptive graphs Box plots and violins

boxplot(my.data[5:7])

violinBy(my.data)

#descriptives by groups

describeBy(my.data,"sex")
## 
##  Descriptive statistics by group 
## group: 1
##             vars  n  mean    sd median trimmed   mad min max range  skew kurtosis   se
## Time           1 36 14.56  5.04     19   14.67  0.00   9  19    10 -0.21    -2.01 0.84
## Anxiety        2 36  5.50  1.93      5    5.53  1.48   1   9     8 -0.08    -0.68 0.32
## Impulsivity    3 36  4.64  4.06      2    4.57  2.97   0  10    10  0.11    -1.86 0.68
## sex            4 36  1.00  0.00      1    1.00  0.00   1   1     0   NaN      NaN 0.00
## Arousal        5 36 61.39  7.97     66   61.83  5.93  48  70    22 -0.39    -1.55 1.33
## Tension        6 36 56.58  6.65     56   56.90  5.19  38  68    30 -0.49     0.42 1.11
## Performance    7 36 72.69 17.30     78   73.57 17.79  40  98    58 -0.48    -1.17 2.88
## --------------------------------------------------------------------------- 
## group: 2
##             vars  n  mean    sd median trimmed   mad min max range  skew kurtosis   se
## Time           1 36 14.00  5.07   14.0   14.00  7.41   9  19    10  0.00    -2.05 0.85
## Anxiety        2 36  4.97  2.40    5.0    4.90  2.97   0  10    10  0.09    -0.84 0.40
## Impulsivity    3 36  5.17  3.93    7.5    5.20  3.71   0  10    10 -0.07    -1.85 0.66
## sex            4 36  2.00  0.00    2.0    2.00  0.00   2   2     0   NaN      NaN 0.00
## Arousal        5 36 60.42  8.32   61.5   60.63 11.12  48  70    22 -0.14    -1.81 1.39
## Tension        6 36 57.08  5.99   58.0   57.40  5.19  41  69    28 -0.53     0.07 1.00
## Performance    7 36 71.72 17.74   75.5   72.53 18.53  38  98    60 -0.37    -1.13 2.96
violinBy(my.data,grp="sex")

violinBy(bfi[1:25])

#Now lets try some bivariate descriptives – The correlation

cor(my.data)  #the traditional output
##                    Time      Anxiety Impulsivity         sex      Arousal    Tension Performance
## Time         1.00000000  0.013216858  0.06121135 -0.05564149  0.956608901 0.05050668  0.87406181
## Anxiety      0.01321686  1.000000000  0.15052463 -0.12197796  0.002116381 0.34330479  0.04696584
## Impulsivity  0.06121135  0.150524635  1.00000000  0.06677940 -0.065812947 0.08773475 -0.15565556
## sex         -0.05564149 -0.121977956  0.06677940  1.00000000 -0.060408880 0.04004630 -0.02812176
## Arousal      0.95660890  0.002116381 -0.06581295 -0.06040888  1.000000000 0.05358998  0.91904738
## Tension      0.05050668  0.343304791  0.08773475  0.04004630  0.053589983 1.00000000  0.02194427
## Performance  0.87406181  0.046965844 -0.15565556 -0.02812176  0.919047382 0.02194427  1.00000000
lowerCor(my.data) #just find the correlations but show them as a lower off diagonal output
##             Time  Anxty Impls sex   Arosl Tensn Prfrm
## Time         1.00                                    
## Anxiety      0.01  1.00                              
## Impulsivity  0.06  0.15  1.00                        
## sex         -0.06 -0.12  0.07  1.00                  
## Arousal      0.96  0.00 -0.07 -0.06  1.00            
## Tension      0.05  0.34  0.09  0.04  0.05  1.00      
## Performance  0.87  0.05 -0.16 -0.03  0.92  0.02  1.00
corPlot(my.data,numbers=FALSE)  #show them in a heat map without showing values

corPlot(my.data) #with the values

corPlot(my.data, numbers = TRUE, stars=TRUE) # with magic astericks

corPlot(my.data,numbers=TRUE,upper=FALSE,diag=FALSE)

# Lets try some more conventional ways of showing it

cor.test(my.data[,1],my.data[,2])
## 
##  Pearson's product-moment correlation
## 
## data:  my.data[, 1] and my.data[, 2]
## t = 0.11059, df = 70, p-value = 0.9123
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2191226  0.2441379
## sample estimates:
##        cor 
## 0.01321686
corr.test(my.data)
## Call:corr.test(x = my.data)
## Correlation matrix 
##              Time Anxiety Impulsivity   sex Arousal Tension Performance
## Time         1.00    0.01        0.06 -0.06    0.96    0.05        0.87
## Anxiety      0.01    1.00        0.15 -0.12    0.00    0.34        0.05
## Impulsivity  0.06    0.15        1.00  0.07   -0.07    0.09       -0.16
## sex         -0.06   -0.12        0.07  1.00   -0.06    0.04       -0.03
## Arousal      0.96    0.00       -0.07 -0.06    1.00    0.05        0.92
## Tension      0.05    0.34        0.09  0.04    0.05    1.00        0.02
## Performance  0.87    0.05       -0.16 -0.03    0.92    0.02        1.00
## Sample Size 
## [1] 72
## Probability values (Entries above the diagonal are adjusted for multiple tests.) 
##             Time Anxiety Impulsivity  sex Arousal Tension Performance
## Time        0.00    1.00        1.00 1.00    0.00    1.00           0
## Anxiety     0.91    0.00        1.00 1.00    1.00    0.06           1
## Impulsivity 0.61    0.21        0.00 1.00    1.00    1.00           1
## sex         0.64    0.31        0.58 0.00    1.00    1.00           1
## Arousal     0.00    0.99        0.58 0.61    0.00    1.00           0
## Tension     0.67    0.00        0.46 0.74    0.65    0.00           1
## Performance 0.00    0.70        0.19 0.81    0.00    0.85           0
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option

#What about bootstrap resampling?

ci <- corCi(my.data)

cor.plot.upperLowerCi(ci)  #with the confidence intervals