Using R for Structural Equation Modeling

Structural Equation Modeling in R

Structural equation models combine measurement models (e.g., reliability) with structural models (e.g., regression). The sem package, developed by John Fox, allows for some basic structural equation models. To use it, add the sem package by using the package manager.
Structural Equation Modeling may be thought of as regression corrected for attentuation. The sem package developed by John Fox uses the RAM path notation of Jack McCardle and is fairly straightforward. The examples in the package are quite straightforward. A text book, such as John Loehlin's Latent Variable Models (4th Edition) is helpful in understanding the algorithm.
For much more detail on using R to do structural equation modeling, see the course notes for sem (primarily using R) available at the syllabus for my sem course. Also see John Fox's notes that he has prepared as a brief description of SEM techniques as an appendix to his statistics text.
#Loehlin problem 2.5 
 obs.var2.5 = c('Ach1',  'Ach2',  'Amb1',  'Amb2',  'Amb3')
 R.prob2.5 = matrix(c(
  1.00 ,  .60  , .30,  .20,   .20,                                               
   .60,  1.00,   .20,   .30,   .10,                                                
   .30,   .20,  1.00,   .70,   .60 ,                                               
   .20,   .30,   .70,  1.00,   .50,                                                
   .20,   .10,   .60,  .50,  1.00), ncol=5,byrow=TRUE)    
  
  
  
  #correlated factors structure (ambition <-> Achievement) 
  model2.5=matrix(c(
	'Ambit ->  Amb1',      'a', NA,
 	'Ambit -> Amb2' ,      'b', NA,
 	'Ambit -> Amb3' ,      'c', NA,
	'Achieve -> Ach1',     'd', NA,
	'Achieve -> Ach2',     'e', NA,
	'Ambit <-> Achieve',   'f', NA,
	'Amb1 <-> Amb1' ,      'u', NA,
 	'Amb2 <-> Amb2' ,      'v', NA,
	'Amb3 <-> Amb3' ,      'w', NA,
 	'Ach1 <-> Ach1' ,      'x', NA,
	'Ach2 <-> Ach2' ,      'y', NA,
	'Achieve <-> Achieve',  NA, 1,
 	'Ambit <-> Ambit',      NA, 1),
 	ncol=3, byrow=TRUE)
 sem2.5= sem(model2.5,R.prob2.5,60, obs.var2.5)
  summary(sem2.5,digits=3)

 Model Chisquare =  9.74   Df =  4 Pr(>Chisq) = 0.0450
 Goodness-of-fit index =  0.964
 Adjusted goodness-of-fit index =  0.865
 RMSEA index =  0.120   90 % CI: (0.0164, 0.219)
 BIC =  -15.1 

 Normalized Residuals
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-5.77e-01 -3.78e-02 -2.04e-06  4.85e-03  3.87e-05  1.13e+00 

 Parameter Estimates
  Estimate Std Error z value Pr(>|z|)                   
a    0.920    0.0924   9.966 0.00e+00    Amb1 <--- Ambit
b    0.761    0.0955   7.974 1.55e-15    Amb2 <--- Ambit
c    0.652    0.0965   6.753 1.45e-11    Amb3 <--- Ambit
d    0.879    0.1762   4.986 6.16e-07  Ach1 <--- Achieve
e    0.683    0.1509   4.525 6.03e-06  Ach2 <--- Achieve
f    0.356    0.1138   3.127 1.76e-03 Achieve <--> Ambit
u    0.153    0.0982   1.557 1.20e-01     Amb1 <--> Amb1
v    0.420    0.0898   4.679 2.88e-06     Amb2 <--> Amb2
w    0.575    0.0949   6.061 1.35e-09     Amb3 <--> Amb3
x    0.228    0.2791   0.816 4.15e-01     Ach1 <--> Ach1
y    0.534    0.1837   2.905 3.67e-03     Ach2 <--> Ach2

 Iterations =  26 
>  #causal structure with errors in achievement
>   #ambition -> achievement
>   
>  model2.51=matrix(c(
+ 	'Ambit ->  Amb1',      'a', NA,
+ 	'Ambit -> Amb2' ,      'b', NA,
+ 	'Ambit -> Amb3' ,      'c', NA,
+ 	'Achieve -> Ach1',     'd', NA,
+ 	'Achieve -> Ach2',     'e', NA,
+ 	'Ambit -> Achieve',   'f', NA,
+ 	'Amb1 <-> Amb1' ,      'u', NA,
+ 	'Amb2 <-> Amb2' ,      'v', NA,
+ 	'Amb3 <-> Amb3' ,      'w', NA,
+ 	'Ach1 <-> Ach1' ,      'x', NA,
+ 	'Ach2 <-> Ach2' ,      'y', NA,
+ 	'Achieve <-> Achieve',  NA, 1,
+ 	'Ambit <-> Ambit',      NA, 1),
+ 	ncol=3, byrow=TRUE)




> 	
>  sem2.51= sem(model2.51,R.prob2.5,100, obs.var2.5)
>  summary(sem2.51,digits=3)

 Model Chisquare =  9.74   Df =  4 Pr(>Chisq) = 0.0450
 Goodness-of-fit index =  0.964
 Adjusted goodness-of-fit index =  0.865
 RMSEA index =  0.120   90 % CI: (0.0164, 0.219)
 BIC =  -15.1 

 Normalized Residuals
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-5.77e-01 -3.78e-02 -6.33e-06  4.85e-03  3.06e-05  1.13e+00 

 Parameter Estimates
  Estimate Std Error z value Pr(>|z|)                   
a    0.920    0.0924   9.966 0.00e+00    Amb1 <--- Ambit
b    0.761    0.0955   7.974 1.55e-15    Amb2 <--- Ambit
c    0.652    0.0965   6.753 1.45e-11    Amb3 <--- Ambit
d    0.821    0.1781   4.613 3.98e-06  Ach1 <--- Achieve
e    0.638    0.1343   4.752 2.01e-06  Ach2 <--- Achieve
f    0.381    0.1394   2.731 6.32e-03 Achieve <--- Ambit
u    0.153    0.0982   1.557 1.20e-01     Amb1 <--> Amb1
v    0.420    0.0898   4.679 2.88e-06     Amb2 <--> Amb2
w    0.575    0.0949   6.061 1.35e-09     Amb3 <--> Amb3
x    0.228    0.2791   0.816 4.15e-01     Ach1 <--> Ach1
y    0.534    0.1837   2.906 3.67e-03     Ach2 <--> Ach2

 Iterations =  27 
> #causal structure with errors in achievement
>   #ambition -> achievement
>   
>  model2.52=matrix(c(
+ 	'Ambit ->  Amb1',      'a', NA,
+ 	'Ambit -> Amb2' ,      'b', NA,
+ 	'Ambit -> Amb3' ,      'c', NA,
+ 	'Achieve -> Ach1',     'd', NA,
+ 	'Achieve -> Ach2',     'e', NA,
+ 	'Ambit -> Achieve',   'f', NA,
+ 	'Amb1 <-> Amb1' ,      'u', NA,
+ 	'Amb2 <-> Amb2' ,      'v', NA,
+ 	'Amb3 <-> Amb3' ,      'w', NA,
+ 	'Ach1 <-> Ach1' ,      'x', NA,
+ 	'Ach2 <-> Ach2' ,      'y', NA,
+ 	'Achieve <-> Achieve', 'z', NA,
+ 	'Ambit <-> Ambit',      NA, 1),
+ 	ncol=3, byrow=TRUE)
> 	
>  sem2.52= sem(model2.52,R.prob2.5,100, obs.var2.5)
Warning message: 
Negative parameter variances.
Model is probably underidentified.
 in: sem.default(ram = ram, S = S, N = N, param.names = pars, var.names = vars,  
>  summary(sem2.52,digits=3)

 Model Chisquare =  9.74   Df =  3 Pr(>Chisq) = 0.0209
 Goodness-of-fit index =  0.964
 Adjusted goodness-of-fit index =  0.82
 RMSEA index =  0.151   90 % CI: (0.0517, 0.261)
 BIC =  -8.9 

 Normalized Residuals
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-5.77e-01 -3.78e-02 -1.62e-06  4.85e-03  3.43e-05  1.13e+00 

 Parameter Estimates
  Estimate Std Error z value Pr(>|z|)                     
a    0.920    0.0924   9.966 0.00e+00      Amb1 <--- Ambit
b    0.761    0.0955   7.974 1.55e-15      Amb2 <--- Ambit
c    0.652    0.0965   6.753 1.45e-11      Amb3 <--- Ambit
d    0.905       NaN     NaN      NaN    Ach1 <--- Achieve
e    0.703       NaN     NaN      NaN    Ach2 <--- Achieve
f    0.346       NaN     NaN      NaN   Achieve <--- Ambit
u    0.153    0.0982   1.557 1.20e-01       Amb1 <--> Amb1
v    0.420    0.0898   4.679 2.88e-06       Amb2 <--> Amb2
w    0.575    0.0949   6.061 1.35e-09       Amb3 <--> Amb3
x    0.228    0.2791   0.816 4.15e-01       Ach1 <--> Ach1
y    0.534    0.1837   2.905 3.67e-03       Ach2 <--> Ach2
z    0.824       NaN     NaN      NaN Achieve <--> Achieve

 Iterations =  27 

 Aliased parameters: d e f z 
Warning message: 
NaNs produced in: sqrt(diag(object$cov)) 
>   #causal structure with errors in achievement
>   #ambition -> achievement
>   #fix achievement to Loehlin answer
>   
>  model2.53=matrix(c(
+ 	'Ambit ->  Amb1',      'a', NA,
+ 	'Ambit -> Amb2' ,      'b', NA,
+ 	'Ambit -> Amb3' ,      'c', NA,
+ 	'Achieve -> Ach1',     'd', NA,
+ 	'Achieve -> Ach2',     'e', NA,
+ 	'Ambit -> Achieve',   'f', NA,
+ 	'Amb1 <-> Amb1' ,      'u', NA,
+ 	'Amb2 <-> Amb2' ,      'v', NA,
+ 	'Amb3 <-> Amb3' ,      'w', NA,
+ 	'Ach1 <-> Ach1' ,      'x', NA,
+ 	'Ach2 <-> Ach2' ,      'y', NA,
+ 	'Achieve <-> Achieve', NA, .873,
+ 	'Ambit <-> Ambit',      NA, 1),
+ 	ncol=3, byrow=TRUE)
> 	
>  sem2.53= sem(model2.53,R.prob2.5,100, obs.var2.5)
>  summary(sem2.53,digits=3)\
Error: syntax error
> summary(sem2.53,digits=3)

 Model Chisquare =  9.74   Df =  4 Pr(>Chisq) = 0.0450
 Goodness-of-fit index =  0.964
 Adjusted goodness-of-fit index =  0.865
 RMSEA index =  0.120   90 % CI: (0.0164, 0.219)
 BIC =  -15.1 

 Normalized Residuals
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-5.77e-01 -3.78e-02 -1.14e-06  4.85e-03  3.19e-05  1.13e+00 

 Parameter Estimates
  Estimate Std Error z value Pr(>|z|)                   
a    0.920    0.0924   9.966 0.00e+00    Amb1 <--- Ambit
b    0.761    0.0955   7.974 1.55e-15    Amb2 <--- Ambit
c    0.652    0.0965   6.753 1.45e-11    Amb3 <--- Ambit
d    0.879    0.1906   4.612 3.98e-06  Ach1 <--- Achieve
e    0.683    0.1437   4.752 2.01e-06  Ach2 <--- Achieve
f    0.356    0.1303   2.731 6.32e-03 Achieve <--- Ambit
u    0.153    0.0982   1.557 1.20e-01     Amb1 <--> Amb1
v    0.420    0.0898   4.679 2.88e-06     Amb2 <--> Amb2
w    0.575    0.0949   6.061 1.35e-09     Amb3 <--> Amb3
x    0.228    0.2791   0.816 4.15e-01     Ach1 <--> Ach1
y    0.534    0.1837   2.906 3.67e-03     Ach2 <--> Ach2

 Iterations =  27 
> 


ÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ

 #Loehlin problem from table 2-12
 #Note that version a is a classic example of congeneric measurement.
 #Alternatively, this could be thought of as underidentified higher order model

 obs.var2.12 = c('a',  'b',  'c',  'd')
  R.prob2.12 = matrix(c(
  1.00 ,  .30,    .20,   .10,                                               
   .30,  1.00,   .20,   .20,                                               
   .20,   .20,  1.00,   .30,                                            
   .10,   .20,   .30,  1.00), 
   ncol=4,byrow=TRUE)

  model2.12a=matrix(c(
 	'g ->  a',      'a1', NA,
 	'g -> b' ,      'b1', NA,
 	'g -> c' ,      'c1', NA,
 	'g -> d',     'd1', NA,
 	'a <-> a',      'e1', NA,
 	'b <-> b',      'e2', NA,
 	'c <-> c',      'e3', NA,
 	'd <-> d',      'e4', NA,
 	'g <-> g',       NA, 1),
	ncol=3, byrow=TRUE)
  sem2.12a= sem(model2.12a,R.prob2.12,120, obs.var2.12)
 summary(sem2.12a,digits=3)

#a 1 degree of freedom model
model2.12b=matrix(c(
 	'g ->  a',      'a1', NA,
 	'g -> b' ,      'b1', NA,
 	'f -> c' ,      'c1', NA,
 	'f -> d',     'd1', NA,
 	'a <-> a',      'e1', NA,
 	'b <-> b',      'e2', NA,
 	'c <-> c',      'e3', NA,
 	'd <-> d',      'e4', NA,
 	'g <-> g',       NA, 1,
 	'f <-> f',       NA, 1,
 	'g <-> f',        'fg', NA),
	ncol=3, byrow=TRUE)
  sem2.12b= sem(model2.12b,R.prob2.12,120, obs.var2.12)
 summary(sem2.12b,digits=3)
 
#the following higher level model has 0 degrees of freedom
 model2.12c=matrix(c(
 	'g ->  a',      'a1', NA,
 	'g -> b' ,      'b1', NA,
 	'f -> c' ,      'c1', NA,
 	'f -> d',       'd1', NA,
 	'a <-> a',      'e1', NA,
 	'b <-> b',      'e2', NA,
 	'c <-> c',      'e3', NA,
 	'd <-> d',      'e4', NA,
 	'g <-> g',       NA, 1,
 	'f <-> f',       NA, 1,
 	'h -> g',        'hg', NA,
 	'h -> f',        NA,1,
	'h <-> h',        NA,1),
	ncol=3, byrow=TRUE)
  sem2.12c= sem(model2.12c,R.prob2.12,120, obs.var2.12)
 summary(sem2.12c,digits=3)
 
 
 
 ÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ
 
 #Loehlin problem 2.9
 
  obs.var2.09 = c('w',  'x',  'y',  'z')
  R.prob2.09 = matrix(c(
  1.00 ,  .40,    .50,   .30,                                               
   .40,  1.00,   .55,   .35,                                               
   .50,   .55,  1.00,   .40,                                            
   .30,   .35,   .40,  1.00), 
   ncol=4,byrow=TRUE)

  model2.09=matrix(c(
 	'g ->  w',      'a1', NA,
 	'g -> x' ,      'b1', NA,
 	'g -> y' ,      'c1', NA,
 	'g -> z',     'd1', NA,
 	'w <-> w',      'e1', NA,
 	'x <-> x',      'e2', NA,
 	'y <-> y',      'e3', NA,
 	'z <-> z',      'e4', NA,
 	'g <-> g',       NA, 1),
	ncol=3, byrow=TRUE)
  sem2.09= sem(model2.09,R.prob2.09,500, obs.var2.09)
 summary(sem2.09,digits=3)
 
 
  obs.var2.09b = c('w',  'x',  'y',  'z')
  
  R.prob2.09b = matrix(c(
  1.00 ,  .40,    .50,   .30,                                               
   .40,  1.00,   .55,   .35,                                               
   .50,   .55,  1.00,   .40,                                            
   .30,   .35,   .40,  1.00), 
   ncol=4,byrow=TRUE)

  model2.09b=matrix(c(
 	'g ->  w',      NA,1,
 	'g -> x' ,      'b1', NA,
 	'g -> y' ,      'c1', NA,
 	'g -> z',     'd1', NA,
 	'w <-> w',      'e1', NA,
 	'x <-> x',      'e2', NA,
 	'y <-> y',      'e3', NA,
 	'z <-> z',      'e4', NA,
 	'g <-> g',       'e',NA),
	ncol=3, byrow=TRUE)
	
  sem2.09b= sem(model2.09b,R.prob2.09b,500, obs.var2.09b)
 summary(sem2.09b,digits=3)
part of a short guide to R
Version of February 15, 2007
William Revelle Department of Psychology
Northwestern University