Thurstonian scaling
#Thurstone case V (assumption of equal and uncorrelated error variances)
#Version of March 22, 2005
#Do a Thurstone scaling (Case 5) of either a square choice matrix or a rectangular rank order matrix
#Need to add output options to allow users to see progress
#data are either a set of rank orders or
#a set of of choices (columns chosed over row)
#output is a list of
# scale scores
#goodness of fit estimate (1 - sumsq(error)/sumsq(original) not very good
# the model choice matrix
#the error (residual) matrix
#the original choice matrix
thurstone <- function(x, ranks = FALSE,opt=TRUE) { #the main routine
if (ranks) {choice <- choice.mat(x)
} else {if (is.matrix(x)) choice <- x
choice <- as.matrix(x)}
scale.values <- scale.vect(choice) #convert rank order information to choice matrix
fit <- thurstone.fit(scale.values,choice,opt) #see how well this model fits
thurstone <- list("scale values ="=round(scale.values,2), "goodness of fit"= fit,"choice matrix "= choice)
}
#the functions used by thurstone:
#this next function does almost all of the work, by transforming choices to normal deviates
scale.mat <- function(x) {scale.mat <- qnorm(x)} #convert choice matrix to normal deviates
#find the average normal deviate score for each item, and subtract from the minimum
scale.vect <- function(x) #do the actual thurstonian scaling
{nvar <- dim(x)[2]
score <- colSums(scale.mat(x)) #find the summed scores
minscore <-min(score) #rescale to 0 as minumum
relative <- score - minscore
relative <- relative/nvar
return(relative) } #the thurstone scale values
#next two functions are used for finding goodness of fit -- not a very powerful measure
thurstone.model <- function (x,opt) { #returns the lower diagonal distances
if (opt) {return(1-pnorm(dist(x,diag=TRUE)))} else return(pnorm(dist(x,diag=TRUE))) }
thurstone.fit <- function (s,d,opt) { #s is the thurstone scale values, d is the data matrix
model <- thurstone.model(s,opt)
dm <- as.dist(d) #makes the distance matrix a lower diagonal
error <- model - dm #error = model - data or more conventionally data = model+error
fit <- 1- sum(error*error)/sum(dm*dm) #a typical fit measure -- squared error/squared original
thurstone.fit <- list("goodness of fit = "=round(fit,2),"model matrix"=round(model,2),"error matrix "=round(error,2))
}
#if we have rank order data, then convert them to a choice matrix
#convert a rank order matrix into a choice matrix
orders.to.choice <- function(x,y) { #does one subject (row) at a time
nvar<-dim(x)[2]
for (i in 1:nvar) {
for (j in 1:nvar) {
if (x[j]< x[i] ) {y[i,j] <- y[i,j]+1
} else if (x[j] == x[i]) {
y[j,i] <- y[j,i]+.5 #take ties into account
y[i,j] <- y[i,j] + .5
} else y[j,i] <- y[j,i]+1
}}
return(y)}
# makes repeated calls to orders.to.choice -- can we vectorize this?
choice.mat <-function(x) {
nsubs<- dim(x)[1]
nvar <- dim(x)[2]
y<- array(rep(0,nvar*nvar),dim=c(nvar,nvar))
for (k in 1:nsubs) {
y<-orders.to.choice(x[k,],y) } #is there a way to vectorize this?
d <- diag(y)
y<- y/(2*d)
lower <- 1/(4*nsubs) #in case of 0 or 1, we put limits
upper <- 1- lower
y[yupper] <- upper
return(y) }
###end of thurstone functions
#now some test data
#Nunnally's data (Nunnally and Bernstein took these from Guilford)
.500 .818 .770 .811 .878 .892 .899 .892 .926
.182 .500 .601 .723 .743 .736 .811 .845 .858
.230 .399 .500 .561 .736 .676 .845 .797 .818
.189 .277 .439 .500 .561 .588 .676 .601 .730
.122 .257 .264 .439 .500 .493 .574 .709 .764
.108 .264 .324 .412 .507 .500 .628 .682 .628
.101 .189 .155 .324 .426 .372 .500 .527 .642
.108 .155 .203 .399 .291 .318 .473 .500 .628
.074 .142 .182 .270 .236 .372 .358 .372 .500
#ranking data
#data set 1 (from http://marketing.byu.edu/htmlpages/books/pcmds/THURSTONE.html#N_1_)
1 2 3 4 5
5 4 3 2 1
4 2 1 3 4
3 2 1 4 5
2 2 2 1 1
3 3 3 3 2
5 1 1 2 4
5 2 1 2 4
2 1 3 2 1
1 2 2 2 4
#data set 2 (from http://marketing.byu.edu/htmlpages/books/pcmds/THURSTONE.html#N_1_)
0.50 0.44 0.42 0.43 0.52 0.48 0.37
0.56 0.50 0.40 0.56 0.63 0.35 0.33
0.58 0.60 0.50 0.44 0.45 0.44 0.32
0.57 0.44 0.56 0.50 0.35 0.48 0.59
0.48 0.38 0.55 0.65 0.50 0.50 0.44
0.52 0.65 0.56 0.52 0.50 0.50 0.80
0.63 0.67 0.68 0.41 0.56 0.20 0.50
part of a short guide to R
Version of April 20, 2005
William Revelle
Department of Psychology
Northwestern University