### R code used in the paper "Analyses of Wine Tasting Data: A Tutorial"
### by Ingram Olkin, Ying Lou, Lynne Stokes, and Jing Cao

#  This code is free for academic use only. It is distributed in the hope that it will 
#  be useful, but WITHOUT ANY WARRANTY.
#   
#  Copyright (C) 2014 Jing Cao (jcao@smu.edu)
#  Department of Statistical Science, Southern Methodist University, Dallas, TX


###########  Pearson's Correlation ##############################################

## read data

x = c(80, 84, 84, 84, 88, 88, 88, 88, 90, 92, 92, 94)
y = c(80, 80, 80, 84, 80, 80, 96, 96, 92, 94, 94, 90)
n = length(x)


## Pearson's correlation: expression (2.1)

r = cor(x,y)   
               

## Fisher's transformation: expression (2.2)

Z = log((1+r)/(1-r))/2


## 95% CI for rho: (rho1, rho2)

zeta1 = Z-1.96*sqrt(1/(n-3))
zeta2 = Z+1.96*sqrt(1/(n-3))

rho1 = (exp(2*zeta1)-1)/(exp(2*zeta1)+1)
rho2 = (exp(2*zeta2)-1)/(exp(2*zeta2)+1)




###########  Spearman's Rank Correlation ##############################################

## Spearman's rank correlation with adjustment for ties

r_as = cor(x,y,method="spearman")




###########  Kendall's Tau ###########################################################

## Kendall's tau: expression (2.8)

tau = cor(x,y,method="kendall")

var_tau = 2*(2*n+5)/(9*n*(n-1))




###########  Cohen's Kappa ############################################################

## add library (psych)

library(psych)


## Cohen's kappa: expression (2.9) and (2.10)

kappa = cohen.kappa(cbind(x,y))$kappa

var_kappa = cohen.kappa(cbind(x,y))$var.kappa



## transform x and y to rank data
 
xr=x; yr=y
category = sort(unique(c(x,y)))
nc = length(category)

for(i in 1:nc) 
   { xr[x==category[i]] = i
     yr[y==category[i]] = i
   }



## cohen.weights: quadratic weights 

cohen.weights = matrix(0,ncol=nc, nrow=nc)
for(i in 1:nc)
   { for (j in 1:nc) 
     { cohen.weights[i,j] = (i-j)^2
     }
   }


    
################################## Cohen's Weighted Kappa: expression (2.11)  

kappa_w = cohen.kappa(cbind(xr,yr),cohen.weights)$weighted.kappa


## Variance of Cohen's weighted kappa: expression (2.12)

pij = matrix(0,ncol=nc,nrow=nc)

for (i in 1:n) pij[xr[i], yr[i]] = pij[xr[i], yr[i]] + 1/n

pi = apply(pij,1,sum)  
pj = apply(pij,2,sum)    

pew = 0
for(i in 1:nc) {for (j in 1:nc) pew = pew+cohen.weights[i,j]*pi[i]*pj[j]}      
 
pw = 0
for(i in 1:nc) {for (j in 1:nc) pw = pw+cohen.weights[i,j]*pij[i,j]}    
 
temp = 0
for(i in 1:nc) {for (j in 1:nc) temp = temp+cohen.weights[i,j]^2*pij[i,j]}
var_kappa_w = (temp-pw^2)/n/pew^2

# 1-pw/pew
#var_kappa_w = cohen.kappa(cbind(xr,yr),cohen.weights)$var.weighted 

 



###########  Intraclass Correlation (ICC)##############################################

## ICC: expression (3.3)

library(psych)
data=read.table("2012AllMax.txt")
ICC(data)

## the ICC in this paper is ICC3 (Single_fixed_raters), 95% CI is also provided





###########  Product-moment Correlations Matrix ########################################

 
R = cor(data)

tempR = R
diag(tempR) = 0

njudge = dim(R)[1]

## expression (3.5)

g = mean(tempR)*(njudge^2)/(njudge^2-njudge)


## expression (3.6)

r.i = colMeans(tempR)*njudge/(njudge-1)






############ Bayesian ordinal model ###################################################

# ordinal data analysis with  common category cutoffs for all raters, the wine data
# no replicate, only one panel

## ni: number of wines per panel
## nj: number of judges per panel
## n0: number of burn-in in MCMC
## ntotal: total number of samples in MCMC including burn-in samples


n0=10000
ntotal=110000

ni=108; nj=5 
 
temp = scan("2012AllMax.txt") 
y=matrix(temp, byrow=T, nrow=108)

## transform data on the original measurement (82 - 98) to the ordinal scale (1 - 17)

y[y==82]=1
y[y==83]=2
y[y==84]=3
y[y==85]=4
y[y==86]=5
y[y==87]=6
y[y==88]=7
y[y==89]=8
y[y==90]=9
y[y==91]=10
y[y==92]=11
y[y==93]=12
y[y==94]=13
y[y==95]=14
y[y==96]=15
y[y==97]=16
y[y==98]=17



## set up initial value for category bounds

T=17
c=0:T
c[1]=-10; c[T+1]=10
for(t in 2:(T)) c[t]=qnorm((t-1)/T)


#####################  assign starting values for MCMC  

x = y
x[!is.na(y)]=(y[!is.na(y)]-mean(y[!is.na(y)]))/sd(y[!is.na(y)])

a=rep(0,nj)
b=rep(1,nj)
theta=rep(0,ni)
sigma2.a = sigma2.b = 0.1
sigma2.r = rep(0.1,nj)

g1 = 2; g2 = 1                   ## hyperparameter for inverse gamma prior
g1 = 0.01; g2 = 0.01 

a.sum = b.sum = sigma2.r.sum = rep(0,nj)
theta.sum = rep(0,ni)
c.sum = rep(0,(T+1))
 

a.trace = b.trace = sigma2.r.trace = array(0,dim=c(nj,ntotal))
theta.trace = array(0,dim=c(ni,ntotal))


 
#####################  run MCMC  

for(mc in 1:ntotal){
   print(mc)
 
          for(j in 1:nj){       ## update x, the continuous estimate of item trait
             for(i in 1:ni){                
                 u = a[j]+b[j]*theta[i]
                 sd = sqrt(sigma2.r[j])
                 u1 = pnorm(c[y[i,j]],u,sd)
                 u2 = pnorm(c[(y[i,j]+1)],u,sd)
                 impute.p = runif(1,u1,u2)
                 x[i,j] = qnorm(impute.p, u, sd)                  
             }
          }
  

 

    for(i in 1:ni){            ## update theta, item latent trait    
        u=0; s=1
        for(j in 1:nj){             
            u = u+sum((x[i,j]-a[j])*b[j])/sigma2.r[j]
            s = s+(b[j]^2/sigma2.r[j])           
        }
        theta[i] = u/s+rnorm(1)/sqrt(s)      
    }
    theta.trace[,mc] = theta



    for(j in 1:nj){           ## update a, rater-specific bias     
        u=0; s=1/sigma2.a
        for(i in 1:ni){
           u = u+sum((x[i,j]-b[j]*theta[i]))/sigma2.r[j]
           s = s+1/sigma2.r[j]
        }
        a[j] = u/s+rnorm(1)/sqrt(s)      
    }
    a.trace[,mc] = a



    for(j in 1:nj){           ## update b, rater-specific discrimination
        u=1/sigma2.b; s=1/sigma2.b
        for(i in 1:ni){
            u = u+sum((x[i,j]-a[j])*theta[i])/sigma2.r[j]
            s = s+theta[i]^2/sigma2.r[j]
        }
        b[j] = u/s+rnorm(1)/sqrt(s)
    }
    b.trace[,mc] = b



    for(j in 1:nj){           ## update sigma2.r, rater-specific variance
        sha=g1; sca=g2
        for(i in 1:ni){
           sha = sha+sum(!is.na(x[i,j]))/2
           sca = sca+sum((x[i,j]-a[j]-b[j]*theta[i])^2,na.rm=T)/2
        }
        sigma2.r[j] = sca/rgamma(1,shape=sha)
    }
    sigma2.r.trace[,mc] = sigma2.r


      
    sha = sum(!is.na(a))/2+g1             ## update sigma2.a, variance of a 
    sca = sum(a^2,na.rm=T)/2+g2
    sigma2.a = sca/rgamma(1,shape=sha)


    sha = sum(!is.na(b))/2+g1             ## update sigma2.b, variance of b 
    sca = sum((b-1)^2,na.rm=T)/2+g2
    sigma2.b = sca/rgamma(1,shape=sha)


    for(t in 1:(T-1)){                    ## update common bound c
       temp1 = max(x[y>0 & y<=t],na.rm=T)
       temp2 = min(x[y>0 & y>=(t+1)],na.rm=T)
#       c[t+1]=temp1+runif(1)*(temp2-temp1)
       c1 = pnorm(temp1)
       c2 = pnorm(temp2)
       impute.c = runif(1,c1,c2)
       c[t+1] = qnorm(impute.c)
    }

 

#####################  save samples after burn-in  

    if(mc > n0){
       a.sum=a.sum+a
       b.sum=b.sum+b
       theta.sum = theta.sum+theta
       sigma2.r.sum = sigma2.r.sum+sigma2.r 
       c.sum = c.sum + c
        
   }

}



#####################  summarize the estimates  


## theta: wine quality
## a: judge bias
## b: judge discrimination
## sigma2.r: variance of judge-specific measure error

npost = ntotal-n0
theta = theta.sum/npost
a = a.sum/npost
b = b.sum/npost
sigma2.r = sigma2.r.sum/npost
c = c.sum/npost
Bayes.rank = rank(theta)





########### Kendall’s Coefficient of Concordance W ##############################################

## W: expression (3.10)
 
data = read.table("2012AllMax.txt")
ratings = data     
ratings = as.matrix(na.omit(ratings))
n.row = nrow(ratings)
n.col = ncol(ratings)
ratings.rank = apply(ratings, 2, rank)
Tj = 0

for (i in 1:n.col) 
    { rater = table(ratings.rank[, i])
      ties = rater[rater > 1]
      l = as.numeric(ties)
      Tj = Tj + sum(l^3 - l)
    }

W = (12 * var(apply(ratings.rank, 1, sum)) * (n.row - 1))/(n.col^2 * (n.row^3 - n.row) - n.col * Tj)
    
stat.test = n.col * (n.row - 1) * W
df1 = n.row - 1
p.value = pchisq(stat.test, df1, lower.tail = FALSE)







########### Concordance Across Years Based on Correlations #######################################

#######  2-year comparison

 R10=c(0.606,0.692, 0.587)
 R11=c(0.580, 0.492, 0.447)
 R12=c(0.585,0.566,0.509)

## Fisher's transformation

 Z10 = log((1+R10)/(1-R10))/2
 Z11 = log((1+R11)/(1-R11))/2
 Z12 = log((1+R12)/(1-R12))/2

## Test statistic
 n=70
 d=(n-3)*(n-3)/(n+n-6)

 T10.11 = sqrt(d)*max(abs(Z10-Z11))
 T10.12 = sqrt(d)*max(abs(Z10-Z12))
 T11.12 = sqrt(d)*max(abs(Z11-Z12))

## p-value
 p10.11 = 1-(pnorm(T10.11)-pnorm(-T10.11))^3
 p10.12 = 1-(pnorm(T10.12)-pnorm(-T10.12))^3
 p11.12 = 1-(pnorm(T11.12)-pnorm(-T11.12))^3


#######  3-year comparison

  k=3
  Zbar=(Z10+Z11+Z12)/k
  S = (n-3)*((Z10-Zbar)^2+(Z11-Zbar)^2+(Z12-Zbar)^2)
  Smax=max(S)  ## 3.41223136 
  
  alpha = 0.05; alpha.tilde = (1-alpha)^(2/k/(k-1))
  critical = qchisq(alpha.tilde,k-1)  ## rejection region bound,  8.154688

 
 

########### The Sign Test to Compare Two Wines #######################################

score = c(1,1,2,1,2,0,0,1,1,1)

n1 = sum(score==1)
n2 = sum(score==2)

## expression (5.1)
C = (abs(n1-n2)-1)^2/(n1+n2)

## p-value of C
p.C = 1-pchisq(C,1)