### R code used in the paper "Analyses of Wine Tasting Data: A Tutorial" ### by Ingram Olkin, Ying Lou, Lynne Stokes, and Jing Cao # This code is free for academic use only. It is distributed in the hope that it will # be useful, but WITHOUT ANY WARRANTY. # # Copyright (C) 2014 Jing Cao (jcao@smu.edu) # Department of Statistical Science, Southern Methodist University, Dallas, TX ########### Pearson's Correlation ############################################## ## read data x = c(80, 84, 84, 84, 88, 88, 88, 88, 90, 92, 92, 94) y = c(80, 80, 80, 84, 80, 80, 96, 96, 92, 94, 94, 90) n = length(x) ## Pearson's correlation: expression (2.1) r = cor(x,y) ## Fisher's transformation: expression (2.2) Z = log((1+r)/(1-r))/2 ## 95% CI for rho: (rho1, rho2) zeta1 = Z-1.96*sqrt(1/(n-3)) zeta2 = Z+1.96*sqrt(1/(n-3)) rho1 = (exp(2*zeta1)-1)/(exp(2*zeta1)+1) rho2 = (exp(2*zeta2)-1)/(exp(2*zeta2)+1) ########### Spearman's Rank Correlation ############################################## ## Spearman's rank correlation with adjustment for ties r_as = cor(x,y,method="spearman") ########### Kendall's Tau ########################################################### ## Kendall's tau: expression (2.8) tau = cor(x,y,method="kendall") var_tau = 2*(2*n+5)/(9*n*(n-1)) ########### Cohen's Kappa ############################################################ ## add library (psych) library(psych) ## Cohen's kappa: expression (2.9) and (2.10) kappa = cohen.kappa(cbind(x,y))$kappa var_kappa = cohen.kappa(cbind(x,y))$var.kappa ## transform x and y to rank data xr=x; yr=y category = sort(unique(c(x,y))) nc = length(category) for(i in 1:nc) { xr[x==category[i]] = i yr[y==category[i]] = i } ## cohen.weights: quadratic weights cohen.weights = matrix(0,ncol=nc, nrow=nc) for(i in 1:nc) { for (j in 1:nc) { cohen.weights[i,j] = (i-j)^2 } } ################################## Cohen's Weighted Kappa: expression (2.11) kappa_w = cohen.kappa(cbind(xr,yr),cohen.weights)$weighted.kappa ## Variance of Cohen's weighted kappa: expression (2.12) pij = matrix(0,ncol=nc,nrow=nc) for (i in 1:n) pij[xr[i], yr[i]] = pij[xr[i], yr[i]] + 1/n pi = apply(pij,1,sum) pj = apply(pij,2,sum) pew = 0 for(i in 1:nc) {for (j in 1:nc) pew = pew+cohen.weights[i,j]*pi[i]*pj[j]} pw = 0 for(i in 1:nc) {for (j in 1:nc) pw = pw+cohen.weights[i,j]*pij[i,j]} temp = 0 for(i in 1:nc) {for (j in 1:nc) temp = temp+cohen.weights[i,j]^2*pij[i,j]} var_kappa_w = (temp-pw^2)/n/pew^2 # 1-pw/pew #var_kappa_w = cohen.kappa(cbind(xr,yr),cohen.weights)$var.weighted ########### Intraclass Correlation (ICC)############################################## ## ICC: expression (3.3) library(psych) data=read.table("2012AllMax.txt") ICC(data) ## the ICC in this paper is ICC3 (Single_fixed_raters), 95% CI is also provided ########### Product-moment Correlations Matrix ######################################## R = cor(data) tempR = R diag(tempR) = 0 njudge = dim(R)[1] ## expression (3.5) g = mean(tempR)*(njudge^2)/(njudge^2-njudge) ## expression (3.6) r.i = colMeans(tempR)*njudge/(njudge-1) ############ Bayesian ordinal model ################################################### # ordinal data analysis with common category cutoffs for all raters, the wine data # no replicate, only one panel ## ni: number of wines per panel ## nj: number of judges per panel ## n0: number of burn-in in MCMC ## ntotal: total number of samples in MCMC including burn-in samples n0=10000 ntotal=110000 ni=108; nj=5 temp = scan("2012AllMax.txt") y=matrix(temp, byrow=T, nrow=108) ## transform data on the original measurement (82 - 98) to the ordinal scale (1 - 17) y[y==82]=1 y[y==83]=2 y[y==84]=3 y[y==85]=4 y[y==86]=5 y[y==87]=6 y[y==88]=7 y[y==89]=8 y[y==90]=9 y[y==91]=10 y[y==92]=11 y[y==93]=12 y[y==94]=13 y[y==95]=14 y[y==96]=15 y[y==97]=16 y[y==98]=17 ## set up initial value for category bounds T=17 c=0:T c[1]=-10; c[T+1]=10 for(t in 2:(T)) c[t]=qnorm((t-1)/T) ##################### assign starting values for MCMC x = y x[!is.na(y)]=(y[!is.na(y)]-mean(y[!is.na(y)]))/sd(y[!is.na(y)]) a=rep(0,nj) b=rep(1,nj) theta=rep(0,ni) sigma2.a = sigma2.b = 0.1 sigma2.r = rep(0.1,nj) g1 = 2; g2 = 1 ## hyperparameter for inverse gamma prior g1 = 0.01; g2 = 0.01 a.sum = b.sum = sigma2.r.sum = rep(0,nj) theta.sum = rep(0,ni) c.sum = rep(0,(T+1)) a.trace = b.trace = sigma2.r.trace = array(0,dim=c(nj,ntotal)) theta.trace = array(0,dim=c(ni,ntotal)) ##################### run MCMC for(mc in 1:ntotal){ print(mc) for(j in 1:nj){ ## update x, the continuous estimate of item trait for(i in 1:ni){ u = a[j]+b[j]*theta[i] sd = sqrt(sigma2.r[j]) u1 = pnorm(c[y[i,j]],u,sd) u2 = pnorm(c[(y[i,j]+1)],u,sd) impute.p = runif(1,u1,u2) x[i,j] = qnorm(impute.p, u, sd) } } for(i in 1:ni){ ## update theta, item latent trait u=0; s=1 for(j in 1:nj){ u = u+sum((x[i,j]-a[j])*b[j])/sigma2.r[j] s = s+(b[j]^2/sigma2.r[j]) } theta[i] = u/s+rnorm(1)/sqrt(s) } theta.trace[,mc] = theta for(j in 1:nj){ ## update a, rater-specific bias u=0; s=1/sigma2.a for(i in 1:ni){ u = u+sum((x[i,j]-b[j]*theta[i]))/sigma2.r[j] s = s+1/sigma2.r[j] } a[j] = u/s+rnorm(1)/sqrt(s) } a.trace[,mc] = a for(j in 1:nj){ ## update b, rater-specific discrimination u=1/sigma2.b; s=1/sigma2.b for(i in 1:ni){ u = u+sum((x[i,j]-a[j])*theta[i])/sigma2.r[j] s = s+theta[i]^2/sigma2.r[j] } b[j] = u/s+rnorm(1)/sqrt(s) } b.trace[,mc] = b for(j in 1:nj){ ## update sigma2.r, rater-specific variance sha=g1; sca=g2 for(i in 1:ni){ sha = sha+sum(!is.na(x[i,j]))/2 sca = sca+sum((x[i,j]-a[j]-b[j]*theta[i])^2,na.rm=T)/2 } sigma2.r[j] = sca/rgamma(1,shape=sha) } sigma2.r.trace[,mc] = sigma2.r sha = sum(!is.na(a))/2+g1 ## update sigma2.a, variance of a sca = sum(a^2,na.rm=T)/2+g2 sigma2.a = sca/rgamma(1,shape=sha) sha = sum(!is.na(b))/2+g1 ## update sigma2.b, variance of b sca = sum((b-1)^2,na.rm=T)/2+g2 sigma2.b = sca/rgamma(1,shape=sha) for(t in 1:(T-1)){ ## update common bound c temp1 = max(x[y>0 & y<=t],na.rm=T) temp2 = min(x[y>0 & y>=(t+1)],na.rm=T) # c[t+1]=temp1+runif(1)*(temp2-temp1) c1 = pnorm(temp1) c2 = pnorm(temp2) impute.c = runif(1,c1,c2) c[t+1] = qnorm(impute.c) } ##################### save samples after burn-in if(mc > n0){ a.sum=a.sum+a b.sum=b.sum+b theta.sum = theta.sum+theta sigma2.r.sum = sigma2.r.sum+sigma2.r c.sum = c.sum + c } } ##################### summarize the estimates ## theta: wine quality ## a: judge bias ## b: judge discrimination ## sigma2.r: variance of judge-specific measure error npost = ntotal-n0 theta = theta.sum/npost a = a.sum/npost b = b.sum/npost sigma2.r = sigma2.r.sum/npost c = c.sum/npost Bayes.rank = rank(theta) ########### Kendall¡¯s Coefficient of Concordance W ############################################## ## W: expression (3.10) data = read.table("2012AllMax.txt") ratings = data ratings = as.matrix(na.omit(ratings)) n.row = nrow(ratings) n.col = ncol(ratings) ratings.rank = apply(ratings, 2, rank) Tj = 0 for (i in 1:n.col) { rater = table(ratings.rank[, i]) ties = rater[rater > 1] l = as.numeric(ties) Tj = Tj + sum(l^3 - l) } W = (12 * var(apply(ratings.rank, 1, sum)) * (n.row - 1))/(n.col^2 * (n.row^3 - n.row) - n.col * Tj) stat.test = n.col * (n.row - 1) * W df1 = n.row - 1 p.value = pchisq(stat.test, df1, lower.tail = FALSE) ########### Concordance Across Years Based on Correlations ####################################### ####### 2-year comparison R10=c(0.606,0.692, 0.587) R11=c(0.580, 0.492, 0.447) R12=c(0.585,0.566,0.509) ## Fisher's transformation Z10 = log((1+R10)/(1-R10))/2 Z11 = log((1+R11)/(1-R11))/2 Z12 = log((1+R12)/(1-R12))/2 ## Test statistic n=70 d=(n-3)*(n-3)/(n+n-6) T10.11 = sqrt(d)*max(abs(Z10-Z11)) T10.12 = sqrt(d)*max(abs(Z10-Z12)) T11.12 = sqrt(d)*max(abs(Z11-Z12)) ## p-value p10.11 = 1-(pnorm(T10.11)-pnorm(-T10.11))^3 p10.12 = 1-(pnorm(T10.12)-pnorm(-T10.12))^3 p11.12 = 1-(pnorm(T11.12)-pnorm(-T11.12))^3 ####### 3-year comparison k=3 Zbar=(Z10+Z11+Z12)/k S = (n-3)*((Z10-Zbar)^2+(Z11-Zbar)^2+(Z12-Zbar)^2) Smax=max(S) ## 3.41223136 alpha = 0.05; alpha.tilde = (1-alpha)^(2/k/(k-1)) critical = qchisq(alpha.tilde,k-1) ## rejection region bound, 8.154688 ########### The Sign Test to Compare Two Wines ####################################### score = c(1,1,2,1,2,0,0,1,1,1) n1 = sum(score==1) n2 = sum(score==2) ## expression (5.1) C = (abs(n1-n2)-1)^2/(n1+n2) ## p-value of C p.C = 1-pchisq(C,1)