R version 3.2.4 (2016-03-10) -- "Very Secure Dishes" Copyright (C) 2016 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ## code to reproduce Table 3 > library(splines) > set.seed(3001) > options(width=150) > sessionInfo() R version 3.2.4 (2016-03-10) Platform: x86_64-pc-linux-gnu (64-bit) Running under: Red Hat Enterprise Linux Server release 6.8 (Santiago) locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 [6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] splines stats graphics grDevices utils datasets methods base > > ################################################ > ## initial work on CSEM function from real data > ################################################ > > ## define CSEM function corresponding to Figure 1: > case <- data.frame(score = c(232,236,239,242,245,248,251,253,256,258,260,263,265,267,269,271,273,275,277,279,281,283,285,286,288,290,292,294,296,298,300,302,304,306,309,311,313,316,318,321,324,327,330,333,337,341,350,351,357,365,376,393,423), csem = c(10,9,9,9,8,8,8,8,8,8,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,9,9,9,10,10,12,12,13,15,18,25,45)) > case$cvar <- case$csem^2 > > ## interpolate variance function and go flat outside range of data > varfunc <- approxfun(x = case$score, y = case$cvar, rule=2) > ## check: > ## x <- seq(from=0.8*min(case$score), to = 1.2*max(case$score), length=1000) > ## plot(x, varfunc(x), type="l") > ## points(case$score, case$cvar, col="blue") > > ## using raw achievement data (which cannot be posted) and deconvolution methods > ## we estimated X in the population had mean 334.3 and standard deviation 32.8: > mux <- 334.3 > sdx <- 32.8 > > ################################################ > ## use Monte Carlo methods to approximate A(W) > ################################################ > xgrid <- seq(from=min(case$score), to=max(case$score), length=200) > nreps <- 30000 > d <- data.frame(x=rep(xgrid, each=nreps)) > d$target <- varfunc(d$x) > > d$w <- d$x + rnorm(nrow(d), mean=0, sd = sqrt(d$target)) > s <- bs(d$w, df=10, intercept=TRUE) > colnames(s) <- paste("phi",colnames(s),sep="") > > b <- aggregate(s, by=list(x=d$x), FUN=mean) > b <- merge(b, unique(d[,c("x","target")]), all.x=TRUE) > > mod <- lm(as.formula(paste("target ~", paste(colnames(s), collapse=" + "), "-1")), data=b) > d$fit <- s %*% coef(mod) > > A1 <- approxfun(x = d$w, y = d$fit, rule=2) > > ################################################ > ## Loop over lambda to create raw data > ## break simulation into 5 chunks of 50,000,000 each to keep RAM manageable > ################################################ > nchunk <- 5 > n <- 50000000 > bigres <- vector(nchunk, mode="list") > > for(ch in 1:nchunk){ + lambdaseq <- c(1,2) + nlam <- length(lambdaseq) + res <- vector(nlam, mode="list") + + ## loop over lambda + for(wh in 1:nlam){ + lambda <- lambdaseq[wh] + + cat(paste("\n\n#########################",ch,lambda,"#########################\n")) + + ## generate X + d <- data.frame(x1 = rnorm(n, mean = mux, sd = sdx)) + + ## generate Ws as conditionally normal given X + g1 <- varfunc + d$w1 <- d$x1 + rnorm(n, sd = sqrt(g1(d$x1))) + + ## get other plug-ins + d$egx <- mean(g1(d$x1)) + + ## brute force condition on w by rounding it to nearest integer + d$wcat <- as.factor(round(d$w1, digits=0)) + d$exgivenw <- ave(d$x1, d$wcat) + d$egxgivenw <- ave(g1(d$x1), d$wcat) + + ####################################################### + ## generate different values of W* + ## mean center to get central moments later + ####################################################### + ## 1) using true variance + d$w1star.gx <- d$w1 + rnorm(n, sd = sqrt(lambda * g1(d$x1))) - mux + ## 2) fresh sample using right variance function + d$w1star.fresh <- d$x1 + rnorm(n, sd = sqrt((1 + lambda) * g1(d$x1))) - mux + ## 3) average g(X) + d$w1star.avggx <- d$w1 + rnorm(n, sd = sqrt(lambda * d$egx)) - mux + ## 4) average g(W) + d$w1star.avggw <- d$w1 + rnorm(n, sd = sqrt(lambda * mean(g1(d$w1)))) - mux + ## 5) g(W) plugin + d$w1star.gw <- d$w1 + rnorm(n, sd = sqrt(lambda * g1(d$w1))) - mux + ## 6) g(E[X|W]) + d$w1star.gexw <- d$w1 + rnorm(n, sd = sqrt(lambda * g1(d$exgivenw))) - mux + ## 7) A(W) + d$w1star.aw <- d$w1 + rnorm(n, sd = sqrt(lambda * A1(d$w1))) - mux + ## 8) E[g(X)|W] + d$w1star.rcal <- d$w1 + rnorm(n, sd = sqrt(lambda * d$egxgivenw)) - mux + + + m1 <- c(mean(d$w1star.gx), mean(d$w1star.gx^2), mean(d$w1star.gx^3), mean(d$w1star.gx^4), mean(d$w1star.gx^5), mean(d$w1star.gx^6), mean(d$w1star.gx^7),mean(d$w1star.gx^8)) + m2 <- c(mean(d$w1star.fresh), mean(d$w1star.fresh^2), mean(d$w1star.fresh^3), mean(d$w1star.fresh^4), mean(d$w1star.fresh^5), mean(d$w1star.fresh^6), mean(d$w1star.fresh^7),mean(d$w1star.fresh^8)) + m3 <- c(mean(d$w1star.avggx), mean(d$w1star.avggx^2), mean(d$w1star.avggx^3), mean(d$w1star.avggx^4), mean(d$w1star.avggx^5), mean(d$w1star.avggx^6), mean(d$w1star.avggx^7), mean(d$w1star.avggx^8)) + m4 <- c(mean(d$w1star.avggw), mean(d$w1star.avggw^2), mean(d$w1star.avggw^3), mean(d$w1star.avggw^4), mean(d$w1star.avggw^5), mean(d$w1star.avggw^6), mean(d$w1star.avggw^7),mean(d$w1star.avggw^8)) + m5 <- c(mean(d$w1star.gw), mean(d$w1star.gw^2), mean(d$w1star.gw^3), mean(d$w1star.gw^4), mean(d$w1star.gw^5), mean(d$w1star.gw^6), mean(d$w1star.gw^7),mean(d$w1star.gw^8)) + m6 <- c(mean(d$w1star.gexw), mean(d$w1star.gexw^2), mean(d$w1star.gexw^3), mean(d$w1star.gexw^4), mean(d$w1star.gexw^5), mean(d$w1star.gexw^6), mean(d$w1star.gexw^7),mean(d$w1star.gexw^8)) + m7 <- c(mean(d$w1star.aw), mean(d$w1star.aw^2), mean(d$w1star.aw^3), mean(d$w1star.aw^4), mean(d$w1star.aw^5), mean(d$w1star.aw^6), mean(d$w1star.aw^7), mean(d$w1star.aw^8)) + m8 <- c(mean(d$w1star.rcal), mean(d$w1star.rcal^2), mean(d$w1star.rcal^3), mean(d$w1star.rcal^4), mean(d$w1star.rcal^5), mean(d$w1star.rcal^6), mean(d$w1star.rcal^7), mean(d$w1star.rcal^8)) + + m <- round(rbind(m1, m2, m3, m4, m5, m6, m7, m8), digits=3) + colnames(m) <- paste("mom",1:8,sep="") + print(tmp <- data.frame(lambda = lambda, method=c("gx","fresh","avggx","avggw","gw","gexw","aw","rcal"), m)) + + ## rearrange tmp to align with table order in paper and save + res[[wh]] <- tmp[c(1,2,7,3,8,5,4,6),] + rm(d) + gc() + } + + ## store results for this chunk + bigres[[ch]] <- res + rm(res) + gc() + } ######################### 1 1 ######################### lambda method mom1 mom2 mom3 mom4 mom5 mom6 mom7 mom8 m1 1 gx -0.001 1413.504 33313.21 9377572 832290414 171868185009 2.788860e+13 5.973624e+15 m2 1 fresh 0.001 1413.637 33320.24 9373523 829977477 170940261591 2.755978e+13 5.861239e+15 m3 1 avggx -0.002 1413.336 16627.09 7505410 357636870 88205998576 8.930403e+12 1.838900e+15 m4 1 avggw 0.001 1434.142 16662.27 7684285 361834837 90741993372 9.136757e+12 1.904799e+15 m5 1 gw -0.004 1434.058 41211.18 10953857 1140278549 231636659089 3.982853e+13 8.438811e+15 m6 1 gexw 0.000 1394.552 31596.63 9131134 806809540 169276088631 2.784832e+13 6.045168e+15 m7 1 aw -0.002 1414.865 40593.48 11742863 1545814136 386817397493 9.498508e+13 2.800664e+16 m8 1 rcal -0.002 1413.384 33295.65 9304081 813281156 166593808678 2.652764e+13 5.600272e+15 ######################### 1 2 ######################### lambda method mom1 mom2 mom3 mom4 mom5 mom6 mom7 mom8 m1 2 gx -0.004 1582.163 49980.58 13125440 1508361709 330283438331 6.542459e+13 1.585801e+16 m2 2 fresh -0.006 1582.810 50072.33 13147081 1512045802 331072378977 6.552751e+13 1.587237e+16 m3 2 avggx -0.002 1582.615 16749.42 9039350 389167523 109660044448 1.035599e+13 2.321808e+15 m4 2 avggw -0.001 1624.457 16692.46 9439648 394624761 115365590241 1.066981e+13 2.453864e+15 m5 2 gw -0.003 1624.630 65996.48 16625688 2302356104 510809845095 1.085383e+14 2.654757e+16 m6 2 gexw -0.006 1544.391 46544.91 12430311 1410678177 312302480824 6.261470e+13 1.540126e+16 m7 2 aw -0.003 1586.063 64735.40 18366994 3348073090 985625994299 3.107403e+14 1.131605e+17 m8 2 rcal -0.003 1582.250 50001.40 12842676 1435959445 308001208995 5.949299e+13 1.415854e+16 ######################### 2 1 ######################### lambda method mom1 mom2 mom3 mom4 mom5 mom6 mom7 mom8 m1 1 gx 0.003 1414.090 33410.96 9394028 834727637 172088532414 2.784950e+13 5.937792e+15 m2 1 fresh 0.001 1413.669 33361.28 9381102 832408764 171726925054 2.783168e+13 5.957003e+15 m3 1 avggx 0.000 1413.662 16689.24 7513539 359015468 88434011910 8.979976e+12 1.849711e+15 m4 1 avggw 0.001 1434.749 16678.15 7688275 361392099 90489468000 9.058216e+12 1.880343e+15 m5 1 gw 0.001 1434.397 41246.55 10947297 1135867153 229873350058 3.924194e+13 8.252666e+15 m6 1 gexw 0.004 1395.090 31692.61 9150701 810353066 169968894037 2.797620e+13 6.068168e+15 m7 1 aw 0.000 1415.116 40607.46 11739502 1541647416 384530187754 9.388482e+13 2.752337e+16 m8 1 rcal 0.002 1413.913 33370.30 9314497 814505271 166770481100 2.656318e+13 5.613295e+15 ######################### 2 2 ######################### lambda method mom1 mom2 mom3 mom4 mom5 mom6 mom7 mom8 m1 2 gx 0.003 1581.472 49938.50 13103229 1503639490 328878106797 6.502020e+13 1.574697e+16 m2 2 fresh 0.002 1581.600 49956.78 13104421 1502779197 328331522215 6.475663e+13 1.562394e+16 m3 2 avggx -0.004 1581.434 16679.76 9019963 386660840 109079140983 1.025514e+13 2.301085e+15 m4 2 avggw -0.001 1623.631 16649.72 9423693 393039127 114829524503 1.058587e+13 2.429825e+15 m5 2 gw -0.001 1623.521 65877.91 16580101 2292438996 508222104220 1.079474e+14 2.641690e+16 m6 2 gexw 0.002 1543.917 46585.83 12425295 1411647187 312498255824 6.272120e+13 1.544197e+16 m7 2 aw 0.002 1585.340 64719.09 18371462 3356001431 991480288682 3.148683e+14 1.160173e+17 m8 2 rcal 0.000 1581.417 49947.19 12820770 1431501736 306571302697 5.908833e+13 1.403445e+16 ######################### 3 1 ######################### lambda method mom1 mom2 mom3 mom4 mom5 mom6 mom7 mom8 m1 1 gx 0.002 1412.970 33335.53 9373852 832062470 171521896561 2.774553e+13 5.914455e+15 m2 1 fresh -0.001 1412.786 33332.02 9377022 833766185 172007341607 2.788629e+13 5.951195e+15 m3 1 avggx 0.002 1413.179 16686.35 7505232 358141327 88185986655 8.930012e+12 1.837921e+15 m4 1 avggw -0.001 1434.076 16649.88 7680831 360870940 90433525726 9.058674e+12 1.883148e+15 m5 1 gw 0.002 1434.547 41422.36 10984684 1147063627 232739041951 4.004038e+13 8.475827e+15 m6 1 gexw -0.001 1394.410 31624.87 9129767 806477482 168944575851 2.773753e+13 6.006241e+15 m7 1 aw -0.001 1414.444 40590.57 11731585 1543085017 385484958290 9.444001e+13 2.777802e+16 m8 1 rcal 0.000 1413.322 33326.55 9302989 813823427 166938914866 2.673111e+13 5.693948e+15 ######################### 3 2 ######################### lambda method mom1 mom2 mom3 mom4 mom5 mom6 mom7 mom8 m1 2 gx 0.003 1581.654 50046.38 13132115 1513872617 3.321709e+11 6.614226e+13 1.612101e+16 m2 2 fresh 0.008 1581.885 50038.02 13125296 1509226463 3.302782e+11 6.536823e+13 1.582124e+16 m3 2 avggx 0.005 1581.575 16674.73 9021409 387073155 1.093941e+11 1.034592e+13 2.331096e+15 m4 2 avggw 0.004 1623.632 16707.26 9427410 394412714 1.151581e+11 1.067534e+13 2.456581e+15 m5 2 gw 0.001 1623.691 65932.26 16591326 2295246898 5.089120e+11 1.081432e+14 2.648226e+16 m6 2 gexw 0.002 1544.051 46604.25 12425321 1410261095 3.115875e+11 6.226134e+13 1.522739e+16 m7 2 aw 0.006 1585.467 64913.16 18450338 3397051729 1.010316e+12 3.231579e+14 1.193701e+17 m8 2 rcal 0.001 1581.672 49937.52 12816454 1430889913 3.068307e+11 5.930670e+13 1.414239e+16 ######################### 4 1 ######################### lambda method mom1 mom2 mom3 mom4 mom5 mom6 mom7 mom8 m1 1 gx 0.006 1413.748 33493.44 9404491 839516694 173393924016 2.827296e+13 6.065967e+15 m2 1 fresh 0.004 1413.514 33435.74 9394636 837352879 172837299796 2.811305e+13 6.017722e+15 m3 1 avggx 0.007 1413.539 16759.58 7519917 361232851 88843939297 9.078555e+12 1.870608e+15 m4 1 avggw 0.006 1434.537 16757.18 7698817 364472118 91137428540 9.210392e+12 1.916022e+15 m5 1 gw 0.008 1434.699 41428.16 10980802 1145815910 232431782812 3.998547e+13 8.468489e+15 m6 1 gexw 0.004 1394.449 31724.85 9151007 812152204 170367222235 2.812374e+13 6.113087e+15 m7 1 aw 0.005 1415.040 40757.78 11772606 1554384126 388835781128 9.544493e+13 2.805573e+16 m8 1 rcal 0.006 1413.726 33443.34 9327222 818991832 168015467264 2.696050e+13 5.737917e+15 ######################### 4 2 ######################### lambda method mom1 mom2 mom3 mom4 mom5 mom6 mom7 mom8 m1 2 gx 0.001 1582.231 50060.98 13135138 1511770801 330973936046 6.557553e+13 1.587850e+16 m2 2 fresh 0.005 1582.174 50094.80 13133045 1511266019 330845952148 6.561523e+13 1.592992e+16 m3 2 avggx -0.002 1582.173 16671.83 9030222 387433793 109451632508 1.033241e+13 2.323037e+15 m4 2 avggw 0.003 1624.129 16707.60 9434698 394872187 115357899523 1.070440e+13 2.464029e+15 m5 2 gw 0.002 1624.252 65947.49 16596885 2295785415 508991270535 1.081397e+14 2.647215e+16 m6 2 gexw 0.000 1544.516 46665.60 12447776 1416360198 313497680178 6.286551e+13 1.542632e+16 m7 2 aw -0.003 1585.125 64655.82 18361945 3361669191 996312818713 3.179080e+14 1.175162e+17 m8 2 rcal -0.002 1581.772 49960.04 12825700 1433452604 307345569654 5.941919e+13 1.417248e+16 ######################### 5 1 ######################### lambda method mom1 mom2 mom3 mom4 mom5 mom6 mom7 mom8 m1 1 gx 0.002 1413.753 33447.27 9410053 840727297 173952698297 2.841554e+13 6.108273e+15 m2 1 fresh 0.004 1413.866 33398.66 9381222 831461996 171067155907 2.756634e+13 5.854410e+15 m3 1 avggx 0.004 1413.893 16721.69 7515355 359688640 88524689613 9.009920e+12 1.856543e+15 m4 1 avggw 0.004 1434.527 16697.62 7690662 362279669 90814245219 9.135285e+12 1.903442e+15 m5 1 gw 0.006 1434.714 41370.89 10970504 1142473989 231540898446 3.970919e+13 8.384188e+15 m6 1 gexw 0.003 1394.912 31685.13 9144075 809137181 169508513148 2.783541e+13 6.016882e+15 m7 1 aw 0.006 1415.553 40760.04 11779728 1556608436 390220007608 9.611540e+13 2.839270e+16 m8 1 rcal 0.005 1413.611 33396.78 9317250 816933624 167552133386 2.685134e+13 5.712520e+15 ######################### 5 2 ######################### lambda method mom1 mom2 mom3 mom4 mom5 mom6 mom7 mom8 m1 2 gx -0.007 1581.590 49953.56 13117570 1508953260 330758519945 6.568493e+13 1.597130e+16 m2 2 fresh -0.004 1582.115 49980.60 13112309 1504720363 328782276791 6.489278e+13 1.565998e+16 m3 2 avggx -0.005 1581.803 16655.24 9024521 386983116 109316005302 1.031057e+13 2.317948e+15 m4 2 avggw -0.007 1623.767 16676.83 9430159 394308858 115162305956 1.065992e+13 2.449577e+15 m5 2 gw -0.004 1624.063 66008.09 16624641 2306444540 512263821700 1.091858e+14 2.679699e+16 m6 2 gexw -0.008 1544.142 46604.87 12442486 1417215917 314198509906 6.319608e+13 1.555647e+16 m7 2 aw -0.008 1584.124 64390.90 18271566 3327467892 981211335026 3.106615e+14 1.138189e+17 m8 2 rcal -0.004 1582.097 50065.91 12846438 1438927818 308421163707 5.958508e+13 1.415177e+16 > > ################################################ > ## Create table > ################################################ > res <- vector(nchunk, mode="list") > for(ch in 1:nchunk){ + res[[ch]] <- rbind(bigres[[ch]][[1]], bigres[[ch]][[2]]) + res[[ch]]$sim <- ch + } > > res <- do.call("rbind", res) > res$mom1 <- NULL > moms <- names(res)[grep("mom",names(res))] > res <- subset(res, method != "fresh") > > ## average moments across simulation replications > for(v in moms){ + res[,v] <- ave(res[,v], list(res$lambda, res$method)) + } > res <- unique(res[,c("lambda","method",moms)]) > res$index <- 1:nrow(res) > > ## get benchmark moments from gx cases and merge them onto res > b <- subset(res, method=="gx") > names(b)[which(names(b) %in% moms)] <- paste(names(b)[which(names(b) %in% moms)],"bench",sep="") > b$index <- NULL > b$method <- NULL > res <- merge(res, b, all.x=TRUE) > res <- res[order(res$index),] > > ## calculate percent bias > for(v in moms){ + res[,v] <- round(100 * (res[,v] - res[,paste(v,"bench",sep="")]) / res[,paste(v,"bench",sep="")], digits=0) + } > > ## reorganize table, get rid of irrelevant columns > res <- subset(res, method != "gx", select=c("method","lambda",moms)) > > ## resort to put the different lambdas adjacent for the same method > nest <- nrow(res)/2 > print(res[c(t(cbind(1:nest, (nest+1):nrow(res)))),]) method lambda mom2 mom3 mom4 mom5 mom6 mom7 mom8 2 aw 1 0 22 25 85 124 239 366 9 aw 2 0 29 40 122 200 381 629 3 avggx 1 0 -50 -20 -57 -49 -68 -69 10 avggx 2 0 -67 -31 -74 -67 -84 -85 4 rcal 1 0 0 -1 -2 -3 -5 -5 11 rcal 2 0 0 -2 -5 -7 -9 -11 5 gw 1 1 24 17 37 34 42 40 12 gw 2 3 32 27 52 54 65 67 6 avggw 1 1 -50 -18 -57 -47 -67 -68 13 avggw 2 3 -67 -28 -74 -65 -84 -85 7 gexw 1 -1 -5 -3 -3 -2 0 1 14 gexw 2 -2 -7 -5 -6 -5 -4 -3 > > proc.time() user system elapsed 3700.921 70.270 3770.931