## code to reproduce Table 2 set.seed(5003) options(width=150) sessionInfo() ################################################ ## Loop over lambda to create raw data ## break simulation into 5 chunks of 50,000,000 each to keep RAM manageable ################################################ nchunk <- 5 n <- 50000000 bigres <- vector(nchunk, mode="list") for(ch in 1:nchunk){ lambdaseq <- c(1,2) nlam <- length(lambdaseq) res <- vector(nlam, mode="list") ## loop over lambda for(wh in 1:nlam){ lambda <- lambdaseq[wh] cat(paste("\n\n#########################",ch,lambda,"#########################\n")) ## generate X d <- data.frame(x1 = rnorm(n)) ## generate Ws as conditionally normal given X g1 <- function(x){ 0.07*(x+1)^2 } ## check reliabilty ## var(d$x) / (var(d$x) + mean(g1(d$x))) d$w1 <- d$x1 + rnorm(n, sd = sqrt(g1(d$x1))) ## corresponding A(W) function, closed form in this case A1 <- function(w){ (.07/(.07 + 1))*(w+1)^2 } ## get other plug-ins d$egx <- mean(g1(d$x1)) ## brute force condition on w by rounding it to nearest 0.1 d$wcat <- as.factor(round(d$w1, digits=1)) d$exgivenw <- ave(d$x1, d$wcat) d$egxgivenw <- ave(g1(d$x1), d$wcat) ####################################################### ## generate different values of W* ####################################################### ## 1) using true variance d$w1star.gx <- d$w1 + rnorm(n, sd = sqrt(lambda * g1(d$x1))) ## 2) fresh sample using right variance function d$w1star.fresh <- d$x1 + rnorm(n, sd = sqrt((1 + lambda) * g1(d$x1))) ## 3) average g(X) d$w1star.avggx <- d$w1 + rnorm(n, sd = sqrt(lambda * d$egx)) ## 4) average g(W) d$w1star.avggw <- d$w1 + rnorm(n, sd = sqrt(lambda * mean(g1(d$w1)))) ## 5) g(W) plugin d$w1star.gw <- d$w1 + rnorm(n, sd = sqrt(lambda * g1(d$w1))) ## 6) g(E[X|W]) d$w1star.gexw <- d$w1 + rnorm(n, sd = sqrt(lambda * g1(d$exgivenw))) ## 7) A(W) d$w1star.aw <- d$w1 + rnorm(n, sd = sqrt(lambda * A1(d$w1))) ## 8) E[g(X)|W] d$w1star.rcal <- d$w1 + rnorm(n, sd = sqrt(lambda * d$egxgivenw)) m1 <- c(mean(d$w1star.gx), mean(d$w1star.gx^2), mean(d$w1star.gx^3), mean(d$w1star.gx^4), mean(d$w1star.gx^5), mean(d$w1star.gx^6), mean(d$w1star.gx^7),mean(d$w1star.gx^8)) m2 <- c(mean(d$w1star.fresh), mean(d$w1star.fresh^2), mean(d$w1star.fresh^3), mean(d$w1star.fresh^4), mean(d$w1star.fresh^5), mean(d$w1star.fresh^6), mean(d$w1star.fresh^7),mean(d$w1star.fresh^8)) m3 <- c(mean(d$w1star.avggx), mean(d$w1star.avggx^2), mean(d$w1star.avggx^3), mean(d$w1star.avggx^4), mean(d$w1star.avggx^5), mean(d$w1star.avggx^6), mean(d$w1star.avggx^7), mean(d$w1star.avggx^8)) m4 <- c(mean(d$w1star.avggw), mean(d$w1star.avggw^2), mean(d$w1star.avggw^3), mean(d$w1star.avggw^4), mean(d$w1star.avggw^5), mean(d$w1star.avggw^6), mean(d$w1star.avggw^7),mean(d$w1star.avggw^8)) m5 <- c(mean(d$w1star.gw), mean(d$w1star.gw^2), mean(d$w1star.gw^3), mean(d$w1star.gw^4), mean(d$w1star.gw^5), mean(d$w1star.gw^6), mean(d$w1star.gw^7),mean(d$w1star.gw^8)) m6 <- c(mean(d$w1star.gexw), mean(d$w1star.gexw^2), mean(d$w1star.gexw^3), mean(d$w1star.gexw^4), mean(d$w1star.gexw^5), mean(d$w1star.gexw^6), mean(d$w1star.gexw^7),mean(d$w1star.gexw^8)) m7 <- c(mean(d$w1star.aw), mean(d$w1star.aw^2), mean(d$w1star.aw^3), mean(d$w1star.aw^4), mean(d$w1star.aw^5), mean(d$w1star.aw^6), mean(d$w1star.aw^7), mean(d$w1star.aw^8)) m8 <- c(mean(d$w1star.rcal), mean(d$w1star.rcal^2), mean(d$w1star.rcal^3), mean(d$w1star.rcal^4), mean(d$w1star.rcal^5), mean(d$w1star.rcal^6), mean(d$w1star.rcal^7), mean(d$w1star.rcal^8)) m <- round(rbind(m1, m2, m3, m4, m5, m6, m7, m8), digits=3) colnames(m) <- paste("mom",1:8,sep="") print(tmp <- data.frame(lambda = lambda, method=c("gx","fresh","avggx","avggw","gw","gexw","aw","rcal"), m)) ## rearrange tmp to align with table order in paper and save res[[wh]] <- tmp[c(1,2,7,3,8,5,4,6),] rm(d) gc() } ## store results for this chunk bigres[[ch]] <- res rm(res) gc() } ################################################ ## Create table ################################################ res <- vector(nchunk, mode="list") for(ch in 1:nchunk){ res[[ch]] <- rbind(bigres[[ch]][[1]], bigres[[ch]][[2]]) res[[ch]]$sim <- ch } res <- do.call("rbind", res) res$mom1 <- NULL moms <- names(res)[grep("mom",names(res))] res <- subset(res, method != "fresh") ## average moments across simulation replications for(v in moms){ res[,v] <- ave(res[,v], list(res$lambda, res$method)) } res <- unique(res[,c("lambda","method",moms)]) res$index <- 1:nrow(res) ## get benchmark moments from gx cases and merge them onto res b <- subset(res, method=="gx") names(b)[which(names(b) %in% moms)] <- paste(names(b)[which(names(b) %in% moms)],"bench",sep="") b$index <- NULL b$method <- NULL res <- merge(res, b, all.x=TRUE) res <- res[order(res$index),] ## calculate percent bias for(v in moms){ res[,v] <- round(100 * (res[,v] - res[,paste(v,"bench",sep="")]) / res[,paste(v,"bench",sep="")], digits=0) } ## reorganize table, get rid of irrelevant columns res <- subset(res, method != "gx", select=c("method","lambda",moms)) ## resort to put the different lambdas adjacent for the same method nest <- nrow(res)/2 print(res[c(t(cbind(1:nest, (nest+1):nrow(res)))),])