###############################################################
# Replications for Figure 4 and false positive/false negative
# simulations
# A Quantitative Method for Substantive Robustness Assessment
# by Justin Esarey and Nathan Danneman
#
# version: 5/11/2014
###############################################################



##################################
# Replicate Figure 4
##################################

# size/power ratios for substantive + statistical significance
# analysis of power (false negatives) first

rm(list=ls())
set.seed(129083)

source("ols_cstar_loss.r")

p.val<-c()
cstar.val<-c()
pb<-txtProgressBar(min=0, max=1000, style=3)
for(i in 1:1000){
  
  setTxtProgressBar(pb, value=i)
  x<-runif(100)
  y<-2+0.75*x+rnorm(100, mean=0, sd=0.5)
  lin.mod<-lm(y~x)
  p.val[i]<-coefficients(summary(lin.mod))[2,4]
  cstar.val[i]<-cstar(lin.mod, r=2)[2]
  
}
close(pb)

p.rate.power<-c()
r.rate.power<-c()
comb.rate.power<-c()
p.thresh<-seq(from=0.005, to = 0.2, by=0.005)

for(k in 1:length(p.thresh)){
  p.rate.power[k]<-sum(p.val<=p.thresh[k])/1000
  r.rate.power[k]<-sum(cstar.val>0)/1000
  comb.rate.power[k]<-sum(cstar.val>=0 & p.val<=p.thresh[k])/1000
}


# now the size (false positive rates)

set.seed(328932)

p.val<-c()
cstar.val<-c()
pb<-txtProgressBar(min=0, max=1000, style=3)
for(i in 1:1000){
  
  setTxtProgressBar(pb, value=i)
  x<-runif(100)
  y<-2+0*x+rnorm(100, mean=0, sd=0.5)
  lin.mod<-lm(y~x)
  p.val[i]<-coefficients(summary(lin.mod))[2,4]
  cstar.val[i]<-cstar(lin.mod, r=2)[2]
  
}
close(pb)

p.rate.size<-c()
r.rate.size<-c()
comb.rate.size<-c()
p.thresh<-seq(from=0.005, to = 0.2, by=0.005)

for(k in 1:length(p.thresh)){
  p.rate.size[k]<-sum(p.val<=p.thresh[k])/1000
  r.rate.size[k]<-sum(cstar.val>0)/1000
  comb.rate.size[k]<-sum(cstar.val>=0 & p.val<=p.thresh[k])/1000
}

plot(p.rate.power~p.rate.size, type="l", ylab="true positive rate (power)", xlab="false positive rate (size)", main=expression(beta[x]~"= 0.75"))
lines(comb.rate.power~comb.rate.size, lty=2, lwd=2)
legend("bottomright", lwd=c(1,2), lty=c(1,2), legend=c("statistical significance only", "significance + substantive robustness"))

dev.copy2eps(file="sizepower.eps")     ##   This is Figure 4 Panel (a)


# This is where we show that there are no cases where the null is false
# and the result is statistically but not substantively significant
p.rate.power[10]
comb.rate.power[10]
r.rate.power[10]





# change the characteristics of the test, as before
# size/power ratios for substantive + statistical significance
# analysis of power (false negatives) first

rm(list=ls())
set.seed(129083)

source("ols_cstar_loss.r")

p.val<-c()
cstar.val<-c()
pb<-txtProgressBar(min=0, max=1000, style=3)
for(i in 1:1000){
  
  setTxtProgressBar(pb, value=i)
  x<-runif(100)
  y<-2+0.25*x+rnorm(100, mean=0, sd=0.5)
  lin.mod<-lm(y~x)
  p.val[i]<-coefficients(summary(lin.mod))[2,4]
  cstar.val[i]<-cstar(lin.mod, r=2)[2]
  
}
close(pb)

p.rate.power<-c()
r.rate.power<-c()
comb.rate.power<-c()
p.thresh<-seq(from=0.005, to = 0.2, by=0.005)

for(k in 1:length(p.thresh)){
  p.rate.power[k]<-sum(p.val<=p.thresh[k])/1000
  r.rate.power[k]<-sum(cstar.val>0)/1000
  comb.rate.power[k]<-sum(cstar.val>=0 & p.val<=p.thresh[k])/1000
}


# now the size (false positive rates)

set.seed(328932)

p.val<-c()
cstar.val<-c()
pb<-txtProgressBar(min=0, max=1000, style=3)
for(i in 1:1000){
  
  setTxtProgressBar(pb, value=i)
  x<-runif(100)
  y<-2+0*x+rnorm(100, mean=0, sd=0.5)
  lin.mod<-lm(y~x)
  p.val[i]<-coefficients(summary(lin.mod))[2,4]
  cstar.val[i]<-cstar(lin.mod, r=2)[2]
  
}
close(pb)

p.rate.size<-c()
r.rate.size<-c()
comb.rate.size<-c()
p.thresh<-seq(from=0.005, to = 0.2, by=0.005)

for(k in 1:length(p.thresh)){
  p.rate.size[k]<-sum(p.val<=p.thresh[k])/1000
  r.rate.size[k]<-sum(cstar.val>0)/1000
  comb.rate.size[k]<-sum(cstar.val>=0 & p.val<=p.thresh[k])/1000
}

plot(p.rate.power~p.rate.size, type="l", ylab="true positive rate (power)", xlab="false positive rate (size)", main=expression(beta[x]~"= 0.25"))
lines(comb.rate.power~comb.rate.size, lty=2, lwd=2)
legend("bottomright", lwd=c(1,2), lty=c(1,2), legend=c("statistical significance only", "significance + substantive robustness"))

dev.copy2eps(file="sizepower_alt.eps")   ##   This is Figure 4 Panel (b)


# This is where we show that there are no cases where the null is false
# and the result is statistically but not substantively significant
p.rate.power[10]
comb.rate.power[10]
r.rate.power[10]






# change characteristics of the DGP to assess updated false
# positive rate when coefficient is statistically significant
# but substantively not significant

# This is where we calculate Pr(null false | stat sig., not robust)
# under a wider noise for the DGP

set.seed(123012)
p.val<-c()
cstar.val<-c()
pb<-txtProgressBar(min=0, max=1000, style=3)
for(i in 1:1000){
  
  setTxtProgressBar(pb, value=i)
  x<-runif(100)
  y<-2+0.25*x+rnorm(100, mean=0, sd=2.5)
  lin.mod<-lm(y~x)
  p.val[i]<-coefficients(summary(lin.mod))[2,4]
  cstar.val[i]<-cstar(lin.mod, r=2)[2]
  
}
close(pb)

thresh.vec<-seq(from=0.00, to=0.5, by=0.01)
p.rate<-c()
r.rate<-c()
comb.rate<-c()
st.no.sb.rate<-c()


for(k in 1:length(thresh.vec)){
  p.rate[k]<-sum(p.val<=0.05)/1000
  r.rate[k]<-sum(cstar.val>=thresh.vec[k])/1000
  comb.rate[k]<-sum(cstar.val>=thresh.vec[k] & p.val<=0.05)/1000
  st.no.sb.rate[k]<-sum(cstar.val<thresh.vec[k] & p.val<=0.05)/1000
}

st.no.sb.rate

# same thing under null

set.seed(123012)
p.val<-c()
cstar.val<-c()
pb<-txtProgressBar(min=0, max=1000, style=3)
for(i in 1:1000){
  
  setTxtProgressBar(pb, value=i)
  x<-runif(100)
  y<-2+0*x+rnorm(100, mean=0, sd=2.5)
  lin.mod<-lm(y~x)
  p.val[i]<-coefficients(summary(lin.mod))[2,4]
  cstar.val[i]<-cstar(lin.mod, r=2)[2]
  
}
close(pb)

thresh.vec<-seq(from=0.00, to=0.5, by=0.01)
p.rate<-c()
r.rate<-c()
comb.rate<-c()
st.no.sb.rate<-c()


for(k in 1:length(thresh.vec)){
  p.rate[k]<-sum(p.val<=0.05)/1000
  r.rate[k]<-sum(cstar.val>=thresh.vec[k])/1000
  comb.rate[k]<-sum(cstar.val>=thresh.vec[k] & p.val<=0.05)/1000
  st.no.sb.rate[k]<-sum(cstar.val<thresh.vec[k] & p.val<=0.05)/1000
}

st.no.sb.rate