# Sunday, 23.9.2001    Replication of Oneal Russett Model, per original files.
# Monday, 24.9.2001    Get this running with splines, in clean directory
#                      "C:\\Documents and Settings\\Default\\Desktop\\JohnOneal\\_data"
#                      set missing values on yrspeace (NA) to 0.0  in triangle database
#                      must have gee objects loaded as well as window.var
#                      compares, logit, gee, huber, wsev variance calculations for
#                      generic model
# Wednesday, 26.9.2001 corrected data set received from John Oneal, rerun
#                      output in triangle2.out in JohnOneal directory
# Monday, 10.12.2001   Minor formatting changes for PA archive.

sink("...triangle2-standard.out")
options(digits=3)
library(gee)
mydir <- "..._data"
attach("..._data",pos=1)
# these data are available at http://www.yale.edu/unsy/democ/democ1.htm
# though variables may have different names, especially yrspeace

example.logit.fit<-glm(formula = dispute1 ~ smldmat + smldep  + lcaprat2 + allies + 
    contigkb+ logdstab+ majpower+ s(yrspeace,df=4) , 
    family   = binomial(link = logit), data = triangle2, 
    na.action = na.exclude, control = list(epsilon = 0.0001, 
    maxit    = 50, trace = F),x=T,y=T)
    
example.gam.fit<-gam(formula = dispute1 ~ smldmat + smldep  +lcaprat2 + allies + 
    contigkb+ logdstab+ majpower+ s(yrspeace,df=4) , 
    family = binomial(link = logit), data = triangle2, na.action = na.exclude)
    
summary(example.logit.fit)

1-pchisq(summary.gam(example.logit.fit)$null.deviance-
   summary.gam(example.logit.fit)$deviance,df=(summary.gam(example.logit.fit))$df[1])

example.gee.fit<-gee(
    formula = dispute1 ~ smldmat + smldep  + lcaprat2 + allies + 
    contigkb+ logdstab+ majpower + s(yrspeace,df=4), id=dyadid,
    data=triangle2, na.action=na.omit,
    tol=0.001, maxiter=as.integer(50),
    family = binomial(link=logit), 
    silent=T, contrasts=NULL,
    scale.fix = F, scale.value = 1)
summary(example.gee.fit)

# now use WSEV sampler
attach(triangle2)
y <- dispute1
id <- triangle2$year
fit <- glm(formula = y ~  smldmat + smldep + lcaprat2 + allies + 
    contigkb+ logdstab+ majpower +s(yrspeace,df=4),
    family= binomial(link = logit), data = triangle2, 
    na.action = na.exclude, control = list(epsilon = 0.0001, 
    maxit = 50, trace = F), model=T)
X <- model.matrix( ~ smldmat + smldep + lcaprat2 + allies + 
    contigkb+ logdstab+ majpower+ s(yrspeace,df=4))
#X <- fit$model
m <- trunc( 4 * (length(unique(triangle2$year)))^(0.33) )
#
out <- window.var( y, X, fit, m=m, tvec=id )
#
out


gee.coefficients <-example.gee.fit$coefficients
gee.naive.se <- diag(sqrt(example.gee.fit$naive.variance))
gee.robust.se <- diag(sqrt(example.gee.fit$robust.variance))
indep.se <- diag(sqrt(out$independence))
WSEV.0.se <- diag(sqrt(out$window.0))
WSEV.1.se <- diag(sqrt(out$window.1))
WSEV.1.2.indep <-WSEV.1.se/indep.se
t.WSEV.1 <- fit$coefficients/WSEV.1.se
triangle.out <- cbind(gee.coefficients,indep.se,gee.robust.se,
                WSEV.1.se,WSEV.1.2.indep,t.WSEV.1)
triangle.out
# independent std.errors are from logit, or gee with independence
# WSEV.0.se are the windows sampling empirical variance estimators, 
#     using clusters of 17 years, that overlap through the sample
# WSEV.1.se are the windows sampling empirical variance estimators, 
#     using clusters of 17 years, overlapping as above, but with
#       the one step approach described in Heagerty, Ward, Gleditsch (2001).
# most standard errors appear to increase by about 3 from 
# independence to the one-step approach, but all variables retain their
# significance.
#
sink()