# Sunday, 23.9.2001 Replication of Oneal Russett Model, per original files. # Monday, 24.9.2001 Get this running with splines, in clean directory # "C:\\Documents and Settings\\Default\\Desktop\\JohnOneal\\_data" # set missing values on yrspeace (NA) to 0.0 in triangle database # must have gee objects loaded as well as window.var # compares, logit, gee, huber, wsev variance calculations for # generic model # Wednesday, 26.9.2001 corrected data set received from John Oneal, rerun # output in triangle2.out in JohnOneal directory # Monday, 10.12.2001 Minor formatting changes for PA archive. sink("...triangle2-standard.out") options(digits=3) library(gee) mydir <- "..._data" attach("..._data",pos=1) # these data are available at http://www.yale.edu/unsy/democ/democ1.htm # though variables may have different names, especially yrspeace example.logit.fit<-glm(formula = dispute1 ~ smldmat + smldep + lcaprat2 + allies + contigkb+ logdstab+ majpower+ s(yrspeace,df=4) , family = binomial(link = logit), data = triangle2, na.action = na.exclude, control = list(epsilon = 0.0001, maxit = 50, trace = F),x=T,y=T) example.gam.fit<-gam(formula = dispute1 ~ smldmat + smldep +lcaprat2 + allies + contigkb+ logdstab+ majpower+ s(yrspeace,df=4) , family = binomial(link = logit), data = triangle2, na.action = na.exclude) summary(example.logit.fit) 1-pchisq(summary.gam(example.logit.fit)$null.deviance- summary.gam(example.logit.fit)$deviance,df=(summary.gam(example.logit.fit))$df[1]) example.gee.fit<-gee( formula = dispute1 ~ smldmat + smldep + lcaprat2 + allies + contigkb+ logdstab+ majpower + s(yrspeace,df=4), id=dyadid, data=triangle2, na.action=na.omit, tol=0.001, maxiter=as.integer(50), family = binomial(link=logit), silent=T, contrasts=NULL, scale.fix = F, scale.value = 1) summary(example.gee.fit) # now use WSEV sampler attach(triangle2) y <- dispute1 id <- triangle2$year fit <- glm(formula = y ~ smldmat + smldep + lcaprat2 + allies + contigkb+ logdstab+ majpower +s(yrspeace,df=4), family= binomial(link = logit), data = triangle2, na.action = na.exclude, control = list(epsilon = 0.0001, maxit = 50, trace = F), model=T) X <- model.matrix( ~ smldmat + smldep + lcaprat2 + allies + contigkb+ logdstab+ majpower+ s(yrspeace,df=4)) #X <- fit$model m <- trunc( 4 * (length(unique(triangle2$year)))^(0.33) ) # out <- window.var( y, X, fit, m=m, tvec=id ) # out gee.coefficients <-example.gee.fit$coefficients gee.naive.se <- diag(sqrt(example.gee.fit$naive.variance)) gee.robust.se <- diag(sqrt(example.gee.fit$robust.variance)) indep.se <- diag(sqrt(out$independence)) WSEV.0.se <- diag(sqrt(out$window.0)) WSEV.1.se <- diag(sqrt(out$window.1)) WSEV.1.2.indep <-WSEV.1.se/indep.se t.WSEV.1 <- fit$coefficients/WSEV.1.se triangle.out <- cbind(gee.coefficients,indep.se,gee.robust.se, WSEV.1.se,WSEV.1.2.indep,t.WSEV.1) triangle.out # independent std.errors are from logit, or gee with independence # WSEV.0.se are the windows sampling empirical variance estimators, # using clusters of 17 years, that overlap through the sample # WSEV.1.se are the windows sampling empirical variance estimators, # using clusters of 17 years, overlapping as above, but with # the one step approach described in Heagerty, Ward, Gleditsch (2001). # most standard errors appear to increase by about 3 from # independence to the one-step approach, but all variables retain their # significance. # sink()