#---------------------------------------------------------------------------------#
### information: 
#---------------------------------------------------------------------------------#
### authors: björn bremer (bremer@mpifg.de), reto bürgisser (buergisser@ipz.uzh.ch)
### last updated: 12 December 2021

#---------------------------------------------------------------------------------#
### description
#---------------------------------------------------------------------------------#

### this file replicates the analysis of the following paper:
### "Do Citizens Care About Government Debt? Evidence From Survey Experiments on Budgetary Priorities"
### this file focuses on the results from the CONJOINT EXPERIMENT shown in the MAIN TEXT
### NOTE: The file uses code for parallelization that runs on MacOS and Unix (parallel::mclapply()). 
### NOTE: The code needs to be adjusted to run on Windows (parallel::parLapply()).

#---------------------------------------------------------------------------------#

RNGkind("L'Ecuyer-CMRG")
set.seed(1234567)

#--------------------------------------------------------
### load libraries ###
#--------------------------------------------------------

#library(rstudioapi)
library(plyr)
library(dplyr)
library(ggplot2) 
library(ggpubr)    # necessary to combine plots
library(glmnet)    # necessary for ridge regression
library(parallel)  # necessary for parallel computing
library(doMC)      # necessary for parallel computing
library(broom)     # necessary to get tidy regression output
library(xtable)    # necessary to create and print tex table
library(reshape2)  # necessary to reshape the data inside the function

#--------------------------------------------------------
### set the number of cores ###
#--------------------------------------------------------

registerDoMC(cores = 4)

#--------------------------------------------------------
### load data ###
#--------------------------------------------------------

setwd(dirname(getActiveDocumentContext()$path)) # set working directory to the location of this file
load("../data/df_cj_ejpr_replication.Rdata") # load the file

setwd("../figurestables/")

#--------------------------------------------------------
### bootstrap functions to calculate coefficients and marginal means with ridge regression ###
#--------------------------------------------------------

B <- 1000  # n of bootstrap draws
#B <- 2 ## to test

## function to calculate acmes
boot.ridge <- function(x, data){
      a <- unique(data$Response.ID)
      rid.b <- sample(a, length(a), repl = TRUE)
      ind.b <- c()
      for(i in 1:length(rid.b)) ind.b <- c(ind.b, which(data$Response.ID %in% rid.b[i]))
      DD.b <- data[ind.b,]
      x.b <- model.matrix(~ Pensions + Education + Income.tax + Rich.tax + VAT + Debt, data=DD.b, 
                          contrasts.arg=list(Pensions=contrasts(DD.b$Pensions, contrasts=TRUE),
                                             Education=contrasts(DD.b$Education, contrasts=TRUE),
                                             Income.tax=contrasts(DD.b$Income.tax, contrasts=TRUE),
                                             Rich.tax=contrasts(DD.b$Rich.tax, contrasts=TRUE),
                                             VAT=contrasts(DD.b$VAT, contrasts=TRUE),
                                             Debt=contrasts(DD.b$Debt)))
      y.b <- DD.b$selected
      fit.ridge.b <- glmnet(x.b, y.b, alpha = 0,
                            lambda = bestlambda.ridge)
      coefficient.b <- predict(fit.ridge.b, type = "coefficients", s = bestlambda.ridge)
      tidy.b <- tidy(fit.ridge.b)
}

## function to calculate mms
boot.ridge.mm <- function(x, data){
      a <- unique(data$Response.ID)
      rid.b <- sample(a, length(a), repl = TRUE)
      ind.b <- c()
      for(i in 1:length(rid.b)) ind.b <- c(ind.b, which(data$Response.ID %in% rid.b[i]))
      DD.b <- data[ind.b,]
      x.b <- model.matrix(~ Pensions + Education + Income.tax + Rich.tax + VAT + Debt, data=DD.b, 
                          contrasts.arg=list(Pensions=contrasts(DD.b$Pensions, contrasts=TRUE),
                                             Education=contrasts(DD.b$Education, contrasts=TRUE),
                                             Income.tax=contrasts(DD.b$Income.tax, contrasts=TRUE),
                                             Rich.tax=contrasts(DD.b$Rich.tax, contrasts=TRUE),
                                             VAT=contrasts(DD.b$VAT, contrasts=TRUE),
                                             Debt=contrasts(DD.b$Debt)))
      y.b <- DD.b$selected
      fit.ridge.b <- glmnet(x.b, y.b, alpha = 0,
                            lambda = bestlambda.ridge)
      means.b <- predict(fit.ridge.b, x.b, type = "response", s = bestlambda.ridge)
      mm.b <- data.frame(x.b, fitted = means.b[, 1L, drop = TRUE])
      mm.b$PensionsNochange <- ifelse(mm.b$PensionsIncrease == 0 & mm.b$PensionsDecrease == 0, 1, 0)
      mm.b$EducationNochange <- ifelse(mm.b$EducationIncrease == 0 & mm.b$EducationDecrease == 0, 1, 0)
      mm.b$Income.taxNochange <- ifelse(mm.b$Income.taxIncrease == 0 & mm.b$Income.taxDecrease == 0, 1,0)
      mm.b$Rich.taxNochange <- ifelse(mm.b$Rich.taxIncrease == 0 & mm.b$Rich.taxDecrease == 0, 1,0)
      mm.b$VATNochange <- ifelse(mm.b$VATIncrease == 0 & mm.b$VATDecrease == 0, 1,0)
      mm.b$DebtNochange <- ifelse(mm.b$DebtIncrease == 0 & mm.b$DebtDecrease == 0, 1,0)
      mm_long.b <- melt(mm.b, id.vars = c("fitted"))
      mm_sum.b <- ddply(mm_long.b, c("variable", "value"), summarise,
                        mean=mean(fitted))
      mm_sum.b <- mm_sum.b[which (mm_sum.b$value == 1),]
}

##########################################################################################
### figure 4: amces from the conjoint survey experiment, pooled ###
##########################################################################################

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + Income.tax + Rich.tax + VAT + Debt, data=df_cj, 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     Income.tax=contrasts(df_cj$Income.tax, contrasts=TRUE),
                                     Rich.tax=contrasts(df_cj$Rich.tax, contrasts=TRUE),
                                     VAT=contrasts(df_cj$VAT, contrasts=TRUE),
                                     Debt=contrasts(df_cj$Debt)))

# create a dv
y <- df_cj$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate results with ridge regression
start_time <- Sys.time()
out_coef <- mclapply(1:B, boot.ridge, data = df_cj, mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef <- do.call(rbind.data.frame, out_coef)

# calculate mean and higher/lower CI of the coefficients
alpha = .05

coef_sum <- ddply(df_out_coef, c("term"), summarise,
                  mean = mean(estimate),
                  low=quantile(estimate, alpha / 2),
                  high=quantile(estimate, 1 - alpha / 2))
print(coef_sum)

# include the baseline as a level (only for the coefficients)
ridge_baselines <- data.frame(term = c("EducationNochange", "PensionsNochange", "Income.taxNochange", 
                                       "Rich.taxNochange", "VATNochange", "DebtNochange"),
                              mean = c(0,0,0,0,0,0), low = c(0,0,0,0,0,0), high = c(0,0,0,0,0,0))

coef_sum <- rbind(coef_sum, ridge_baselines)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(Income.tax)", "(Rich.tax)", "(VAT)", "(Debt)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

coef_sum <- rbind(coef_sum, ridge_attributes)

# exclude the intercept from the df
coef_sum <- subset(coef_sum, term!= "(Intercept)")

# order the factored variable
coef_sum$names_ordered <- factor(coef_sum$term, levels = c("DebtIncrease","DebtDecrease","DebtNochange","(Debt)",
                                                           "VATIncrease", "VATDecrease","VATNochange", "(VAT)", 
                                                           "Rich.taxIncrease","Rich.taxDecrease","Rich.taxNochange","(Rich.tax)",
                                                           "Income.taxIncrease","Income.taxDecrease","Income.taxNochange","(Income.tax)", 
                                                           "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                           "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# plot the coefficients
coefplot_all <-  ggplot(coef_sum,aes(x = names_ordered)) +
      geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1) +
      geom_point(aes(y=mean), size=1.5) +
      geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
      coord_flip() +
      theme_bw() + 
      scale_x_discrete(labels = c("Increase", "Decrease","No change","(Government debt)",
                                  "Increase","Decrease","No change","(Value added tax)",
                                  "Increase","Decrease","No change","(Top income tax)",
                                  "Increase","Decrease","No change","(Income tax)",
                                  "Decrease", "Increase","No change","(Pension spending)",
                                  "Decrease","Increase","No change", "(Education spending)")) +
      theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold'))) +
      theme(legend.position="none") +
      xlab("") + ylab("Change in Pr(Support for fiscal package)")
coefplot_all

# save the coefficient plot
ggsave(coefplot_all, width = 15, height = 10, units = c("cm"), file ="figure4.eps")

##########################################################################################
### figure 5: distribution of the ratings of all fiscal packages by government debt attribute level ###
##########################################################################################

distributon_rank_debt <- ggplot(df_cj, aes(x=rating, fill = Debt, colour = Debt)) +
      geom_bar(aes(y = ..prop..), position="dodge") + 
      #geom_density(alpha = 0, adjust = 2) +
      scale_x_continuous(breaks=c(0,1,2,3,4,5,6,7,8,9,10)) +
      ylab("Proportion") + theme_bw() + xlab("Rating of fiscal package (0-10)") +
      scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "fill") +
      scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour") + 
      labs(fill = "Government debt") +
      labs(colour = "Government debt")
distributon_rank_debt

ggsave(distributon_rank_debt, width = 15, height = 10, units = c("cm"), file ="figure5.eps")

##########################################################################################
### figure 6: estimated marginal means from conjoint survey experiment by income group and partisanship ###
##########################################################################################

###-----------------------------------
### figure 6a: by income
###-----------------------------------

## low income

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + Income.tax + Rich.tax + VAT + Debt, data=df_cj[which (df_cj$income_brackets_three == 1),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     Income.tax=contrasts(df_cj$Income.tax, contrasts=TRUE),
                                     Rich.tax=contrasts(df_cj$Rich.tax, contrasts=TRUE),
                                     VAT=contrasts(df_cj$VAT, contrasts=TRUE),
                                     Debt=contrasts(df_cj$Debt)))

# create a dv
y <- df_cj[which (df_cj$income_brackets_three == 1),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_low <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$income_brackets_three == 1),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_low <- do.call(rbind.data.frame, out_mm_low)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_low$term <- as.character(df_out_mm_low$variable)
df_out_mm_low$estimate <- as.numeric(df_out_mm_low$mean)

mm_sum_low <- ddply(df_out_mm_low, c("term"), summarise,
                    mean = mean(estimate),
                    low=quantile(estimate, alpha / 2),
                    high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_low)

# include the attribute names as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(Income.tax)", "(Rich.tax)", "(VAT)", "(Debt)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_low <- rbind(mm_sum_low, ridge_attributes)

###-----------------------------------
## medium income

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + Income.tax + Rich.tax + VAT + Debt, data=df_cj[which (df_cj$income_brackets_three == 2),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     Income.tax=contrasts(df_cj$Income.tax, contrasts=TRUE),
                                     Rich.tax=contrasts(df_cj$Rich.tax, contrasts=TRUE),
                                     VAT=contrasts(df_cj$VAT, contrasts=TRUE),
                                     Debt=contrasts(df_cj$Debt)))

# create a dv
y <- df_cj[which (df_cj$income_brackets_three == 2),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_med <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$income_brackets_three == 2),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_med <- do.call(rbind.data.frame, out_mm_med)

# calculate mean and higher/meder CI of the marginal means
df_out_mm_med$term <- as.character(df_out_mm_med$variable)
df_out_mm_med$estimate <- as.numeric(df_out_mm_med$mean)

mm_sum_med <- ddply(df_out_mm_med, c("term"), summarise,
                    mean = mean(estimate),
                    low=quantile(estimate, alpha / 2),
                    high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_med)

# include the attribute names as a level
mm_sum_med <- rbind(mm_sum_med, ridge_attributes)

###-----------------------------------
## high income

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + Income.tax + Rich.tax + VAT + Debt, data=df_cj[which (df_cj$income_brackets_three == 3),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     Income.tax=contrasts(df_cj$Income.tax, contrasts=TRUE),
                                     Rich.tax=contrasts(df_cj$Rich.tax, contrasts=TRUE),
                                     VAT=contrasts(df_cj$VAT, contrasts=TRUE),
                                     Debt=contrasts(df_cj$Debt)))

# create a dv
y <- df_cj[which (df_cj$income_brackets_three == 3),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_high <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$income_brackets_three == 3),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_high <- do.call(rbind.data.frame, out_mm_high)

# calculate mean and higher/higher CI of the marginal means
df_out_mm_high$term <- as.character(df_out_mm_high$variable)
df_out_mm_high$estimate <- as.numeric(df_out_mm_high$mean)

mm_sum_high <- ddply(df_out_mm_high, c("term"), summarise,
                     mean = mean(estimate),
                     low=quantile(estimate, alpha / 2),
                     high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_high)

# include the attribute names as a level
mm_sum_high <- rbind(mm_sum_high, ridge_attributes)

###-------------------------------------------
## combine the coefficients from all groups

mm_sum_low$income <- "Low"
mm_sum_med$income <- "Medium"
mm_sum_high$income <- "High"

mm_sum_income <- rbind(mm_sum_low, mm_sum_med, mm_sum_high)
mm_sum_income$income <- factor(mm_sum_income$income, levels = c("Low", "Medium", "High"))

# exclude the intercept from the df
mm_sum_income <- subset(mm_sum_income, term!= "X.Intercept.")

# order the factored variable
mm_sum_income$names_ordered <- factor(mm_sum_income$term, levels = c("DebtIncrease","DebtDecrease","DebtNochange","(Debt)",
                                                                     "VATIncrease", "VATDecrease","VATNochange", "(VAT)", 
                                                                     "Rich.taxIncrease","Rich.taxDecrease","Rich.taxNochange","(Rich.tax)",
                                                                     "Income.taxIncrease","Income.taxDecrease","Income.taxNochange","(Income.tax)", 
                                                                     "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                     "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# create the plot
mmplot_income <- ggplot(mm_sum_income, aes(x = names_ordered, colour = income)) +
      geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
      geom_point(aes(y=mean, shape = income), size=2, position = position_dodge(0.8)) +
      geom_hline(yintercept = 0.5, size=.5,colour="black",linetype="dotted") +
      coord_flip() +
      theme_bw() + 
      scale_x_discrete(labels = c("Increase", "Decrease","No change","(Government debt)",
                                  "Increase","Decrease","No change","(Value added tax)",
                                  "Increase","Decrease","No change","(Top income tax)",
                                  "Increase","Decrease","No change","(Income tax)",
                                  "Decrease", "Increase","No change","(Pension spending)",
                                  "Decrease","Increase","No change", "(Education spending)")) +
      theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold'))) +
      labs(colour = "Income") +
      labs(shape = "Income") +
      guides(colour = guide_legend(reverse = TRUE)) +
      guides(shape = guide_legend(reverse = TRUE)) +
      xlab("") + ylab("Marginal mean") +
      scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour") +
      theme(legend.position="bottom")
mmplot_income

# save the mm plot
ggsave(mmplot_income, width = 11, height = 22, units = c("cm"), file ="figure6a.eps")

###-----------------------------------
### figure 6b: by partisanship
###-----------------------------------

###-----------------------------------
## left

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + Income.tax + Rich.tax + VAT + Debt, data=df_cj[which (df_cj$party_lr == "Left"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     Income.tax=contrasts(df_cj$Income.tax, contrasts=TRUE),
                                     Rich.tax=contrasts(df_cj$Rich.tax, contrasts=TRUE),
                                     VAT=contrasts(df_cj$VAT, contrasts=TRUE),
                                     Debt=contrasts(df_cj$Debt)))

# create a dv
y <- df_cj[which (df_cj$party_lr == "Left"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_party_l <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$party_lr == "Left"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_party_l <- do.call(rbind.data.frame, out_mm_party_l)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_party_l$term <- as.character(df_out_mm_party_l$variable)
df_out_mm_party_l$estimate <- as.numeric(df_out_mm_party_l$mean)

mm_sum_party_l <- ddply(df_out_mm_party_l, c("term"), summarise,
                        mean = mean(estimate),
                        low=quantile(estimate, alpha / 2),
                        high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_party_l)

# include the attribute names as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(Income.tax)", "(Rich.tax)", "(VAT)", "(Debt)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_party_l <- rbind(mm_sum_party_l, ridge_attributes)

###-----------------------------------
## right

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + Income.tax + Rich.tax + VAT + Debt, data=df_cj[which (df_cj$party_lr == "Right"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     Income.tax=contrasts(df_cj$Income.tax, contrasts=TRUE),
                                     Rich.tax=contrasts(df_cj$Rich.tax, contrasts=TRUE),
                                     VAT=contrasts(df_cj$VAT, contrasts=TRUE),
                                     Debt=contrasts(df_cj$Debt)))

# create a dv
y <- df_cj[which (df_cj$party_lr == "Right"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_party_r <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$party_lr == "Right"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_party_r <- do.call(rbind.data.frame, out_mm_party_r)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_party_r$term <- as.character(df_out_mm_party_r$variable)
df_out_mm_party_r$estimate <- as.numeric(df_out_mm_party_r$mean)

mm_sum_party_r <- ddply(df_out_mm_party_r, c("term"), summarise,
                        mean = mean(estimate),
                        low=quantile(estimate, alpha / 2),
                        high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_party_r)

# include the attribute names as a level
mm_sum_party_r <- rbind(mm_sum_party_r, ridge_attributes)

###-------------------------------------------
## combine the coefficients from all groups
mm_sum_party_l$party_lr <- "Left"
mm_sum_party_r$party_lr <- "Right"

mm_sum_party_lr <- rbind(mm_sum_party_l, mm_sum_party_r)

mm_sum_party_lr$party_lr <- factor(mm_sum_party_lr$party_lr, levels = c("Left", "Right"))

# exclude the intercept from the df
mm_sum_party_lr <- subset(mm_sum_party_lr, term!= "X.Intercept.")

# order the factored variable
mm_sum_party_lr$names_ordered <- factor(mm_sum_party_lr$term, levels = c("DebtIncrease","DebtDecrease","DebtNochange","(Debt)",
                                                                         "VATIncrease", "VATDecrease","VATNochange", "(VAT)", 
                                                                         "Rich.taxIncrease","Rich.taxDecrease","Rich.taxNochange","(Rich.tax)",
                                                                         "Income.taxIncrease","Income.taxDecrease","Income.taxNochange","(Income.tax)", 
                                                                         "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                         "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

mmplot_party_lr <- ggplot(mm_sum_party_lr,aes(x = names_ordered, colour = party_lr)) +
      geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
      geom_point(aes(y=mean, shape = party_lr), size=2, position = position_dodge(0.8)) +
      geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
      coord_flip() +
      theme_bw() +
      scale_x_discrete(labels = c("Increase", "Decrease","No change","(Government debt)",
                                  "Increase","Decrease","No change","(Value added tax)",
                                  "Increase","Decrease","No change","(Top income tax)",
                                  "Increase","Decrease","No change","(Income tax)",
                                  "Decrease", "Increase","No change","(Pension spending)",
                                  "Decrease","Increase","No change", "(Education spending)")) +
      theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold'))) +
      labs(colour = "Partisanship") +
      labs(shape = "Partisanship") +
      guides(colour = guide_legend(reverse = TRUE)) +
      guides(shape = guide_legend(reverse = TRUE)) +
      xlab("") + ylab("Marginal mean") +
      scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour") +
      theme(legend.position="bottom")
mmplot_party_lr

# save the mm plot
ggsave(mmplot_party_lr, width = 11, height = 22, units = c("cm"), file ="figure6b.eps")

## combine the two plots 
#mmplot_combined <- ggarrange(mmplot_income, mmplot_party_lr)
#ggsave(mmplot_combined, width = 22, height = 22, units = c("cm"), file ="figure6.eps")

##########################################################################################
### figure 7: estimated marginal means from conjoint survey experiment by country ###
##########################################################################################

###-----------------------------------
## germany

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + Income.tax + Rich.tax + VAT + Debt, data=df_cj[which (df_cj$country == "DE"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     Income.tax=contrasts(df_cj$Income.tax, contrasts=TRUE),
                                     Rich.tax=contrasts(df_cj$Rich.tax, contrasts=TRUE),
                                     VAT=contrasts(df_cj$VAT, contrasts=TRUE),
                                     Debt=contrasts(df_cj$Debt)))

# create a dv
y <- df_cj[which (df_cj$country == "DE"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_country_de <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$country == "DE"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_country_de <- do.call(rbind.data.frame, out_mm_country_de)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_country_de$term <- as.character(df_out_mm_country_de$variable)
df_out_mm_country_de$estimate <- as.numeric(df_out_mm_country_de$mean)

mm_sum_country_de <- ddply(df_out_mm_country_de, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_country_de)

# include the attribute names as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(Income.tax)", "(Rich.tax)", "(VAT)", "(Debt)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_country_de <- rbind(mm_sum_country_de, ridge_attributes)

###-----------------------------------
## uk

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + Income.tax + Rich.tax + VAT + Debt, data=df_cj[which (df_cj$country == "UK"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     Income.tax=contrasts(df_cj$Income.tax, contrasts=TRUE),
                                     Rich.tax=contrasts(df_cj$Rich.tax, contrasts=TRUE),
                                     VAT=contrasts(df_cj$VAT, contrasts=TRUE),
                                     Debt=contrasts(df_cj$Debt)))

# create a dv
y <- df_cj[which (df_cj$country == "UK"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_country_uk <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$country == "UK"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_country_uk <- do.call(rbind.data.frame, out_mm_country_uk)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_country_uk$term <- as.character(df_out_mm_country_uk$variable)
df_out_mm_country_uk$estimate <- as.numeric(df_out_mm_country_uk$mean)

mm_sum_country_uk <- ddply(df_out_mm_country_uk, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_country_uk)

# include the attribute names as a level
mm_sum_country_uk <- rbind(mm_sum_country_uk, ridge_attributes)

###-----------------------------------
## italy 

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + Income.tax + Rich.tax + VAT + Debt, data=df_cj[which (df_cj$country == "IT"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     Income.tax=contrasts(df_cj$Income.tax, contrasts=TRUE),
                                     Rich.tax=contrasts(df_cj$Rich.tax, contrasts=TRUE),
                                     VAT=contrasts(df_cj$VAT, contrasts=TRUE),
                                     Debt=contrasts(df_cj$Debt)))

# create a dv
y <- df_cj[which (df_cj$country == "IT"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_country_it <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$country == "IT"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_country_it <- do.call(rbind.data.frame, out_mm_country_it)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_country_it$term <- as.character(df_out_mm_country_it$variable)
df_out_mm_country_it$estimate <- as.numeric(df_out_mm_country_it$mean)

mm_sum_country_it <- ddply(df_out_mm_country_it, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_country_it)

# include the attribute names as a level
mm_sum_country_it <- rbind(mm_sum_country_it, ridge_attributes)

###-----------------------------------
## spain

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + Income.tax + Rich.tax + VAT + Debt, data=df_cj[which (df_cj$country == "ES"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     Income.tax=contrasts(df_cj$Income.tax, contrasts=TRUE),
                                     Rich.tax=contrasts(df_cj$Rich.tax, contrasts=TRUE),
                                     VAT=contrasts(df_cj$VAT, contrasts=TRUE),
                                     Debt=contrasts(df_cj$Debt)))

# create a dv
y <- df_cj[which (df_cj$country == "ES"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_country_es <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$country == "IT"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_country_es <- do.call(rbind.data.frame, out_mm_country_es)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_country_es$term <- as.character(df_out_mm_country_es$variable)
df_out_mm_country_es$estimate <- as.numeric(df_out_mm_country_es$mean)

mm_sum_country_es <- ddply(df_out_mm_country_es, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_country_es)

# include the attribute names as a level
mm_sum_country_es <- rbind(mm_sum_country_es, ridge_attributes)

###-------------------------------------------
## combine the coefficients from all countries

mm_sum_country_de$country <- "DE"
mm_sum_country_it$country<- "IT"
mm_sum_country_es$country <- "ES"
mm_sum_country_uk$country <- "UK"

mm_sum_country <- rbind(mm_sum_country_de, mm_sum_country_es, mm_sum_country_it, mm_sum_country_uk)

# exclude the intercept from the df
mm_sum_country <- subset(mm_sum_country, term!= "X.Intercept.")

# order the factored variable
mm_sum_country$names_ordered <- factor(mm_sum_country$term, levels = c("DebtIncrease","DebtDecrease","DebtNochange","(Debt)",
                                                                       "VATIncrease", "VATDecrease","VATNochange", "(VAT)", 
                                                                       "Rich.taxIncrease","Rich.taxDecrease","Rich.taxNochange","(Rich.tax)",
                                                                       "Income.taxIncrease","Income.taxDecrease","Income.taxNochange","(Income.tax)", 
                                                                       "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                       "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# create the plot
mmplot_country <- ggplot(mm_sum_country,aes(x = names_ordered, colour = country)) +
      geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
      geom_point(aes(y=mean, shape = country), size=2, position = position_dodge(0.8)) +
      geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
      coord_flip() +
      theme_bw() +
      scale_x_discrete(labels = c("Increase", "Decrease","No change","(Government debt)",
                                  "Increase","Decrease","No change","(Value added tax)",
                                  "Increase","Decrease","No change","(Top income tax)",
                                  "Increase","Decrease","No change","(Income tax)",
                                  "Decrease", "Increase","No change","(Pension spending)",
                                  "Decrease","Increase","No change", "(Education spending)")) +
      theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold',
                                                'plain', 'plain', 'plain', 'bold'))) +
      labs(colour = "Country") +
      labs(shape = "Country") +
      guides(colour = guide_legend(reverse = TRUE)) +
      guides(shape = guide_legend(reverse = TRUE)) +
      xlab("") + ylab("Marginal mean") +
      scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour") 
mmplot_country

# save the mm plot
ggsave(mmplot_country, width = 17.5, height = 22.5, units = c("cm"), file ="figure7.eps")

#--------------------------------------------------------
### save the results from the regression models ###
#--------------------------------------------------------

save(coef_sum, mm_sum_income, mm_sum_party_lr, mm_sum_country, file = "../data/results/cj_results_main.RData")