##### Clean replication code ##### 
##### Updated as of 12/5/2021 ##### 

# ---- packages

remove(list=ls())

if (!require("pacman")) install.packages("pacman")

p_load(rio, here, tidyverse, skimr, panelr, ggpubr, corrplot, corrr, xtable, 
       stargazer, knitr, huxtable, jtools, lavaan, semPlot, plm, haven, lmtest, lfe, ggExtra)

source("http://andy.egge.rs/code/xtsumR.R") # xtsumR function

here() # just check the top directory 

# ---- Import datasets and some cleaning

bes <- import(here("data", "working data", "BES_panel_long.dta"))
shp <- import(here("data", "working data", "shp_panel.dta"))
liss <- import(here("data", "working data", "liss netherlands", "merged", "liss_working.dta"))

shp <- shp %>% 
  rename(id = idpers,
         polint = pp01,
         leftright = pp10,
         gender = sex)

bes <- bes %>% 
  rename(polint = polAttention)


# ---- Within-between variation

# use xtsumR function to get the tables
variance_liss <- xtsumR(liss$confpolt, liss$id)
variance_bes <- xtsumR(bes$trustMPs, bes$id)
variance_shp <- xtsumR(shp$pp04, shp$id)

# bind and change first column 
var_all <- rbind(variance_liss, variance_bes, variance_shp)
var_all[1,1] <- "LISS"
var_all[4,1] <- "BES"
var_all[7,1] <- "SHP"
var_all <- xtable(var_all)
print(var_all, include.rownames=F, file = here("tables", "wbsumstats.tex"))

var_all[2,4]/var_all[1,4] # 87% LISS variation is between
var_all[5,4]/var_all[4,4] # 87%  BESvariation is between
var_all[8,4]/var_all[7,4] # 91%  SHP variation is between


# ---- Trust aggregate graphs, by wave

# Calculate mean of trusting by wave, by panel study

besmean <- bes %>%
  group_by(wave) %>%
  summarise(trust_bes = mean(trustMPs, na.rm=T))

lissmean <- liss %>%
  group_by(wave) %>%
  summarise(trust_liss = mean(confpolt, na.rm=T))

shpmean <- shp %>%
  group_by(wave) %>%
  summarise(trust_shp = mean(pp04, na.rm=T))

summary(besmean$trust_bes, na.rm=T)

plot1bes <-  ggplot(besmean, aes(trust_bes, wave)) +
  geom_segment(aes(x = 0, y = wave, xend = trust_bes, yend = wave), color = "grey50", size=1) +
  geom_point() + 
  labs(title = "Mean trust in MPs by BES wave", 
       subtitle = "How much trust do you have in Members of Parliament in general?",
       y = "Wave",
       x = "1 (No trust) - 7 (A great deal)",
       caption = "Min = 2.3, max = 3.4") + 
  geom_vline(xintercept = 3.02, linetype="dashed") +
  theme_minimal()

# summary(lissmean$trust_liss, na.rm=T)

plot1liss <- ggplot(lissmean, aes(trust_liss, wave)) +
  geom_segment(aes(x = 0, y = wave, xend = trust_liss, yend = wave), color = "grey50", size = 1) +
  geom_point() + 
  labs(title = "Mean confidence in Dutch politicians by LISS wave", 
       subtitle = "How much confidence do you personally have in each of the following institutions? Politicians",
       y = "Wave",
       x = "0 (No Confidence) - 10 (Full Confidence)",
       caption = "Min = 4.5, max = 5.2") + 
  geom_vline(xintercept = 4.7991, linetype="dashed") +
  scale_y_continuous(breaks = seq(0, 12, 4)) +
  theme_minimal()

# summary(shpmean$trust_shp, na.rm=T)

plot1shp <- ggplot(shpmean, aes(trust_shp, wave)) +
  geom_segment(aes(x = 0, y = wave, xend = trust_shp, yend = wave), color = "grey50", size = 1) +
  geom_point() + 
  labs(title = "Mean confidence in Swiss Government by SHP wave", 
       subtitle = "How much confidence do you have in the federal government?",
       y = "Wave",
       x = "0 (No Confidence) - 10 (Full Confidence)",
       caption = "Min = 5.3, max = 6.3")  +
  geom_vline(xintercept = 5.7, linetype="dashed") +
  theme_minimal()

meansplot <- ggarrange(plot1bes, plot1shp, plot1liss, ncol=1)

ggsave("meansplot.png", 
       path = here("figures"))


# ---- Correlations between waves 

# shp
shp_wide <- shp %>% 
  select(id, wave, pp04) %>% 
  spread(key = wave, value = pp04) %>% 
  ungroup() %>% # needs this otherwise keeps the id
  select(-id)

cor(shp_wide, use = "pairwise.complete.obs") %>%
  kable(., booktabs=T, format = "latex", caption = "Correlations across waves: SHP", digits = 2) %>%
  kable_styling(latex_options = "scale_down")

shp_cor <- cor(shp_wide, use = "pairwise.complete.obs")

png(file = here("figures", "shp_corr.png"), width = 1920, height = 1080, res = 200)

corrplotshp <- corrplot(shp_cor, method="color", type = "upper", tl.col="black", diag = FALSE, addCoefasPercent = T,
                        addCoef.col = "black")
dev.off()

correlation <-  shp_cor[1,] # get just first wave correlated with each subsequent wave
wave <- 1:20 # add wave column
panel <- "SHP"
shp_corw1 <- data.frame(correlation, wave, panel) # make into DF
ggplot(shp_corw1, aes(wave, correlation)) + geom_point() # plot

# liss 
liss_wide <- liss %>% 
  select(id, time, confpolt) %>% 
  spread(key = time, value = confpolt) %>% 
  ungroup() %>% # needs this otherwise keeps the idpers
  select(-id)

cor(liss_wide, use = "complete.obs") %>%
  kable(., booktabs=T, format = "latex", caption = "Correlations across waves: LISS", digits = 2) %>%
  kable_styling(latex_options = "scale_down")

liss_cor <- cor(liss_wide, use = "pairwise.complete.obs")

png(file = here("figures", "liss_corr.png"), width = 1920, height = 1080, res = 200)

corrplotliss <- corrplot(liss_cor, method="color", type = "upper", tl.col="black", diag = FALSE, addCoefasPercent = T,
                         addCoef.col = "black")

dev.off()

correlation <-  liss_cor[1,] # get just first wave correlated with each subsequent wave
wave <- 1:12 # add wave column
panel <- "LISS"
liss_corw1 <- data.frame(correlation, wave, panel) # make into DF
ggplot(liss_corw1, aes(wave, correlation)) + geom_line() # plot

# bes

bes_wide <- bes %>% 
  select(id, wave, trustMPs) %>% 
  spread(key = wave, value = trustMPs) %>% 
  ungroup() %>% # needs this otherwise keeps the idpers
  select(-id)

cor(bes_wide, use = "pairwise.complete.obs") %>%
  kable(., booktabs=T, format = "latex", caption = "Correlations across waves: BES", digits = 2) %>%
  kable_styling(latex_options = "scale_down")

bes_cor <- cor(bes_wide, use = "pairwise.complete.obs")

png(file = here("figures", "bes_corr.png"), width = 1920, height = 1080, res = 200)
corrplotbes <-  corrplot(bes_cor, method="color", type = "upper", tl.col="black", diag = FALSE, addCoefasPercent = T,
                         addCoef.col = "black")
dev.off()

# just first wave and subsequent waves
correlation <-  bes_cor[1,] # get just first wave correlated with each subsequent wave
wave <- 1:20 # add wave column
panel <- "BES"
bes_corw1 <- data.frame(correlation, wave, panel) # make into DF
ggplot(bes_corw1, aes(wave, correlation)) + geom_point() # plot

# ---- Correlation between first and subsequent waves

firstwcorr <- rbind(bes_corw1, shp_corw1, liss_corw1)
firstwcorr <- firstwcorr %>% 
  filter(wave != 1) # remove first wave 

ggplot(firstwcorr, aes(wave, correlation, colour=factor(panel), size=correlation)) + 
  geom_point() +
  labs(title = "Correlation between the first wave and subsequent waves in three panel studies",
       subtitle = "Variable: trust",
       x = "Wave number",
       colour = "Panel",
       size = "Correlation") +
  theme_minimal() + 
  theme(axis.ticks.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_blank()) +
  annotate("text", x = 7.1, y=0.72, label = "Corr = 0.708", 
           colour="black", fontface=2) +
  annotate(
    geom = "curve", x = 5, y = 0.72, xend = 2.3, yend = 0.708, 
    curvature = -.3, arrow = arrow(length = unit(2, "mm")), colour="black"
  ) +
  annotate("text", x = 12.5, y=0.459, label = "Corr = 0.461", 
           colour="black", fontface=2) +
  annotate(
    geom = "curve", x = 12.5, y = 0.47, xend = 15.1, yend = 0.461, 
    curvature = -.3, arrow = arrow(length = unit(2, "mm")), colour="black"
  )

# ---- Models: Prep

# Restrict to those who completed enough waves then show number of distinct individuals

comp_liss <- liss %>% filter(confpolt != "NA") %>% group_by(id) %>% mutate(count=n()) %>% filter(count>=12) %>% ungroup()
comp_bes <- bes %>% filter(trustMPs != "NA") %>% group_by(id) %>% mutate(count=n()) %>% filter(count >= 13) %>% ungroup()
comp_shp <- shp %>% filter(pp04 != "NA") %>% group_by(id)  %>% mutate(count=n()) %>% filter(count>=14) %>% ungroup()

n_distinct(comp_bes$id, na.rm=T) # 1347
n_distinct(comp_liss$id, na.rm=T) # 1421
n_distinct(comp_shp$id, na.rm=T) # 1353

# There's some odd missing wave rows for LISS, so remove those

liss <- liss[!(liss$wave==""),] 
comp_liss <- comp_liss[!(comp_liss$wave==""),] 

# how many interviewed in each wave

shp %>% group_by(id) %>% summarise(n = n()) %>% summarise(round(mean(n==3),3)) %>% paste0(.,"%")
liss %>% group_by(id) %>% summarise(n = n()) %>% summarise(round(mean(n==3),3)) %>% paste0(.,"%")
bes %>% group_by(id) %>% summarise(n = n()) %>% summarise(round(mean(n==3),3)) %>% paste0(.,"%")

# ---- Models: Pooled with clustered errors

liss_pooled_cl <-   felm(confpolt ~ age + edu3 + workstatus + income10 + leftright + polint + gender | 0 | 0 | id, 
                         data=comp_liss)
bes_pooled_cl <-   felm(trustMPs ~ age + edu3 + workstatus + income10 + leftright + polint + gender | 0 | 0 | id, 
                        data=comp_bes)
shp_pooled_cl <-   felm(pp04 ~ age + edu3 + workstatus + income10 + leftright + polint + gender | 0 | 0 | id, 
                        data=comp_shp)

stargazer(liss_pooled_cl, shp_pooled_cl, bes_pooled_cl, model.names = FALSE, model.numbers = NULL,
          header=FALSE, dep.var.caption = "Political trust", dep.var.labels.include = FALSE,
          covariate.labels = c("Age", "Education", "Work status", "Income", "Left right", "Political interest", "Sex"),
          column.labels=c("LISS", "SHP", "BES"),
          no.space = TRUE, notes = "Standard errors clustered by individual", notes.append = T,
          title = "Pooled OLS models predicting political trust",
          label = "lab:pooledols",
          keep.stat =c("n","rsq"))

# ---- Models:  pooled but FE

liss_pooled_fe <-   felm(confpolt ~ age + edu3 + workstatus + income10 + leftright + polint  | id | 0 | 0, 
                         data=comp_liss)
bes_pooled_fe <-   felm(trustMPs ~ age + edu3 + workstatus + income10 + leftright + polint | id | 0 | 0, 
                        data=comp_bes)
shp_pooled_fe <-   felm(pp04 ~ age + edu3 + workstatus + income10 + leftright + polint  | id | 0 | 0, 
                        data=comp_shp)

# table to compare pooled and FE models

stargazer(liss_pooled_cl, shp_pooled_cl, bes_pooled_cl, liss_pooled_fe, bes_pooled_fe, shp_pooled_fe,
          model.names = FALSE, model.numbers = NULL,
          header=FALSE, dep.var.caption = "Political trust", dep.var.labels.include = FALSE,
          covariate.labels = c("Age", "Education", "Work status", "Income", "Left right", "Political interest", "Sex"),
          column.labels=c("LISS", "SHP", "BES", "LISS (FEs)", "SHP (FEs)", "BES (FEs)"),
          no.space = TRUE, notes = "Cols 1-3, Standard errors clustered by individual; \n Cols 4-6 fixed effects", notes.append = T,
          title = "Pooled and fixed-effects OLS models predicting political trust",
          label = "lab:pooledolsfe",
          keep.stat =c("n","rsq"))

# ---- Models: WBM

# This changes to panelR object to use the wbm() function

liss_pan <- panel_data(comp_liss, id = id, wave = wave)
bes_pan <- panel_data(comp_bes, id = id, wave = wave)
shp_pan <- panel_data(comp_shp, id = id, wave = wave)

# Coef names for the outputs 
coefs_names <- c( "Btw: Left-right"="`imean(leftright)`", "W-i: Left-right" = "leftright",
                  "Btw: Age" = "`imean(age)`", "W-i: Age" = "age",
                  "Btw: Political interest"="`imean(polint)`", "W-i: Political interest" = "polint",
                  "Btw: Education" = "`imean(edu3)`", "W-i: Education" = "edu3",
                  "Btw: Income" = "`imean(income10)`", "W-i: Income" = "income10",
                  "Btw: Work status" = "`imean(workstatus)`", "W-i: Work status" = "workstatus",
                  "Btw: Sex" = "gender")

wbm_liss <- wbm(confpolt ~ age + edu3 + workstatus + income10 + leftright + polint | gender, data=liss_pan)
wbm_bes <- wbm(trustMPs ~ age + edu3 + workstatus + income10 + leftright + polint | gender, data=bes_pan)
wbm_shp <- wbm(pp04 ~ age + edu3 + workstatus + income10 + leftright + polint | gender, data=shp_pan)

# Make robust/clustered errors 

summ_clust_shp <- summ(wbm_shp, robust = "HC3", cluster = "idhous")
summ_woclust <- summ(wbm_shp, robust = "HC3")
summ_robust_bes <- summ(wbm_bes, robust = "HC3")
summ_robust_liss <- summ(wbm_liss, robust = "HC3")

# HANNAH: I'd like to get these estimates alongside the others (pooled, FE) but 
# Stargazer can't take it as an object and huxreg doesn't output nicely to Latex.
# Any ideas?

huxreg("SHP" = summ_clust_shp, "BES" = summ_robust_bes, "LISS" = summ_robust_liss, coefs = coefs_names,
       note = "{stars}. \n
       Robust standard errors, clustered by household for SHP.")

# WBM output - figures 

plot_summs(summ_robust_liss, summ_robust_bes, summ_clust_shp, coefs = coefs_names, scale=T, 
           model.names = c("LISS", "BES", "SHP")) +
  labs(title = "Within and between effects on political trust in three panel studies",
       caption = "Robust standard errors (LISS/BES) \n Robust standard errors clustered by household (SHP)")

ggsave("wbm_all.png", 
       path = here("figures"))


# ---- Models: LGM

# https://www.alexcernat.com/estimating-and-visualizing-change-in-time-using-latent-growth-models-with-r/

liss_lgcdf <- panel_data(comp_liss, id = id, wave = wave)
bes_lgcdf <- panel_data(comp_bes, id = id, wave = wave)
shp_lgcdf <- panel_data(comp_shp, id = id, wave = wave)

## LISS 

liss_mm_df <- liss_lgcdf %>% 
  select(id, wave, confpolt, confep, confgov, confparl)%>%
  widen_panel(liss_mm_df, separator = "_")

model <- 'i =~ 1*confpolt_1 + 1*confpolt_2 + 1*confpolt_3 + 1*confpolt_4 +
                1*confpolt_5 + 1*confpolt_6 + 1*confpolt_7 + 1*confpolt_8 +
                1*confpolt_9 + 1*confpolt_10 + 1*confpolt_11 + 1*confpolt_12
          s =~ 1*confpolt_1 + 2*confpolt_2 + 3*confpolt_3 + 4*confpolt_4 +
                5*confpolt_5 + 6*confpolt_6 + 7*confpolt_7 + 8*confpolt_8 +
                9*confpolt_9 + 10*confpolt_10 + 11*confpolt_11 + 12*confpolt_12'

fit_lgm <- growth(model, data = liss_mm_df)

summary(fit_lgm, standardized = TRUE)
i
ggsave("lgc_liss.png", 
       path = here("figures"))

## BES

bes_mm_df <- bes_lgcdf %>% 
  select(id, wave, trustMPs) %>%
  filter(wave <12)  %>%
  widen_panel(bes_mm_df, separator = "_")

model <- 'i =~ 1*trustMPs_1 + 1*trustMPs_2 + 1*trustMPs_3 + 1*trustMPs_4 +
                 + 1*trustMPs_6 + 1*trustMPs_7 + 1*trustMPs_8 +
                1*trustMPs_9 + 1*trustMPs_10
          s =~ 1*trustMPs_1 + 2*trustMPs_2 + 3*trustMPs_3 + 4*trustMPs_4 + 6*trustMPs_6 + 7*trustMPs_7 + 8*trustMPs_8 +
                9*trustMPs_9 + 10*trustMPs_10'

fit_lgm <- growth(model, data = bes_mm_df)

summary(fit_lgm, standardized = TRUE)

pred_lgm <- predict(fit_lgm)

pred_lgm_long <- map(1:10, 
                     function(x) pred_lgm[, 1] + x * pred_lgm[, 2]) %>% 
  reduce(cbind) %>% 
  as.data.frame() %>%
  setNames(str_c("Wave", 1:10)) %>% 
  mutate(id = row_number()) %>% 
  gather(-id, key = wave, value = pred)

pred_lgm_long$wave <- as.factor(pred_lgm_long$wave)

pred_lgm_long$wave <- fct_relevel(pred_lgm_long$wave, "Wave10", "Wave11", "Wave12", 
                                  "Wave13", "Wave14", "Wave15",
                                  "Wave16", "Wave17", "Wave18",
                                  "Wave19", "Wave20", 
                                  after = Inf)


pred_lgm_long %>% 
  ggplot(aes(wave, pred, group = id)) + # what variables to plot?
  geom_line(alpha = 0.05) + # add a transparent line for each person
  stat_summary( # add average line
    aes(group = 1),
    fun = mean,
    geom = "line",
    size = 1.5,
    color = "red"
  ) +
  theme_bw() + # makes graph look nicer
  labs(y = "Trust", # labels
       x = "Wave")

ggsave("lgc_bes.png", 
       path = here("figures"))
## SHP

shp_mm_df <- shp_lgcdf %>% 
  select(id, wave, pp04) 

shp_mm_df<- widen_panel(shp_mm_df, separator = "_")

model <- 'i =~ 1*pp04_1 + 1*pp04_2 + 1*pp04_3 + 1*pp04_4 + 1*pp04_5 +
                 + 1*pp04_6 + 1*pp04_7 + 1*pp04_8 +
                1*pp04_9 + 1*pp04_10 + 1*pp04_11 + 1*pp04_13 +
                1*pp04_16 + 1*pp04_19 
          s =~ 1*pp04_1 + 2*pp04_2 + 3*pp04_3 + 4*pp04_4 + 5*pp04_5 +
                 + 6*pp04_6 + 7*pp04_7 + 8*pp04_8 +
                9*pp04_9 + 10*pp04_10 + 11*pp04_11 + 13*pp04_13 +
                16*pp04_16 + 19*pp04_19'

fit_lgm <- growth(model, data = shp_mm_df)

summary(fit_lgm, standardized = TRUE)

pred_lgm <- predict(fit_lgm)

pred_lgm_long <- map(1:20, 
                     function(x) pred_lgm[, 1] + x * pred_lgm[, 2]) %>% 
  reduce(cbind) %>% 
  as.data.frame() %>%
  setNames(str_c("Wave", 1:20)) %>% 
  mutate(id = row_number()) %>% 
  gather(-id, key = wave, value = pred)

pred_lgm_long$wave <- as.factor(pred_lgm_long$wave)

pred_lgm_long$wave <- fct_relevel(pred_lgm_long$wave, "Wave10", "Wave11", "Wave12", 
                                  "Wave13", "Wave14", "Wave15",
                                  "Wave16", "Wave17", "Wave18",
                                  "Wave19", "Wave20", 
                                  after = Inf)

pred_lgm_long %>% 
  ggplot(aes(as.factor(wave), pred, group = id)) + # what variables to plot?
  geom_line(alpha = 0.05) + # add a transparent line for each person
  stat_summary( # add average line
    aes(group = 1),
    fun = mean,
    geom = "line",
    size = 1.5,
    color = "red"
  ) +
  theme_bw() + # makes graph look nicer
  labs(y = "Trust", # labels
       x = "Wave")

ggsave("lgc_shp.png", 
       path = here("figures"))

# ---- Models: Impact functions

# get mean partisanship by respondent...
liss <- liss %>% group_by(id) %>% mutate(mean_incumb = mean(incumbency, na.rm=T))

#filter to only those who had a change
liss_incumbdf <- liss %>% filter(!is.na(incumbency), mean_incumb > 0 & mean_incumb <1)

# and keep those in more than 1 wave

liss_incumbdf <- liss_incumbdf %>% group_by(id) %>% mutate(count=n()) %>% filter(count>1) %>% ungroup()
n_distinct(liss_incumbdf$id) # 4553

# need to create a variable to measure time to/from event.
# nin = not %in% 
liss_incumbdf <- liss_incumbdf %>% group_by(id) %>% arrange(id, year) %>%
  mutate(lag_incumb = dplyr::lag(incumbency, 1), # dplyr:: since there's a base R lag
         # loser = incumbent status is zero but previously had incumbent status (i.e was not zero)
         loser = ifelse(incumbency == 0 & lag_incumb %nin% c(0,NA), 1, 0),
         # winner = incumbent status is one but previously did not have incumbent status (i.e was not zero)
         winner = ifelse(incumbency== 1 & lag_incumb %nin% c(1, NA), 1, 0))

# set time variables

liss_incumbdf <- liss_incumbdf %>% group_by(id) %>%
  # incumbent change - make year if incumbency changes
  mutate(incumbch_yr = ifelse(loser != 1, 0, year), # year you BECAME a loser
         incumbch_yr = max(incumbch_yr),
         time_ch = year-incumbch_yr) %>% # time to/from loser change
  filter(time_ch < 18) %>% ungroup() %>% select(-incumbch_yr) %>%
  mutate(time_ch_short = ifelse(time_ch %in% c(-18:-3), -3,
                                ifelse(time_ch %in% c(3:18), 3, time_ch)),
         tsqu = time_ch^2,
         toch = ifelse(time_ch < 1, time_ch, 0), # time until change
         fromch = ifelse(time_ch >-1, time_ch, 0), # time from incumbency change
         lost = ifelse(time_ch >-1, 1, 0)) # dummy for having lost

# subset

impact_df <- liss_incumbdf %>% 
  select(id, year, lost, fromch, toch, tsqu, time_ch, incumbency, winner, loser,
         age, income10, edu3, workstatus, gender, leftright, confpolt, vote, incumbency) %>%
  filter_all(all_vars(!is.na(.))) %>% arrange(id, year)

impact_df <- pdata.frame(impact_df, index = c("id", "year"))
impact_df <- zap_labels(impact_df)

m1 <- plm(confpolt ~ 0 + lost, data=impact_df, model = "fd")
coeftest(m1,vcov=vcovHC(m1,type="HC0",cluster="group"))

m2 <- plm(confpolt ~ 0 + lost + age + income10 + edu3 + workstatus + gender + leftright, 
          data=impact_df, model = "fd")
coeftest(m2,vcov=vcovHC(m2,type="HC0",cluster="group"))

m3 <- plm(confpolt ~ 0 + lost + age + income10 + edu3 + workstatus + gender + leftright, 
          data=impact_df, model = "within")
coeftest(m3,vcov=vcovHC(m3,type="HC0",cluster="group"))

cov1<-vcovHC(m1,type="HC0",cluster="group")
robust.se1 <- sqrt(diag(cov1))
cov2<-vcovHC(m2,type="HC0",cluster="group")
robust.se2 <- sqrt(diag(cov2))
cov3<-vcovHC(m3,type="HC0",cluster="group")
robust.se3 <- sqrt(diag(cov3))

stargazer(m1,m2,m3, header = FALSE, dep.var.labels = "Trust in politicians", 
          keep.stat = c("rsq","n"), se=list(robust.se1,robust.se2,robust.se3,NULL), 
          column.labels = c("FD", "FE"),
          column.separate = c(2, 1),no.space = T, 
          type = "text")

exmpl <- impact_df %>% filter(time_ch %in% c(-4:4)) %>% 
  group_by(time_ch) %>% summarise(av_trst = mean(confpolt))

ggplot(exmpl, aes(x = time_ch, y = av_trst)) + 
  geom_line(size=1, colour="black") +
  scale_x_discrete(limits = c(-4:4)) +
  geom_vline(xintercept = 0, linetype="dashed") +
  theme_minimal() +
  labs(x = "Time (in years) to/since losing incumbency status",
       y = "Average confidence in politicians (0-10)",
       title = "The effect of losing an election on trust in politicians",
       caption = "Data: LISS Panel Study")

ggsave("impactfunction.png", 
       path = here("figures"))

# Repeat for those who completed all waves

liss_incumbdf2 <- liss_incumbdf %>% group_by(id) %>% mutate(count=n()) %>% filter(count==12) %>% ungroup()
n_distinct(liss_incumbdf2$id) # 488

liss_incumbdf2 <- liss_incumbdf2 %>% group_by(id) %>% arrange(id, year) %>%
  mutate(lag_incumb = dplyr::lag(incumbency, 1), # dplyr:: since there's a base R lag
         # loser = incumbent status is zero but previously had incumbent status (i.e was not zero)
         loser = ifelse(incumbency == 0 & lag_incumb %nin% c(0,NA), 1, 0),
         # winner = incumbent status is one but previously did not have incumbent status (i.e was not zero)
         winner = ifelse(incumbency== 1 & lag_incumb %nin% c(1, NA), 1, 0))

# set time variables

liss_incumbdf2 <- liss_incumbdf2 %>% group_by(id) %>%
  # incumbent change - make year if incumbency changes
  mutate(incumbch_yr = ifelse(loser != 1, 0, year), # year you BECAME a loser
         incumbch_yr = max(incumbch_yr),
         time_ch = year-incumbch_yr) %>% # time to/from loser change
  filter(time_ch < 18) %>% ungroup() %>% select(-incumbch_yr) %>%
  mutate(time_ch_short = ifelse(time_ch %in% c(-18:-3), -3,
                                ifelse(time_ch %in% c(3:18), 3, time_ch)),
         tsqu = time_ch^2,
         toch = ifelse(time_ch < 1, time_ch, 0), # time until change
         fromch = ifelse(time_ch >-1, time_ch, 0), # time from incumbency change
         lost = ifelse(time_ch >-1, 1, 0)) # dummy for having lost

# subset

impact_df2 <- liss_incumbdf2 %>% 
  select(id, year, lost, fromch, toch, tsqu, time_ch, incumbency, winner, loser,
         age, income10, edu3, workstatus, gender, leftright, confpolt, vote, incumbency) %>%
  filter_all(all_vars(!is.na(.))) %>% arrange(id, year)

impact_df2 <- pdata.frame(impact_df2, index = c("id", "year"))
impact_df2 <- zap_labels(impact_df2)

exmpl2 <- impact_df2 %>% filter(time_ch %in% c(-4:4)) %>% 
  group_by(time_ch) %>% summarise(av_trst = mean(confpolt))

ggplot(exmpl2, aes(x = time_ch, y = av_trst)) + geom_line() +
  scale_x_discrete(limits = c(-4:4)) 

## plot both together

exmpl2$sample <- "All waves"
exmpl$sample <- ">=2 waves"

impactfun <- rbind(exmpl, exmpl2) %>%
  arrange(time_ch, sample)

ggplot(impactfun, aes(x = time_ch, y = av_trst)) +
  geom_line(aes(color=sample), size = 1) +
  scale_x_discrete(limits = c(-4:4)) +
  geom_vline(xintercept = 0, linetype="dashed") +
  theme_minimal() +
  labs(x = "Time (in years) to/since losing incumbency status",
       y = "Average confidence in politicians (0-10)",
       title = "The effect of losing an election on trust in politicians",
       caption = "Data: LISS Panel Study")

ggsave("impactfunction2.png", 
       path = here("figures"))


