#LCAmodel 
#All analyses carried out using R studio Version 1.0.143 for Mac

setwd("~/Desktop")

#clear workshpace
rm(list = ls())

#installing packages
install.packages("scatterplot3d")
install.packages("MASS")
install.packages("poLCA")
install.packages("ggplot2")


library(scatterplot3d)
library(MASS)
library(poLCA)
library(ggplot2)
library(haven)

ideologyEU7USAUS <- read_dta("ideologyEU7USAUS.dta")
View(ideologyEU7USAUS)
df <- ideologyEU7USAUS

f <- cbind(EXP1, EXP2, EXP3, EXP4, AP1, AP2,AP3, AP4, EL1, EL2, POP1, POP2, POP3, POP4, POP5)~1 #formula of the poLCA model


#Running LCA models starting from 1-class model up to 8-class model to find best fit
#Table 16 in SI Appendix shows Fit Statistics for all models (1-class to 8-class model).These can be found at the end of every model calculated
#Model Fit Statistics (BIC) are also reported in the manuscript (p.19)
model1 <- poLCA(formula = f, maxiter=1000, nclass=1, nrep=80, data=df, na.rm=T) # run latent profile model 1 class
model2 <- poLCA(formula = f, maxiter=1000, nclass=2, nrep=80, data=df, na.rm=T) # run latent profile model 2 classes
model3 <- poLCA(formula = f, maxiter=1000, nclass=3, nrep=80, data=df, na.rm=T) # run latent profile model 3 classes
model4 <- poLCA(formula = f, maxiter=1000, nclass=4, nrep=80, data=df, na.rm=T) # run latent profile model 4 classes
model5 <- poLCA(formula = f, maxiter=1000, nclass=5, nrep=80, data=df, na.rm=T) # run latent profile model 5 classes
model6 <- poLCA(formula = f, maxiter=1000, nclass=6, nrep=80, data=df, na.rm=T) # run latent profile model 6 classes
model7 <- poLCA(formula = f, maxiter=1000, nclass=7, nrep=80, data=df, na.rm=T) # run latent profile model 7 classes
model8 <- poLCA(formula = f, maxiter=1000, nclass=8, nrep=80, data=df, na.rm=T) # run latent profile model 8 classes

#########################################################################################
#########################################################################################
############ 7 class model and graph
set.seed(5)
modelbase <- poLCA(formula = f, maxiter=1000, nclass=7, nrep=80, data=df, na.rm=T) # run latent profile model 7 class
probs.start.new <- poLCA.reorder(modelbase$probs.start,order(modelbase$P,decreasing=TRUE)) #set starting probabilities so that the model returns classes ordered from the biggest to smallest class (otherwise class order is random)
model <- poLCA(formula = f, maxiter=1000, nclass=7, nrep=80, data=df, probs.start=probs.start.new, na.rm=T) # re-run latent profile model with set starting probabilities

model$predclass <- as.numeric(model$predclass)
df$class <- model$predclass # take the predictor for the class and add it to the data frame


mean_class1 <- apply(df[which(df$class==1),1:15], 2, mean, na.rm=T)
mean_class2 <- apply(df[which(df$class==2),1:15], 2, mean)
mean_class3 <- apply(df[which(df$class==3),1:15], 2, mean)
mean_class4 <- apply(df[which(df$class==4),1:15], 2, mean)
mean_class5 <- apply(df[which(df$class==5),1:15], 2, mean)
mean_class6 <- apply(df[which(df$class==6),1:15], 2, mean)
mean_class7 <- apply(df[which(df$class==7),1:15], 2, mean)

mean <- c(mean_class1, mean_class2, mean_class3,mean_class4, mean_class5, mean_class6, mean_class7 ) # group means into 1 variable
id <- c(rep("Latent Class 1", length(mean_class1)), # create id to separate lines in graph per class
        rep("Latent Class 2", length(mean_class2)),
        rep("Latent Class 3", length(mean_class3)),
        rep("Latent Class 4", length(mean_class4)),
        rep("Latent Class 5", length(mean_class5)), 
        rep("Latent Class 6", length(mean_class6)), 
        rep("Latent Class 7", length(mean_class7)))

items <- c("EXP1","EXP2", "EXP3", "EXP4", "AP1", "AP2", #create variable with the items
           "AP3", "AP4", "EL1", "EL2",
           "POP1", "POP2", "POP3", "POP4", "POP5")

items <- factor(items, labels = c("EXP1","EXP2", "EXP3", "EXP4", "AP1", "AP2", #create variable with the items
                                  "AP3", "AP4", "EL1", "EL2",
                                  "POP1", "POP2", "POP3", "POP4", "POP5"),
                levels = c("EXP1","EXP2", "EXP3", "EXP4", "AP1", "AP2", #create variable with the items
                           "AP3", "AP4", "EL1", "EL2",
                           "POP1", "POP2", "POP3", "POP4", "POP5"))

sd_class1 <- apply(df[which(df$class==1),1:15], 2, sd)
sd_class2 <- apply(df[which(df$class==2),1:15], 2, sd)
sd_class3 <- apply(df[which(df$class==3),1:15], 2, sd)
sd_class4 <- apply(df[which(df$class==4),1:15], 2, sd)
sd_class5 <- apply(df[which(df$class==5),1:15], 2, sd)
sd_class6 <- apply(df[which(df$class==6),1:15], 2, sd)
sd_class7 <- apply(df[which(df$class==7),1:15], 2, sd)


stdev <- c(sd_class1, sd_class2, sd_class3,sd_class4, sd_class5, sd_class6, sd_class7)

data <- data.frame(items,id, mean,stdev)

plot <- ggplot(data, aes(x = items, y = mean, group = id, colour = id, shape = id)) +
  geom_line(size=1.5) + geom_point(size=3.5) + # have a line and a point through/for all the items
  #geom_errorbar(aes(ymin = mean - 1.96*stdev, ymax = mean + 1.96*stdev)) + #add errorbars to the graph, destroy the readability
  scale_color_manual(values=c("forestgreen","darkorchid" , "gold2",  "gray48", "red", "gray68", "green" )) + #http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
  scale_shape_manual(values=c(0, 1, 2, 3, 4, 8, 5))+
  ylab("") + # title y axis
  xlab("") + #title x axis
  theme_minimal() + #white background, no black lines surrounding
  scale_y_continuous("Mean Score on Items", breaks = c(1,2,3,4,5,6,7)) + # make sure every item score gets a tick in the graph
  theme(legend.position="bottom") + #position of the legend
  theme(legend.title=element_blank()) +# no title on the legend
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) #display items on x axis with an angle
ggsave("mean_scores_class.eps", width=10, height=8, dpi=900)

plot + theme(axis.title.x = element_text(face="bold", size=20),
             axis.text.x  = element_text( size=20),
             axis.title.y = element_text(face="bold", size=20),
             axis.text.y  = element_text( size=16),
             legend.text = element_text( size = 20, face = "bold"))



#Assign each respondent to one class according to modal posterior probability
ideologyEU7USAUS$classfin <- model$predclass


#Save the predicted probability of assignment to each class as a continuous variable

#df$predtech
#df$predPD
#df$predpopulist
#df$predmodpop
#df$predtrack
#df$predmidres
#df$predextrpd


model$posterior
polca.results <- model$posterior
colnames(polca.results)<-c("predPD", "predtrack", "predmidres","predpopulist", "predtech","predmobpop","predextrpd")
f <- cbind(ideologyEU7USAUS,polca.results) #formula of the poLCA model
is.numeric(f$predextrpd)
View(f)

# export data frame to Stata binary format 
library(foreign)
write.dta(f, "ideologyEU7USAUS.merge.dta")
