#Import FPED data
FPED <- read.csv(file.choose())

#Import demographic & NLit score data
corr <- read.csv(file.choose())

##Aggregate servings of expanded variables by Respondent ID
Butter <- stats::aggregate(. ~ `Respondent ID`, data = Butter, FUN = "sum")
Coffee <- stats::aggregate(. ~ `Respondent ID`, data = Coffee, FUN = "sum")
Fried_Seafood <- stats::aggregate(. ~ `Respondent ID`, data = Fried_Seafood, FUN = "sum")
HF_Dairy <- stats::aggregate(. ~ `Respondent ID`, data = HF_Dairy, FUN = "sum")
Hn3_Seafood <- stats::aggregate(. ~ `Respondent ID`, data = Hn3_Seafood, FUN = "sum")
LF_Dairy <- stats::aggregate(. ~ `Respondent ID`, data = LF_Dairy, FUN = "sum")
Ln3_Seafood <- stats::aggregate(. ~ `Respondent ID`, data = Ln3_Seafood, FUN = "sum")
SSB <- stats::aggregate(. ~ `Respondent ID`, data = SSB, FUN = "sum")
Sweets <- stats::aggregate(. ~ `Respondent ID`, data = Sweets, FUN = "sum")
Tea <- stats::aggregate(. ~ `Respondent ID`, data = Tea, FUN = "sum")
Veg_Oil <- stats::aggregate(. ~ `Respondent ID`, data = Veg_Oil, FUN = "sum")
Margarine <- stats::aggregate(. ~ `Respondent ID`, data = Margarine, FUN = "sum")
RF_Margarine <- stats::aggregate(. ~ `Respondent ID`, data = RF_Margarine, FUN = "sum")
Lard <- stats::aggregate(. ~ `Respondent ID`, data = Lard, FUN = "sum")
Olive_Oil <- stats::aggregate(. ~ `Respondent ID`, data = Olive_Oil, FUN = "sum")
Fried_Potatoes <- stats::aggregate(. ~ `Respondent ID`, data = Fried_Potatoes, FUN = "sum")
Non_fried_Potatoes <- stats::aggregate(. ~ `Respondent ID`, data = Non_fried_Potatoes, FUN = "sum")

Non_fried_Potatoes$`Respondent ID` <- gsub("NLit", "", Non_fried_Potatoes$`Respondent ID`)
Fried_Potatoes$`Respondent ID` <- gsub("NLit", "", Fried_Potatoes$`Respondent ID`)

#Merge the expanded variables with selected FPED variables
FPED <- merge(FPED, Butter, all = T)
FPED <- merge(FPED, Coffee, all = T)
FPED <- merge(FPED, Fried_Seafood, all = T)
FPED <- merge(FPED, HF_Dairy, all = T)
FPED <- merge(FPED, Hn3_Seafood, all = T)
FPED <- merge(FPED, Lard, all = T)
FPED <- merge(FPED, LF_Dairy, all = T)
FPED <- merge(FPED, Ln3_Seafood, all = T)
FPED <- merge(FPED, Olive_Oil, all = T)
FPED <- merge(FPED, SSB, all = T)
FPED <- merge(FPED, Sweets, all = T)
FPED <- merge(FPED, Tea, all = T)
FPED <- merge(FPED, Veg_Oil, all = T)
FPED <- merge(FPED, RF_Margarine, all = T)
FPED <- merge(FPED, Margarine, all = T)
FPED <- merge(FPED, Fried_Potatoes, all = T)
FPED <- merge(FPED, Non_fried_Potatoes, all = T)


## PCA
library(psych)
pca.vars <- FPED[c(3:18, 21:39)]
pca.vars2 <- FPED2[c(3:37)]
pca <- principal(pca.vars, rotate="varimax", nfactors = 2, scores=TRUE)
pca$loadings
loadings <- as.data.frame(unclass(pca$loadings))
#export PCA loadings to excel to name and sort
write.csv(loadings, file = "pca loadings.csv")
#import PCA Western and Med diet loadings
loadings <- read.csv(file.choose())
pc <- as.data.frame(unclass(pca$scores))
#export PCA scores to excel
write.csv(pc, file = "pca scores.csv")
#import log transformed PCA scores for Western and Med Patterns
pc <- read.csv(file.choose())


## PCovR
library(PCovR)
nlit_scores <- as.data.frame(corr$NLit)
#perform PCovR - diet variables already centered and standardized, standardize nlit scores
fped_results <- pcovr(pca.vars, nlit_scores, prepY = "cent")
summary(fped_results)
PCovR_loadings <- as.data.frame(unclass(fped_results$Px))
PCovR <- as.data.frame(fped_results$Te)
PCovR <- log(PCovR)

## Merge diet patterns with corr file
corr <- merge(corr, PCovR, by = `Respondent ID`)
corr <- merge(corr, pc, by = `Respondent ID`)


#### REGRESSION MODELS ####

## PCovR Prudent Diet regression table
library(sjPlot)
pcovr_model_1 <- lm(PCovR ~ NLit, data = corr)

pcovr_model_2 <- lm(PCovR ~ Age +
                    Income +
                    Education +
                    BMI +
                    Race_Name +
                    Diabetes, data = corr)

pcovr_model_3 <- lm(PCovR ~ Age +
                    Income +
                    Education +
                    BMI +
                    Race_Name +
                    Diabetes +
                    NLit +
                    Sex, data = corr)

pcovr_model_4 <- lm(PCovR ~ Age +
                    Income +
                    BMI +
                    Race_Name +
                    Diabetes +
                    NLit =
                    Sex, data = corr)


pcovr_reg_table <- sjt.lm(pcovr_model_1, 
                        pcovr_model_2,
                        pcovr_model_3,
                        pcovr_model_4,
                        show.std = T,
                        show.se = F,
                        show.ci = T,
                        string.est = "Estimate",
                        string.ci = "95% CI",
                        string.std = "β",
                        show.header = T,
                        string.dv = "PCovR Prudent Diet Pattern",
                        show.fstat = T,
                        group.pred = F)
pcovr_reg_table


## PCA Western Diet regression table
western_model_1 <- lm(Western.Diet ~ NLit, data = corr)

western_model_2 <- lm(Western.Diet ~ Age +
                        Income +
                        Education +
                        BMI +
                        Race_Name +
                        Diabetes, data = corr)

western_model_3 <- lm(Western.Diet ~ Age +
                        Income +
                        Education +
                        BMI +
                        Race_Name +
                        Diabetes +
                        NLit + 
                        Sex, data = corr)

western_model_4 <- lm(Western.Diet ~ Age +
                        Income +
                        BMI +
                        Race_Name +
                        Diabetes +
                        NLit +
                        Sex, data = corr)

western_reg_table <- sjt.lm(western_model_1,
                            western_model_2,
                            western_model_3,
                            western_model_4,
                            show.std = T,
                            show.se = F,
                            show.ci = T,
                            string.est = "Estimate",
                            string.ci = "95% CI",
                            string.std = "β",
                            show.header = T,
                            string.dv = "PCA Westen Diet Pattern",
                            show.fstat = T,
                            group.pred = F)

western_reg_table

## PCA Med Diet regression table
med_model_1 <- lm(Med ~ NLit, data = corr)

med_model_2 <- lm(Med ~ Age +
                    Income +
                    Education +
                    BMI +
                    Race_Name +
                    Diabetes, data = corr)

med_model_3 <- lm(Med ~ Age +
                    Income +
                    Education +
                    BMI +
                    Race_Name +
                    Diabetes +
                    NLit +
                    Sex, data = corr)

med_model_4 <- lm(Med ~ Age +
                    Income +
                    BMI +
                    Race_Name +
                    Diabetes +
                    NLit +
                    Sex, data = corr)

med_reg_table <- sjt.lm(med_model_1,
                        med_model_2,
                        med_model_3,
                        med_model_4,
                        show.std = T,
                        show.se = F,
                        show.ci = T,
                        string.est = "Estimate",
                        string.ci = "95% CI",
                        string.std = "β",
                        show.header = T,
                        string.dv = "PCA Mediterranean Pattern",
                        show.fstat = T,
                        group.pred = F)

med_reg_table

#### Visualization ####
library(ggplot2)
library(ggpubr)

####Plots for combined figures
## PCovR Loading Plot

pcovr.load <- ggplot(PCovR,aes(x= reorder(Group,PCovR),PCovR)) + 
  geom_bar(stat ="identity", fill = "black") + 
  scale_y_continuous(position = "right") +
  theme(axis.title = element_blank()) +
  theme(panel.background = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(axis.text.y = element_text(family = "Times New Roman", color = "black", size = 8.5)) +
  theme(axis.text.x = element_text(family = "Times New Roman", color = "black", size = 7)) +
  geom_hline(yintercept = 0, color = "black") +
  ggtitle(label = "PCovR Prudent Pattern", subtitle = "Explains 5% of Dietary Intake Variation & 23% of Variation in Global NLit Scores") +
  theme(plot.title = element_text(size = 9.25, family = "Times New Roman", face = "bold", hjust = 0.175)) +
  theme(plot.subtitle = element_text(size=8.25, family = "Times New Roman", hjust = -9)) +
  theme(title = element_text()) +
  theme(panel.grid.major.x = element_line(color = "gray85")) +
  theme(plot.margin = margin(0.1, 0.1, 0.1, .75, "cm")) +
  coord_flip()

pcovr.plot <- ggplot(corr, aes(x=levels, y=PCovR)) + 
  stat_summary(aes(y = (RRR*-1), group=1), fun.y=mean, geom="bar",group=1, width=.6) + 
  stat_summary(fun.data=mean_se, geom="errorbar", width=.15) +
  theme(axis.title = element_blank()) + 
  theme(panel.background = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(axis.text.y = element_text(family = "Times New Roman", color = "black", size = 7)) +
  theme(axis.text.x = element_text(family = "Times New Roman", color = "black", size = 8.5)) +
  geom_hline(yintercept = 0, color = "black") +
  ggtitle(label = "PCovR Prudent Pattern Adherence") +
  theme(plot.title = element_text(size = 9.25, family = "Times New Roman", face = "bold", hjust = 0.5)) +
  theme(plot.subtitle = element_text(size=8.25, family = "Times New Roman", hjust = 0.5)) +
  theme(title = element_text()) +
  theme(panel.grid.major.y = element_line(color = "gray85")) +
  theme(plot.margin = margin(.25, .5, .25, .5, "cm")) +
  geom_signif(comparisons = list(c("Poor", "Good")), 
              map_signif_level=TRUE, vjust = .5, test = "bonferroni", y_position = 1.0,
              tip_length = .004, textsize = 4) +
  geom_signif(comparisons = list(c("Possibly Poor", "Good")), 
              map_signif_level=TRUE, vjust = .5, test = "bonferroni", y_position = 0.8,
              tip_length = .004, textsize = 4) +  
  geom_signif(comparisons = list(c("Poor", "Possibly Poor")), 
              map_signif_level=TRUE, vjust = .5, test = "bonferroni", y_position = 0.8,
              tip_length = .004, textsize = 4)

#Figures by diet pattern
pcovr.theme <- ggarrange(pcovr.load, pcovr.plot,
                       heights = c(2.25,1),
                       nrow = 2,
                       labels = c("A", "B"),
                       font.label = list(size = 11, family = "Times New Roman"))

## export to tiff
tiff("Figure 1.tiff", width = 6, height = 7, units = 'in', res = 600, compression = 'lzw', type = c("cairo"))
pcovr.theme
dev.off()


## PCA Western Diet Loading Plot
pca.load <- ggplot(loadings,aes(x= reorder(Group,Western),Western)) + 
  geom_bar(stat ="identity", fill = "black") + 
  scale_y_continuous(position = "right") +
  theme(axis.title = element_blank()) + 
  theme(panel.background = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(axis.text.y = element_text(family = "Times New Roman", color = "black", size = 8.5)) +
  theme(axis.text.x = element_text(family = "Times New Roman", color = "black", size = 7)) +
  geom_hline(yintercept = 0, color = "black") +
  ggtitle(label = "PCA Western Diet Pattern", subtitle = "Explains 13% of Dietary Intake Variation") +
  theme(plot.title = element_text(size = 9.25, family = "Times New Roman", face = "bold", hjust = 0.225)) +
  theme(plot.subtitle = element_text(size=8.5, family = "Times New Roman", hjust = 0.175)) +
  theme(title = element_text()) +
  theme(panel.grid.major.x = element_line(color = "gray85")) +
  coord_flip()

## PCA Western Diet by Category with p-value
pca.plot <- ggplot(corr, aes(x=levels, y=Western)) + 
  stat_summary(aes(y = Western.Diet, group=1), fun.y=mean, geom="bar",group=1, width=.6) + 
  stat_summary(fun.data=mean_se, geom="errorbar", width=.15) +
  theme(axis.title = element_blank()) + 
  theme(panel.background = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(axis.text.y = element_text(family = "Times New Roman", color = "black", size = 7)) +
  theme(axis.text.x = element_text(family = "Times New Roman", color = "black", size = 8.5)) +
  geom_hline(yintercept = 0, color = "black") +
  ggtitle(label = "PCA Western Diet Adherence") +
  theme(plot.title = element_text(size = 9.25, family = "Times New Roman", face = "bold", hjust = 0.5)) +
  theme(plot.subtitle = element_text(size=8.25, family = "Times New Roman", hjust = 0.5)) +
  theme(title = element_text()) +
  theme(panel.grid.major.y = element_line(color = "gray85")) +
  theme(plot.margin = margin(.25, .5, .25, .5, "cm")) +
  geom_signif(comparisons = list(c("Poor", "Good")), 
              map_signif_level=TRUE, vjust = .5, test = "bonferroni", y_position = .8,
              tip_length = .004, textsize = 4) +
  geom_signif(comparisons = list(c("Poor", "Possibly Poor")), 
              map_signif_level=TRUE, vjust = .5, test = "bonferroni", y_position = .7,
              tip_length = .004, textsize = 4)

pca.theme <- ggarrange(pca.load, pca.plot,
                       heights = c(2.25,1),
                       nrow = 2,
                       labels = c("A", "B"),
                       font.label = list(size = 11, family = "Times New Roman"))

## export to tiff
tiff("Figure 2.tiff", width = 6, height = 7, units = 'in', res = 600, compression = 'lzw', type = c("cairo"))
pca.theme
dev.off()


## PCA Med Loading Plot
med.load <- ggplot(loadings,aes(x= reorder(Group,Med),Med)) + 
  geom_bar(stat ="identity", fill = "black") + 
  scale_y_continuous(position = "right") +
  theme(axis.title = element_blank()) + 
  theme(panel.background = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(axis.text.y = element_text(family = "Times New Roman", color = "black", size = 8.5)) +
  theme(axis.text.x = element_text(family = "Times New Roman", color = "black", size = 7)) +
  geom_hline(yintercept = 0, color = "black") +
  ggtitle(label = "PCA Mediterranean Diet Pattern", subtitle = "Explains 11% of Dietary Intake Variation") +
  theme(plot.title = element_text(size = 9.25, family = "Times New Roman", face = "bold", hjust = 0.225)) +
  theme(plot.subtitle = element_text(size=8.5, family = "Times New Roman", hjust = 0.175)) +
  theme(title = element_text()) +
  theme(panel.grid.major.x = element_line(color = "gray85")) +
  coord_flip()

#Mediterranean Diet Pattern
med.plot <- ggplot(corr, aes(x=levels, y=Med)) + 
  stat_summary(aes(y = VF.Diet, group=1), fun.y=mean, geom="bar",group=1, width=.6) + 
  stat_summary(fun.data=mean_se, geom="errorbar", width=.15) +
  theme(axis.title = element_blank()) + 
  theme(panel.background = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(axis.text.y = element_text(family = "Times New Roman", color = "black", size = 7)) +
  theme(axis.text.x = element_text(family = "Times New Roman", color = "black", size = 8.5)) +
  geom_hline(yintercept = 0, color = "black") +
  ggtitle(label = "PCA Mediterranean Diet Adherence") +
  theme(plot.title = element_text(size = 9.25, family = "Times New Roman", face = "bold", hjust = 0.5)) +
  theme(plot.subtitle = element_text(size=8.25, family = "Times New Roman", hjust = 0.5)) +
  theme(title = element_text()) +
  theme(panel.grid.major.y = element_line(color = "gray85")) +
  theme(plot.margin = margin(.25, .5, .25, .5, "cm")) +
  geom_signif(comparisons = list(c("Poor", "Good")), 
              map_signif_level=TRUE, vjust = .5, test = "bonferroni", y_position = .3,
              tip_length = .004, textsize = 4)

med.theme <- ggarrange(med.load, med.plot,
                       heights = c(2.25,1),
                       nrow = 2,
                       labels = c("A", "B"),
                       font.label = list(size = 11, family = "Times New Roman"))

## export to tiff
tiff("Figure 3.tiff", width = 6, height = 7, units = 'in', res = 600, compression = 'lzw', type = c("cairo"))
med.theme
dev.off()


#### Produce table for manuscript ####
library(compareGroups)
char <- compareGroups(levels ~ ., data = corr)
chartab <- createTable(char, type = 2, digits = 1, sd.type = 2, hide.no = "no", show.n = TRUE, show.all = TRUE)
View(chartab)
char
export2word(chartab, file = "diet differences.docx", header.labels = c(p.overall = "p-value"))


#### Visualization of standard intake between poor and good NLit ####
library(plyr)
FPED2 <- rename(FPED2, c("Fried Seafood" = "Fried Fish & Shrimp"))
FPED2$Level <- factor(FPED2$Level, levels = c("Poor", "Possibly Poor", "Good"))
FPED_reduced <- FPED2[c(1,2,5,6,8,9,11,14,15,16,20,21,23,24,25,26,29,31,32,37)]
FPED_reduced <- subset(FPED_reduced, Level == "Poor" | Level == "Good",
                       select = c(1:20))
FPED_reduced$Level <- as.factor(FPED_reduced$Level)
FPED_reduced$Level <- factor(FPED_reduced$Level, levels = c("Good", "Poor"))
test <- melt(FPED_reduced, id = c("Respondent.ID", "Level"))
test$variable <- factor(test$variable, levels = c("Orange & Red Vegetables", "Nuts & Seeds", "Low Fat Dairy",
                                                  "Olive Oil", "Other Vegetables", "Coffee", "Soy Products", 
                                                  "Dark Green Vegetables", "Margarine", "High Fat Dairy", "Fruit Juice", "Butter", "Potatoes", 
                                                  "Refined Grains", "Sugar Sweetened Beverages", "Red Meat",
                                                  "Processed Meat", "Fried Fish & Shrimp"))


#Z Plot
z_change <- ggplot(test, aes(x= variable, y=value, fill = Level)) + 
  geom_bar(position = "stack", stat = "summary", fun.y=mean, width=.85) + 
  theme(axis.title.y = element_blank(),
        axis.title.x = element_blank()) +
  theme(legend.position = c(0.85, 0.8),
        legend.title = element_text(family = "Times New Roman", face = "bold", color = "black", size = 10),
        legend.text = element_text(family = "Times New Roman", color = "black", size = 10),
        legend.key.size = unit(0.5,"cm"),
        legend.direction = "vertical",
        legend.background = element_rect(linetype = 1, size = 0.5, colour = 1)) +
  guides(fill=guide_legend(title="Nutrition Literacy")) +
  scale_y_continuous(position = "right") +
  theme(panel.background = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(axis.text.y = element_text(family = "Times New Roman", color = "black", size = 12)) +
  theme(axis.text.x = element_text(family = "Times New Roman", color = "black", size = 10)) +
  theme(axis.text.x = element_text(hjust = 0.5, vjust = 0.5)) +
  geom_hline(yintercept = 0, color = "black") +
  ggtitle(label = "") +
  theme(plot.title = element_text(size = 12, family = "Times New Roman", face = "bold", hjust = 0.1)) +
  theme(plot.subtitle = element_text(size=11, family = "Times New Roman", hjust = -0.5)) +
  theme(title = element_text()) +
  theme(panel.grid.major.x = element_line(color = "gray85")) +
  scale_fill_manual(values = c("#79AF97FF", "#001933")) +
  coord_flip()

z_change

tiff("z change.tiff", width = 6, height = 7, units = 'in', res = 600, compression = 'lzw', type = c("cairo"))
z_change
dev.off()