Metabarcoding Read Processing of Paired-end Reads via DADA2 Denoising Pipeline A. TRIMMING AND QUALITY FILTERING library(dada2); packageVersion("dada2") path <- "D:/IGB_fellowship/EUK15_Run3_2019" list.files(path) e153.fnFs <- sort(list.files(path, pattern="_R1_001.fastq", full.names = TRUE)) e153.fnRs <- sort(list.files(path, pattern="_R2_001.fastq", full.names = TRUE)) e153.sample.names <- sapply(strsplit(basename(e153.fnFs), "_"), `[`, 1) e153.sample.names plotQualityProfile(e153.fnFs[1:12]) plotQualityProfile(e153.fnRs[1:12]) e153.filtFs <- file.path(path, "e153.filtered", paste0(e153.sample.names, "_F_filt.fastq.gz")) e153.filtRs <- file.path(path, "e153.filtered", paste0(e153.sample.names, "_R_filt.fastq.gz")) e153.out <- filterAndTrim(e153.fnFs, e153.filtFs, e153.fnRs, e153.filtRs, trimLeft=c(20,20), truncLen=c(260,200), maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=TRUE, compress=TRUE, multithread=TRUE) head(e153.out) e153.errF <- learnErrors(e153.filtFs, multithread=TRUE) e153.errR <- learnErrors(e153.filtRs, multithread=TRUE) plotErrors(e153.errF, nominalQ=TRUE) B. DEREPLICATION e153.derepFs <- derepFastq(e153.filtFs, verbose=TRUE) e153.derepRs <- derepFastq(e153.filtRs, verbose=TRUE) names(e153.derepFs) <- e153.sample.names names(e153.derepRs) <- e153.sample.names e153.dadaFs <- dada(e153.derepFs, err=e153.errF, multithread=TRUE) e153.dadaRs <- dada(e153.derepRs, err=e153.errR, multithread=TRUE) e153.dadaFs[[1]] C. MERGE PAIRED READS e153.mergers <- mergePairs(e153.dadaFs, e153.derepFs, e153.dadaRs, e153.derepRs, verbose=TRUE) head(e153.mergers[[1]]) D. CONSTRUCT SEQUENCE TABLE e153.seqtab <- makeSequenceTable(e153.mergers) dim(e153.seqtab) table(nchar(getSequences(e153.seqtab))) saveRDS(e153.seqtab, "D:/IGB_fellowship/EUK15_Run3_2019/e153.seqtab.rds") E. REMOVE CHIMERAS e153.seqtab.nochim <- removeBimeraDenovo(e153.seqtab, method="consensus", multithread=TRUE, verbose=TRUE) dim(e153.seqtab.nochim) sum(e153.seqtab.nochim)/sum(e153.seqtab) saveRDS(e153.seqtab.nochim, "D:/IGB_fellowship/EUK15_Run3_2019/e153.seqtab.nochim.rds") write.table(t(e153.seqtab.nochim), "D:/IGB_fellowship/EUK15_Run3_2019/e153.seqtab_nochim.tsv") getN <- function(x) sum(getUniques(x)) e153.track <- cbind(e153.out, sapply(e153.dadaFs, getN), sapply(e153.dadaRs, getN), sapply(e153.mergers, getN), rowSums(e153.seqtab.nochim)) colnames(e153.track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim") rownames(e153.track) <- e153.sample.names head(e153.track) write.table(e153.track, "D:/IGB_fellowship/EUK15_Run3_2019/e153.track.csv") # Merge multiple runs (if necessary) st1 <- readRDS("D:/IGB_fellowship/EUK15_Run1_2015-2017/e151.seqtab.nochim.rds") st2 <- readRDS("D:/IGB_fellowship/EUK15_Run2_2018/e152.seqtab.nochim.rds") st3 <- readRDS("D:/IGB_fellowship/EUK15_Run3_2019/e153.seqtab.nochim.rds") st.all <- mergeSequenceTables(st1, st2, st3) # Remove chimeras all_seqtab <- removeBimeraDenovo(st.all, method="consensus", multithread=TRUE) dim(all_seqtab) saveRDS(all_seqtab, "D:/IGB_fellowship/EUK15_all/euk15_seqtab.nonchim.rds") # Assign taxonomy euk_tax_pr2 <- assignTaxonomy(all_seqtab, "D:/IGB_fellowship/EUK15_all/pr2_version_4.12.0_18S_dada2.fasta.gz", tryRC=TRUE, multithread=TRUE, taxLevels = c("Kingdom","Supergroup","Division","Class","Order","Family","Genus","Species")) saveRDS(euk_tax_pr2, "D:/IGB_fellowship/EUK15_all/euk15_tax_pr2.RDS") write.csv(cbind(t(all_seqtab), euk_tax_pr2), "D:/IGB_fellowship/EUK15_all/euk15_seqtab_nonchim_pr2_id.csv", quote=FALSE)