生信喵 发表于 2023-5-11 20:26:12

dada2物种分类鉴定

本帖最后由 生信喵 于 2023-5-11 21:02 编辑

物种分类鉴定
#方法一:dada2自带注释函数,官网下载
#https://benjjneb.github.io/dada2/training.html
# taxa <- assignTaxonomy(seqtab.nochim, "MiSeq_SOP/tax/silva_nr99_v138.1_train_set.fa.gz", multithread=12)
# taxa <- addSpecies(taxa, "MiSeq_SOP/tax/silva_species_assignment_v138.1.fa.gz")
#
# taxa.print <- taxa
# rownames(taxa.print) <- NULL
# head(taxa.print)

#方法二: DECIPHER
download.file(url="http://www2.decipher.codes/Classification/TrainingSets/SILVA_SSU_r138_2019.RData", destfile="SILVA_SSU_r138_2019.RData")

#加载CECIPHER SILVA库
load("SILVA_SSU_r138_2019.RData")

#加载DECIPHER
library(DECIPHER)
packageVersion("DECIPHER")

#创建dna序列
dna <- DNAStringSet(getSequences(seqtab.nochim))

ids <- IdTaxa(dna, trainingSet, strand="top", processors=12, verbose=FALSE)
ranks <- c("domain", "phylum", "class", "order", "family", "genus", "species") # ranks of interest
# Convert the output object of class "Taxa" to a matrix analogous to the output from assignTaxonomy
taxid <- t(sapply(ids, function(x) {
m <- match(ranks, x$rank)
taxa <- x$taxon
taxa <- NA
taxa
}))
colnames(taxid) <- ranks
rownames(taxid) <- getSequences(seqtab.nochim)评估结果
unqs.mock <- seqtab.nochim["Mock",]
unqs.mock <- sort(unqs.mock, decreasing=TRUE)
cat("DADA2 inferred", length(unqs.mock), "sample sequences present in the Mock community.\n")

mock.ref <- getSequences(file.path(path, "HMP_MOCK.v35.fasta"))
match.ref <- sum(sapply(names(unqs.mock), function(x) any(grepl(x, mock.ref))))
cat("Of those,", sum(match.ref), "were exact matches to the expected reference sequences.\n")画树
library(msa)

seqs <- getSequences(seqtab.nochim)
names(seqs) <- seqs
mult <- msa(seqs, method="ClustalW", type="dna", order="input")

library("phangorn")
phang.align <- as.phyDat(mult, type="DNA", names=getSequence(seqtab))
dm <- dist.ml(phang.align)
treeNJ <- NJ(dm) # Note, tip order != sequence order
fit = pml(treeNJ, data=phang.align)
## negative edges length changed to 0!
fitGTR <- update(fit, k=4, inv=0.2)
fitGTR <- optim.pml(fitGTR, model="GTR", optInv=TRUE, optGamma=TRUE,
                  rearrangement = "stochastic", control = pml.control(trace = 0))
fitGTR$tree
页: [1]
查看完整版本: dada2物种分类鉴定