|
发表于 2023-5-11 20:26:12
|
查看: 2822 |
回复: 0
本帖最后由 生信喵 于 2023-5-11 21:02 编辑
物种分类鉴定
- #方法一:dada2自带注释函数,官网下载
- #https://benjjneb.github.io/dada2/training.html
- # taxa <- assignTaxonomy(seqtab.nochim, "MiSeq_SOP/tax/silva_nr99_v138.1_train_set.fa.gz", multithread=12)
- # taxa <- addSpecies(taxa, "MiSeq_SOP/tax/silva_species_assignment_v138.1.fa.gz")
- #
- # taxa.print <- taxa
- # rownames(taxa.print) <- NULL
- # head(taxa.print)
- #方法二: DECIPHER
- download.file(url="http://www2.decipher.codes/Classification/TrainingSets/SILVA_SSU_r138_2019.RData", destfile="SILVA_SSU_r138_2019.RData")
- #加载CECIPHER SILVA库
- load("SILVA_SSU_r138_2019.RData")
- #加载DECIPHER
- library(DECIPHER)
- packageVersion("DECIPHER")
- #创建dna序列
- dna <- DNAStringSet(getSequences(seqtab.nochim))
- ids <- IdTaxa(dna, trainingSet, strand="top", processors=12, verbose=FALSE)
- ranks <- c("domain", "phylum", "class", "order", "family", "genus", "species") # ranks of interest
- # Convert the output object of class "Taxa" to a matrix analogous to the output from assignTaxonomy
- taxid <- t(sapply(ids, function(x) {
- m <- match(ranks, x$rank)
- taxa <- x$taxon[m]
- taxa[startsWith(taxa, "unclassified_")] <- NA
- taxa
- }))
- colnames(taxid) <- ranks
- rownames(taxid) <- getSequences(seqtab.nochim)
复制代码 评估结果
- unqs.mock <- seqtab.nochim["Mock",]
- unqs.mock <- sort(unqs.mock[unqs.mock>0], decreasing=TRUE)
- cat("DADA2 inferred", length(unqs.mock), "sample sequences present in the Mock community.\n")
- mock.ref <- getSequences(file.path(path, "HMP_MOCK.v35.fasta"))
- match.ref <- sum(sapply(names(unqs.mock), function(x) any(grepl(x, mock.ref))))
- cat("Of those,", sum(match.ref), "were exact matches to the expected reference sequences.\n")
复制代码 画树
- library(msa)
- seqs <- getSequences(seqtab.nochim)
- names(seqs) <- seqs
- mult <- msa(seqs, method="ClustalW", type="dna", order="input")
- library("phangorn")
- phang.align <- as.phyDat(mult, type="DNA", names=getSequence(seqtab))
- dm <- dist.ml(phang.align)
- treeNJ <- NJ(dm) # Note, tip order != sequence order
- fit = pml(treeNJ, data=phang.align)
- ## negative edges length changed to 0!
- fitGTR <- update(fit, k=4, inv=0.2)
- fitGTR <- optim.pml(fitGTR, model="GTR", optInv=TRUE, optGamma=TRUE,
- rearrangement = "stochastic", control = pml.control(trace = 0))
- fitGTR$tree
复制代码
|
|