|
发表于 2022-4-14 17:58:09
|
查看: 1675 |
回复: 0
1 不同 kmer 大小- mkdir 1.kmer
- cd 1.kmer
- cp ../36.illumina/lib.list .
- SOAPdenovo-31mer all -s lib.list -K 31 -o kmer31 -D 1 -d 1 -u 2 -p 12 >kmer31.log
- SOAPdenovo-63mer all -s lib.list -K 63 -o kmer63 -D 1 -d 1 -u 2 -p 12 >kmer63.log
- SOAPdenovo-127mer all -s lib.list -K 127 -o kmer105 -D 1 -d 1 -u 2 -p 12 >kmer105.log
- ll *.scafSeq
- seqkit stat *.scafSeq
- mv *.scafSeq ../
- rm -rf kmer*
- #修改-d
- SOAPdenovo-63mer all -s lib.list -K 63 -o kmer63 -D 1 -d 0 -u 2 -p 12 >kmer63_0.log
- SOAPdenovo-63mer all -s lib.list -K 63 -o kmer63 -D 1 -d 1 -u 2 -p 12 >kmer63_1.log
- SOAPdenovo-63mer all -s lib.list -K 63 -o kmer63 -D 1 -d 2 -u 2 -p 12 >kmer63_2.log
- seqkit stat *.scafSeq
复制代码
2 文库大小
小片段文库,写入文件 lib.list
- #大片段文库
- max_rd_len=150
- [LIB]
- avg_ins=500
- reverse_seq=0
- asm_flags=3
- rank=1
- pair_num_cutoff=3
- q1=/cleandata/500_clean.1.fq.gz
- q2=/cleandata/500_clean.2.fq.gz
- [LIB]
- avg_ins=2000
- reverse_seq=1
- asm_flags=2
- rank=2
- pair_num_cutoff=3
- q1=/cleandata/2000_clean.1.fq.gz
- q2=/cleandata/2000_clean.2.fq.gz
- SOAPdenovo-63mer all -s lib.list -K 63 -o kmer1 -D 1 -d 1 -u 2 -p 12 >kmer1.log
- SOAPdenovo-63mer all -s lib2.list -K 63 -o kmer2 -D 1 -d 1 -u 2 -p 12 >kmer2.log
- ll *.scafSeq
- seqkit stat *.scafSeq
- seqkit seq -m 500 kmer1.scafSeq | seqkit stat
- seqkit seq -m 500 kmer2.scafSeq | seqkit stat
复制代码 换spades软件看大片段文库
- echo "spades.py --isolate -o spades1 -t 12 -1 /share/home/xiehs/05.assembly/data/illumina._1.fastq.gz -2 /share/home/xiehs/05.assembly/data/illumina_2.fastq.gz -t 12 1>spades1.log 2>spades1.err" > spades.sh
- echo "spades.py --isolate -o spades2 -t 12 -1 /share/home/xiehs/05.assembly/data/illumina._1.fastq.gz -2 /share/home/xiehs/05.assembly/data/illumina_2.fastq.gz -t 12 --mp1-1 /cleandata/2000_clean.1.fq.gz --mp1-2 /cleandata/2000_clean.2.fq.gz 1>spades2.log 2>spades2.err" >> spades.sh
- nohup sh spades.sh &
- seqkit seq -m 500 spades1/scaffolds.fasta | seqkit stat
- seqkit seq -m 500 spades2/scaffolds.fasta | seqkit stat
复制代码
3 数据质量
比较过滤前后拼接结果差别;
- mkdir 3.filter
- SOAPdenovo-63mer all -s lib.list -K 63 -o kmer63 -D 1 -d 0 -u 2 -p 12 >kmer63.log
- seqkit seq -m 500 kmer63.scafSeq | seqkit stat
- seqkit seq -m 500 ../1.kmer/kmer63_1.scafSeq | seqkit stat
复制代码
4 数据量大小
分别抽取 10%,30%,50%,80%进行比较。
- seqkit sample -p 0.1 -s 1234 /share/home/xiehs/05.assembly/data/illumina_1.fastq.gz | gzip >reads.0.1_1.fq.gz
- seqkit sample -p 0.1 -s 1234 /share/home/xiehs/05.assembly/data/illumina_2.fastq.gz | gzip >reads.0.1_2.fq.gz
- for i in {0.1,0.3,0.5,0.8};do
- seqkit sample -p ${i} -s 1234 /share/home/xiehs/05.assembly/data/illumina_1.fastq.gz | gzip >reads.${i}_1.fq.gz
- seqkit sample -p ${i} -s 1234 /share/home/xiehs/05.assembly/data/illumina_2.fastq.gz | gzip >reads.${i}_2.fq.gz;
- done;
- ls -1 *.fq.gz | xargs -n 2 | while read {i,j};do echo spades.py -o spades_${i} -t 12 -1 ${i} -2 ${j};done;
- spades.py -o spades_reads.0.1_1.fq.gz -t 12 -1 reads.0.1_1.fq.gz -2 reads.0.1_2.fq.gz
- spades.py -o spades_reads.0.3_1.fq.gz -t 12 -1 reads.0.3_1.fq.gz -2 reads.0.3_2.fq.gz
- spades.py -o spades_reads.0.5_1.fq.gz -t 12 -1 reads.0.5_1.fq.gz -2 reads.0.5_2.fq.gz
- spades.py -o spades_reads.0.8_1.fq.gz -t 12 -1 reads.0.8_1.fq.gz -2 reads.0.8_2.fq.gz
- _1.fq.gz删掉作为目录-o
- nohup sh spades.sh &
- seqkit stat spades_reads*/scaffolds.fasta
复制代码
5 reads 长度的影响
- #不同reads长度
- #利用wgsim分别模拟长度70与150bp长度reads
- cp /share/home/xiehs/05.assembly/data/MGH78578.fasta .
- wgsim MGH78578.fasta read.50_1.fq read.50_2.fq -1 50 -2 50
- wgsim MGH78578.fasta read.300_1.fq read.300_2.fq -1 300 -2 300
- spades.py -o spades50 -t 12 -1 read.50_1.fq -2 read.50_2.fq
- spades.py -o spades300 -t 12 -1 read.300_1.fq -2 read.300_2.fq
- seqkit stat spades50/scaffolds.fasta
- seqkit stat spades300/scaffolds.fasta
复制代码
6 不同错误率
- #不同错误率的影响
- wgsim -e 0.01 -1 50 -2 50 MGH78578.fasta read.0.01_1.fq read.0.01_2.fq
- wgsim -e 0.1 -1 50 -2 50 MGH78578.fasta read.0.1_1.fq read.0.1_2.fq
- #用soapdenovo拼接
- SOAPdenovo-63mer all -s lib.list -K 35 -o kmer35 -D 1 -d 1 -u 2 -p 12
- SOAPdenovo-63mer all -s lib.list -K 35 -o kmer35_01 -D 1 -d 1 -u 2 -p 12
- seqkit stat kmer35.scafSeq
复制代码
|
|