生信喵 发表于 2023-6-19 17:43:47

bam文件处理

1.samtools 处理
ls -1 *.sam | while read i;do echo samtools sort -@ 12 -O bam -o ${i%.sam}.sorted.bam $i >>sam2bam.sh;done;
#报错参考 https://www.cnblogs.com/huanping/p/13786701.html
ln -s /usr/lib64/libcrypto.so.1.0.2k /share/home/xiehs/Software/miniconda3/envs/human/lib/libcrypto.so.1.0.0

bsub -q fat -n 12 -o %J.log -e %J.err sh sam2bam.sh
ls -1 *.bam | while read i ;do echo samtools index -@ 24 ${i}>>bai.sh;done;
bsub -q fat -n 24 -o %J.log -e %J.err sh bai.sh
# 一步管道命令
# bwa mem -t 4 -R '@RG\tID:A1\tPL:illumina\tSM:human' ref.fna clean.1.fq.gz clean.2.fq.gz | samtools view -Sb - | samtools sort -> A1.sort.bam


echo samtools merge -@ 24 -t -o merge.sorted.bam *.bam >merge.sh
bsub -q fat -n 24 -o %J.log -e %J.err sh merge.sh
samtools index merge.sorted.bam
samtools quickcheck merge.sorted.bam2.标记 Duplication
gatk MarkDuplicates -I merge.sorted.bam -M merge.markdup_metrics.txt -O merge.sorted.markdup.bam
samtools index merge.sorted.markdup.bam
Duplication 对变异检测的影响

3.BQSR
#建立模型 --java-options "-Xmx8G -Djava.io.tmpdir=./" 加上就不报错了,不加会报错
time gatk BaseRecalibrator --java-options "-Xmx8G -Djava.io.tmpdir=./" -R /share/home/xiehs/data/GATK/hg38/Homo_sapiens_assembly38.fasta -I merge.sorted.markdup.bam --known-sites /share/home/xiehs/data/GATK/hg38/1000G_phase1.snps.high_confidence.hg38.vcf.gz --known-sites /share/home/xiehs/data/GATK/hg38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /share/home/xiehs/data/GATK/hg38/dbsnp_146.hg38.vcf.gz -O merge.sorted.markdup.recal_data.table >bqsr.log
#应用模型
time gatk ApplyBQSR --bqsr-recal-file merge.sorted.markdup.recal_data.table -R /share/home/xiehs/data/GATK/hg38/Homo_sapiens_assembly38.fasta -I merge.sorted.markdup.bam -O merge.sorted.markdup.BQSR.bam
samtools flagstat merge.sorted.markdup.BQSR.bam
#建立索引
time samtools index merge.sorted.markdup.BQSR.bam
页: [1]
查看完整版本: bam文件处理