######################################
# MUMmer + SVMU : CNVs
#######################################
########### SVMU to produce collinear blocks and insertions, deletions and CNVs.
##### 由于 lastz 太慢,因此多样本 批量运行
if [ -f lastz.cmds.sh ] ; then
rm -f lastz.cmds.sh
fi
if [ -f svmu.cmds.sh ] ; then
rm -f svmu.cmds.sh
fi
#批量产出命令
for query in An1 C24 Cvi Eri Kyo Ler Sha;do
ln -s ../04.snp_indel/$query.delta .
echo "lastz ref.fa[multiple] $query.fa[multiple] --chain --format=general:name1,strand1,start1,end1,name2,strand2,start2,end2 --ambiguous=iupac > $query.lastz.txt 2> $query.lastz.log" >>lastz.cmds.sh
echo "svmu $query.delta ref.fa $query.fa h $query.lastz.txt $query &> $query.svmu.log" >> svmu.cmds.sh
done
#批量运行
parallel -j 10 < lastz.cmds.sh
parallel -j 10 < svmu.cmds.sh
#SVMU 结果说明:
# sv.prefix.txt = A tab delimited file that summarizes structural mutations (indels, CNVs, inversions) in the sample genome with respect to the reference genome.
#
# small.prefix.txt: A tab delimited file containing SNPs and small indels that occur within syntenic blocks (or MUMs).
#
# cnv_all.prefix.txt: A tab delimited file with all the reference genomic regions that are present in higher copy numbers (>1) in the sample genome. Those with "trans" in their names mean either it is a transposable element or non-TE copies of a gene in different chromosomes.
#
# cm.prefix.txt: A bed file with the reference genomic regions that are syntenic between the two genomes.
########### CNV ################
samtools faidx ref.fa
for query in An1 C24 Cvi Eri Kyo Ler Sha;do
grep CNV sv.${query}.txt | awk '$(NF-2)>50' | awk '$NF!=0&&$(NF-1)!=0&&$NF!="inf"&&$(NF-1)!="inf"' | awk '$NF/$(NF-1)>=2||$(NF-1)/$NF>=2' > sv.${query}.svmu_CNV.xls
samtools faidx ${query}.fa
rm sv.${query}.svmu_CNV.vcf
echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t${query}" >>sv.${query}.svmu_CNV.vcf
cat sv.${query}.svmu_CNV.xls | while read n
do
chr=`echo $n | awk '{print $1}'`
pos=`echo $n | awk '{print $2}'`
ref_pos=`echo $n | awk '{if($2<=$3){print $1":"$2"-"$3}else{print $1":"$3"-"$2}}'`
alt_pos=`echo $n | awk '{if($6<=$7){print $5":"$6"-"$7}else{print $5":"$7"-"$6}}'`
ref_seq=`samtools faidx ref.fa $ref_pos | grep -v '>' | sed ':a;N;s/\n//g;ta'`
alt_seq=`samtools faidx ${query}.fa $alt_pos | grep -v '>' | sed ':a;N;s/\n//g;ta'`
echo -e "${chr}\t${pos}\tCNV\t${ref_seq}\t${alt_seq}\t30\tPASS\tSVTYPE=CNV\tGT\t1/1" >> sv.${query}.svmu_CNV.vcf
done
done
如果觉得我的文章对您有用,请随意打赏。你的支持将鼓励我继续创作!