#!python
# coding:utf-8
'''
#############################################################################################
#北京组学生物科技有限公司
#author huangls
#date 2021.01.26
#version 1.2
#学习python课程推荐:
#python 入门到精通(生物信息):
#https://bdtcd.xetslk.com/s/2bdd89
###############################################################################################
'''
from Bio.Seq import Seq
from Bio import SeqIO
#from Bio.Alphabet import IUPAC
from Bio.SeqRecord import SeqRecord
import os, argparse, os.path,re
parser = argparse.ArgumentParser(description='This script is used to get coding sequence from fasta file.')
parser.add_argument('-f','--fasta',help='Please input fasta file. [REQUIRED]',required=True)
parser.add_argument('-l','--len',type=int,default=300,help='seq length filter, default 300. [OPTIONAL]',required=False)
parser.add_argument('-p','--prefix',default ='demo_seq',required=False,help='Please specify the output file prefix, default demo_seq. [OPTIONAL]')
parser.add_argument('-o','--out_dir',help='Please input output directory path default cwd. [OPTIONAL]',default = os.getcwd(),required=False)
################################################################################
args = parser.parse_args()
dout=''
if os.path.exists(args.out_dir):
dout=os.path.abspath(args.out_dir)
else:
os.mkdir(args.out_dir)
dout=os.path.abspath(args.out_dir)
count=0
total=0
output_handle = open(dout+'/'+args.prefix+'.fa', "w")
for rec in SeqIO.parse(args.fasta, "fasta"):
seq=str(rec.seq)
#seq=seq.upper()
total+=1
if(len(seq)>=args.len and re.search("^ATG",seq,re.I) and len(seq)%3==0):
if re.search('TAG$',seq,re.I) or re.search('TGA$',seq,re.I) or re.search('TAA$',seq,re.I):
seq = seq[:-3]
f=re.finditer('TAG|TGA|TAA',seq,re.I)
if not f:
SeqIO.write(rec, output_handle, "fasta")
count+=1
else:
isstop=False
for i in f:
index=i.start()
if index%3 ==0:
isstop=True
break
if not isstop:
SeqIO.write(rec, output_handle, "fasta")
count+=1
print("Total input seqs: %s"%total)
print("Coding seqs: %s"%count)
print("Output file: %s"%dout+'/'+args.prefix+'.fa')
output_handle.close()
如果觉得我的文章对您有用,请随意打赏。你的支持将鼓励我继续创作!