脚本名称:get_gene_exon_from_gff.pl
用法:perl ../script/get_gene_exon_from_gff.pl -in1 WRKY_domain_new_out_emoved_redundant.txt -in2 Arabidopsis_thaliana.TAIR10.31.gff3 -out gene_exon_info.gff
输出结果(部分):
脚本源代码:
use Getopt::Long; my %opts; use Data::Dumper; GetOptions( \%opts, "in1=s", "in2=s", "out=s", "h" ); if ( !defined( $opts{in1} ) || !defined( $opts{in2} ) || !defined( $opts{out} ) || defined( $opts{h} ) ) { &USAGE; } open( IN1, "$opts{in1}" ) || die "open $opts{in1} failed\n"; open( IN2, "$opts{in2}" ) || die "open $opts{in2} failed\n"; open( OUT, ">$opts{out}" ) || die "open $opts{out} failed\n"; my %gffs; while (<IN1>) { chomp; next if /^#/; my @b = split/\t/, $_; $gffs{$b[0]} = 1; } #print Dumper(\%gffs); while (<IN2>) { chomp; next if (/^#/); my @a = split /\t/, $_; next if $a[2]=~/exon/i; if ($a[2] =~/^mRNA$/i or $a[2] =~/^transcript$/i ) { ($id1) = ($a[8] =~ m/ID=([^;]*)/); }elsif ( $a[2] =~/^CDS$/i or $a[2] =~/utr/i ) { ($id1) = ($a[8] =~ m/Parent=([^;]*)/); }else{ next; } if ( exists $gffs{$id1} ) { print OUT "$_\n"; } } close OUT; close IN1; close IN2; sub USAGE { print "usage: perl $0 -in1 mRNA_id.txt -in2 genome.gff3 -out gene_location.txt "; exit; }
如果觉得我的文章对您有用,请随意打赏。你的支持将鼓励我继续创作!