5 perl双层哈希赋值

目的是,把多组重复串联序列批量、成对的提取序列。

代码编写到序列提取时:

use Bio::SeqIO;
use Bio::Seq;
use Data::Dumper;
#输入序列
$in = Bio::SeqIO->new(
 -file   => "C:/Users/Administrator/cds.fa",
 -format => 'Fasta'
);

#读取ID
my $count=-1;
my %ID;
open IN, "C:/Users/Administrator/KAKS.txt" or die "$!";        #这个文件是blast输出并经过筛选的结果。

my @read=<IN>;
foreach (@read){
 $count++;
 if(/^[^#]/){
  my @a = split /\t/,$read[$count];
   $ID{$count}{$a[0]}=1;
   $ID{$count}{$a[1]}=1;
 }
}
close (IN);
#print Dumper(\%ID);
for my $con(keys %ID){
 while ( my $seq = $in->next_seq() ) {
  my ( $id, $sequence, $desc ) = ( $seq->id, $seq->seq, $seq->desc );
  if ( exists $ID1{$con}{$id} ){
   print "$con\t$sequence\n";

   $ID{$con}{$id}=$sequence;
  }
 }
}

print Dumper(\%ID);
运行结果:

21 ATGATGAATCGGGGACTTGAAGTTCTATCTCAAACCTCTTACTTACAGACTTGGAATTGGTTGTTTCAAGAGAGCAAAGGAACAAACTGGAGTGCTGAAGAGAACAAGTGTTTCGAAAATGCTCTAGCTTTATATGATACAGACACGCCTGACCGATGGTTTAAAGTGGCAGCAATGGTTCCTGGAAAAACTGTTGAAGATGTCATCAAACAGTATAGAGAATTGGAAGAGGATGTAAGTGATATAGAGGCGGGATTGATTCCAATTCCTGGGTATACCACTGATTCTTTTAGATTGGAGTGGGTTAATGATAGTCAGGGCTTTGATGGAATTAGAAACTATTACACTCCTGGTGGTAAAAGAGGGTCTGGGTCTCGACCTTCTGATCATGAAAGGAAGAAAGGGGTGCCATGGACTGAAGAAGAACACAGGCAATTTCTAATGGGTCTTAAAAAGTATGGTAAAGGGGATTGGAGAAATATTTCACGCAATTTCGTGATGACTAGAACACCAACTCAGGTGGCAAGCCATGCTCAAAAATACTTCATTAGGCAGCTGACGGGTGGGAAGGATAAGAAGAGGACCAGTATCCATGATATCACCACGGTTAACATCCCCGATACACCCTCTTCCTCTCCGGATCACAGCAAGCCTTTGTCTCCAAACAATTCTTCTGCAGTCATACAGTCACAGCAGCAGTCGAAAGTACCCAGTGTAACAACTAAAGAGCTATTTGATTTTGAGTGGAAGCAACATAATGAAGGGGTTGCAGCCATGGTTTTCAATCAGACAAGTACCGGTAACGCCTTACTGTCCCCCTTGTGTGGGATTTCTTCATATGGAACCAAGCTGGAGGAAAAAAAATTGCCAAGAGGAACACTTCCCAGATCTCAGTTTGGATCTTATAACACTCTTTTCCAGATGCAGTCAACCTTCCTCACCGGATCACAGCAAGCCTTTGTCTCCAAACAATTCTTCTGCAGTCATACAGTCACAGCAGCAGTCGAAAGTACCCAGTATAACAACTAA
21 ATGAATAGGGGACTTGAAATTCTCTCTCCAGCCTCTTATTTACATACTTCCAATTGGTTTTTTCAAGAGAGTAAAGGTACTAAATGGACGCCTGAAGAGAACAAGTGCTTTGAAAATGCTTTGGCTTTATATGATAAAGATACCCCTGATCGATGGATTAAGGTGGCATCTATGATTCCTGGAAAAACAGTTGGGGATGTCATCAAGCAATACAGAGAATTGGAAGAAGATGTGAGTGATATAGAGGCTGGCCTGATTCCAATCCCTGGATATAGCAGTGATTCTTTTACATTGGAGTGGGGTAATGATAGTCAAGGCTTTGATGGATTTAGACACTATTACACTCCTGGTAAAAGAGGGACTGGGTCTCGTTCTTCTGATCAAGAAAGGAAGAAAGGAGTGCCATGGACTGAAGAAGAACACAGACAATTCTTAATGGGTCTTAAAAAGTACGGTAAAGGGGATTGGAGAAATATTTCACGCAATTTCGTGACTACTCGAACACCGACTCAAGTGGCAAGTCATGCTCAGAAATACTTCATCAGGCAGGTCACTGGGGGGAAAGACAAGAGGAGGTCCAGTATCCATGATATCACAACGGTTAATGTCCCTGATACTCCCTCTTCGTCACCGGATCGCTGCAAGCATTCGTCTTCAAGTGATTCTTCTGCAGTCATACAGGCACAGCAGCAAGCTAAACTAGCCACAACTAAAGAGATCGACTTTGAATGGAAGCAACAAAATGGAGCAGCTATGGTTTTCAACCGGACGAGTAATTGCAATGCTTTCCTGCCCCCCTTCTGTGGGATTTCATCATACGGACCCAAGCTGGAGGAACAAAATTTGCTTGGAGAAACTCTTCCCAGATCTCAATTCGGATCTTACAACACTCATTTCCAGATGCAGTCAATGCAACAACATCAATAA
$VAR1 = {
          '21' => {
                    'Gbscaffold781.3' => 'ATGAATAGGGGACTTGAAATTCTCTCTCCAGCCTCTTATTTACATACTTCCAATTGGTTTTTTCAAGAGAGTAAAGGTACTAAATGGACGCCTGAAGAGAACAAGTGCTTTGAAAATGCTTTGGCTTTATATGATAAAGATACCCCTGATCGATGGATTAAGGTGGCATCTATGATTCCTGGAAAAACAGTTGGGGATGTCATCAAGCAATACAGAGAATTGGAAGAAGATGTGAGTGATATAGAGGCTGGCCTGATTCCAATCCCTGGATATAGCAGTGATTCTTTTACATTGGAGTGGGGTAATGATAGTCAAGGCTTTGATGGATTTAGACACTATTACACTCCTGGTAAAAGAGGGACTGGGTCTCGTTCTTCTGATCAAGAAAGGAAGAAAGGAGTGCCATGGACTGAAGAAGAACACAGACAATTCTTAATGGGTCTTAAAAAGTACGGTAAAGGGGATTGGAGAAATATTTCACGCAATTTCGTGACTACTCGAACACCGACTCAAGTGGCAAGTCATGCTCAGAAATACTTCATCAGGCAGGTCACTGGGGGGAAAGACAAGAGGAGGTCCAGTATCCATGATATCACAACGGTTAATGTCCCTGATACTCCCTCTTCGTCACCGGATCGCTGCAAGCATTCGTCTTCAAGTGATTCTTCTGCAGTCATACAGGCACAGCAGCAAGCTAAACTAGCCACAACTAAAGAGATCGACTTTGAATGGAAGCAACAAAATGGAGCAGCTATGGTTTTCAACCGGACGAGTAATTGCAATGCTTTCCTGCCCCCCTTCTGTGGGATTTCATCATACGGACCCAAGCTGGAGGAACAAAATTTGCTTGGAGAAACTCTTCCCAGATCTCAATTCGGATCTTACAACACTCATTTCCAGATGCAGTCAATGCAACAACATCAATAA',
                    'Gbscaffold11559.61' => 'ATGATGAATCGGGGACTTGAAGTTCTATCTCAAACCTCTTACTTACAGACTTGGAATTGGTTGTTTCAAGAGAGCAAAGGAACAAACTGGAGTGCTGAAGAGAACAAGTGTTTCGAAAATGCTCTAGCTTTATATGATACAGACACGCCTGACCGATGGTTTAAAGTGGCAGCAATGGTTCCTGGAAAAACTGTTGAAGATGTCATCAAACAGTATAGAGAATTGGAAGAGGATGTAAGTGATATAGAGGCGGGATTGATTCCAATTCCTGGGTATACCACTGATTCTTTTAGATTGGAGTGGGTTAATGATAGTCAGGGCTTTGATGGAATTAGAAACTATTACACTCCTGGTGGTAAAAGAGGGTCTGGGTCTCGACCTTCTGATCATGAAAGGAAGAAAGGGGTGCCATGGACTGAAGAAGAACACAGGCAATTTCTAATGGGTCTTAAAAAGTATGGTAAAGGGGATTGGAGAAATATTTCACGCAATTTCGTGATGACTAGAACACCAACTCAGGTGGCAAGCCATGCTCAAAAATACTTCATTAGGCAGCTGACGGGTGGGAAGGATAAGAAGAGGACCAGTATCCATGATATCACCACGGTTAACATCCCCGATACACCCTCTTCCTCTCCGGATCACAGCAAGCCTTTGTCTCCAAACAATTCTTCTGCAGTCATACAGTCACAGCAGCAGTCGAAAGTACCCAGTGTAACAACTAAAGAGCTATTTGATTTTGAGTGGAAGCAACATAATGAAGGGGTTGCAGCCATGGTTTTCAATCAGACAAGTACCGGTAACGCCTTACTGTCCCCCTTGTGTGGGATTTCTTCATATGGAACCAAGCTGGAGGAAAAAAAATTGCCAAGAGGAACACTTCCCAGATCTCAGTTTGGATCTTATAACACTCTTTTCCAGATGCAGTCAACCTTCCTCACCGGATCACAGCAAGCCTTTGTCTCCAAACAATTCTTCTGCAGTCATACAGTCACAGCAGCAGTCGAAAGTACCCAGTATAACAACTAA'
                  },
          '52' => {
                    'Gbscaffold127.3' => 1,
                    'Gbscaffold46726.3' => 1
                  },
          '150' => {
                     'Gbscaffold8038.10' => 1,
                     'Gbscaffold5204.16' => 1
                   },
          '29' => {
                    'Gbscaffold16576.42' => 1,
                    'Gbscaffold11723.40' => 1
                  },
          '45' => {
                    'Gbscaffold1258.3_scaffold1258.4' => 1,
                    'Gbscaffold1258.2' => 1
                  },

————以下省略————

发现循环只进行了一次,请问何解?

我已经听完了咱们perl的全部课程。还是不明白,望高人指点!!!


请先 登录 后评论

1 个回答

omicsgene - 生物信息
擅长:重测序,遗传进化,转录组,GWAS

这个地方为什么要用两层hash??

一层hash,存储一下ID就好了,存储数字没有用;   


attachments-2018-12-66qw5Wal5c0f552869f47.jpg


还有, 读取文件最好用while循环,参考脚本:https://www.omicsclass.com/article/322


请先 登录 后评论
  • 1 关注
  • 0 收藏,3326 浏览
  • Gbolin 提出于 2018-12-11 11:16

相似问题