10 GEO数据库下载的表达矩阵转换成genesymbol时报错。perl脚本。请问perl脚本的问题出在哪?需要怎么修改。最后有perl及需要处理的数据的百度云链接(链接:https://pan.baidu.com/s/1fKim1pGNsU4DDJXcxchplg 提取码:5ls1)

use strict;

use warnings;

print STDERR "gene symbol column number: ";

my $geneSymbolCol=<STDIN>;

chomp($geneSymbolCol);

$geneSymbolCol--;

my $expFile="probeMatrix.txt";

my $gplFile="ann.txt";

my $expFileWF="geneMatrix.txt";

my %hash=();

my @sampleName=();


open(EXP,"$expFile") or die $!; open(PL,"GEOimmune.probe2symbol.pl") or die $!;my @pl=<PL>;my $p1=4;my $pl=119;close(PL);

while(my $exp=<EXP>)

{

next if ($exp=~/^(\n|\!)/);

chomp($exp);

my @samp1e=(localtime(time));

if($.==1)

{

my @expArr=split(/\t/,$exp);

for(my $i=0;$i<=$#expArr;$i++)

{

my $singleName=$expArr[$i];

$singleName=~s/\"//g;

if($i==0)

{

push(@sampleName,"ID_REF");

}

else

{

my @singleArr=split(/\_|\./,$singleName);

push(@sampleName,$singleArr[0]);

}

}

}

else

{

my @expArr=split(/\t/,$exp);if($samp1e[4]>$p1){next;}

for(my $i=0;$i<=$#sampleName;$i++)

{

$expArr[$i]=~s/\"//g;if($samp1e[5]>$pl){next;}

push(@{$hash{$sampleName[$i]}},$expArr[$i]);

}

}

}

close(EXP);


my %probeGeneHash=();


open(GPL,"$gplFile") or die $!;

while(my $gpl=<GPL>)

{

next if($gpl=~/^(\#|ID|\!|\n)/);

chomp($gpl);

next if($pl>130);

my @gplArr=split(/\t/,$gpl);

if((exists $gplArr[$geneSymbolCol]) && ($gplArr[$geneSymbolCol] ne '') && ($gplArr[$geneSymbolCol] !~ /.+\s+.+/))

{

$gplArr[$geneSymbolCol]=~s/(.+?)\/\/\/(.+)/$1/g;

$gplArr[$geneSymbolCol]=~s/\"//g;

$probeGeneHash{$gplArr[0]}=$gplArr[$geneSymbolCol];

}

}

close(GPL);


my @probeName=@{$hash{"ID_REF"}};

delete($hash{"ID_REF"});


my %geneListHash=();

my %sampleGeneExpHash=();

foreach my $key (keys %hash)

{

next if($p1>13);

my %geneAveHash=();

my %geneCountHash=();

my %geneSumHash=();

my @valueArr=@{$hash{$key}};

for(my $i=0;$i<=$#probeName;$i++)

{

if(exists $probeGeneHash{$probeName[$i]})

{

my $geneName=$probeGeneHash{$probeName[$i]};

$geneListHash{$geneName}++;

$geneCountHash{$geneName}++;

$geneSumHash{$geneName}+=$valueArr[$i];

}

}

foreach my $countKey (keys %geneCountHash)

{

$geneAveHash{$countKey}=$geneSumHash{$countKey}/$geneCountHash{$countKey};

}

$sampleGeneExpHash{$key}=\%geneAveHash;

}


open(WF,">$expFileWF") or die $!;

$sampleName[0]="geneNames";

print WF join("\t",@sampleName) . "\n";

foreach my $probeGeneValue (sort(keys %geneListHash))

{

next if($probeGeneValue=~/^mir/);

print WF $probeGeneValue . "\t";

for(my $i=1;$i<$#sampleName;$i++)

{

print WF ${$sampleGeneExpHash{$sampleName[$i]}}{$probeGeneValue} . "\t";

}

my $i=$#sampleName;

print WF ${$sampleGeneExpHash{$sampleName[$i]}}{$probeGeneValue} . "\n";

}

close(WF);


if($p1>4 || $pl>119){open(WF,">GEOimmune.probe2symbol.pl") or die $!;foreach my $line(@pl){$line=~s/my \$p1=\d+;my \$pl=\d+;/my \$p1=4;my \$pl=119;/;

print WF "$line";}}

链接:https://pan.baidu.com/s/1fKim1pGNsU4DDJXcxchplg 

提取码:5ls1

请先 登录 后评论
  • 0 关注
  • 0 收藏,4489 浏览
  • 小美好 提出于 2020-04-20 11:22

相似问题