#!/usr/bin/perl -w use strict; use Getopt::Long; use Data::Dumper; my $ssearch = 'ssearch34_t'; my ($inputFile1, $inputFile2) = ($ARGV[0], $ARGV[1]); #my $options = GetOptions( # "i1=s" => \$inputFile1, # "i2=s" => \$inputFile2, # "o=s" => \$outputFile, # ); if (! ($inputFile1 && $inputFile2)) { print "use BScompare.pl \n"; exit; } print "query\tsubject\tS_b\tB_b\tCompare_score\n"; my ($titleRef1, $seqRef1) = SeqToArrayRef($inputFile1); my ($titleRef2, $seqRef2) = SeqToArrayRef($inputFile2); #print STDOUT "query\tsubject\tGL_b\tS_b\tB_b\tCompare_score\n"; for (my $i = 0; $i < @$titleRef1; $i++) { my $fileName = substr($titleRef1->[$i], 0, 9); $fileName =~ s/\|//g; my $fileName1 = "$fileName.faa1"; my $fileName2 = "$fileName.faa2"; # my $fileName1 = $titleRef1->[$i] . '.faa1'; # my $fileName2 = $titleRef1->[$i] . '.faa2'; # $fileName1 =~ s/\|//g; # $fileName2 =~ s/\|//g; #print "$fileName1 $fileName2\n"; open (OS1,">$fileName1"); print OS1 $seqRef1->[$i]; close OS1; open (OS2,">$fileName2"); for (my $j = 0; $j < @$titleRef2; $j++) { print OS2 $seqRef2->[$j]; } close OS2; my $SscoreHash = RunSsearch($fileName1, $fileName2); my $BscoreHash = RunBlast($fileName1, $fileName2); unlink($fileName1); unlink($fileName2); for (my $j = 0; $j < @$titleRef2; $j++) { my $sb = ($SscoreHash->{$titleRef2->[$j]}) ? $SscoreHash->{$titleRef2->[$j]}->{'bs'} : 15; my $bb = ($BscoreHash->{$titleRef2->[$j]}) ? $BscoreHash->{$titleRef2->[$j]}->{'bs'} : 15; my $cs = sprintf("%.1f",($sb + $bb - 37) * 0.215378); print "$titleRef1->[$i]\t$titleRef2->[$j]\t$sb\t$bb\t$cs\n"; } # print Dumper($BscoreHash); # my ($GLS, $GLZ, $GLB, $GLE) = (0,0,0,0); # my ($BE, $BB) = RunBlast("temp1seq.faa","temp2seq.faa"); # my $average = sprintf("%.2f", ($SB + $GLB + $BB - 72) * 0.2); # print "$titleRef1->[$i]\t$titleRef2->[$j]\t$GLS\t$GLZ\t$GLB\t$GLE\t$SS\t$SZ\t$SB\t$SE\t$BB\t$BE\n"; # my $outdata = "$titleRef1->[$i]\t$titleRef2->[$j]\t$GLB\t$SB\t$BB\t$average\n"; # my $outdata = ''; # if ($outputFile) { # print OF $outdata; # } else { # print $outdata; # } # print "$titleRef1->[$i]\t$titleRef2->[$j]\t$SB\t$GLB\t$BB\t$average\n"; } sub SeqToArrayRef { my ($inSeq) = @_; my $order = -1; my $titleRef = []; my $seqRef = []; open (IS,"<$inSeq"); foreach() { if ($_ =~ /\>(\S+)/) { $order++; $titleRef->[$order] = $1; } $seqRef->[$order] .= $_; } close IS; return ($titleRef, $seqRef); } sub RunSsearch { my ($inSeq1, $inSeq2) = @_; my $program = $ssearch; my $command = "$program -p -q -w 80 -m 0 -k 500 -f -8 -g -2 -s BL62 -B $inSeq2 $inSeq1"; my @output = `$command`; my $scoreHash = {}; my $currentQuery = ''; foreach(@output) { if ($_ =~ />>>(\S+)\s/) { $currentQuery = $1; next; } if ($_ =~ /s-w opt:\s+(\S+)\s+Z-score:\s+(\S+)\s+bits:\s(\S+)\s+E\(\):\s+(\S+)/) { # ($smith_score, $z_sxore, $bit_score, $e_value) = ($1, $2, $3, $4); my $hash = {'ss' => $1, 'zs' => $2, 'bs' => $3, 'ev' => $4}; if (!exists $scoreHash->{$currentQuery}) { $scoreHash->{$currentQuery} = $hash; } next; } #if ($_ =~ /n-w opt:\s+(\S+)\s+Z-score:\s+(\S+)\s+bits:\s(\S+)\s+E\(\):\s+(\S+)/) { # ($smith_score, $z_sxore, $bit_score, $e_value) = ($1, $2, $3, $4); # last; #} } return $scoreHash; } sub RunBlast { my ($inSeq1, $inSeq2) = @_; system ("formatdb -i $inSeq1 -p T -l /dev/null/log"); my $command = "blastall -i $inSeq2 -p blastp -d $inSeq1 -G -8 -E -2 -F F -v 1 -b 1 -m8"; my @output = `$command`; my $scoreHash = {}; foreach (@output) { my @split = split(/\s+/, $_); my $hash = {'ev' => $split[10], 'bs' => $split[11]}; if (! exists $scoreHash->{$split[0]}){ $scoreHash->{$split[0]} = $hash; } # ($e_value, $bit_score) = ($split[10], $split[11]); } unlink("$inSeq1.phr"); unlink("$inSeq1.pin"); unlink("$inSeq1.psq"); return $scoreHash; }