#!/usr/bin/php -q
<?php
/*--------------------------------------------------------------------#
# GSAT V(1.0) - This program will preform a Needlemen Wunsch          #
# Global alignment on two sequences using EMBOSS NEEDLE. It is        #
# required that you have EMBOSS NEEDLE installed on your machine      #
# to run this program. This tool will align your sequences, and       #
# will shuffle your second sequence 2*X Where X=lenght of your        #
# first protein. It will then calculate the number of standard        #
# deviations your NW score is from your average shuffled NW scores    #
# Anything over 10SDs for a sequence over 80 Amino Acids is           #
# significant [Doolittle].                                            #
#                                                                     #
# This tool was designed by Vamsee Reddy. Please cite:                #
# [V. S. Reddy, M. Saier, 2010]                                       #
#---------------------------------------------------------------------*/
error_reporting(0);
Class 
GSAT
{
    var 
$seq1;
    var 
$seq2;
    var 
$shuf;
    var 
$times;
    var 
$on_score;
    var 
$needle_scores;
    var 
$sd;
    var 
$z;
    var 
$results;
    var 
$average;
    var 
$clear=FALSE;
    var 
$matrix;
    
    public function 
__Construct()
    {
        
$opt=getopt("i:g:e:r:o:m:");
        if(!
count($opt))
        {
            echo 
">> Welcome to GSAT :: The Global Sequence Alignment Tool by Vamsee Reddy <Symphony.dev@gmail.com>\n";
            echo 
">> [ USAGE ]\n>> GSAT -i <INPUT> -o <OUTPUT> -r <Number of shuffles> -g <Gap open penalty> -e <Gap extention penalty> -m <Custom scoring matrix>\n>> Only INPUT & OUTPUT are required. Default settings will be used for the rest.\n\n";
            exit;
        } else
        {
            if((!isset(
$opt['i']))||(!isset($opt['o']))) {die(">> Invalid Options\n");}
            
$file=$opt['i'];
            
$this->go=($opt['g'])?$opt['g']:8;
            
$this->ge=($opt['e'])?$opt['e']:2;
            
$out=$opt['o'];
            
$this->matrix=($opt['m'])?"-datafile {$opt['m']}":NULL;
            
$read=file_get_contents($file);
            
$read=explode(">",$read);
            unset(
$read[0]);
            foreach(
$read as $fasta)
            {
                
$fasta=explode("\n",$fasta);
                unset(
$fasta[0]);
                
$fastas[]=implode(NULL,$fasta);
            }
            
$this->seq1=$fastas[0];
            
$this->seq2=$fastas[1];
            
$this->times=($opt['r'])?$opt['r']:$this->times=strlen($this->seq1)*3;
            
$this->times=($this->times<100)?$this->times*3:$this->times;
        }
        
        
$this->on_score=$this->needle($this->seq1,$this->seq2); 
        
$this->shuf=$this->shuffle_squence($this->seq2);
        
$this->needle_all();
        
$this->sd=sd($this->needle_scores);
        
$this->get_z();
        
$info="Shuffled: {$this->times} times\t\tAverage Quality: {$this->average} +/- {$this->sd}\n\nThree Z-Scores are given based on three potential Averages\n";
        
$res="{$this->results}\n$info\nZ-Score: {$this->z} (S.Ds From average)\n\n";
        if(
$out)
        {
            
$handle=fopen($out,"w+");
            
fwrite($handle,$res);
        } else
        {
            echo 
$res;
        }
    }
    
    public function 
shuffle_squence($seq)
    {
        for(
$i=0;$i<=strlen($seq)-1;$i++)
        {
            
$array[]=$seq[$i];
        }
        for(
$i=1;$i<=$this->times;$i++)
        {
            for(
$x=0;$x<=100;$x++)
            {
                
shuffle($array);
            }
            
$shuffled[]=implode(NULL,$array);
        }
        return 
$shuffled;
    }
    
    public function 
getInput($msg)
    {
        
fwrite(STDOUT"$msg: ");
        
$varin trim(fgets(STDIN));
        return 
$varin;
    }
    
    public function 
needle($a,$b)
    {
        
$handle=fopen("/tmp/a","w+");
        
fwrite($handle,$a);
        
fclose($handle);
        
$handle=fopen("/tmp/b","w+");
        
fwrite($handle,$b);
        
fclose($handle);
        
$cmd="needle -asequence '/tmp/a' -bsequence '/tmp/b' -gapopen {$this->go} -gapextend {$this->ge} {$this->matrix} -outfile '/tmp/needle_out'";
        
system($cmd);
        if(
$this->clear){system('clear');}
        
$res=file_get_contents('/tmp/needle_out');
        
$this->results=(isset($this->results))?$this->results:$res;
        
preg_match_all('/# Score: [0-9,\.]{1,5}/',$res,$score);
        
$score=preg_replace("/[^0-9,\.]/",NULL,$score[0][0]);
        return 
$score;
    }
    
    public function 
needle_all()
    {
        
$i=1;
        foreach(
$this->shuf as $seq)
        {
            
$percent number_format(($i 100) / $this->times);
            
$this->needle_scores[]=$this->needle($this->seq1,$seq);
            
system('clear');
            echo 
"\n Calculating....$percent% Complete :: \n";
            
$i++;
        }
    }
    
    public function 
get_z()
    {
        foreach(
$this->needle_scores as $score)
        {
            @
$count+=$score;
        }
        
$average=$count/count($this->needle_scores);
        
        
$z=($this->on_score-($average))/$this->sd;
        
$z1=($this->on_score-($average+$this->sd))/$this->sd;
        
$z2=($this->on_score-($average-$this->sd))/$this->sd;
        
$avg=($z+$z1+$z2)/3;
        
$avg=round($avg,2);
        
$z=round($z,2);
        
$z1=round($z1,2);
        
$z2=round($z2,2);
        
$this->average=round($average,2);
        
$this->z="$avg ($z1,$z,$z2)";
    }
    
    
    
}
function 
sd_square($x$mean

    return 
pow($x $mean,2); 
}

function 
sd($array
{
    return 
sqrt(array_sum(array_map("sd_square"$arrayarray_fill(0,count($array), (array_sum($array) / count($array)) ) ) ) / (count($array)-1) );
}
new 
GSAT();
?>