#!/usr/bin/php
<?php
################################################################################
# Genome_Compare by Vamsee Reddy <Symphony.dev@gmail.com>                      #
# This script will compare a subject genome to multiple genome files           # 
# and report the best match sequence for each subject sequence                 #
# found in the target genomes. This will also report back tabulated TMS        #
#information. Requires HMMTOP & NCBI BLAST to run                              #
################################################################################
ini_set('memory_limit','500M'); // Yea, these genome files can get pretty big...
Class GC
{
    var 
$subject;
    var 
$targets;
    var 
$organisms;
    var 
$path='./db';
    var 
$output;
    var 
$targetseq;
    var 
$organism_index;
    var 
$subjectseq;
    
    public function 
__Construct()
    {
        
$this->subject=$this->getInput("Enter Subject Genome Filename");
        
$this->targets=$this->getInput("Enter Target Genome filenames. Separate with a comma");
        
$this->organisms=$this->getInput("Enter your organism symbol/name in the same order as your genome files. Separate with a comma");
        @
mkdir($this->path);
        
chdir($this->path);
        
$this->targets=explode(",",$this->targets);
        
$this->organisms=explode(",",$this->organisms);
        
$this->build_index();
        
$this->make_blast_db();
        
$this->blast();
        
chdir($this->path);
        
$this->parse_blast_output();
        
$this->load_dbs();
        
$this->write_file('clean.tsv');
    }
    
    public function 
getInput($msg)
    {
        
fwrite(STDOUT"$msg: ");
        
$varin trim(fgets(STDIN));
        return 
$varin;
    }
    
    public function 
build_index()
    {
        echo 
"\n\n> Building Organism Index...";
        
$handle=fopen('organism_index','w+');
        foreach(
$this->targets as $key=>$target)
        {
            
$target=trim($target);
            
$organism=$this->organisms[$key];
            
$target=file_get_contents($target);
            
preg_match_all("/>.{1,}/i",$target,$ids,PREG_PATTERN_ORDER);
            foreach(
$ids[0] as $id)
            {
                
$line="$id #$organism#\n";
                
fwrite($handle,$line);
            }
            unset(
$target);
        }
        
fclose($handle);
    }
    
    public function 
make_blast_db()
    {
        echo 
"\n> Building Specific BLAST Database...";
        foreach(
$this->targets as $target)
        {
            
$res[]=file_get_contents($target);
        }
        
$res=implode("\n",$res);
        
$res=preg_replace("/\t/",' ',$res);
        
$handle=fopen('target','w+');
        
fwrite($handle,$res);
        
fclose($handle);
        unset(
$res);
        
$subject=file_get_contents($this->subject);
        
$subject=preg_replace("/\t/",' ',$subject);
        
$handle=fopen('input','w+');
        
fwrite($handle,$subject);
        
fclose($handle);
        unset(
$subject);
        
system("formatdb -i target -t target -o target -p T -o T");
    }
    
    public function 
blast()
    {
        echo 
"\n>>Blasting...\n";
        
system("blastall -p blastp -d target -i input -m 9 -o output -K 10");
    }
    
    public function 
parse_blast_output()
    {
        
        
$res=file_get_contents('output');
        
$pattern='/#.{1,}\n[^#]{1,}\n/iU';
        
preg_match_all($pattern,$res,$out,PREG_PATTERN_ORDER);
        foreach(
$out[0] as $res)
        {
            
$res=explode("\n",$res);
            
$top[]=explode("\t",$res[1]);
        }
        
$this->output=$top;
    }
    
    public function 
get_tms($sequence)
    {
        
$uid "tms_count.seq";
        
$tmp='/tmp';
        
$fpseqfile1 fopen("$tmp/$uid""w+");
        
fwrite($fpseqfile1">MY_SEQ\n$sequence");
        
fclose($fpseqfile1);
        
$command "hmmtop -if=$tmp/$uid";
        
ob_start();
        
$can system($command);
        
ob_end_clean();
        
system('clear');
        
$chunks preg_split("/\s+/"$can5);
        
$chunks preg_split("/\s+/"$chunks[4]);
        
unlink("$tmp/$uid");
        return 
$chunks[0];
    }
    
    public function 
load_dbs()
    {
        
$this->targetseq=file_get_contents('target');
        
$this->organism_index=file_get_contents('organism_index');
        
$this->subjectseq=file_get_contents('input');
    }
    
    public function 
load_target($id)
    {
        
preg_match_all("/>$id.{1,}\n.{1,}(>|$)/iUs",$this->targetseq,$out);
        
$seq=preg_replace('/>/',NULL,$out[0][0]);
        
$seq=explode("\n",$seq);
        unset(
$seq[0]);
        
$seq=implode(NULL,$seq);
        
preg_match_all("/$id.{1,}\n/",$this->organism_index,$o);
        
preg_match_all('/#.{1,}#/',$o[0][0],$organism);
        
$organism=preg_replace('/#/',NULL,$organism[0][0]);
        
$tms=$this->get_tms($seq);
        
$len=strlen($seq);
        return array(
$organism,$tms,$len);
    }
    
    public function 
load_subject($id)
    {
        
preg_match_all("/>$id.{1,}\n.{1,}(>|$)/iUs",$this->subjectseq,$out);
        
$seq=preg_replace('/>/',NULL,$out[0][0]);
        
$seq=explode("\n",$seq);
        unset(
$seq[0]);
        
$seq=implode(NULL,$seq);
        
$tms=$this->get_tms($seq);
        
$len=strlen($seq);
        return array(
$tms,$len);
    }
    
    public function 
write_file($out)
    {
        
// Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
        
$header="SUBJECT ID|SUBJECT TMS|SUBJECT LENGTH|HIT ID|HIT ORGANISM|HIT TMS|HIT LENGHTH|% IDENTITY|ALIGNMENT LENGHTH|MISMATCHES|GAP OPENINGS|HIT-START|HIT-END|SUBJECT-START|SUBJECT-END|E-VALUE|BIT-SCORE\n";
        
$header=preg_replace('/\|/',"\t",$header);
        
$handle=fopen($out,"w+");
        
fwrite($handle,$header);
        foreach(
$this->output as $row)
        {
            
$target=$this->load_target($row[1]);
            
$subject=$this->load_subject($row[0]);
            
$entry=array($row[0],$subject[0],$subject[1],$row[1],$target[0],$target[1],$target[2],$row[2],$row[3],$row[4],$row[5],$row[6],$row[7],$row[8],$row[9],$row[10],$row[11]);
            
$entry=implode("\t",$entry);
            
fwrite($handle,"$entry\n");
            echo 
".";
        }
        echo 
"\n>> Done! File saves as $out";
    }
}
new 
GC;
?>