/* Program From Progressive Alignment of Amino Acid Sequences and Construction of Phylogenetic Trees from Them, by D. F. Feng and R. F. Doolittle, Methods of Enzymology, v266, p368 (1996) 2/7/1996 Read "readme.doc" before using. */ #include #include #define NSEQ 52 /*Maximum number of sequences*/ #define MAXSE 501 /*Maximum length for sequences*/ #define MAXAS MAXSE+100 /*Maximum length for aligned sequences*/ #define tnsq 2*NSEQ-3 /*Total number of branches*/ #define nsq NSEQ*(NSEQ-1)/2 /*Upper triangle*/ #define tpnsq 2*(2*NSEQ-3) /*Total number of matrix elements*/ int pam250[21][21] = { {20, 8, 6, 5, 6, 5, 4, 3, 3, 3, 5, 4, 3, 3, 6, 2, 6, 4, 8, 0, 0}, { 8,10, 9, 9, 9, 9, 9, 8, 8, 7, 7, 8, 8, 6, 7, 5, 7, 5, 5, 6, 0}, { 6, 9,11, 8, 9, 8, 8, 8, 8, 7, 7, 7, 8, 7, 8, 6, 8, 5, 5, 3, 0}, { 5, 9, 8,14, 9, 7, 7, 7, 7, 8, 8, 8, 7, 6, 6, 5, 7, 3, 3, 2, 0}, { 6, 9, 9, 9,10, 9, 8, 8, 8, 8, 7, 6, 7, 7, 7, 6, 8, 4, 5, 2, 0}, { 5, 9, 8, 7, 9,13, 8, 9, 8, 7, 6, 5, 6, 5, 5, 4, 7, 3, 3, 1, 0}, { 4, 9, 8, 7, 8, 8,10,10, 9, 9,10, 8, 9, 6, 6, 5, 6, 4, 6, 4, 0}, { 3, 8, 8, 7, 8, 9,10,12,11,10, 9, 7, 8, 5, 6, 4, 6, 2, 4, 1, 0}, { 3, 8, 8, 7, 8, 8, 9,11,12,10, 9, 7, 8, 6, 6, 5, 6, 3, 4, 1, 0}, { 3, 7, 7, 8, 8, 7, 9,10,10,12,11, 9, 9, 7, 6, 6, 6, 3, 4, 3, 0}, { 5, 7, 7, 8, 7, 6,10, 9, 9,11,14,10, 8, 6, 6, 6, 6, 6, 8, 5, 0}, { 4, 8, 7, 8, 6, 5, 8, 7, 7, 9,10,14,11, 8, 6, 5, 6, 4, 4,10, 0}, { 3, 8, 8, 7, 7, 6, 9, 8, 8, 9, 8,11,13, 8, 6, 5, 6, 3, 4, 5, 0}, { 3, 6, 7, 6, 7, 5, 6, 5, 6, 7, 6, 8, 8,14,10,12,10, 8, 6, 4, 0}, { 6, 7, 8, 6, 7, 5, 6, 6, 6, 6, 6, 6, 6,10,13,10,12, 9, 7, 3, 0}, { 2, 5, 6, 5, 6, 4, 5, 4, 5, 6, 6, 5, 5,12,10,14,10,10, 7, 6, 0}, { 6, 7, 8, 7, 8, 7, 6, 6, 6, 6, 6, 6, 6,10,12,10,12, 7, 6, 2, 0}, { 4, 5, 5, 3, 4, 3, 4, 2, 3, 3, 6, 4, 3, 8, 9,10, 7,17,15, 8, 0}, { 8, 5, 5, 3, 5, 3, 6, 4, 4, 4, 8, 4, 4, 6, 7, 7, 6,15,18, 8, 0}, { 0, 6, 3, 2, 2, 1, 4, 1, 1, 3, 5,10, 5, 4, 3, 6, 2, 8, 8,25, 0}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, }; int Gonnet[21][21] = { {20, 8, 7, 5, 9, 6, 6, 5, 5, 6, 7, 6, 5, 7, 7, 6, 8, 7, 8, 7, 0}, { 8,10,10, 8, 9, 8, 9, 9, 8, 8, 8, 8, 8, 7, 6, 6, 7, 5, 6, 5, 0}, { 7,10,11, 8, 9, 7,10, 8, 8, 8, 8, 8, 8, 7, 7, 7, 8, 6, 6, 4, 0}, { 5, 8, 8,15, 8, 6, 7, 7, 7, 8, 7, 7, 7, 6, 5, 6, 6, 4, 5, 3, 0}, { 9, 9, 9, 8,10, 9, 8, 8, 8, 8, 7, 7, 8, 7, 7, 7, 8, 6, 6, 4, 0}, { 6, 8, 7, 6, 9,15, 8, 8, 7, 7, 7, 7, 7, 4, 3, 4, 4, 3, 4, 4, 0}, { 6, 9,10, 7, 8, 8,12,10, 9, 9, 9, 8, 9, 6, 5, 5, 6, 5, 7, 4, 0}, { 5, 9, 8, 7, 8, 8,10,13,11, 9, 8, 8, 9, 5, 4, 4, 5, 3, 5, 3, 0}, { 5, 8, 8, 7, 8, 7, 9,11,12,10, 8, 8, 9, 6, 5, 5, 6, 4, 5, 4, 0}, { 6, 8, 8, 8, 8, 7, 9, 9,10,11, 9,10,10, 7, 6, 6, 6, 5, 6, 5, 0}, { 7, 8, 8, 7, 7, 7, 9, 8, 8, 9,14, 9, 9, 7, 6, 6, 6, 8,10, 7, 0}, { 6, 8, 8, 7, 7, 7, 8, 8, 8,10, 9,13,11, 6, 6, 6, 6, 5, 6, 6, 0}, { 5, 8, 8, 7, 8, 7, 9, 9, 9,10, 9,11,11, 7, 6, 6, 6, 5, 6, 4, 0}, { 7, 7, 7, 6, 7, 4, 6, 5, 6, 7, 7, 6, 7,12,11,11,10,10, 8, 7, 0}, { 7, 6, 7, 5, 7, 3, 5, 4, 5, 6, 6, 6, 6,11,12,11,11, 9, 7, 6, 0}, { 6, 6, 7, 6, 7, 4, 5, 4, 5, 6, 6, 6, 6,11,11,12,10,10, 8, 7, 0}, { 8, 7, 8, 6, 8, 4, 6, 5, 6, 6, 6, 6, 6,10,11,10,11, 8, 7, 5, 0}, { 7, 5, 6, 4, 6, 3, 5, 3, 4, 5, 8, 5, 5,10, 9,10, 8,15,13,12, 0}, { 7, 6, 6, 5, 6, 4, 7, 5, 5, 6,10, 6, 6, 8, 7, 8, 7,13,15,12, 0}, { 7, 5, 4, 3, 4, 4, 4, 3, 4, 5, 7, 6, 4, 7, 6, 7, 5,12,12,22, 0}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, }; int Blosum62[21][21] = { {13, 3, 3, 1, 4, 1, 1, 1, 0, 1, 1, 1, 1, 3, 3, 3, 3, 2, 2, 2, 0}, { 3, 8, 5, 3, 5, 4, 5, 4, 4, 4, 3, 3, 4, 3, 2, 2, 2, 2, 2, 1, 0}, { 3, 5, 9, 3, 4, 2, 4, 3, 3, 3, 2, 3, 3, 3, 3, 3, 4, 2, 2, 2, 0}, { 1, 3, 3,11, 3, 2, 2, 3, 3, 3, 2, 2, 3, 2, 1, 1, 2, 0, 1, 0, 0}, { 4, 5, 4, 3, 8, 4, 2, 2, 3, 3, 2, 3, 3, 3, 3, 3, 4, 2, 2, 1, 0}, { 1, 4, 2, 2, 4,10, 4, 3, 2, 2, 2, 2, 2, 1, 0, 0, 1, 1, 1, 2, 0}, { 1, 5, 4, 2, 2, 4,10, 5, 4, 4, 5, 4, 4, 2, 1, 1, 1, 1, 2, 0, 0}, { 1, 4, 3, 3, 2, 3, 5,10, 6, 4, 3, 2, 3, 1, 1, 0, 1, 1, 1, 0, 0}, { 0, 4, 3, 3, 3, 2, 4, 6, 9, 6, 4, 4, 5, 2, 1, 1, 2, 1, 2, 1, 0}, { 1, 4, 3, 3, 3, 2, 4, 4, 6, 9, 4, 5, 5, 4, 1, 2, 2, 1, 3, 2, 0}, { 1, 3, 2, 2, 2, 2, 5, 3, 4, 4,12, 4, 3, 2, 1, 1, 1, 3, 6, 2, 0}, { 1, 3, 3, 2, 3, 2, 4, 2, 4, 5, 4, 9, 6, 3, 1, 2, 1, 1, 2, 1, 0}, { 1, 4, 3, 3, 3, 2, 4, 3, 5, 5, 3, 6, 9, 3, 1, 2, 2, 1, 2, 1, 0}, { 3, 3, 3, 2, 3, 1, 2, 1, 2, 4, 2, 3, 3, 9, 5, 6, 5, 4, 3, 3, 0}, { 3, 2, 3, 1, 3, 0, 1, 1, 1, 1, 1, 1, 1, 5, 8, 6, 7, 4, 3, 1, 0}, { 3, 2, 3, 1, 3, 0, 1, 0, 1, 2, 1, 2, 2, 6, 6, 8, 5, 4, 3, 2, 0}, { 3, 2, 4, 2, 4, 1, 1, 1, 2, 2, 1, 1, 2, 5, 7, 5, 8, 3, 3, 1, 0}, { 2, 2, 2, 0, 2, 1, 1, 1, 1, 1, 3, 1, 1, 4, 4, 4, 3,10, 7, 5, 0}, { 2, 2, 2, 1, 2, 1, 2, 1, 2, 3, 6, 2, 2, 3, 3, 3, 3, 7,11, 6, 0}, { 2, 1, 2, 0, 1, 2, 0, 0, 1, 2, 2, 1, 1, 3, 1, 2, 1, 5, 6,15, 0}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, }; int mptable[21][21]; int index[26]={ 4, 7, 0, 7, 8,17, 5,10, 14,20,12,15,13, 6,20, 3, 9,11, 1, 2,20,16,19,20, 18, 8, }; char seq1[MAXAS], seq2[MAXAS]; /*Used in shuffle, dummy arrays*/ char seqt[NSEQ][100],code[NSEQ][8]; char seq[NSEQ][MAXAS],oseq[NSEQ][MAXAS],wseq[NSEQ][MAXAS]; char aseq1[MAXAS],aseq2[MAXAS]; char clus[NSEQ]; int L[MAXAS][MAXAS], R[MAXAS][MAXAS], M[MAXAS][MAXAS]; int lseq[NSEQ], nseq, cnum[NSEQ]; int pen; /*None negative gap penalty*/ int wgseq, wgt; int iseq, family[NSEQ][NSEQ],ZGROUP,print_flag; int loop,nrow,ncol; float rndmax = {2147483647.}; double sreal[NSEQ][NSEQ],shmean[NSEQ][NSEQ],siden[NSEQ][NSEQ]; double D[NSEQ][NSEQ],pro[NSEQ][NSEQ]; double RB[tnsq],A[nsq][tnsq],Y[nsq]; main(ac,av) int ac; char *av[]; { FILE *fd1, *fd2, *fd3, *fd4, *fd5, *fd6, *fd7, *fd8, *fd9, *fopen(); char dseq[MAXAS], dummy_array[100]; /*dummy sequence array*/ char cdum; char segment[NSEQ]; char ori_branch[400],rnd_branch[400]; char orderstring[400],temp_string[400]; int i,j,si,sj,NSDL,nax,cu_nseq,tmp_nseq,tmp_si,tmp_sj; int cflag1,cflag2,ik; int nsub_cl,kk,clus_cnt,dum; int pchange,nchange,ntimes,hits,ixx,achange,ichange; int ipp,kind,same,pc_low,pc_high; int k,t,v,q,n,ta[NSEQ],side,rn,flseq,ifront,iend; int count_x,cutoff_x,drop_x[200],ndrop_x; int adj; float bR,psd,psdmin,sum_psd,num_noneg; double sim_score1(),score1,score2,sum,ddum,cum; /* Check for correct command line */ if(ac<4) { fprintf(stderr,"\nPurpose: Sample Multiple Alignments and"); fprintf(stderr," Generate New Trees for Boostrapping\n\t"); fprintf(stderr," Procedure.\n"); fprintf(stderr,"Usage: "); fprintf(stderr,"%s rf wf1 M [>wf2]\n",av[0]); fprintf(stderr,"\trf: All sequences arranged in "); fprintf(stderr,"binary alignment order.\n"); fprintf(stderr,"\twf1: Aligned sequences.\n"); fprintf(stderr,"\tM: Scoring matrix.\n"); fprintf(stderr,"\t\tType D(for PAM250), "); fprintf(stderr,"G(for Gonnet), or B(for Blosum62).\n"); fprintf(stderr,"\twf2: Alignment results.\n"); fprintf(stderr,"Current limit: Sequence length =< %d,",MAXSE-1); fprintf(stderr," NSEQ =< %d.\n\n",NSEQ); exit(0); } /* Open files */ if((fd1 = fopen(av[1],"r")) == NULL) /*error check*/ exit(fprintf(stderr,"%s: can't open %s\n",av[1]) ); if((fd2 = fopen(av[2],"w")) == NULL) /*error check*/ exit(fprintf(stderr,"%s: can't open %s\n",av[2])); if((fd9 = fopen("rboot","w")) == NULL) /*error check*/ exit(fprintf(stderr,"%s: can't open rboot\n")); srandom(6); /* Set weighting, shuffle times and gap penalty */ wgseq=50; wgt=1; if(*av[3]=='D') { pen=8; for(i=0; i<21; i++) for(j=0; j<21; ++j) mptable[i][j] = pam250[i][j]; } else if(*av[3]=='G') { pen=8; for(i=0; i<21; i++) for(j=0; j<21; ++j) mptable[i][j] = Gonnet[i][j]; } else if(*av[3]=='B') { pen=6; for(i=0; i<21; i++) for(j=0; j<21; ++j) mptable[i][j] = Blosum62[i][j]; } else { printf("\n*** Matrix-%s is not available. ***\n",av[3]); exit(0); } print_flag = 0; /*Control printing D matrix */ psdmin = 10000.0; /*Minimum %s.d.*/ for(i=0; i<200; ++i) drop_x[i] = -1; /* Read sequences */ Read_seq(fd1); fclose(fd1); printf("\t\t-- PROGRESSIVE (similarity) ALIGNMENT --\n"); if(*av[3]=='D') printf("\t\t\t\tPAM250\n\n"); else if(*av[3]=='B') printf("\t\t\t\tBlosum62\n\n"); else if(*av[3]=='G') printf("\t\t\t\tGonnet\n\n"); printf("Sequences are entered in the following order:\n"); for(i=0; i= score2) { for(i=0; i<2; ++i) strcpy(seq[i],wseq[i]); if(nseq == 3) { strcpy(seq[2],wseq[2]); rm_cX1(3,seq); goto skip; } } else { strcpy(dummy_array,code[0]); strcpy(code[0],code[1]); strcpy(code[1],dummy_array); strcpy(dummy_array,seqt[0]); strcpy(seqt[0],seqt[1]); strcpy(seqt[1],dummy_array); switch_array(2,oseq); if(nseq == 3) goto skip; } strcpy(seq[2],oseq[2]); tmp_nseq = 3; si=0; sj=1; while(tmp_nseq <= nseq) { nsub_cl = 0; if(clus[sj+1]=='J') cflag1 = 1; else cflag1= 0; if((sj+2)0) { realign_cl(sj+1,sj+2); si++; sj++; tmp_nseq++; nsub_cl--; } goto NORM; } control(&tmp_si,&tmp_sj,&cu_nseq,tmp_nseq,nsub_cl, cflag1,cflag2); rm_cX1(tmp_nseq,seq); score1 = sim_score1(tmp_nseq); if(tmp_nseq==nseq) /*When end is reached, no exchange*/ goto skip; for(i=0; i=score2) { for(i=0; i0) { switch_array(ik+1,seq); switch_array(ik+1,oseq); strcpy(dummy_array,code[ik]); strcpy(code[ik],code[ik-1]); strcpy(code[ik-1],dummy_array); strcpy(dummy_array,seqt[ik]); strcpy(seqt[ik],seqt[ik-1]); strcpy(seqt[ik-1],dummy_array); cdum=clus[ik]; clus[ik]=clus[ik-1]; clus[ik-1]=cdum; dum=cnum[tmp_nseq]; cnum[tmp_nseq]=cnum[tmp_nseq-1]; cnum[tmp_nseq-1]=dum; realign_cl(ik-2,ik-1); si++; sj++; tmp_nseq++; nsub_cl--; ik++; } goto NORM1; } } NORM: si++; sj++; tmp_nseq++; NORM1: ; } skip: rpJwX(); rm_cX1(nseq,seq); /* Re-format the original sequences: remove the blank space at the beginning and append a '*' at the end of the sequences */ for(i=0; i10000, this number has to be increased*/ flseq = strlen(seq[0])-1; for(i=0; iifront) ifront = j; else break; } } for(i=0; i0; ) { if(seq[i][j]=='X') j--; else if(j= cutoff_x) drop_x[k++] = i; } ndrop_x = k; sum_psd = num_noneg = 0; for(loop=0; loop<18000; loop++) { /* Print computed parameters */ para(); /* Calculate Srand */ shuffle(); /* Compute difference score */ adj = 0; for(i=0; i='A' && temp_string[i]<='Z') fprintf(fd9,"%s",code[family[0][j++]-1]); else if(temp_string[i]>='a' && temp_string[i]<='z') fprintf(fd9,"%s",code[family[0][j++]-1]); else fprintf(fd9,"%c",temp_string[i]); i++; } fprintf(fd9,")\n"); if((fd4 = fopen("albr","w")) == NULL) /*error check*/ exit(fprintf(stderr,"%s: can't open albr for writing in main.\n")); for(i=0;i<=ZGROUP;++i) { if(i==0) { j=0; for(v=0;family[i][v]!=0;++v) { if(v<26) ta[family[i][v]] = j+65; else if(v<52) ta[family[i][v]] = j+71; else exit(printf("Too many sequences!\n")); j++; } } for(n=0;family[i][n]!=0;++n) fprintf(fd4,"%c",ta[family[i][n]]); fprintf(fd4,"\n"); } fprintf(fd4,"*\n"); if((fd5 = fopen("algr","w")) == NULL) /*error check*/ exit(fprintf(stderr,"%s: can't open algr for writing in main.\n")); for(i=0; i0) printf("**Certain Sreal are adjusted!**"); printf("\nMain Branch: "); side = 0; for(i=0;i<=ZGROUP;++i) { if(side==1) printf("\n Clusters: "); else if(side>1) printf("\n "); printf("(%s,%s)",code[family[i][0]-1],code[family[i][1]-1]); for(v=2;family[i][v]!=0;++v) { printf("%s)",code[family[i][v]-1]); if(v%11==0 && v==nseq-1) ; else if(v%11==0) printf("\n "); } side++; } printf("\n"); printf("Branch lengths: (r1,r2,r3,r4,r5)\n"); printf(" (r6,r7,........)\n"); for(j=0; j=9) { c = fgetc(fd); it--; if(c>='0' && c<='9') cnum[m] = 10*cnum[m] + (c - 48); } for(i=0; i MAXSE-1) { printf("%s length exceeded %d,",code[m],MAXSE); printf(" execution terminated!\n"); printf("Shorten %s or increase MAXAS in program.\n",code[m]); exit(); } seq[m][i]='\0'; oseq[m][i]='\0'; m++; nseq++; } } Align(si,sj,nsc,cf1,cf2) int si,sj,nsc,cf1,cf2; { int sigma; /*Similarity constant between pairs of letters*/ int Sim; /*Aligned sequences optimal similarity score*/ int ssigma; /*Sum of sigmas*/ int Mmax,dum1,dum2,jj,kk,kp,m,n,i,j,ij,k,c,t,dcd; int mtch,coeff; float fidty,lsmall,fsim,NAS; float fsigma,sigmaf,test; /* Construct the first row and column of the 3 matrices */ for( j=0; seq[si][j]!='\0'; ++j ) L[j][0] = R[j][0] = M[j][0] = 0; for( k=0; seq[sj][k]!='\0'; ++k ) L[0][k] = R[0][k] = M[0][k] = 0; /* Construct the remaining elements of the 3 matrices */ for( j=1; seq[si][j]!='\0'; ++j ) { jj = j - 1; for( k=1; seq[sj][k]!='\0'; ++k ) { kk = k - 1; coeff=1; dcd=sigma=mtch=0; /*Scoring current sequence against all earlier sequences*/ for(i=0; i (wgseq-2) && mtch == dcd ) coeff = wgt; fsigma = (float)sigma/(float)dcd; sigma = fsigma; sigmaf = sigma; test = fsigma - sigmaf; if(test>0.5) sigma++; sigma = coeff*sigma; Mmax = M[jj][kk]; /*Determine M[j][k]*/ dum1 = L[jj][kk] - pen; dum2 = R[jj][kk] - pen; if( Mmax < dum1 ) Mmax = dum1; if( Mmax < dum2 ) Mmax = dum2; M[j][k] = Mmax + sigma; L[j][k] = L[jj][k]; /*Determine L[j][k]*/ if( L[jj][k] < M[jj][k] ) L[j][k] = M[jj][k]; R[j][k] = R[j][kk]; /*Determine R[j][k]*/ if( R[j][kk] < M[j][kk] ) R[j][k] = M[j][kk]; } } /* Determine the optimal score of the aligned sequences */ m = strlen(seq[si])-1; n = strlen(seq[sj])-1; Sim = ( R[m][n] > L[m][n] ) ? R[m][n] : L[m][n]; Sim = ( Sim > M[m][n] ) ? Sim : M[m][n]; /* Tracing out the aligned sequences */ j = strlen(seq[si])-1; k = strlen(seq[sj])-1; c = 0; if( Sim == M[j][k] ) { aseq1[c] = seq[si][j]; aseq2[c] = seq[sj][k]; c++; branchM(si,sj,&j,&k,&c); } else if( Sim == L[j][k] ) { aseq1[c] = seq[si][j]; aseq2[c] = ' '; c++; branchL(si,sj,&j,&k,&c); } else { aseq1[c] = ' '; aseq2[c] = seq[sj][k]; c++; branchR(si,sj,&j,&k,&c); } /* Catch the N-terminal overhangs */ if( j==0 ) { while( k>-1 ) { aseq1[c] = ' '; aseq2[c++] = seq[sj][--k]; } } if( k==0 ) { while( j>-1 ) { aseq1[c] = seq[si][--j]; aseq2[c++] = ' '; } } if( c>MAXAS ) /*error check*/ exit(printf("job aborted, please increase MAXAS")); /* Replace the sequences in the aligned form */ j=c-3; i=1; if(si>0) { while(j>-1) { if(aseq1[j]==' ') seq[si][i]='U'; else seq[si][i]=aseq1[j]; i++; j--; } seq[si][i]='\0'; } else { while(j>-1) { if(aseq1[j]==' ') seq[si][i]='X'; else seq[si][i]=aseq1[j]; i++; j--; } seq[si][i]='\0'; } j=c-3; i=1; /*First element must be blank*/ while(j>-1) { if(aseq2[j]==' ') seq[sj][i]='X'; else seq[sj][i]=aseq2[j]; i++; j--; } seq[sj][i]='\0'; } branchM(si,sj,pj,pk,pc) int si,sj, *pj, *pk, *pc; { int Mmax; if( (*pj==0) || (*pk==0) ) return; (*pj)--; (*pk)--; Mmax = M[*pj][*pk]; if( Mmax < L[*pj][*pk] - pen ) Mmax = L[*pj][*pk] - pen; if( Mmax < R[*pj][*pk] - pen ) Mmax = R[*pj][*pk] -pen; if( Mmax == M[*pj][*pk] ) { aseq1[*pc] = seq[si][*pj]; aseq2[*pc] = seq[sj][*pk]; (*pc)++; branchM(si,sj,pj,pk,pc); return; } else if( Mmax == (L[*pj][*pk] -pen) ) { aseq1[*pc] = seq[si][*pj]; aseq2[*pc] = ' '; (*pc)++; branchL(si,sj,pj,pk,pc); return; } else { aseq1[*pc] = ' '; aseq2[*pc] = seq[sj][*pk]; (*pc)++; branchR(si,sj,pj,pk,pc); return; } } branchL(si,sj,pj,pk,pc) int si,sj, *pj, *pk, *pc; { if( *pj==0 ) return; (*pj)--; aseq1[*pc] = seq[si][*pj]; if( L[*pj][*pk] > M[*pj][*pk] ) { aseq2[*pc] = ' '; (*pc)++; branchL(si,sj,pj,pk,pc); return; } else { aseq2[*pc] = seq[sj][*pk]; (*pc)++; branchM(si,sj,pj,pk,pc); return; } } branchR(si,sj,pj,pk,pc) int si,sj, *pj, *pk, *pc; { if( *pk==0 ) return; (*pk)--; aseq2[*pc] = seq[sj][*pk]; if( R[*pj][*pk] > M[*pj][*pk] ) { aseq1[*pc] = ' '; (*pc)++; branchR(si,sj,pj,pk,pc); return; } else { aseq1[*pc] = seq[si][*pj]; (*pc)++; branchM(si,sj,pj,pk,pc); return; } } BkUtoX(si) /*Change Blank or 'U' to 'X'*/ int si; { int i; for(i=1; ;++i) { if(seq[si][i]=='\0') break; else if(seq[si][i]==' '||seq[si][i]=='U') seq[si][i]='X'; } seq[si][i]='\0'; } Re_align(si,sj) /*sj contains 'U' */ int si,sj; { int i, ii; char d_asi[MAXAS]; /* Dummy array */ ii=1; d_asi[0]=' '; for(i=1; ;++i) /*i=1 because 1st element is blank*/ { if(seq[sj][i]=='\0') break; else if(seq[sj][i]!='U') { d_asi[i]=seq[si][ii]; ii++; } else if(seq[sj][i]=='U') d_asi[i]='U'; } d_asi[i]='\0'; strcpy(seq[si],d_asi); return; } format_seq(si) /*Remove first blank and append '*' in seq[]*/ int si; { int i; char d_as[MAXAS]; for(i=1; ;++i) { if(seq[si][i]=='\0') break; else d_as[i-1]=seq[si][i]; } d_as[i-1]='*'; d_as[i]='\0'; strcpy(seq[si],d_as); return; } mulalign(fd,NSDL,nax) int NSDL, nax; FILE *fd; { int i, j, k, nline; /* Copy all sequences into a file */ for(i=0; i=NSDL ) printf("%c_",seq[j][k]); else printf("%-2c",seq[j][k]); } } } /* printf("\n\nSequence order after progressive alignment:\n"); for(i=0; i0 ) { (*psi)--; (*psj)--; Re_align(*psi,*psj); palign(psi,psj); } return; } para() { int asl,flag,i,iden,nar,j,k,l,ls,shtseq,rsl[NSEQ]; double fns,fls,piden[NSEQ][NSEQ],Sreal,Stotal,sum,sum1,sum2; for(i=0; i0)&&((seq[i][k]=='X')&& (seq[j][k]!='X'))) ; else if(flag>0) ; sum1 += mptable[index[seq[i][k]-'A']] [index[seq[i][k]-'A']]; sum2 += mptable[index[seq[j][k]-'A']] [index[seq[j][k]-'A']]; } if(flag>0) /*True if overhang present at C-term*/ Sreal += pen; Sreal = Sreal/10; Stotal += Sreal; sreal[i][j] = sreal[j][i] = Sreal; siden[i][j] = siden[j][i] = (sum1+sum2)/20; piden[i][j] = (float)iden*100.0/(float)nar; piden[j][i] = piden[i][j]; } } if(loop==0) { printf("\nSimilarity Scores:\n"); printf(" "); for(i=0; i0)&&((seq[i][k]=='X')&& (seq[j][k]!='X'))) ; else if(flag>0) ; } if(flag>0) /*True if overhang present at C-term*/ Sreal += pen; Sreal = Sreal/10; Stotal += Sreal; } } Stotal = Stotal/nar; return(Stotal); } switch_array(tmp_nseq,array) int tmp_nseq; char (*array)[MAXAS]; { int i; char dum_array[MAXAS]; strcpy(dum_array,array[tmp_nseq-1]); strcpy(array[tmp_nseq-1],array[tmp_nseq-2]); strcpy(array[tmp_nseq-2],dum_array); } rm_cX1(tmp_nseq,array) int tmp_nseq; char (*array)[MAXAS]; { int flag,i,j,m,n; /*Removing X's that appear in all sequences at the same location*/ for(i=1 ; array[0][i]!='\0'; ++i) { flag=0; /*means all 'X'*/ for(j=0; j0) { gps1++; gflag=0; } ii++; } gps2=0; gflag=0; for(ii=0; ii0) { gps2++; gflag=0; } ii++; } rand_score = 0; for(ii=0; ii<26; ++ii) h1[ii] = h2[ii] = 0; ttlseq1 = 0; for(ii=0; ii0) { tlgap += gaps1[ii]; tngap++; } for(ii=0; ii<20; ++ii) if(gaps2[ii]>0) { tlgap += gaps2[ii]; tngap++; } if(seq1[0]=='X') tngap--; if(seq1[tlseq1-1]=='X') tngap--; if(seq2[0]=='X') tngap--; if(seq2[tlseq1-1]=='X') tngap--; for(ii=0; ii<26; ++ii) for(jj=0; jj<26; ++jj) rand_score += (float)mptable[index[aa [ii]-'A']][index[aa[jj]-'A']]*h1[ii] *h2[jj]; rand_score1 = ((rand_score/ttlseq1)-pen*tngap)/10.0; shmean[i][j] = shmean[j][i] = rand_score1; } } } rm_cX2(plseq1,plseq2) int *plseq1, *plseq2; { int i,k,kp; for(i=0; seq1[i]!='\0'; ++i) { if(seq1[i]=='X' && seq1[i]==seq2[i]) { k=i; while(seq1[k+1]!='\0') { seq1[k]=seq1[k+1]; k++; } seq1[k]='\0'; kp=i; while(seq2[kp+1]!='\0') { seq2[kp]=seq2[kp+1]; kp++; } seq2[kp]='\0'; i--; } /* If seq have no X's, have to take into account here */ } *plseq1 = k; *plseq2 = kp; return; } rpJwX() /*Replace J in sequences with X*/ { int i,j; for(i=0; i pro[j][++k]) { *pminj1 = j; *pmink1 = k; } else continue; } ++j; k=j; } return; } NEWMATRIX() { int i,j,k,q,qq1,qq2; j=1; k=2; while(j1) { for(j=1; j='A' && *pts<='Z') { or[nseq] = *pts - 65; lastor = or[nseq] + 1; nseq++; } else if(*pts>='a' && *pts<='z') { or[nseq] = *pts - 97 + lastor; nseq++; } pts++; } /*Read distance scores*/ for(i=0; ifabs(a[l][k])) l=i; } if(l!=k) { for(j=k; j<2*n; j++) { dum=a[k][j]; a[k][j]=a[l][j]; a[l][j]=dum; } } for(i=k+1; i=0; --i) for(k=n; ka[j]) { temp = a[i]; a[i] = a[j]; a[j] = temp; } }