/* Program From Progressive Alignment of Amino Acid Sequences and Construction of Phylogenetic Trees from Them, by D. F. Feng and R. F. Doolittle, Methods of Enzymology, v266, p368 (1996) 2/7/1996 Read "readme.doc" before using. to compile on SGI: cc -lm arrange.c -o arrange */ #include #include #define NSEQ 75 #define MAXS 2000 #define MAXAS MAXS+100 int pam250[21][21] = { {20, 8, 6, 5, 6, 5, 4, 3, 3, 3, 5, 4, 3, 3, 6, 2, 6, 4, 8, 0, 0}, { 8,10, 9, 9, 9, 9, 9, 8, 8, 7, 7, 8, 8, 6, 7, 5, 7, 5, 5, 6, 0}, { 6, 9,11, 8, 9, 8, 8, 8, 8, 7, 7, 7, 8, 7, 8, 6, 8, 5, 5, 3, 0}, { 5, 9, 8,14, 9, 7, 7, 7, 7, 8, 8, 8, 7, 6, 6, 5, 7, 3, 3, 2, 0}, { 6, 9, 9, 9,10, 9, 8, 8, 8, 8, 7, 6, 7, 7, 7, 6, 8, 4, 5, 2, 0}, { 5, 9, 8, 7, 9,13, 8, 9, 8, 7, 6, 5, 6, 5, 5, 4, 7, 3, 3, 1, 0}, { 4, 9, 8, 7, 8, 8,10,10, 9, 9,10, 8, 9, 6, 6, 5, 6, 4, 6, 4, 0}, { 3, 8, 8, 7, 8, 9,10,12,11,10, 9, 7, 8, 5, 6, 4, 6, 2, 4, 1, 0}, { 3, 8, 8, 7, 8, 8, 9,11,12,10, 9, 7, 8, 6, 6, 5, 6, 3, 4, 1, 0}, { 3, 7, 7, 8, 8, 7, 9,10,10,12,11, 9, 9, 7, 6, 6, 6, 3, 4, 3, 0}, { 5, 7, 7, 8, 7, 6,10, 9, 9,11,14,10, 8, 6, 6, 6, 6, 6, 8, 5, 0}, { 4, 8, 7, 8, 6, 5, 8, 7, 7, 9,10,14,11, 8, 6, 5, 6, 4, 4,10, 0}, { 3, 8, 8, 7, 7, 6, 9, 8, 8, 9, 8,11,13, 8, 6, 5, 6, 3, 4, 5, 0}, { 3, 6, 7, 6, 7, 5, 6, 5, 6, 7, 6, 8, 8,14,10,12,10, 8, 6, 4, 0}, { 6, 7, 8, 6, 7, 5, 6, 6, 6, 6, 6, 6, 6,10,13,10,12, 9, 7, 3, 0}, { 2, 5, 6, 5, 6, 4, 5, 4, 5, 6, 6, 5, 5,12,10,14,10,10, 7, 6, 0}, { 6, 7, 8, 7, 8, 7, 6, 6, 6, 6, 6, 6, 6,10,12,10,12, 7, 6, 2, 0}, { 4, 5, 5, 3, 4, 3, 4, 2, 3, 3, 6, 4, 3, 8, 9,10, 7,17,15, 8, 0}, { 8, 5, 5, 3, 5, 3, 6, 4, 4, 4, 8, 4, 4, 6, 7, 7, 6,15,18, 8, 0}, { 0, 6, 3, 2, 2, 1, 4, 1, 1, 3, 5,10, 5, 4, 3, 6, 2, 8, 8,25, 0}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, }; int Gonnet[21][21] = { {20, 8, 7, 5, 9, 6, 6, 5, 5, 6, 7, 6, 5, 7, 7, 6, 8, 7, 8, 7, 0}, { 8,10,10, 8, 9, 8, 9, 9, 8, 8, 8, 8, 8, 7, 6, 6, 7, 5, 6, 5, 0}, { 7,10,11, 8, 9, 7,10, 8, 8, 8, 8, 8, 8, 7, 7, 7, 8, 6, 6, 4, 0}, { 5, 8, 8,15, 8, 6, 7, 7, 7, 8, 7, 7, 7, 6, 5, 6, 6, 4, 5, 3, 0}, { 9, 9, 9, 8,10, 9, 8, 8, 8, 8, 7, 7, 8, 7, 7, 7, 8, 6, 6, 4, 0}, { 6, 8, 7, 6, 9,15, 8, 8, 7, 7, 7, 7, 7, 4, 3, 4, 4, 3, 4, 4, 0}, { 6, 9,10, 7, 8, 8,12,10, 9, 9, 9, 8, 9, 6, 5, 5, 6, 5, 7, 4, 0}, { 5, 9, 8, 7, 8, 8,10,13,11, 9, 8, 8, 9, 5, 4, 4, 5, 3, 5, 3, 0}, { 5, 8, 8, 7, 8, 7, 9,11,12,10, 8, 8, 9, 6, 5, 5, 6, 4, 5, 4, 0}, { 6, 8, 8, 8, 8, 7, 9, 9,10,11, 9,10,10, 7, 6, 6, 6, 5, 6, 5, 0}, { 7, 8, 8, 7, 7, 7, 9, 8, 8, 9,14, 9, 9, 7, 6, 6, 6, 8,10, 7, 0}, { 6, 8, 8, 7, 7, 7, 8, 8, 8,10, 9,13,11, 6, 6, 6, 6, 5, 6, 6, 0}, { 5, 8, 8, 7, 8, 7, 9, 9, 9,10, 9,11,11, 7, 6, 6, 6, 5, 6, 4, 0}, { 7, 7, 7, 6, 7, 4, 6, 5, 6, 7, 7, 6, 7,12,11,11,10,10, 8, 7, 0}, { 7, 6, 7, 5, 7, 3, 5, 4, 5, 6, 6, 6, 6,11,12,11,11, 9, 7, 6, 0}, { 6, 6, 7, 6, 7, 4, 5, 4, 5, 6, 6, 6, 6,11,11,12,10,10, 8, 7, 0}, { 8, 7, 8, 6, 8, 4, 6, 5, 6, 6, 6, 6, 6,10,11,10,11, 8, 7, 5, 0}, { 7, 5, 6, 4, 6, 3, 5, 3, 4, 5, 8, 5, 5,10, 9,10, 8,15,13,12, 0}, { 7, 6, 6, 5, 6, 4, 7, 5, 5, 6,10, 6, 6, 8, 7, 8, 7,13,15,12, 0}, { 7, 5, 4, 3, 4, 4, 4, 3, 4, 5, 7, 6, 4, 7, 6, 7, 5,12,12,22, 0}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, }; int Blosum62[21][21] = { {13, 3, 3, 1, 4, 1, 1, 1, 0, 1, 1, 1, 1, 3, 3, 3, 3, 2, 2, 2, 0}, { 3, 8, 5, 3, 5, 4, 5, 4, 4, 4, 3, 3, 4, 3, 2, 2, 2, 2, 2, 1, 0}, { 3, 5, 9, 3, 4, 2, 4, 3, 3, 3, 2, 3, 3, 3, 3, 3, 4, 2, 2, 2, 0}, { 1, 3, 3,11, 3, 2, 2, 3, 3, 3, 2, 2, 3, 2, 1, 1, 2, 0, 1, 0, 0}, { 4, 5, 4, 3, 8, 4, 2, 2, 3, 3, 2, 3, 3, 3, 3, 3, 4, 2, 2, 1, 0}, { 1, 4, 2, 2, 4,10, 4, 3, 2, 2, 2, 2, 2, 1, 0, 0, 1, 1, 1, 2, 0}, { 1, 5, 4, 2, 2, 4,10, 5, 4, 4, 5, 4, 4, 2, 1, 1, 1, 1, 2, 0, 0}, { 1, 4, 3, 3, 2, 3, 5,10, 6, 4, 3, 2, 3, 1, 1, 0, 1, 1, 1, 0, 0}, { 0, 4, 3, 3, 3, 2, 4, 6, 9, 6, 4, 4, 5, 2, 1, 1, 2, 1, 2, 1, 0}, { 1, 4, 3, 3, 3, 2, 4, 4, 6, 9, 4, 5, 5, 4, 1, 2, 2, 1, 3, 2, 0}, { 1, 3, 2, 2, 2, 2, 5, 3, 4, 4,12, 4, 3, 2, 1, 1, 1, 3, 6, 2, 0}, { 1, 3, 3, 2, 3, 2, 4, 2, 4, 5, 4, 9, 6, 3, 1, 2, 1, 1, 2, 1, 0}, { 1, 4, 3, 3, 3, 2, 4, 3, 5, 5, 3, 6, 9, 3, 1, 2, 2, 1, 2, 1, 0}, { 3, 3, 3, 2, 3, 1, 2, 1, 2, 4, 2, 3, 3, 9, 5, 6, 5, 4, 3, 3, 0}, { 3, 2, 3, 1, 3, 0, 1, 1, 1, 1, 1, 1, 1, 5, 8, 6, 7, 4, 3, 1, 0}, { 3, 2, 3, 1, 3, 0, 1, 0, 1, 2, 1, 2, 2, 6, 6, 8, 5, 4, 3, 2, 0}, { 3, 2, 4, 2, 4, 1, 1, 1, 2, 2, 1, 1, 2, 5, 7, 5, 8, 3, 3, 1, 0}, { 2, 2, 2, 0, 2, 1, 1, 1, 1, 1, 3, 1, 1, 4, 4, 4, 3,10, 7, 5, 0}, { 2, 2, 2, 1, 2, 1, 2, 1, 2, 3, 6, 2, 2, 3, 3, 3, 3, 7,11, 6, 0}, { 2, 1, 2, 0, 1, 2, 0, 0, 1, 2, 2, 1, 1, 3, 1, 2, 1, 5, 6,15, 0}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, }; int mptable[21][21]; int index[26]={ 4, 7, 0, 7, 8,17, 5,10, 14,20,12,15,13, 6,20, 3, 9,11, 1, 2,20,16,19,20, 18, 8, }; char aseq1[MAXAS], aseq2[MAXAS]; char seq1[MAXAS], seq2[MAXAS]; char seqt[NSEQ][200], code[NSEQ][8]; char seq[NSEQ][MAXS]; int L[MAXS][MAXS], R[MAXS][MAXS], M[MAXS][MAXS]; int lseq[NSEQ], nseq; int pen; int bord_flag, Rbase; int family[NSEQ][NSEQ]; double pro[NSEQ][NSEQ]; double D[NSEQ][NSEQ]; main(ac,av) int ac; char *av[]; { FILE *fd1, *fopen(); int i,j,k,si,sj,len; double Align(); if(ac<3) { fprintf(stderr,"\nPurpose: Determine an Approximate"); fprintf(stderr," Branching Order.\n"); fprintf(stderr,"Usage: "); fprintf(stderr,"%s rf M [>wf]\n",av[0]); fprintf(stderr,"\trf: All sequences, any order.\n"); fprintf(stderr,"\tM: Scoring matrix.\n"); fprintf(stderr,"\t\tType D(for PAM250), "); fprintf(stderr,"G(for Gonnet), or B(for Blosum62).\n"); fprintf(stderr,"\twf: Alignment results.\n"); fprintf(stderr,"Current limit: Sequence length =<"); fprintf(stderr," %d(unaligned), %d(aligned), ",MAXS,MAXAS); fprintf(stderr,"NSEQ=<%d.\n",NSEQ); fprintf(stderr,"Note: Sequences in rf will be arranged in"); fprintf(stderr," the same order as in branching order.\n\n"); exit(0); } if( (fd1 = fopen( av[1],"r" )) == NULL ) exit(fprintf(stderr,"%s: can't open %s\n",av[1]) ); if(*av[2]=='D') { pen=8; for(i=0; i<21; i++) for(j=0; j<21; ++j) mptable[i][j] = pam250[i][j]; } else if(*av[2]=='G') { pen=8; for(i=0; i<21; i++) for(j=0; j<21; ++j) mptable[i][j] = Gonnet[i][j]; } else if(*av[2]=='B') { pen=6; for(i=0; i<21; i++) for(j=0; j<21; ++j) mptable[i][j] = Blosum62[i][j]; } else { printf("\n*** Matrix-%s is not available. ***\n",av[2]); exit(0); } Read_seq(fd1); printf("\n\n Binary comparisons between %d ",nseq); printf("protein sequences\n"); printf("\t\t\t*** (gap penalty = %d) ***\n\n",pen); for(i=0; i L[m][n] ) ? R[m][n] : L[m][n]; Sim = ( Sim > M[m][n] ) ? Sim : M[m][n]; fsim = Sim; fsim = fsim/10.0; j = m; k = n; c = 0; if( Sim == M[j][k] ) { aseq1[c] = seq1[j]; aseq2[c] = seq2[k]; c++; branchM(&j,&k,&c); } else if( Sim == L[j][k] ) { aseq1[c] = seq1[j]; aseq2[c] = 'X'; c++; branchL(&j,&k,&c); } else { aseq1[c] = 'X'; aseq2[c] = seq2[k]; c++; branchR(&j,&k,&c); } if( j==1 ) { while( k > 1 ) { aseq1[c] = 'X'; aseq2[c] = seq2[--k]; c++; } } if( k==1 ) { while( j > 1 ) { aseq1[c] = seq1[--j]; aseq2[c] = 'X'; c++; } } aseq1[c] = '\0'; aseq2[c] = '\0'; if( c>MAXAS ) exit(printf("job aborted, increase MAXAS")); lsmall = si; if( si > sj ) lsmall = sj; nc = ir= nar = 0; for( j=0; j= 'A' && aseq1[j] <= 'Z' ) { if( aseq2[j] >= 'A' && aseq2[j] <= 'Z' ) nar++; else ; } else ; } fidty = ir; fidty = fidty / nar; NAS = fsim*100.0 / nar; sum=0; for(i=1; i M[*pj][*pk] ) { aseq2[*pc] = 'X'; (*pc)++; branchL(pj,pk,pc); return; } else { aseq2[*pc] = seq2[*pk]; (*pc)++; branchM(pj,pk,pc); return; } } branchR(pj,pk,pc) int *pj,*pk,*pc; { if( *pk == 1 ) return; (*pk)--; aseq2[*pc] = seq2[*pk]; if( R[*pj][*pk] > M[*pj][*pk] ) { aseq1[*pc] = 'X'; (*pc)++; branchR(pj,pk,pc); return; } else { aseq1[*pc] = seq1[*pj]; (*pc)++; branchM(pj,pk,pc); return; } } bord() { int i,ii,ii1,j,k,q,qz,side,ss1,v,vv,w,z,zt,zz,Z; int minj1,mink1,r,s,t; side = 0; i=qz=1; i=s=Z=0; for(j=1;j1) printf("\n "); printf("(%s,%s)",code[family[i][0]-1], code[family[i][1]-1]); for(v=2;family[i][v]!=0;) { printf("%s)",code[family[i][v]-1]); v++; if(v%11==0) printf("\n "); } side++; } printf("\n"); } FINDMIN(pminj1,pmink1) int *pminj1, *pmink1; { int j,k; j=1; k=2; *pminj1 = j; *pmink1 = k; while(j pro[j][++k]) { *pminj1 = j; *pmink1 = k; } else continue; } ++j; k=j; } return; } NEWMATRIX() { int i,j,k,q,qq1,qq2; j=1; k=2; while(j0) { gps1++; gflag=0; } ii++; } gps2=0; gflag=0; for(ii=0; ii0) { gps2++; gflag=0; } ii++; } rand_score = 0; for(ii=0; ii<26; ++ii) h1[ii] = h2[ii] = 0; ttlseq1 = 0; for(ii=0; ii0) { tlgap += gaps1[ii]; tngap++; } for(ii=0; ii<20; ++ii) if(gaps2[ii]>0) { tlgap += gaps2[ii]; tngap++; } if(s1[0]=='X') tngap--; if(s1[tlseq1-1]=='X') tngap--; if(s2[0]=='X') tngap--; if(s2[tlseq1-1]=='X') tngap--; for(ii=0; ii<26; ++ii) for(jj=0; jj<26; ++jj) rand_score += (float)mptable[index[aa[ii]-'A']] [index[aa[jj]-'A']]*h1[ii]*h2[jj]; rand_score1 = ((rand_score/ttlseq1)-pen*tngap)/10.0; return(rand_score1); }