#include #define MAXS1 603 /*Maximum size for sequence 1*/ #define MAXS2 603 /*Maximum size for sequence 2*/ #define MAXAS 1000 /*Maximum size for aligned sequences*/ #define Mt 25 #define Ms 20 #define Mr 18 #define Mq 17 #define Mp 15 #define Mo 14 #define Mn 13 #define Mm 12 #define Ml 11 #define Mk 10 #define Mj 9 #define Mi 8 #define Mh 7 #define Mg 6 #define Mf 5 #define Me 4 #define Md 3 #define Mc 2 #define Mb 1 #define Ma 0 #define XB 0 int mptable[21][21] = { {Ms,Mi,Mg,Mf,Mg,Mf,Me,Md,Md,Md,Mf,Me,Md,Md,Mg,Mc,Mg,Me,Mi,Ma,XB}, {Mi,Mk,Mj,Mj,Mj,Mj,Mj,Mi,Mi,Mh,Mh,Mi,Mi,Mg,Mh,Mf,Mh,Mf,Mf,Mg,XB}, {Mg,Mj,Ml,Mi,Mj,Mi,Mi,Mi,Mi,Mh,Mh,Mh,Mi,Mh,Mi,Mg,Mi,Mf,Mf,Md,XB}, {Mf,Mj,Mi,Mo,Mj,Mh,Mh,Mh,Mh,Mi,Mi,Mi,Mh,Mg,Mg,Mf,Mh,Md,Md,Mc,XB}, {Mg,Mj,Mj,Mj,Mk,Mj,Mi,Mi,Mi,Mi,Mh,Mg,Mh,Mh,Mh,Mg,Mi,Me,Mf,Mc,XB}, {Mf,Mj,Mi,Mh,Mj,Mn,Mi,Mj,Mi,Mh,Mg,Mf,Mg,Mf,Mf,Me,Mh,Md,Md,Mb,XB}, {Me,Mj,Mi,Mh,Mi,Mi,Mk,Mk,Mj,Mj,Mk,Mi,Mj,Mg,Mg,Mf,Mg,Me,Mg,Me,XB}, {Md,Mi,Mi,Mh,Mi,Mj,Mk,Mm,Ml,Mk,Mj,Mh,Mi,Mf,Mg,Me,Mg,Mc,Me,Mb,XB}, {Md,Mi,Mi,Mh,Mi,Mi,Mj,Ml,Mm,Mk,Mj,Mh,Mi,Mg,Mg,Mf,Mg,Md,Me,Mb,XB}, {Md,Mh,Mh,Mi,Mi,Mh,Mj,Mk,Mk,Mm,Ml,Mj,Mj,Mh,Mg,Mg,Mg,Md,Me,Md,XB}, {Mf,Mh,Mh,Mi,Mh,Mg,Mk,Mj,Mj,Ml,Mo,Mk,Mi,Mg,Mg,Mg,Mg,Mg,Mi,Mf,XB}, {Me,Mi,Mh,Mi,Mg,Mf,Mi,Mh,Mh,Mj,Mk,Mo,Ml,Mi,Mg,Mf,Mg,Me,Me,Mk,XB}, {Md,Mi,Mi,Mh,Mh,Mg,Mj,Mi,Mi,Mj,Mi,Ml,Mn,Mi,Mg,Mf,Mg,Md,Me,Mf,XB}, {Md,Mg,Mh,Mg,Mh,Mf,Mg,Mf,Mg,Mh,Mg,Mi,Mi,Mo,Mk,Mm,Mk,Mi,Mg,Me,XB}, {Mg,Mh,Mi,Mg,Mh,Mf,Mg,Mg,Mg,Mg,Mg,Mg,Mg,Mk,Mn,Mk,Mm,Mj,Mh,Md,XB}, {Mc,Mf,Mg,Mf,Mg,Me,Mf,Me,Mf,Mg,Mg,Mf,Mf,Mm,Mk,Mo,Mk,Mk,Mh,Mg,XB}, {Mg,Mh,Mi,Mh,Mi,Mh,Mg,Mg,Mg,Mg,Mg,Mg,Mg,Mk,Mm,Mk,Mm,Mh,Mg,Mc,XB}, {Me,Mf,Mf,Md,Me,Md,Me,Mc,Md,Md,Mg,Me,Md,Mi,Mj,Mk,Mh,Mq,Mp,Mi,XB}, {Mi,Mf,Mf,Md,Mf,Md,Mg,Me,Me,Me,Mi,Me,Me,Mg,Mh,Mh,Mg,Mp,Mr,Mi,XB}, {Ma,Mg,Md,Mc,Mc,Mb,Me,Mb,Mb,Md,Mf,Mk,Mf,Me,Md,Mg,Mc,Mi,Mi,Mt,XB}, {XB,XB,XB,XB,XB,XB,XB,XB,XB,XB,XB,XB,XB,XB,XB,XB,XB,XB,XB,XB, 0}, }; int index[128]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 7, 0, 7, 8,17, 5, 10,14,21,12,15,13, 6,21, 3, 9,11, 1, 2,21,16,19, 20,18, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; int ratio[21][21] = { {0,2,2,2,1,1,2,1,3,1,1,2,2,2,2,3,2,1,2,1,3}, {2,0,2,1,3,2,3,2,3,2,1,2,3,1,1,1,2,2,2,1,3}, {2,2,0,2,2,2,1,1,1,1,2,1,2,1,1,2,2,2,1,2,3}, {2,1,2,0,3,2,3,2,2,2,1,2,3,2,1,2,1,1,1,2,3}, {1,3,2,3,0,2,1,1,2,2,2,2,1,2,2,2,2,1,2,1,3}, {1,2,2,2,2,0,2,2,3,1,1,1,1,1,2,3,1,2,2,2,3}, {2,3,1,3,1,2,0,2,1,1,2,2,1,1,2,2,2,2,1,2,3}, {1,2,1,2,1,2,2,0,2,2,2,1,2,2,1,2,2,1,2,1,3}, {3,3,1,2,2,3,1,2,0,2,3,2,2,1,2,2,1,1,1,2,3}, {1,2,1,2,2,1,1,2,2,0,1,2,2,2,1,3,2,2,1,2,3}, {1,1,2,1,2,1,2,2,3,1,0,2,2,2,1,2,2,2,2,2,3}, {2,2,1,2,2,1,2,1,2,2,2,0,1,1,1,2,1,2,2,2,3}, {2,3,2,3,1,1,1,2,2,2,2,1,0,1,2,2,1,2,2,2,3}, {2,1,1,2,2,1,1,2,1,2,2,1,1,0,1,1,1,2,1,1,3}, {2,1,1,1,2,2,2,1,2,1,1,1,2,1,0,1,1,2,1,1,3}, {3,1,2,2,2,3,2,2,2,3,2,2,2,1,1,0,1,2,3,1,3}, {2,2,2,1,2,1,2,2,1,2,2,1,1,1,1,1,0,1,1,2,3}, {1,2,2,1,1,2,2,1,1,2,2,2,2,2,2,2,1,0,1,1,3}, {2,2,1,1,2,2,1,2,1,1,2,2,2,1,1,3,1,1,0,2,3}, {1,1,2,2,1,2,2,1,2,2,2,2,2,1,1,1,2,1,2,0,3}, {3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0}, }; int res[128]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 1, 0, 4, 3,19, 5,18,21, 6,16, 8, 9,21, 11,12,13,14, 2,21,17,15, 20,10, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; char seq1[MAXS1], seq2[MAXS2]; char aseq1[MAXAS], aseq2[MAXAS]; int L[MAXS1][MAXS2], R[MAXS1][MAXS2], M[MAXS1][MAXS2]; int j, k; int g = {8}; /*None negative gap penalty*/ int c; /*Counter for aligned sequences*/ main(ac,av) int ac; char *av[]; { FILE *fd1, *fd2, *fopen(); /*File description 1 & 2*/ int Mmax; int dum1, dum2; int jj, kk, m, n, nc, n1, n2, n3, mv, mu; int pcek = {1}; /* To print n1,.., set pcek nonzero */ int lseq1, lseq2; /*Length of two sequences*/ int sigma; /*Similarity constant between pairs of letters*/ int Sim; /*Aligned sequences optimal similarity score*/ int ir; int ssigma; /*Sum of sigmas*/ int gaps; /*Total number of gaps introduced*/ int blanks, kgps1, kgps2, gps1[100], gps2[100]; int i, ii; int pseq = {0}; /*To print sequences, set pseq nonzero*/ int t; int mp, np, pdum, maxp, ascore; int oh[4]; /* Overhang lengths */ int bks1max, bks2max; int nMa, nMb, nMc, nMd, nMe, nMf; int nMg, nMh, nMi, nMj, nMk, nMl; int nMm, nMn, nMo, nMp, nMq, nMr; int nMs, nMt; char seqt1[80], seqt2[80]; float fidty, lsmall, fsim, NAS; float bscore; float fn1, fn2, fn1n2; float fg; /*count number of arguments to prevent large core dump*/ if(ac!=3)exit(fprintf(stderr,"usage: %s file1 file2 > [file3]\n",av[0])); /* Open fd1 and fd2 */ if( (fd1 = fopen( av[1],"r" )) == NULL ) /*error check*/ exit(fprintf(stderr,"%s: can't open %s\n",av[0],av[1]) ); else if( (fd2 = fopen( av[2],"r" )) == NULL ) /*error check*/ exit(fprintf(stderr,"%s: can't open %s\n",av[0],av[2]) ); /* Enter the title of the first sequences */ fseek(fd1,1,1); /* Start reading from column 7 */ fgets(seqt1, sizeof seqt1, fd1); printf("%s",seqt1); /* Enter the first sequence */ seq1[0]=' '; fseek(fd1,11,1); ii=0; for(i=1;(c=getc(fd1)) && c != '*'; ++i) { if(c=='\n') { --i; fseek(fd1,11,1); continue; } if(c==' ') { --i; continue; } seq1[i]=c; if(pseq != 0) { if( ii==30 ) { ii=0; printf("\n"); } printf("%2c",seq1[i]); ii++; } } lseq1=i; if( lseq1>MAXS1 ) /*error check*/ exit(fprintf(stderr,"%s: job aborted, increase MAXS1 (%d)",av[0],MAXS1-3)); printf(" (sequence length=%d)",i-1); printf("\n\n"); /* Enter the title of the second sequence */ fseek(fd2,1,1); /* Start reading from column 7 */ fgets(seqt2, sizeof seqt2, fd2); printf("%s",seqt2); /* Enter the second sequence */ seq2[0]=' '; fseek(fd2,11,1); ii = 0; for(i=1;(c=getc(fd2)) && c != '*'; ++i) { if(c=='\n') { --i; fseek(fd2,11,1); continue; } if(c==' ') { --i; continue; } seq2[i]=c; if( pseq != 0 ) { if( ii == 30 ) { ii=0; printf("\n"); } printf("%2c",seq2[i]); ii++; } } lseq2=i; if( lseq2>MAXS2 ) /*error check*/ exit(fprintf(stderr,"%s: job aborted, increase MAXS2 (%d)",av[0],MAXS2-3)); printf(" (sequence length=%d)",i-1); printf("\n\n"); fg = g; printf("Gap penalty = %3.2f\n",fg/10.0); /* Construct the first row and column of the 3 matrices */ for( j=0; j-1 ) { aseq1[c] = ' '; aseq2[c++] = seq2[--k]; } } if( k==0 ) { while( j>-1 ) { aseq1[c] = seq1[--j]; aseq2[c++] = ' '; } } if( c>MAXAS ) /*error check*/ exit(printf("job aborted, please increase MAXAS")); /* Compute percent identity */ lsmall = lseq1; if( lseq1 > lseq2 ) lsmall = lseq2; nc = 0; ir = 0; for( j=0; j-1; ) { k = j; if( (aseq1[j] != ' ') && (aseq1[--j] == ' ') ) { blanks++; while( aseq1[--j] == ' ' ) blanks++; if( j>-1 ) { gps1[blanks]++; bks1max = (bks1max>blanks ) ? bks1max : blanks; blanks=0; } } if( j==k ) j--; } blanks = 0; bks2max = 0; for( j=1; j<100; ++j ) gps2[j] = 0; for( j=c-3; j>-1; ) { k = j; if( (aseq2[j] != ' ') && (aseq2[--j] == ' ') ) { blanks++; while( aseq2[--j] == ' ' ) blanks++; if( j>-1 ) { gps2[blanks]++; bks2max = ( bks2max>blanks ) ? bks2max : blanks; blanks=0; } } if( j==k ) j--; } /* Compute lengths of overhangs */ blanks = 0; j = 0; while( aseq1[j] == ' ' ) { blanks++; j++; } oh[0] = blanks; blanks = 0; j = c-3; while( aseq1[j] == ' ' ) { blanks++; j--; } oh[1] = blanks; blanks = 0; j = 0; while( aseq2[j] == ' ' ) { blanks++; j++; } oh[2] = blanks; blanks = 0; j = c-3; while( aseq2[j] == ' ' ) { blanks++; j--; } oh[3] = blanks; /* Print the internal gap lengths of each sequence */ gaps = 0; printf("Internal gaps: freq(gap length)\n Seq 1: "); for( j=1; j<=bks1max; j++) { if( gps1[j] != 0 ) { printf("%d(%d), ",gps1[j],j); gaps += gps1[j]; } } printf("\n Seq 2: "); for( j=1; j<=bks2max; j++) { if( gps2[j] != 0 ) { printf("%d(%d), ",gps2[j],j); gaps += gps2[j]; } } printf("\n"); /* Print overhangs length */ printf("Overhangs length: N-terminal C-terminal\n"); printf(" Seq 1: %3d %3d\n",oh[1],oh[0]); printf(" Seq 2: %3d %3d\n",oh[3],oh[2]); /* Compute score from aligned sequences */ ascore = Ma*nMa + Mb*nMb + Mc*nMc + Md*nMd - g*gaps; ascore += Me*nMe + Mf*nMf + Mg*nMg + Mh*nMh + Mi*nMi; ascore += Mj*nMj + Mk*nMk + Ml*nMl + Mm*nMm + Mn*nMn; ascore += Mo*nMo + Mp*nMp + Mq*nMq + Mr*nMr; ascore += Ms*nMs + Mt*nMt; bscore = ascore; bscore = ascore/10.0; printf("Score calculated from alignment = %6.2f\n", bscore); bscore = fsim - bscore; printf("Score(matrix) - score(alignment) = %6.2f",bscore); printf("\n\n\n\n"); /* Print the aligned sequences */ printf(" --------------- SmfLOM alignment ---------------\n\n"); j = c-3; while( j>-1 ) { k = 0; t = j; while( k<30 ) { if( j>-1 ) printf("%2c",aseq1[j--]); k++; } printf("\n "); k = 0; while( k<30 ) { if( t>-1 ) { printf("%c",aseq2[t]); if( aseq1[t] != aseq2[t] ) printf(" "); else if( aseq1[t] == ' ' ) printf(" "); else printf("_"); } k++; t--; } j = t; printf("\n\n"); } } branchM() { int Mmax; if( (j==0) || (k==0) ) return; j--; k--; Mmax = M[j][k]; if( Mmax < L[j][k] - g ) Mmax = L[j][k] - g; if( Mmax < R[j][k] - g ) Mmax = R[j][k] -g; if( Mmax == M[j][k] ) { aseq1[c] = seq1[j]; aseq2[c] = seq2[k]; c++; branchM(); return; } else if( Mmax == (L[j][k] -g) ) { aseq1[c] = seq1[j]; aseq2[c] = ' '; c++; branchL(); return; } else { aseq1[c] = ' '; aseq2[c] = seq2[k]; c++; branchR(); return; } } branchL() { if( j==0 ) return; j--; aseq1[c] = seq1[j]; if( L[j][k] > M[j][k] ) { aseq2[c] = ' '; c++; branchL(); return; } else { aseq2[c] = seq2[k]; c++; branchM(); return; } } branchR() { if( k==0 ) return; k--; aseq2[c] = seq2[k]; if( R[j][k] > M[j][k] ) { aseq1[c] = ' '; c++; branchR(); return; } else { aseq1[c] = seq1[j]; c++; branchM(); return; } }