/*This is the papa3.c program. Reference: R.F. Doolittle & D.F. Feng (1990) Meth. Enzymol., 183:659-669. Note to compile this program on the Iris Indigo, type cc -cckr -signed papa3.c */ #define MAXS 500 /*Maximum size for sequence*/ #define NSEQ 50 /*Maximum number of sequences*/ #include int nseq; char seq[NSEQ][MAXS]; #define nse NSEQ*(NSEQ-1)/2 float mat[nse]; int kind[14]; int xtable[5][14] = { {30, 0, 0, 0, 0,15,15, 0, 0, 0,20,30,30,18}, { 0,30, 0, 0, 0,15,15, 0,30,30,20, 0, 0,18}, { 0, 0, 0, 0,30, 0, 0,20, 0, 0,20, 0, 0,18}, { 0, 0,30, 0, 0,15,15,20, 0,30, 0,30, 0,18}, { 0, 0, 0,30, 0,15,15,20,30, 0, 0, 0,30,18}, }; int ytable[5][14] = { {30, 0, 0, 0,15, 0,15, 0, 0, 0,30,20,30,18}, { 0, 0,30, 0,15, 0,15,30, 0,30, 0,20, 0,18}, { 0, 0, 0, 0, 0,30, 0, 0,20, 0, 0,20, 0,18}, { 0,30, 0, 0,15, 0,15, 0,20,30,30, 0, 0,18}, { 0, 0, 0,30,15, 0,15,30,20, 0, 0, 0,30,18}, }; int ztable[5][14] = { {30, 0, 0, 0,15,15, 0, 0, 0, 0,30,30,20,18}, { 0, 0, 0,30,15,15, 0,30,30, 0, 0, 0,20,18}, { 0, 0, 0, 0, 0, 0,30, 0, 0,20, 0, 0,20,18}, { 0,30, 0, 0,15,15, 0, 0,30,20,30, 0, 0,18}, { 0, 0,30, 0,15,15, 0,30, 0,20, 0,30, 0,18}, }; float xdtable[5][14] = { {1,0,0,0,0,.5,.5, 0,0,0,.7,1,1,.6}, {0,1,0,0,0,.5,.5, 0,1,1,.7,0,0,.6}, {0,0,0,0,1, 0, 0,.7,0,0,.7,0,0,.6}, {0,0,1,0,0,.5,.5,.7,0,1, 0,1,0,.6}, {0,0,0,1,0,.5,.5,.7,1,0, 0,0,1,.6}, }; float ydtable[5][14] = { {1,0,0,0,.5,0,.5,0, 0,0,1,.7,1,.6}, {0,0,1,0,.5,0,.5,1, 0,1,0,.7,0,.6}, {0,0,0,0, 0,1, 0,0,.7,0,0,.7,0,.6}, {0,1,0,0,.5,0,.5,0,.7,1,1, 0,0,.6}, {0,0,0,1,.5,0,.5,1,.7,0,0, 0,1,.6}, }; float zdtable[5][14] = { {1,0,0,0,.5,.5,0,0,0, 0,1,1,.7,.6}, {0,0,0,1,.5,.5,0,1,1, 0,0,0,.7,.6}, {0,0,0,0, 0, 0,1,0,0,.7,0,0,.7,.6}, {0,1,0,0,.5,.5,0,0,1,.7,1,0, 0,.6}, {0,0,1,0,.5,.5,0,1,0,.7,0,1, 0,.6}, }; main(ac,av) int ac; char *av[]; { FILE *fd, *fsd; int flag, i, ii, j, jj, k, l, m, n; int O,Q,sum,pprt; int item,nterm,index; int ip,jp,kp,lp,ipp,ipr,iprr; float max,cm,X,Y,Z; float length[5]; char c, stitle[NSEQ][100], code[NSEQ][5]; char s1,s2,s3,s4; if(ac<2) { fprintf(stderr,"Parsimony After Progressiv Alignment (PAPA)\n"); fprintf(stderr,"usage: %s rf [opt] [>wf]\n",av[0]); fprintf(stderr,"\trf: Aligned sequences in NEWAT format.\n"); fprintf(stderr,"\topt:Set opt=1 for intermed steps printout\n"); fprintf(stderr,"\twf: Summary Results and Branching Order!\n"); fprintf(stderr,"Current limits: NSEQ=<%d, length=<%d\n", NSEQ,MAXS); fprintf(stderr,"X,Y,or Z tree selected on max c-length\n"); fprintf(stderr,"Branch order: FM-method using diff matrix.\n"); exit(); } if((fd=fopen(av[1],"r"))==NULL) exit("Can't open %s\n",av[1]); if((pprt = atoi(av[2]))!=0) pprt = 1; nseq=0; for(i=0; ; ++i) { if((c=getc(fd))==EOF) break; ungetc(c,fd); nseq++; while((c=getc(fd))==' ') ; fgets(stitle[i], sizeof stitle[i], fd); fgetc(fd); fgets(code[i],5,fd); code[i][5] = '\0'; for(k=0; k<6; ++k) fgetc(fd); for(j=0;(c=getc(fd)) && c != '*'; ++j) { if(c=='\n') { for(k=0; k<11; ++k) fgetc(fd); --j; continue; } if(c==' ') { --j; continue; } seq[i][j]=c; } seq[i][j]='*'; seq[i][j+1]='\0'; getc(fd); } for(i=0; iY)?max:Y; max = (max>Z)?max:Z; if(max==X) { if(pprt != 0) printf("%2d,%2d,%2d,%2d X ",i+1,j+1,k+1,l+1); for(ii=0; ii<5; ++ii) { cm=0; for(jj=0; jj<14; ++jj) cm += xdtable[ii][jj]*kind[jj]; length[ii] = cm; if(pprt != 0) printf("%7.1f ",cm); } if(pprt != 0) printf("\n"); index=(ip-1)*(2*nseq-ip)/2+jp-ip; if(index==1) item++; mat[index] += length[0]+length[1]; index=(ip-1)*(2*nseq-ip)/2+kp-ip; mat[index] += length[0]+length[2]+length[3]; index=(ip-1)*(2*nseq-ip)/2+lp-ip; mat[index] += length[0]+length[2]+length[4]; index=(jp-1)*(2*nseq-jp)/2+kp-jp; mat[index] += length[1]+length[2]+length[3]; index=(jp-1)*(2*nseq-jp)/2+lp-jp; mat[index] += length[1]+length[2]+length[4]; index=(kp-1)*(2*nseq-kp)/2+lp-kp; mat[index] += length[3]+length[4]; } else if(max==Y) { if(pprt != 0) printf("%2d,%2d,%2d,%2d Y ",i+1,j+1,k+1,l+1); for(ii=0; ii<5; ++ii) { cm=0; for(jj=0; jj<14; ++jj) cm += ydtable[ii][jj]*kind[jj]; length[ii] = cm; if(pprt != 0) printf("%7.1f ",cm); } if(pprt != 0) printf("\n"); index=(ip-1)*(2*nseq-ip)/2+jp-ip; if(index==1) item++; mat[index] += length[0]+length[2]+length[3]; index=(ip-1)*(2*nseq-ip)/2+kp-ip; mat[index] += length[0]+length[1]; index=(ip-1)*(2*nseq-ip)/2+lp-ip; mat[index] += length[0]+length[2]+length[4]; index=(jp-1)*(2*nseq-jp)/2+kp-jp; mat[index] += length[1]+length[2]+length[3]; index=(jp-1)*(2*nseq-jp)/2+lp-jp; mat[index] += length[3]+length[4]; index=(kp-1)*(2*nseq-kp)/2+lp-kp; mat[index] += length[1]+length[2]+length[4]; } else { if(pprt != 0) printf("%2d,%2d,%2d,%2d Z ",i+1,j+1,k+1,l+1); for(ii=0; ii<5; ++ii) { cm=0; for(jj=0; jj<14; ++jj) cm += zdtable[ii][jj]*kind[jj]; length[ii] = cm; if(pprt != 0) printf("%7.1f ",cm); } if(pprt != 0) printf("\n"); index=(ip-1)*(2*nseq-ip)/2+jp-ip; if(index==1) item++; mat[index] += length[0]+length[2]+length[3]; index=(ip-1)*(2*nseq-ip)/2+kp-ip; mat[index] += length[0]+length[2]+length[4]; index=(ip-1)*(2*nseq-ip)/2+lp-ip; mat[index] += length[0]+length[1]; index=(jp-1)*(2*nseq-jp)/2+kp-jp; mat[index] += length[3]+length[4]; index=(jp-1)*(2*nseq-jp)/2+lp-jp; mat[index] += length[1]+length[2]+length[3]; index=(kp-1)*(2*nseq-kp)/2+lp-kp; mat[index] += length[1]+length[2]+length[4]; } } fsd = fopen("sscore","w"); printf("\n\nSummary Table:\n\n"); ipr=0; iprr = 1; printf(" "); for(i=0; i2) { grouping(newnseq); newnseq--; } strcat(code[1],code[0]); printf("\n\nOverall Branching Order:\n"); printf("%s\n",code[1]); } grouping(newnseq) int newnseq; { int imin,jmin,i,j,k; int ntimes,j1,j2,row,col; float min,sum; char c1,c2; imin = jmin = 0; min = 100000; for(i=0; i=newnseq-2) break; } } strcpy(code[newnseq-2],code[newnseq]); nmatrix[newnseq-2][newnseq-2] = 0.0; k=0; for(i=0; i='A' && c1<='Z') || (c1>='a' && c1<='z')) { j2=0; while((c2=code[i][j2])!='\0') { if((c2>='A' && c2<='Z') || (c2>='a' && c2<='z')) { if((c1>='A' && c1<='Z') && (c2>='A' && c2<='Z')) sum += score[c2-'A'] [c1-'A']; else if((c1>='A' && c1<='Z') && (c2>='a' && c2<='z')) sum += score[c2-'a'+26] [c1-'A']; else if((c1>='a' && c1<='z') && (c2>='A' && c2<='Z')) sum += score[c2-'A'] [c1-'a'+26]; else if((c1>='a' && c1<='z') && (c2>='a' && c2<='z')) sum += score[c2-'a'+26] [c1-'a'+26]; ntimes++; } j2++; } } j1++; } nmatrix[newnseq-2][k] = nmatrix[k][newnseq-2] = sum/(float)ntimes; k++; } }