/* Program From Progressive Alignment of Amino Acid Sequences and Construction of Phylogenetic Trees from Them, by D. F. Feng and R. F. Doolittle, Methods of Enzymology, v266, p368 (1996) 2/7/1996 Read "readme.doc" before using. */ #include #include int NumStrings,ndiff; char InputString[50][60]; main(ac,av) int ac; char *av[]; { FILE *B1andD, *B2andD, *fopen(); int c,i,length; char line[400],paren[200]; if(ac<2) { fprintf(stderr,"\nPurpose: Find Best Branch Order With"); fprintf(stderr," No Negative Branch Lengths.\n"); fprintf(stderr,"Usage: "); fprintf(stderr,"%s rf [>wf]\n",av[0]); fprintf(stderr,"\trf: `albr' (an output file from tree).\n"); fprintf(stderr,"\twf: Branch Lengths.\n"); fprintf(stderr,"** Use `tomac' for tree drawing **\n\n"); exit(0); } if((B1andD = fopen(av[1],"r")) == NULL) exit(fprintf(stderr,"%s: Can't open %s\n",av[1])); i=ndiff=0; while((c=getc(B1andD)) != '*') { ungetc(c,B1andD); fscanf(B1andD,"%s",InputString[i]); i++; getc(B1andD); } NumStrings = i; GetParen(InputString,paren); length = strlen(paren); for(i=1; i<(length-1); ++i) paren[i-1] = paren[i]; paren[i-1] = '\0'; B2andD = fopen("zecret","w"); fprintf(B2andD,"%s\n",paren); while((c=fgetc(B1andD)) != EOF) { fgets(line, sizeof line, B1andD); fprintf(B2andD,"%s",line); } fclose(B1andD); fclose(B2andD); for(i=0; i<60; i++) { branchlength(); setree(); flip(); } } GetParen(start,paren) char start[50][60], paren[]; { int i,j,match; char c[2],SubParen[200],oldparen[200]; paren[0] = '('; for(i=1; i<3; ++i) paren[i] = start[0][i-1]; paren[i++] = ')'; paren[i] = '\0'; i=2; while(i1) { for(j=1; j='A' && *pts<='Z') { or[nseq] = *pts - 65; nseq++; } else if(*pts>='a' && *pts<='z') { or[nseq] = *pts - 71; nseq++; } pts++; } /*Read distance scores*/ for(i=0; ifabs(a[l][k])) l=i; } if(l!=k) { for(j=k; j<2*n; j++) { dum=a[k][j]; a[k][j]=a[l][j]; a[l][j]=dum; } } /*End: Minimize roundoff error*/ for(i=k+1; i=0; --i) for(k=n; ka[j]) { temp = a[i]; a[i] = a[j]; a[j] = temp; } } setree() { FILE *fd, *fd1, *fopen(); int i,j,k; char branch_order[300], branch_length[1300][8]; char bostring[500]; char c,c1,c2,c3,sign; fd = fopen("tomac","r"); fscanf(fd,"%s",branch_order); i=0; while(fscanf(fd,"%s",branch_length[i++]) != EOF) ; fclose(fd); fd1 = fopen("duup1","w"); i=k=0; while(branch_order[i] != '\0') { c1 = branch_order[i]; c2 = branch_order[i+1]; c3 = branch_order[i+2]; if(c3 == '\0' && ((c2>='A' && c2<='Z')||(c2>='a'&&c2<='z'))) { if(branch_length[k][0]=='-') fprintf(fd1,")*%c",c2); else fprintf(fd1,")%c",c2); goto out; } else { /* ( */ if(c1=='(') fprintf(fd1,"%c",c1); /* XY */ else if((c1>='A' && c1<='Z') && (c2>='A' && c2<='Z')) { sign = branch_length[k][0]; if(sign=='-') fprintf(fd1,"%c*",c1); else fprintf(fd1,"%c",c1); k++; } /* xy */ else if((c1>='a' && c1<='z') && (c2>='a' && c2<='z')) { sign = branch_length[k][0]; if(sign=='-') fprintf(fd1,"%c*",c1); else fprintf(fd1,"%c",c1); k++; } /* Xy */ else if((c1>='A' && c1<='Z') && (c2>='a' && c2<='z')) { sign = branch_length[k][0]; if(sign=='-') fprintf(fd1,"%c*",c1); else fprintf(fd1,"%c",c1); k++; } /* xY */ else if((c1>='a' && c1<='z') && (c2>='A' && c2<='Z')) { sign = branch_length[k][0]; if(sign=='-') fprintf(fd1,"%c*",c1); else fprintf(fd1,"%c",c1); k++; } /* X) */ else if((c1>='A' && c1<='Z') && c2==')') { sign = branch_length[k][0]; if(sign=='-') fprintf(fd1,"%c*",c1); else fprintf(fd1,"%c",c1); k++; } /* x) */ else if((c1>='a' && c1<='z') && c2==')') { sign = branch_length[k][0]; if(sign=='-') fprintf(fd1,"%c*",c1); else fprintf(fd1,"%c",c1); k++; } /* )Y */ else if(c1==')' && (c2>='A' && c2<='Z')) { sign = branch_length[k][0]; if(sign=='-') fprintf(fd1,"%c*",c1); else fprintf(fd1,"%c",c1); k++; } /* Y( */ else if((c1>='A' && c1<='Z') && c2=='(') { sign = branch_length[k][0]; if(sign=='-') fprintf(fd1,"%c*",c1); else fprintf(fd1,"%c",c1); k++; } /* y( */ else if((c1>='a' && c1<='z') && c2=='(') { sign = branch_length[k][0]; if(sign=='-') fprintf(fd1,"%c*",c1); else fprintf(fd1,"%c",c1); k++; } /* )z */ else if(c1==')' && (c2>='a' && c2<='z')) { sign = branch_length[k][0]; if(sign=='-') fprintf(fd1,"%c*",c1); else fprintf(fd1,"%c",c1); k++; } /* )) */ else if(c1==')' && c2==')') { sign = branch_length[k][0]; if(sign=='-') fprintf(fd1,"%c*",c1); else fprintf(fd1,"%c",c1); k++; } /* )( */ else if(c1==')' && c2=='(') { sign = branch_length[k][0]; if(sign=='-') fprintf(fd1,"%c*",c1); else fprintf(fd1,"%c",c1); k++; } /* )\0 */ else if(c1==')' && c2=='\0') fprintf(fd1,")"); /* X\0 */ else if((c1>='A' && c1<='Z') && c2=='\0') fprintf(fd1,"%c",c1); /* x\0 */ else if((c1>='a' && c1<='z') && c2=='\0') fprintf(fd1,"%c",c1); else exit(printf("Problem in setree.\n")); } i++; } out: fprintf(fd1,"\n"); fclose(fd1); } flip() { FILE *fd, *fdz, *fdd, *fopen(); int i,j,star_pos,left,ibrkt,isw,mleft,mmleft,mright; int li,ri,nstar,old_star_pos,lcluster,rcluster; int olddiff,newdiff,k,l,cleft,cright; char c,swstring[400],newstring[400]; char left_entry[100],right_entry[100]; char line[500],sub_left[50],sub_right[50]; fd=fopen("duup1","r"); fgets(swstring, sizeof swstring, fd); fclose(fd); nstar=0; for(i=0; swstring[i]!='\0'; ++i) if(swstring[i]=='*') { c = swstring[i-1]; if(c>='A'&&c<='Z' || c>='a'&&c<='z') { printf("Negative terminal branch lengths"); printf(" detected. Job aborted.\n"); printf("You may have two very similar "); printf("sequences.\n"); exit(0); } else nstar++; } if(nstar==0) { remove("duup1"); exit(0); } star_pos = 0; while(swstring[star_pos]!='*') star_pos++; /*Locate * position*/ newdiff = abs(star_pos - old_star_pos); if(newdiff==olddiff) ndiff++; else ndiff=0; /*Encountered deep node*/ if(swstring[star_pos-1]==')' && swstring[star_pos+1]==')') { /*Move left, find (*/ i=star_pos-2; ibrkt=1; while(ibrkt!=0) { if(swstring[i]==')') ibrkt++; else if(swstring[i]=='(') ibrkt--; i--; } left = i; /*1st outside the (*/ /*End: Move left, find (*/ /*Move left to corresponding (*/ mleft=left; ibrkt=0; for(;;) { if(swstring[mleft]==')') ibrkt++; else if(swstring[mleft]=='(') ibrkt--; if(ibrkt==0) break; mleft--; } /*End: Move left to corresponding (*/ i=0; for(isw=0; isw5 && star_pos>old_star_pos) { /*Encounter type 2 duplicated shallow nodes*/ lcluster=li=0; mleft=star_pos-2; mmleft=star_pos-1; if(swstring[mleft]!=')') /*No cluster*/ { lcluster++; } else { ibrkt=0; for(;;) { if(swstring[mleft]==')') ibrkt++; else if(swstring[mleft]=='(') ibrkt--; if(ibrkt==0) break; mleft--; } ibrkt=0; for(;;) { if(swstring[mmleft]==')') ibrkt++; else if(swstring[mmleft]=='(') ibrkt--; if(ibrkt==0) break; mmleft--; } } rcluster=ri=0; mright=star_pos+1; if(swstring[star_pos+1]!='(') /*No cluster*/ { right_entry[ri++] = swstring[star_pos+1]; rcluster++; } else /*Cluster found*/ { ibrkt=0; for(;;) { if(swstring[mright]=='(') ibrkt++; else if(swstring[mright]==')') ibrkt--; if(ibrkt==0) break; mright++; } for(i=star_pos+1; i<=mright; ++i) right_entry[ri++] = swstring[i]; } right_entry[ri] = '\0'; /* cluster on the left and no cluster on the right */ if(lcluster==0 && rcluster!=0) { j=0; for(i=0; i='A' && newstring[i]<='Z') newstring[j++] = newstring[i]; else if(newstring[i]>='a' && newstring[i]<='z') newstring[j++] = newstring[i]; else if(newstring[i]=='(' || newstring[i]==')') newstring[j++] = newstring[i]; else ; } newstring[j] = '\0'; fdz=fopen("zecret","r"); fdd=fopen("duup1","w"); fprintf(fdd,"%s\n",newstring); fgets(line, sizeof line, fdz); while(fgets(line, sizeof line, fdz)!=NULL) { fprintf(fdd,"%s",line); } fclose(fdz); fclose(fdd); fdz=fopen("zecret","w"); fdd=fopen("duup1","r"); while(fgets(line, sizeof line, fdd)!=NULL) { fprintf(fdz,"%s",line); } fclose(fdz); fclose(fdd); old_star_pos = star_pos; olddiff = newdiff; remove("duup1"); } checking(s) char *s; { int i,nc,noq,ncq,nn,ns,sc; nc=noq=ncq=nn=ns=sc=0; for(i=0; s[i]!='\0'; ++i) { if(s[i]>='A' && s[i]<='Z') nc++; else if(s[i]=='(') noq++; else if(s[i]==')') ncq++; else if(s[i]=='\n') nn++; else if(s[i]=='*') ns++; else sc++; } } mv_pointer_left(mleft,swstring) int *mleft; char *swstring; { int ibrkt; ibrkt=0; for(;;) { if(swstring[*mleft]==')') ibrkt++; else if(swstring[*mleft]=='(') ibrkt--; if(ibrkt==0) break; (*mleft)--; } return; }