/* This program counts the number of strongly polar (D,E,N,Q,K,R,H), anionic */ /* (D,E), cationic (R,K,H), semipolar (G,S,T,C), hydroxy (S,T), total */ /* hydrophobic (L,I,V,M,F,Y,W,A,P), amphipathic aromatic (Y,W), total */ /* aromatic (Y,W,F), total aliphatic (L,I,V,M,A,P), branched chain aliphatic */ /* (L,I,V). The percentages are given as averages in a variable window. */ /* 8/25/94 st */ #include #include main(ac,av) int ac; char *av[]; { FILE *ALIGN,*ALLout,*AAFout; char code[6],seqarray[30][1000],str[200]; int ansarrleng,begin,c,e,end,i,length,numlines,numseq,pos,posn,algn_leng,row,start,s top,wsize,x,y,z; float Acnt,Rcnt,Ncnt,Dcnt,Gcnt,Ccnt,Qcnt,Ecnt,Hcnt,Icnt,Lcnt,Kcnt,Mcnt,Fcnt,Pcnt,Scnt, Tcnt,Wcnt,Ycnt,Vcnt; float str_polar,anionic,cationic,semipolar,hydroxy,tot_hydrophob,amphi_arom,tot_arom,t ot_aliph,br_chain_aliph; float aafreq[29][1000],scorearray1[10][1000],scorearray2[10][1000],sum; wsize=atoi(av[1]); if(ac!=5 && wsize%2==0) { printf("chkall gathers statistical data on all the positions in an alignment.\nThe amino acid groups searched for are\n\n"); printf(" Strongly Polar D E N Q K R H\n"); printf(" Anionic D E\n"); printf(" Cationic R K H\n"); printf(" Semipolar G S T C\n"); printf(" Hydroxy S T\n"); printf(" Total Hydrophobic L I V M F Y W A P\n"); printf(" Amphipathic Aromatic Y W\n"); printf(" Total Aliphatic L I V M A P\n"); printf(" Branched Chain Aliphatic L I V\n\n"); printf("Usage: chkall ws rf wf1 wf2.\n"); printf(" ws = window size-must be odd\n"); printf(" rf = alignment in NEWAT format\n"); printf(" wf1= save file for various group frequencies\n"); printf(" wf2= save file frequency of amino acid appearance\n"); printf(" MAX length = 1000 MAX SEQ=30\n"); exit (1); } if((ALIGN=fopen(av[2],"r"))==NULL) { printf("File %s does not exist.\n", av[2]); exit (1); } ALLout=fopen(av[3],"w"); AAFout=fopen(av[4],"w"); /* Determine 2-D scorearray1 size */ numlines=0; fgets(str,sizeof str,ALIGN); while (strstr(str,"*")==NULL) { fgets(str,sizeof str,ALIGN); numlines++; } length=(numlines)*30; /* Read aligned sequences into the array seqarray */ rewind(ALIGN); row=0; numseq=0; for(;;) { c=fgetc(ALIGN); if(c==EOF) { fclose(ALIGN); break; } else { ungetc(c,ALIGN); fgets(str,sizeof str,ALIGN); if(str[0]=='T') { numseq++; y=0; for(i=0;i