/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* codon.c
 * 
 * Routines for dealing with codon frquencies.
 * 
 * Includes:
 *    ParseCodonFile()  -- read a Codon-COdon joint  matrix from disk.
 *    
 *    
 * ER - Wed Aug 21 12:37:47 CDT 2002
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
#include "squid.h"

#ifdef MEMDEBUG
#include "dbmalloc.h"
#endif


/* Function: ParseCodonFile()
 * 
 * Purpose:  Given a pointer to an open file containing a Hexamer matrix,
 *           parse the file and allocate and fill a 2D array of
 *           floats containing the matrix. 
 *          
 *           The Codon-Codon matrix is a 64x64 matrix.
 *                                       COD(x1,x2,x3)=x1+x2*4+x3*16
 *           
 * Args:     fp        - open codon-codon frequencies file
 *           ret_codon - RETURN:  codon-codon matrix                   
 * 
 * Returns:  1 on success; 0 on failure and sets squid_errno to
 *           indicate the cause. ret_hexap is allocated here and
 *           must be freed by the caller (use FreeHexap).
 */
int
ParseCodonFile(FILE *fp, double **ret_codon_joint)
{
  double  *codon_joint;
  char     buffer[5000];    /* input buffer from fp                  */
  int      nsymbols = 64;   /* total number of symbols in matrix     */
  char    *sptr;
  int      row, col;
  
  /* Allocate a hexamer matrix. For speed of indexing, we use
   * a 64x64 matrix so we can do lookups using the ASCII codes
   */
  if ((codon_joint = (double *) calloc (64*64, sizeof(double))) == NULL)
    Die("calloc failed");

  if (fp == NULL) { 
    for (row = 0; row < 64; row++) 
      for (col = 0; col < 64; col++) 
	codon_joint[row*64+col] = 0.0;

    *ret_codon_joint = codon_joint;
    return 1;
  }

  /* Look at the first non-comment line in the file.
  */
  do {
    if (fgets(buffer, 5000, fp) == NULL) 
      { squid_errno = SQERR_NODATA; return 0; }
   
  } while (*(sptr = strtok(buffer, "\n")) == '#');
  
  /* there is a hack here. This relies on having one extra line
     between the commented lines and the actual data that you do not 
     care about.
     ~/qrna/scripts/codon_codon.pl adds a line of ------------- to do that.
  */


  /* Parse the rest of the file.
   */
  for (row = 0; row < nsymbols; row++)
    {
      if (fgets(buffer, 5000, fp) == NULL) 
	{ squid_errno = SQERR_NODATA; return 0; }
      
      if ((sptr = strtok(buffer, " \t\n")) == NULL)
	{ squid_errno = SQERR_NODATA; return 0; }

      for (col = 0; col < nsymbols; col++)
	{
	  if (sptr == NULL) { squid_errno = SQERR_NODATA; return 0; }

	  codon_joint[row*64+col] = atof(sptr);

	  sptr = strtok(NULL, " \t\n");
	}
    }

  /* Return
   */
  *ret_codon_joint = codon_joint;
  return 1;

}

int
ParseTargetFreqFile(FILE *fp, double **ret_targetfreq)
{
  double  *targetfreq;
  char     buffer[5000];    /* input buffer from fp                  */
  int      nsymbols = 4;    /* total number of symbols in matrix     */
  char    *sptr;
  int      row, col;
  int      verbose = FALSE;
  
  /* Allocate a hexamer matrix. For speed of indexing, we use
   * a 64x64 matrix so we can do lookups using the ASCII codes
   */
  if ((targetfreq = (double *) calloc (nsymbols, sizeof(double))) == NULL)
    Die("calloc failed");

  if (fp == NULL) { 
    for (col = 0; col < nsymbols; col++) 
	targetfreq[col] = 0.0;

    *ret_targetfreq = targetfreq;
    return 1;
  }

  /* Look at the first non-comment line in the file.
  */
  do {
    if (fgets(buffer, 5000, fp) == NULL) 
      { squid_errno = SQERR_NODATA; return 0; }
   
  } while (*(sptr = strtok(buffer, "\n")) == '#');
  
  /* there is a hack here. This relies on having one extra line
     between the commented lines and the actual data that you do not 
     care about.
     ~/qrna/scripts/codon_codon.pl adds a line of ------------- to do that.
  */

  /* Parse the rest of the file.
   */
  for (row = 0; row < 1; row++)
    {
      if (fgets(buffer, 5000, fp) == NULL) 
	{ squid_errno = SQERR_NODATA; return 0; }
      
      if ((sptr = strtok(buffer, " \t\n")) == NULL)
	{ squid_errno = SQERR_NODATA; return 0; }

      for (col = 0; col < nsymbols; col++)
	{
	  if (sptr == NULL) { squid_errno = SQERR_NODATA; return 0; }

	  targetfreq[row*nsymbols+col] = atof(sptr);

	  sptr = strtok(NULL, " \t\n");
	}
    }

    if (verbose) {
      printf("Target frequencies\n");
      PrintVectorProbs(stdout, targetfreq, nsymbols);
    }
    CheckSingleProb(targetfreq, nsymbols); /* paranoia */
  
  /* Return
   */
  *ret_targetfreq = targetfreq;

  return 1;

}
