/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* hexamer.c
 * 
 * Routines for dealing with hexamer frquencies.
 * 
 * Includes:
 *    ParseHexamerFile()  -- read a Hexamer matrix from disk.
 *    
 *    
 * ER - Thu Sep 14 15:53:26 CDT 2000
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
#include "squid.h"

#ifdef MEMDEBUG
#include "dbmalloc.h"
#endif


/* Function: ParseHexamerFile()
 * 
 * Purpose:  Given a pointer to an open file containing a Hexamer matrix,
 *           parse the file and allocate and fill a 2D array of
 *           floats containing the matrix. 
 *          
 *           The Hexamer matrix is a 64x64 matrix.
 *                                   COD(x1,x2,x3)=x1+x2*4+x3*16
 *           
 * Args:     fp        - open Hexamer frequencies file
 *           ret_hexap - RETURN: hexamer matrix                   
 * 
 * Returns:  1 on success; 0 on failure and sets squid_errno to
 *           indicate the cause. ret_hexap is allocated here and
 *           must be freed by the caller (use FreeHexap).
 */
int
ParseHexamerFile(FILE *fp, double ***ret_hexap)
{
  double **hexap;
  char     buffer[5000];    /* input buffer from fp                  */
  int      nsymbols = 64;   /* total number of symbols in matrix     */
  char    *sptr;
  int      idx;
  int      row, col;
  
  /* Allocate a hexamer matrix. For speed of indexing, we use
   * a 64x64 matrix so we can do lookups using the ASCII codes
   */
  if ((hexap = (double **) calloc (64, sizeof(double *))) == NULL)
    Die("calloc failed");
  for (idx = 0; idx < 64; idx++)
    if ((hexap[idx] = (double *) calloc (64, sizeof(double))) == NULL)
      Die("calloc failed");

  if (fp == NULL) { 
    for (row = 0; row < 64; row++) 
      for (col = 0; col < 64; col++) 
	hexap[row][col] = 0.0;

    *ret_hexap = hexap;
    return 1;
  }

  /* Look at the first non-comment line in the file.
  */
  do {
    if (fgets(buffer, 5000, fp) == NULL) 
      { squid_errno = SQERR_NODATA; return 0; }
   
  } while (*(sptr = strtok(buffer, "\n")) == '#');
  
  /* there is a hack here. This relies on having one extra line
     between the commented lines and the actual data that you do not 
     care about.
     ~/qrna/scripts/hexamer.pl adds a line of ------------- to do that.
  */


  /* Parse the rest of the file.
   */
  for (row = 0; row < nsymbols; row++)
    {
      if (fgets(buffer, 5000, fp) == NULL) 
	{ squid_errno = SQERR_NODATA; return 0; }
      
      if ((sptr = strtok(buffer, " ")) == NULL)
	{ squid_errno = SQERR_NODATA; return 0; }

      for (col = 0; col < nsymbols; col++)
	{
	  if (sptr == NULL) { squid_errno = SQERR_NODATA; return 0; }

	  hexap[row][col] = atof(sptr);

	  sptr = strtok(NULL, " \t\n");
	}
    }
  
  /* Return
   */
  *ret_hexap = hexap;
  return 1;

}
