/*
    squidsites - A simple tool to extract statistics from the squid access file.
    Version 1.01    
    ChangeLog: see file ChangeLog for details.
    
    Copyright (C) 1999 Stefano Passiglia

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

/*
 * This structure holds informations for the visited site.
 */
typedef struct _site_descr SITE_DESCR, *PSITE_DESCR;
struct _site_descr
{
   PSITE_DESCR      next_site,
                    prev_site;
   char            *site_name;
   unsigned int     hit_count;
   unsigned int     bytes;
};


/*
 * Global variables.
 */

/* The site list. */
static PSITE_DESCR site_list = NULL;

/* The options. */
static unsigned char sort_by_bytes = 0;

/* The Help message. */
static char help_message[] = "Usage:\n"
                             "squidsites [-b] [< log_file] [> output_file]\n\n"
                             "  -b       Sort sites with respect to downloaded bytes\n"
                             "  -h       Print this help message\n"
                             "  log_file Squid log file. If omitted, read from stdin\n\n";




/*
 * insert_in_list()
 *
 * Inserts informations into the list.
 */
static
void insert_in_list( int   bytes,
                     char *browser,
                     char *site )
{
   PSITE_DESCR    new_site,
                  curr_site;
   int            cmp_res;

   /* Try to find the site, and increase its hit count. */
   curr_site = site_list;
   while ( curr_site != NULL )
   {
      cmp_res = strcmp( curr_site->site_name, site );
      if ( cmp_res == 0 )
      {
         /* Found a match. */
         break;
      }

      curr_site = curr_site->next_site;
   }

   if ( curr_site == NULL )
   {
      /* No site found, add a new one at the head of the list. */
      new_site = (PSITE_DESCR)calloc( 1, sizeof(SITE_DESCR) );
      new_site->site_name = (char *)strdup( site );
      new_site->next_site = site_list;
      if ( site_list != NULL )
      {
         site_list->prev_site = new_site;
      }
      site_list = new_site;
      curr_site = new_site;
   }

   curr_site->hit_count++;
   curr_site->bytes += bytes;

} /* insert_in_list */


/*
 * sort_list()
 *
 * Horrible bubblesort on the list.
 */
static
void sort_list()
{
   PSITE_DESCR first_site = site_list,
               second_site;
   char        greater,
               swapped;
   
   /* Trivial case */
   if ( site_list == NULL ||
        site_list->next_site == NULL )
   {
      return;
   }
   
   swapped = 1;
   while ( swapped == 1 )
   {
      swapped = 0;

      first_site = site_list;
      second_site = first_site->next_site;
   
      while ( second_site != NULL )
      {
         greater = sort_by_bytes ? second_site->bytes > first_site->bytes :
                                   second_site->hit_count > first_site->hit_count;
         if ( greater )
         {
            /* Swap nodes */
            first_site->next_site = second_site->next_site;
            second_site->prev_site = first_site->prev_site;
            
            if ( first_site->prev_site != NULL )
            {
               first_site->prev_site->next_site = second_site;
            }
            first_site->prev_site = second_site;
        
            if ( second_site->next_site != NULL )
            {
               second_site->next_site->prev_site = first_site;
            }
            second_site->next_site = first_site;
                
            /* If swapping the list head, remember the new pointer */
            if ( first_site == site_list )
            {
               site_list = second_site;
            }

            /* Go ahead. */
            first_site = second_site;
            second_site = first_site->next_site;
            
            /* Flag the swap */
            swapped = 1;
         }
         else
         {
            /* Go ahead. */
            first_site = second_site;
            second_site = second_site->next_site;
         }
      }
   }       
} /* sort_list */



/*
 * print_list()
 *
 * Prints the list.
 */
static
void print_list()
{
   PSITE_DESCR curr_site = site_list;
   
   while ( curr_site != NULL )
   {
      if ( sort_by_bytes )
      {
         printf( "%-30s: %10d bytes\t(%d hits)\n", curr_site->site_name, 
                                                  curr_site->bytes,
                                                  curr_site->hit_count );
      }
      else
      {
         printf( "%-30s: %5d hit(s)\t(%d bytes)\n", curr_site->site_name,
                                                  curr_site->hit_count, 
                                                  curr_site->bytes );
      }
      curr_site = curr_site->next_site;
   }
} /* print_list() */



/*
 * parse_access_file()
 *
 * Parse the access log file, extract the host and site information
 * (2nd, 3rd and 8th field of the record).
 * Store information in the site list.
 */
static
void parse_access_file()
{
   FILE *fpLog;

   char line_buffer[8192];
   int  bytes;
   char *browser_name;
   char *site_name;

   /* Used for strtok's */
   char *pbuffer;

#ifdef _DEBUG
   int line = 1;
#endif

   fpLog = stdin;
          
   /* Read every line and extract informations. */
   while ( fgets( line_buffer, sizeof(line_buffer), fpLog ) != NULL )
   {
#ifdef _DEBUG
      line++;
#endif
      /* Extract record fields. */
      pbuffer = (char *)strtok( &line_buffer[0], " " );   /* Time. Discarded. */
      pbuffer = (char *)strtok( NULL, " " );              /* Bytes. Used. */
         bytes = atoi( pbuffer );
      pbuffer = (char *)strtok( NULL, " " );              /* Host name. Used. */
         browser_name = pbuffer;
      pbuffer = (char *)strtok( NULL, " " );
      pbuffer = (char *)strtok( NULL, " " );
      pbuffer = (char *)strtok( NULL, " " );
      pbuffer = (char *)strtok( NULL, " " );
      pbuffer = (char *)strtok( NULL, " " );
      pbuffer = (char *)strtok( NULL, " " );
         while ( *pbuffer++ != '/' );
         site_name = pbuffer;
         if ( *site_name == '-' )
         {
            continue;
         }

#ifdef _DEBUG
      printf ( "%d\n", line );
      printf( "%d\t%s\t%s\n", bytes, browser_name, site_name );
#endif

      /* Insert informations. */
      insert_in_list( bytes, browser_name, site_name ); 
   }

   fclose( fpLog );
} /* parse_access_file */




/*
 * print_help
 *
 * Prints an help message.
 */
static
void print_help()
{
   fprintf( stderr, help_message );
} /* print_help */



/*
 * parse_options
 *
 * Parse command line options
 */
static 
void parse_options( int argc, char **argv )
{
   char opt;
   
   if ( argc == 1 )
   {
      return;
   }

   while ( (opt = getopt(argc, argv, "hb")) != EOF )
   {
      switch ( opt )
      {
         case 'b':
                 
             sort_by_bytes = 1;
             break;
             
         default:
                 
            print_help();
            exit( 1 );
            
            break;
      }
   }
} /* parse_options */




/*
 * Program entry point.
 */
int main( int    argc,
          char **argv )
{
   parse_options( argc, argv );
   
   parse_access_file();
  
   sort_list();
  
   print_list();
    
   return 0;
} /* main */
