/*
** portmon.c -- Main procedure for the portmon daemon
** Copyright (C) 2002 Nik Reiman <nik@aboleo.net>
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
** 
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
** 
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA
*/

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <signal.h>
#include <ctype.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>

#include "portmon.h"
#include "config.h"

#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif

void version(void)
{
 printf("This is %s, version %s\n", PACKAGE, VERSION);
 printf("By Nik Reiman, nik@aboleo.net\n");
}

void portmon_exit(int signal)
{
 snprintf(err_msg, STRLARGE, "Exiting on signal %d\n", signal);
 log_write(err_msg);
 exit(0);
}

int log_write(char *msg)
{
 FILE *log_fp;

 if((log_fp = fopen(logfile, "a")) == NULL) {
  perror("fopen");
  return (1);
 }
 fprintf(log_fp, "(%s) - %s", get_time(time(NULL)), err_msg);
 fclose(log_fp);
 return (0);
}

// return the current time in pretty form
char *get_time(time_t cur_time)
{
 char *fmt_time = (char *)malloc(STRSMALL * sizeof(char));

 strncpy(fmt_time, ctime(&cur_time), STRSMALL);
 // kill the dumb newline ctime makes
 fmt_time[strlen(fmt_time) - 1] = '\0';
 return (fmt_time);
}

// the place where stupid people go... ;)
void help(void)
{
 version();
 printf("Usage: portmon [options]\n");
 printf("Where options include:\n");
#ifdef HAVE_GETOPT_LONG
 printf(" -n, --sleep=<time>\tMinutes between checks (1 hour)\n");
 printf(" -c, --config=<file>\tHost file (./hosts)\n");
 printf(" -l, --logfile=<file>\tlogfile location (/dev/null)\n");
 printf
  (" -s, --severe=<number>\tHow many times to wait before severe notification (12)\n");
 printf
  (" -e, --error=<command>\tCommand to run in case of error (built-in)\n");
 printf(" -t, --timeout=<time>\tNumber of seconds for connect timeout (3)\n");
 printf
  (" -g, --lag=<time>\tNumber of milliseconds for lagout notification\n");
 printf(" -p, --probes=<number>\tNumber of probes to send out (3)\n");
 printf(" -r, --report=[format]\tRun in report mode, with optional format\n");
 printf(" -v, --verbose\t\tVerbose mode\n");
 printf(" -d, --daemon\t\tDaemonize portmon\n");
 printf(" -V, --version\t\tVersion\n");
 printf(" -h, --help\t\tThis screen\n");
#else
 printf(" -n <time>\tMinutes between checks (1 hour)\n");
 printf(" -c <file>\tHost file (./hosts)\n");
 printf(" -l <file>\tlogfile location (/dev/null)\n");
 printf(" -s <number>\tHow many times to wait before severe notification (12)\n");
 printf(" -e <command>\tCommand to run in case of error (built-in)\n");
 printf(" -t <time>\tNumber of seconds for connect timeout (3)\n");
 printf(" -g <time>\tNumber of milliseconds for lagout notification\n");
 printf(" -p <number>\tNumber of probes to send out (3)\n");
 printf(" -r [format]\tRun in report mode\n");
 printf(" -v\t\tVerbose mode\n");
 printf(" -d\t\tDaemonize portmon\n");
 printf(" -V\t\tVersion\n");
 printf(" -h\t\tThis screen\n");
#endif
}

int main(int argc, char *argv[])
{
 // 128 bytes for the name of the filename containing our hosts
 char *host_file = malloc(STRMED * sizeof(char));
 // default sleep time is 1 hour
 int sleep_time = 60 * 60;
 // severe being when to send another notification
 int severe = 12;
 // where we go when we die
 void portmon_exit(int);
 // options to parse for on command line
#ifdef HAVE_GETOPT_LONG
 int option_index;
 static struct option long_options[] = {
  {"sleep", 1, 0, 'n'},
  {"error", 1, 0, 'e'},
  {"config", 1, 0, 'c'},
  {"severe", 1, 0, 's'},
  {"logfile", 1, 0, 'l'},
  {"timeout", 1, 0, 't'},
  {"probes", 1, 0, 'p'},
  {"lag", 1, 0, 'g'},
  {"daemon", 0, 0, 'd'},
  {"report", 2, 0, 'r'},
  {"verbose", 0, 0, 'v'},
  {"version", 0, 0, 'V'},
  {"help", 0, 0, 'h'},
  {0, 0, 0, 0}
 };
#endif

 int i, j, k, num_hosts = 0, total_milli, num_good_probes;
 int host_down, ret = 0, num_probes = 3, lag_timeout = MAXINT, avg_time;
 pid_t pid = getpid();
 time_t tmp_time;
 char *host_msg = (char *)malloc(STRLARGE * sizeof(char));
 int days, hrs, mins;
 char run_script[STRLARGE];
 char *output_fmt = NULL, opt;

 err_msg = (char *)malloc(STRLARGE * sizeof(char));
 // default file for our hosts
 strncpy(host_file, "hosts", STRMED);
 // default place to log. ;)
 strncpy(logfile, "/dev/null", STRLARGE);
 // default timeout
 timeout = 3;
 run_script[0] = '\0';
 verbose = 0;
 daemonize_flag = 0;
 report_flag = 0;

 // read args off of the command line
 // getopt_long seems to be a linux extension, so all those bsd lamers
 // get to miss out on the fun!
#ifdef HAVE_GETOPT_LONG
 while((opt =
        getopt_long(argc, argv, "hvVdre:c:n:s:l:t:p:0", long_options,
                    &option_index)) != -1) {
#else
 while((opt = getopt(argc, argv, "hvVdre:c:n:s:l:t:p:")) != -1) {
#endif
  switch (opt) {
   case 'n':
    // -n <number> ... will be the new number of minutes to sleep for
    sleep_time = atoi(optarg) * 60;
    break;
   case 'e':
    // -e <file> ... run file as an error handler instead of err_action
    strncpy(run_script, optarg, 256);
    break;
   case 'c':
    // -c <file> ... will be an alternate host file
    strncpy(host_file, optarg, 128);
    break;
   case 's':
    // -s <time> ... severe warning (optional)
    severe = atoi(optarg);
    break;
   case 'l':
    // -l <logfile> ... where to log to
    strncpy(logfile, optarg, 256);
    break;
   case 't':
    // -t <seconds> ... set the connect timeout
    timeout = atoi(optarg);
    break;
   case 'p':
    // -p <number> ... number of probes to send out
    num_probes = atoi(optarg);
    break;
   case 'g':
    // -g <milliseconds> ... number of milliseconds to wait before lagging out
    lag_timeout = atoi(optarg);
    break;
   case 'd':
    // daemonize
    daemonize_flag = 1;
    break;
   case 'r':
    // report mode
    report_flag = 1;
    if(optarg) {
     output_fmt = (char *)malloc(STRMED * sizeof(char));
     strcpy(output_fmt, optarg);
    }
    break;
   case 'v':
    verbose = 1;
    break;
   case 'V':
    version();
    return (0);
    break;
   case 'h':
    // help me!
    help();
    return (-1);
    break;
   case '?':
    help();
    return (-1);
    break;
   default:
    printf("Unknown error parsing command line...exiting.\n");
    return (-1);
    break;
  }
 }

 // trap some signals to quit on
 if(signal(SIGQUIT, portmon_exit) == SIG_ERR) {
  perror("signal");
  exit(1);
 }
 if(signal(SIGINT, portmon_exit) == SIG_ERR) {
  perror("signal");
  exit(1);
 }
 if(signal(SIGTERM, portmon_exit) == SIG_ERR) {
  perror("signal");
  exit(1);
 }

 snprintf(err_msg, STRLARGE, "Portmon started by user %s\n", getenv("USER"));
 log_write(err_msg);

 // read in the configuration file
 if((num_hosts = read_config(host_file)) == -1) {
  printf("Failed reading config file %s\n", host_file);
  return (1);
 }

 if(report_flag) {
  report(num_hosts, num_probes, lag_timeout, output_fmt);
  return (0);
 }

 // fork into the background if we are told to daemonize
 if(daemonize_flag) {
  pid = fork();
  if(pid < 0) {
   perror("fork");
   return (1);
  }
 }

 // either go here if being run in the foreground, or as the child proc
 if(pid == 0 || daemonize_flag == 0) {
  // steps needed for proper daemonization
  if(daemonize_flag) {
   setsid();
   chdir("/");
   umask(0);
  }
  // main loop
  while(1) {
   total_milli = 0;
   num_good_probes = 0;
   // go through the array of hosts, and try to connect to each one
   for(i = 0; i < num_hosts; i++) {
    host_down = 0;
    host_msg[0] = '\0';
    for(j = 0; j < hosts[i].num_ports; j++) {
     num_good_probes = 0;
     avg_time = 0;
     if(hosts[i].ports[j].port == 0) {
      for(k = 0; k < num_probes; k++) {
       ret = icmp_ping(hosts[i].ports[j].addr);
       if(ret >= 0) {
        total_milli += ret;
        num_good_probes++;
       }
      }
     }
     else {
      for(k = 0; k < num_probes; k++) {
       if(verbose) {
        printf("Attempting to contact %s:%d -> ", hosts[i].name, hosts[i].ports[j].port);
       }
       ret = tcp_ping(hosts[i].ports[j].addr, hosts[i].ports[j].port);
       if(ret >= 0) {
        total_milli += ret;
        num_good_probes++;
       }
      }
     }

     if(ret < 0) {
      // host is first found to be down
      if((hosts[i].ports[j].is_down == 0
          && hosts[i].ports[j].downtime == 0) || severe < 1) {
       strcat(host_msg, err_msg);
       host_down = 1;
       hosts[i].ports[j].is_down = 1;
       hosts[i].ports[j].downtime = time(NULL);
      }
      else {
       hosts[i].ports[j].is_down++;
       // severe notification.  Only applies when the host
       // has been down for <severe> passes, and severe is
       // set to a number greater than 0
       if((hosts[i].ports[j].is_down % severe == 0) && severe >= 1) {
        tmp_time = time(NULL);
        tmp_time -= hosts[i].ports[j].downtime;

        days = (int)(tmp_time / (60 * 60 * 24));
	tmp_time -= days * (60 * 60 * 24);

        hrs = (int)(tmp_time / (60 * 60));
	tmp_time -= hrs * (60 * 60);

        mins = (int)(tmp_time / 60);
        snprintf(err_msg, STRLARGE,
                 "%s:%d is STILL down (down: %dd,%dh,%dm)\n",
                 hosts[i].name, hosts[i].ports[j].port, days, hrs, mins);
        log_write(err_msg);
        strcat(host_msg, err_msg);
        host_down = 1;
        hosts[i].ports[j].is_down = 0;
       }
      }
     }
     // the host is up
     else {
      // calculate average lag
      avg_time = total_milli / num_good_probes;
      // see if the host was down last time, and send notification
      // that it is back up
      if(hosts[i].ports[j].is_down) {
       tmp_time = time(NULL);
       tmp_time -= hosts[i].ports[j].downtime;
       days = (int)(tmp_time / (60 * 60 * 24));
       tmp_time -= days * (60 * 60 * 24);

       hrs = (int)(tmp_time / (60 * 60));
       tmp_time -= hrs * (60 * 60);

       mins = (int)(tmp_time / 60);
       if(avg_time > lag_timeout) {
        snprintf(err_msg, STRLARGE,
                 "%s:%d appears to be back up, but is above the lag threshold (down: %dd:%dh:%dm, %dms lag)\n",
                 hosts[i].name, hosts[i].ports[j].port, days, hrs, mins,
                 avg_time);
       }
       else {
        snprintf(err_msg, STRLARGE,
                 "%s:%d appears to be back up (down: %dd:%dh:%dm)\n",
                 hosts[i].name, hosts[i].ports[j].port, days, hrs, mins);
       }
       log_write(err_msg);
       strncat(host_msg, err_msg, STRLARGE);
       host_down = 1;
       hosts[i].ports[j].is_down = 0;
       hosts[i].ports[j].downtime = 0;
      }
      else if(avg_time > lag_timeout) {
       // since lag_timeout is defined to be INT_MAX, this will only
       // be entered if it was explicitly defined on the command line.
       snprintf(err_msg, STRLARGE,
                "%s:%d is up, but is above the lag threshold (%dms lag)\n",
                hosts[i].name, hosts[i].ports[j].port, avg_time);
       log_write(err_msg);
       strncat(host_msg, err_msg, STRLARGE);
       host_down = 1;
      }
     }
    }
    if(host_down == 1) {
     // run a script?
     if(strlen(run_script)) {
      exec_proc(run_script, time(NULL), hosts[i].name, host_msg);
     }
     // run the compiled in module
     else
      err_action(hosts[i].name, host_msg);
    }
   }
   // precious sleep
   sleep(sleep_time);
  }
 }
 // parent exits
 else if(daemonize_flag) {
  exit(0);
 }
 else {
 }

 return (0);
}
