#!/usr/bin/perl
#
# mirror - a FTP mirror program for Perl5
#
# Last edited: Mon Aug 21 17:26:02 1995 by nicholas@tao.binary9.com $

$perlstamp1="/usr/homes/nicholas/distrib/mirror/mirror";
$perlstamp2="2.5.46";
$perlstamp3="nicholas\@tao.binary9.com";
$perlstamp4="Mon Aug 21 17:26:02 1995";  # end;

# Usage:
# mirror [dir/filespec]

# if no parameters are passed, mirror looks for ".mirror" in the current directory
# if a directory is passed, mirror looks for ".mirror"  in that directory
# if a filename is passed, mirror uses that file as the ".mirror"

# the format of ".mirror" is
# host         [full address of host]
# remotedir    [directory on host to begin mirroring]
# localdir     [directory to put files]
# skip         [regular express of files to skip.]

# You can have multiple skip statements
# example

# host                 sunsite.unc.edu
# remotedir            /pub/Linux/Incoming
# localdir             /mirrors/sunsite.incoming
# skip                 ^\.
# skip                 ^README


# need mftp & timelocal
require "mftp.pl";
require "timelocal.pl";

$toggle=0;


# getCallback uses two globals $getfile,$getfilesize
sub getCallback {
    local($amt)=@_;

    if ($toggle!=5) {
	$toggle++;
	return;
    }

    print "\r"." " x 77 ."\r";
    printf "$getfile %s/%s (%.0f%% done)",niceSize($amt),niceSize($getfilesize),($amt/$getfilesize)*100;

    $toggle=0;
}

# dbg - my own debug routines
#  levels are roughly 
#     0 - none
#     1 - status messages (Connect,Error)
#     2 - detailed status messages
#     3 - file statuses
#     4 - full blown debugging
sub dbg {
    print ' ' x (($_[0]-1)*2) . $_[1]."\n" if $DEBUG >= $_[0];
}

# choak - my DIE routine
sub choak {
    ftp::close();
    print @_;
    print "\n";
    exit 1;
}

# normalizeDir - a simple routine to ensure that directories have the trailing slash removed
sub normalizeDir {
    ($_[0]=~/(.*)\/$/?$1:$_[0]);
}

# niceSize - a nice print routine for file sizes
sub niceSize {
    local($meg,$gig)=(1024*1024,1024*1024*1024);

    if ($_[0] >= $gig ) {
	sprintf("%.2fG",$_[0],$gig);
    }
    elsif ($_[0] >= $meg) {
	sprintf("%.2fM",$_[0]/$meg);
    }
    elsif ($_[0] >= 1024) {
	sprintf("%.2fK",$_[0]/1024);
    }
    else {
        sprintf("%d",$_[0]);
    }
}

# getDate - gets the date from ascii to seconds since ...

@mons=("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec");

sub getDate {
    local($m,$d,$t)=@_;
    local($a,$mon,$time);

    for $a (0..11) {
	if ($m eq $mons[$a]) {
	    $mon=$a;
	    last;
	}
    }

    if ($t=~/(.*):(.*)/) {
	$time=timelocal(0,$2,$1,$d,$mon,(localtime(time))[5]);
    } else {
	$t=$t-1900 if $t>1900;
	$time=timelocal(0,0,0,$d,$mon,$t);
    }

    $time;
}

# stripDir - makes a relative path out of an absolute one
#  params: $whattostrip,$fromwhat
sub stripDir {
    local($s,$d)=(normalizeDir($_[0]),normalizeDir($_[1]));

    ($d=~/$s\/(.*)/ ? $1 : $_[1]);
}

# getRemoteDir - routine to get the remote directory listing (recursive). We'll just use
#  a stack and depth-first searching (just like we learned in school).
#  the resultant list is stored in a relative path into %thelist
sub getRemoteDir {
    # our only option, the directory. We assume that we have a connection.
    local($topdir)=normalizeDir($_[0]);

    # our other local variables
    local(%thelist);
    local(@dirstack,@files,$file);
    local($perms,$links,$uid,$gid,$size,$mon,$day,$time,$filename,$full,$skip);
    local($filecount,$dircount,$bytes)=(0,0,0);
    local($filedate);

    # prime our pump
    push @dirstack,$topdir;
    $dircount++;

    # and switch to ascii
    ftp::ascii();

    dbg 1,"Reading Directories from $topdir";

    # and do the loop thingy
    while ($dir=pop @dirstack) {
	# make sure our @files is reset
	undef @files;

	# and try to read the directory
	dbg 2,"Getting $dir";
	choak $ftp::Error unless @files=ftp::dir($dir);

	# ok, we got each line of the dir listing in @files. Go thru
	# and parse each one to figure our what type of file this is.
	forfile: for $file (@files) {
	    ($perms,$links,$uid,$gid,$size,$mon,$day,$time,$filename)=split /\s+/,$file;

	    # skip the stupid ones
	    next if $filename eq ".";
	    next if $filename eq "..";

	    # and skip our global @skiplist
	    for $skip (@skiplist) {
		if ($filename=~/$skip/) {
		    dbg 3,"Skipping $filename";
		    next forfile;
		}
	    }

	    # go figure it out from $perms
	    if ($perms=~/^-/) {
		# its a file
		# make sure it doesn't have an "*" at the end of it
		chop $filename if $filename=~/\*$/;

		# make the fullpath out of it
		$full=stripDir($topdir,$dir."/".$filename);

		# get the date
		$filedate=getDate($mon,$day,$time);

		# add it to our list
		$thelist{$full}=$size.":".$filedate;

		$filecount++;
		$bytes=$bytes+$size;
	    }
	    elsif ($perms=~/^d/) {
		# its a directory
		# make sure it doesn't have an "/" at the end of it
		# and make a full path of it
		$full=normalizeDir($dir."/".$filename);

		# and add it to our list
		push @dirstack,$full;

		$dircount++;
	    }
	    elsif ($perms=~/^l/) {
		# its a symlink (what to do ... what to do ... ignore for now.)
		# test and see if we can CD into, if so, then its a directory else
		# is a file
		if (ftp::cwd($dir."/".$filename)) {
		    # a directory
		    $full=normalizeDir($dir."/".$filename)."/.";

		    # and add it to our list
		    push @dirstack,$full;

		    $dircount++;
		} else {
		    # a file
		    # make the fullpath out of it
		    $full=stripDir($topdir,$dir."/".$filename);

		    # get the date
		    $filedate=getDate($mon,$day,$time);

		    # add it to our list
		    $thelist{$full}=$size.":".$filedate;

		    $filecount++;
		    $bytes=$bytes+$size;
		}
	    }
	}

	# now, we've parsed all of @files, time for the next dir
	next;
    }

    # hey! No more directories. That means that %thelist is now full of the files
    # with relative pathnames.

    # We're done. return the list
    ($filecount,$dircount,$bytes,%thelist);
}

# getLocalDir - routine to get a local directory. The sister of getRemoteDir but
# using perl's dir i/o instead of FTP
sub getLocalDir {
    # the directory to scan
    local($topdir)=normalizeDir($_[0]);

    # our locals
    local(%thelist);
    local(@dirstack,$file,@files,$full);
    local($filecount,$dircount,$bytes)=(0,0,0);

    # prime the pump
    push @dirstack,$topdir;
    $dircount++;

    dbg 1,"Getting local directory tree starting at $topdir";

    # loop til we're out of directories
    while ($dir=pop @dirstack) {
	# clear @files
	undef @files;

	# read the directory
	dbg 2,"Getting $dir";
	choak "Error reading directory: $!" unless opendir DIR,$dir;
	choak "Error reading directory: $!" unless @files=readdir DIR;
	closedir DIR;

	# ok, lets go thru this
	for $file (@files) {
	    next if $file eq ".";
	    next if $file eq "..";

	    next if $file eq ".mirror";

	    # make fullpath of it
	    $full=$dir."/".$file;

	    # lets see what kind it is
	    if (-l $full) {
		# eeeuuuuwww... a symlink ... ignore for now
	    }
	    elsif (-d _) {
		# its a dir
		# add it to our list
		push @dirstack,$full;

		$dircount++;
	    }
	    elsif (-f _) {
		# its a regular file
		local($size)=-s $full;

		# strip it (to relative)
		$full=stripDir($topdir,$full);

		# add it to our list
		$thelist{$full}=$size;

		$filecount++;
		$bytes=$bytes+$size;
	    }
	}

	# done with this dir
	next;
    }

    # no more directories, we're done
    ($filecount,$dircount,$bytes,%thelist);
}

# compareDirs - routine to compare the two directories
#  should get the two %arrays via passby reference, but until then we'll
#  use the globals %rlist and %llist
#  this generates @getlist and @dellist
#  @getlist is "filename:size", @dellist is just "filename"
sub compareDirs {
    local($file,$size,$filedate);

    # initialize some vars
    $keepcount=0;	       
    $keepsize=0;
    $updatecount=0;
    $updatesize=0;
    $delcount=0;
    $delsize=0;
    $getcount=0;
    $getsize=0;

    # loop thru all the remote files
    for $file (sort keys %rlist) {

	($size,$filedate)=split /:/,$rlist{$file};

	# do we have this file locally?
	if (defined($llist{$file})) {
	    # yes, check the size

	    if ($llist{$file}==$size) {
		# same size, assume its OK
		dbg 3,"Need to keep $file";

		$keepcount++;
		$keepsize=$keepsize+$size;
	    } else {
		# not same size, we need to update
		# add to both lists
		push @getlist,"$file:".$rlist{$file};
		push @dellist,$file;
		dbg 3,"Need to update $file";

		$updatecount++;
		$updatesize=$updatesize+$size;
	    }

	    # tag it from the local list
	    delete $llist{$file};
	} else {
	    # nope, we need to get it
	    push @getlist,"$file:".$rlist{$file};
	    dbg 3,"Need to get $file";

	    $getcount++;
	    $getsize=$getsize+$size;
	}
    }

    # all the remote files have been gotten
    # anything remaining in %llist are extra files that need to be
    # deleted
    for $file (keys %llist) {
	    push @dellist,$file;
	    dbg 3,"Removing $file";

	    $delcount++;
	    $delsize=$delsize+$llist{$file};
    }

    # all done, at this point %rlist & %llist are unreliable so
    # kill them
    undef %rlist;
    undef %llist;
}

# createDirs - create all the subdirs in a pathspec
sub createDirs {
    local(@dirs)=split /\//,$_[0];
    local($dr,$d)=("/");

    # get rid of filename
    pop @dirs;
    shift @dirs;

    dbg 4,"\@dirs: '".join(" ",@dirs)."'";

    for $d (@dirs) {
	$dr=$dr.$d;

        if (! -d $dr) {
            dbg 3,"Creating $dr";

            `mkdir $dr`;
            `chmod 755 $dr`;
        }
	$dr=$dr."/";
    }
}


# deleteFiles - removes files from @dellist
sub deleteFiles {
    local($topdir)=normalizeDir($_[0]);
    local($file);
    
    for $file (@dellist) {
	unlink $topdir."/".$file;
    }
}

# getFiles - gets the files from the remote host and stores them
#  locally. Assumes a global @getlist
sub getFiles {
    # top of the local & remote dirs
    local($ltopdir,$rtopdir)=(normalizeDir($_[0]),normalizeDir($_[1]));

    # local vars
    local($file,$filename,$rfull,$lfull,$size,$tstart,$tend,$bps);
    local($start,$end,$filedate);
    local($bsize)=0;

    # go into binary
    ftp::binary();

    dbg 1,sprintf("Getting %d (%s) files, keeping %d (%s), updating %d (%s) and deleting %d (%s).",
		  $getcount,niceSize($getsize),$keepcount,niceSize($keepsize),
		  $updatecount,niceSize($updatesize),$delcount,niceSize($delsize));

    # start the timer
    $start=time;

    # loop thru all the files
    for $file (sort @getlist) {
	# extract the info
	($filename,$size,$filedate)=split(/:/,$file);

	# make the  local full and remote full names
	$lfull=$ltopdir."/".$filename;
	$rfull=$rtopdir."/".$filename;
       
	# ensure the destination directories exist
	createDirs($lfull);

	dbg 3,"Getting $rfull";

	# start the timer
	$tstart=time;

#	print "\r$filename (".niceSize($size).")".' ' x 40 if $DEBUG==2;

	$getfile=$rfull;
	$getfilesize=$size;

	# get the file
	if ($size == 0) {
	    `touch $lfull`;
	} else {
	    choak $ftp::Error unless ftp::get($rfull,$lfull);
	}

	# stop the timer
	$tend=time;

	print "\r"." " x 77 . "\r";

	# set the time/date
	utime $filedate,$filedate,$lfull;

#	print "." if $DEBUG==2;

	$bps=$size/(($tend-$tstart)+1);
	$bsize=$bsize+$size;
	dbg 3,"   at ".niceSize($bps)." bytes per second";
    }

    # stop the timer
    $end=time;

    print "\n" if $DEBUG==2;

    $bps=$bsize/(($end-$start)+1);
    dbg 1,"Read data at ".niceSize($bps)." bytes per second";
}

# readRC - reads the rc file
sub readRC {
    local($rcfile)=$_[0];
    local($rhost,$rdir,$ldir,@skip);

    # do some logic checking for the rc file;
    # is it a dir?
    if (-d $rcfile) {
	# make a file
	$rcfile=normalizeDir($rcfile)."/.mirror";
    }

    open(rcfile) || choak "Couldn't open $rcfile for reading: $!";

    while (<rcfile>) {
	next if /^\s*#/ || /^$/;

        if (/\s*host\s*(.*)/) {
            $rhost=$1;
            next;
        }
        if (/\s*remotedir\s*(.*)/) {
            $rdir=$1;
            next;
        }
        if (/\s*localdir\s*(.*)/) {
            $ldir=$1;
            next;
        }
        if (/\s*skip\s*(.*)/) {
            push @skip,$1;
            next;
        }
    }
    close rcfile;

    dbg 4,"rhost $rhost, rdir $rdir, ldir $ldir, skip = ".join(",",@skip);

    ($rhost,$rdir,$ldir,@skip);
}

# openHost - attempts to open remote host
sub openHost {
    local($host)=shift @_;
    local($count)=(@_?shift @_:10);

    # loop thru count reps
    while ($count--) {
	dbg 1,"Trying to open $host";

	return if ftp::open($host,"anonymous","mirror\@binary9.com");

	ftp::close();
	dbg 2,"Retrying to open $host in 10 seconds";
	sleep 10;
    }

    choak "Couldn't log into remote: ".$ftp::Error;
}

# ----------------------------------------------
$DEBUG=1;
$|=1;

print STDERR "Mirror v$perlstamp2 - $perlstamp3 ($perlstamp4)\n\n";

$rc="./.mirror";
$rc=shift @ARGV if @ARGV;

($remotehost,$remotedir,$localdir,@skiplist)=readRC($rc);

openHost($remotehost);

($rfiles,$rdirs,$rbytes,%rlist)=getRemoteDir($remotedir);  
($lfiles,$ldirs,$lbytes,%llist)=getLocalDir($localdir);

dbg 1,"Mirroring $rfiles files in $rdirs dirs for a total of ".niceSize($rbytes)." bytes.";

compareDirs();
deleteFiles($localdir);

getFiles($localdir,$remotedir);

ftp::close();
exit;




