#!/bin/perl
#============================================================= -*-perl-*-
#
# BackupPC_compressPool: Compress existing pool
#
# DESCRIPTION
#
#   Usage: BackupPC_compressPool [-t] [-r] <host>
#
#   Flags:
#     -t     test mode: do everything except actually replace the pool files.
#            Useful for estimating total run time without making any real
#            changes.
#     -r     read check: re-read the compressed file and compare it against
#            the original uncompressed file.  Can only be used in test mode.
#     -c #   number of children to fork.  BackupPC_compressPool can take
#            a long time to run, so to speed things up it spawns four children,
#            each working on a different part of the pool.  You can change
#            the number of children with the -c option.
#
#   BackupPC_compressPool is used to convert an uncompressed pool to
#   a compressed pool.  If BackupPC compression is enabled after
#   uncompressed backups already exist, BackupPC_compressPool can
#   be used to compress all the old uncompressed backups.
#
#   It is important that BackupPC not run while BackupPC_compressPool
#   runs.  Also, BackupPC_compressPool must run to completion before
#   BackupPC is restarted.
#
# AUTHOR
#   Craig Barratt  <cbarratt@users.sourceforge.net>
#
# COPYRIGHT
#   Copyright (C) 2001-2007  Craig Barratt
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#========================================================================
#
# Version 3.1.0, released 25 Nov 2007.
#
# See http://backuppc.sourceforge.net.
#
#========================================================================

use strict;
no  utf8;

use File::Find;
use File::Path;
use Compress::Zlib;
use Getopt::Std;
use lib "__INSTALLDIR__/lib";
use BackupPC::Lib;
use BackupPC::FileZIO;

die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
$bpc->ChildInit();
my $TopDir   = $bpc->TopDir();
my $LogDir   = $bpc->LogDir();
my $BinDir   = $bpc->BinDir();
my %Conf     = $bpc->Conf();
my $PoolDir  = "$TopDir/pool";
my $CPoolDir = "$TopDir/cpool";
my $Compress = $Conf{CompressLevel};
my %opts;
my $SigName = "";

#
# Catch various signals
#
foreach my $sig ( qw(INT BUS SEGV PIPE TERM ALRM HUP) ) {
    $SIG{$sig} = \&catch_signal;
}

$| = 1;

my $CompMaxRead  = 131072;          # 128K
my $CompMaxWrite = 6291456;         # 6MB

if ( !getopts("trc:", \%opts) || @ARGV != 0 ) {
    print("usage: $0 [-c nChild] [-r] [-t]\n");
    exit(1);
}
my $TestMode  = $opts{t};
my $ReadCheck = $opts{r};
my $nChild    = $opts{c} || 4;
if ( $ReadCheck && !$TestMode ) {
    print(STDERR "$0: -r (read check) option must have -t (test)\n");
    exit(1);
}
if ( $nChild < 1 || $nChild >= 16 ) {
    print(STDERR "$0: number of children (-c option) must be from 1 to 16\n");
    exit(1);
}
if ( !BackupPC::FileZIO->compOk ) {
    print STDERR <<EOF;
$0: Compress::Zlib is not installed.   You need to install it
before running this script.
EOF
    exit(1);
}
if ( $Compress <= 0 ) {
    print STDERR <<EOF;
$0: compression is not enabled. \%Conf{CompressLevel} needs
to be set to a value from 1 to 9.  Please edit the config.pl file and
re-start $0.
EOF
    exit(1);
}

my $Errors     = 0;
my $SubDirDone = 0;
my $SubDirCnt  = 0;
my $SubDirCurr = 0;
my $FileCnt    = 0;
my $FileOrigSz = 0;
my $FileCompressSz = 0;

my $err = $bpc->ServerConnect($Conf{ServerHost}, $Conf{ServerPort});
if ( $err eq "" ) {
    print <<EOF;
BackupPC is running on $Conf{ServerHost}.  You need to stop BackupPC
before you can upgrade the code.  Depending upon your installation,
you could run "/etc/init.d/backuppc stop".
EOF
    exit(1);
}

umask($Conf{UmaskMode});

sub cpoolFileName
{
    my($new) = @_;
    if ( $new !~ m{/(\w/\w/\w)/(\w{32})(_\d+)?$} ) {
        print("Error: Can't parse filename from $new\n");
        $Errors++;
        return;
    }
    my $dir = "$CPoolDir/$1";
    $new = "$dir/$2";
    mkpath($dir, 0, 0777) if ( !-d $dir );
    return $new if ( !-f $new );
    for ( my $i = 0 ; ; $i++ ) {
        return "${new}_$i" if ( !-f "${new}_$i" );
    }
}

sub doCompress
{
    my $file = ($File::Find::name =~ /(.*)/ && $1);
    local(*FH, *OUT);
    my(@s) = stat($file);
    my($n, $dataIn, $dataOut, $flush, $copy);

    if ( $SigName ) {
        print("Child got signal $SigName; quitting\n");
        reportStats();
        exit(0);
    }
    return if ( !-f $file );
    my $defl = deflateInit(
                -Bufsize => 65536,
                -Level   => $Compress,
           );
    if ( !open(FH, $TestMode ? "<" : "+<", $file) ) {
        print("Error: Can't open $file for read/write\n");
        $Errors++;
        return;
    }
    binmode(FH);
    while ( sysread(FH, $dataIn, $CompMaxWrite) > 0 ) {
        $flush = 0;
        $FileOrigSz += length($dataIn);
        my $fragOut = $defl->deflate($dataIn);
        if ( length($fragOut) < $CompMaxRead ) {
            #
            # Compression is too high: to avoid huge memory requirements
            # on read we need to flush().
            #
            $fragOut .= $defl->flush();
            $flush = 1;
            $defl = deflateInit(
                        -Bufsize => 65536,
                        -Level   => $Compress,
                   );
        }
        $dataOut .= $fragOut;
        if ( !$copy && length($dataOut) > $CompMaxWrite ) {
            if ( !open(OUT, "+>", "$file.__z") ) {
                print("Error: Can't open $file.__z for write\n");
                $Errors++;
                close(FH);
                return;
            }
	    binmode(OUT);
            $copy = 1;
        }
        if ( $copy && $dataOut ne "" ) {
            if ( syswrite(OUT, $dataOut) != length($dataOut) ) {
                printf("Error: Can't write %d bytes to %s\n",
                                    length($dataOut), "$file.__z");
                $Errors++;
                close(OUT);
                close(FH);
                unlink("$file.__z");
                return;
            }
            $FileCompressSz += length($dataOut);
            $dataOut = undef;
        }
    }
    if ( !$flush ) {
        $dataOut .= $defl->flush();
        if ( $copy && $dataOut ne "" ) {
            if ( syswrite(OUT, $dataOut) != length($dataOut) ) {
                printf("Error: Can't write %d bytes to %s\n",
                                    length($dataOut), "$file.__z");
                $Errors++;
                close(OUT);
                close(FH);
                unlink("$file.__z");
                return;
            }
            $FileCompressSz += length($dataOut);
            $dataOut = undef;
        }
    }
    my $newFile = cpoolFileName($file);
    if ( $TestMode ) {
        close(FH);
        if ( !open(FH, ">", $newFile) ) {
            print("Error: Can't open $newFile for write\n");
            $Errors++;
            close(FH);
            unlink("$file.__z");
            return;
        }
	binmode(FH);
    }
    if ( $copy ) {
        if ( !sysseek(OUT, 0, 0) ) {
            print("Error: Can't seek $file.__z to 0\n");
            $Errors++;
        }
        if ( !sysseek(FH, 0, 0) ) {
            print("Error: Can't seek $newFile to 0\n");
            $Errors++;
        }
        while ( sysread(OUT, $dataIn, $CompMaxWrite) > 0 ) {
            if ( syswrite(FH, $dataIn) != length($dataIn) ) {
                printf("Error: Can't write %d bytes to %s\n",
                                        length($dataIn), $file);
                $Errors++;
            }
        }
        if ( !truncate(FH, sysseek(OUT, 0, 1)) ) {
            printf("Error: Can't truncate %s to %d\n",
                                        $file, sysseek(OUT, 0, 1));
            $Errors++;
        }
        close(OUT);
        close(FH);
        unlink("$file.__z");
    } else {
        if ( !sysseek(FH, 0, 0) ) {
            print("Error: Can't seek $file to 0\n");
            $Errors++;
        }
        if ( syswrite(FH, $dataOut) != length($dataOut) ) {
            printf("Error: Can't write %d bytes to %s\n",
                                        length($dataOut), $file);
            $Errors++;
        }
        $FileCompressSz += length($dataOut);
        if ( !truncate(FH, length($dataOut)) ) {
            printf("Error: Can't truncate %s to %d\n", $file, length($dataOut));
            $Errors++;
        }
        close(FH);
    }
    if ( $TestMode ) {
        if ( $ReadCheck ) {
            checkRead($file, $newFile);
        }
        unlink($newFile);
    } else {
        rename($file, $newFile);
        my $atime = $s[8] =~ /(.*)/ && $1;
        my $mtime = $s[9] =~ /(.*)/ && $1;
        utime($atime, $mtime, $newFile);
    }
    (my $dir = $file) =~ s{/[^/]*$}{};
    $FileCnt++;
    if ( $SubDirCurr ne "" && $SubDirCurr ne $dir ) {
        $SubDirDone++;
        $SubDirCurr = $dir;
        reportStats();
    } elsif ( $SubDirCurr eq "" ) {
        $SubDirCurr = $dir;
    }
}

sub reportStats
{
    print("stats: $SubDirDone $SubDirCnt $FileCnt $FileOrigSz"
                . " $FileCompressSz $Errors\n");
}

sub checkRead
{
    my($file, $cfile) = @_;
    return if ( !-f $file || !-f $cfile );
    my $f = BackupPC::FileZIO->open($cfile, 0, $Compress)
                                || die("can't open $cfile for read\n");
    my($n, $nd, $r, $d, $d0);
    local(*FH);

    if ( !open(FH, "<", $file) ) {
        print("can't open $file for check\n");
        $Errors++;
        $f->close();
        return;
    }
    binmode(FH);
    #print("comparing $file to $cfile\n");
    while ( 1 ) {
        $n = 1 + int(rand($CompMaxRead) + rand(100));
        $r = $f->read(\$d, $n);
        sysread(FH, $d0, $n);
        if ( $d ne $d0 ) {
            print("Botch read data on $cfile\n");
        }
        last if ( length($d) == 0 );
    }
    if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) {
        printf("Botch at EOF on $cfile got $r (%d,%d)\n",
                        sysseek(FH, 0, 1), $n);
        $Errors++;
    }
    $f->close;
    close(FH);
}

sub checkReadLine
{
    my($file, $cfile) = @_;
    return if ( !-f $file || !-f $cfile );
    my $f = BackupPC::FileZIO->open($cfile, 0, $Compress)
                                || die("can't open $cfile for read\n");
    my($n, $nd, $r, $d, $d0);
    local(*FH);

    if ( !open(FH, "<", $file) ) {
        print("can't open $file for check\n");
        $Errors++;
        $f->close();
        return;
    }
    binmode(FH);
    while ( 1 ) {
        $d0 = <FH>;
        $d  = $f->readLine();
        if ( $d ne $d0 ) {
            print("Botch read data on $cfile\n");
        }
        last if ( length($d) == 0 );
    }
    if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) {
        printf("Botch at EOF on $cfile got $r (%d,%d)\n",
                        sysseek(FH, 0, 1), $n);
        $Errors++;
    }
    $f->close;
    close(FH);
}

sub catch_signal
{
    $SigName = shift;
}

sub compressHostFiles
{
    my($host) = @_;
    my(@Files, @Backups, $fh, $data);
    local(*FH);

    if ( !defined($host) ) {
        for ( my $i = 0 ; ; $i++ ) {
            last if ( !-f "$LogDir/LOG.$i" );
            push(@Files, "$LogDir/LOG.$i");
        }
    } else {
        @Backups = $bpc->BackupInfoRead($host);
        for ( my $i = 0 ; $i < @Backups ; $i++ ) {
            next if ( $Backups[$i]{compress} );
            push(@Files, "$TopDir/pc/$host/SmbLOG.$Backups[$i]{num}");
            push(@Files, "$TopDir/pc/$host/XferLOG.$Backups[$i]{num}");
        }
        push(@Files, "$TopDir/pc/$host/SmbLOG.bad");
        push(@Files, "$TopDir/pc/$host/XferLOG.bad");
        for ( my $i = 0 ; ; $i++ ) {
            last if ( !-f "$TopDir/pc/$host/LOG.$i" );
            push(@Files, "$TopDir/pc/$host/LOG.$i");
        }
    }
    foreach my $file ( @Files ) {
        if ( $SigName ) {
            print("Child got signal $SigName; quitting\n");
            reportStats();
            exit(0);
        }
        next if ( !-f $file );
        if ( !BackupPC::FileZIO->compressCopy($file, "$file.z", undef,
                                        $Compress, !$TestMode) ) {
            print("compressCopy($file, $file.z, $Compress, !$TestMode)"
                . " failed\n");
            $Errors++;
        } elsif ( $TestMode ) {
            checkReadLine($file, "$file.z") if ( $ReadCheck );
            unlink("$file.z");
        }
    }
}

sub updateHostBackupInfo
{
    my($host) = @_;
    if ( !$TestMode ) {
        my @Backups = $bpc->BackupInfoRead($host);
        for ( my $i = 0 ; $i < @Backups ; $i++ ) {
            $Backups[$i]{compress} = $Compress;
        }
        $bpc->BackupInfoWrite($host, @Backups);
    }
}

my @Dirs = split(//, "0123456789abcdef");
my @Hosts = sort(keys(%{$bpc->HostInfoRead()}));
my $FDread;
my @Jobs;

#
# First make sure there are no existing compressed backups
#
my(%compHosts, $compCnt);
for ( my $j = 0 ; $j < @Hosts ; $j++ ) {
    my $host = $Hosts[$j];
    my @Backups = $bpc->BackupInfoRead($host);
    for ( my $i = 0 ; $i < @Backups ; $i++ ) {
        next if ( !$Backups[$i]{compress} );
        $compHosts{$host}++;
        $compCnt++;
    }
}
if ( $compCnt ) {
    my $compHostStr = join("\n  + ", sort(keys(%compHosts)));
    print STDERR <<EOF;
BackupPC_compressPool: there are $compCnt compressed backups.
BackupPC_compressPool can only be run when there are no existing
compressed backups. The following hosts have compressed backups:

  + $compHostStr

If you really want to run BackupPC_compressPool you will need to remove
all the existing compressed backups (and /home/pcbackup/data/cpool).
Think carefully before you do this. Otherwise, you can just let new
compressed backups run and the old uncompressed backups and pool will
steadily expire.
EOF
    exit(0);
}

#
# Next spawn $nChild children that actually do all the work.
#
for ( my $i = 0 ; $i < $nChild ; $i++ ) {
    local(*CHILD);
    my $pid;
    if ( !defined($pid = open(CHILD, "-|")) ) {
        print("Can't fork\n");
        next;
    }
    my $nDirs  = @Dirs  / ($nChild - $i);
    my $nHosts = @Hosts / ($nChild - $i);
    if ( !$pid ) {
        #
        # This is the child.
        # First process each of the hosts (compress per-pc log files etc).
        #
        for ( my $j = 0 ; $j < $nHosts ; $j++ ) {
            compressHostFiles($Hosts[$j]);
        }
        #
        # Count the total number of directories so we can estimate the
        # completion time.  We ignore empty directories by reading each
        # directory and making sure it has at least 3 entries (ie, ".",
        # ".." and a file).
        #
        for ( my $j = 0 ; $j < $nDirs ; $j++ ) {
            my $thisDir = $Dirs[$j];
            next if ( !-d "$PoolDir/$thisDir" );
            foreach my $dir ( <$PoolDir/$thisDir/*/*> ) {
                next if ( !opendir(DIR, $dir) );
                my @files = readdir(DIR);
                closedir(DIR);
                $SubDirCnt++ if ( @files > 2 );
            }
        }
        #
        # Now process each of the directories
        #
        for ( my $j = 0 ; $j < $nDirs ; $j++ ) {
            my $thisDir = shift(@Dirs);
            next if ( !-d "$PoolDir/$thisDir" );
            find({wanted => sub { doCompress($File::Find::name); },
                                   no_chdir => 1}, "$PoolDir/$thisDir");
        }
        #
        # Last, update the backup info file for each of the hosts
        #
        for ( my $j = 0 ; $j < $nHosts ; $j++ ) {
            updateHostBackupInfo($Hosts[$j]);
        }
        $SubDirDone = $SubDirCnt;
        reportStats();
        exit(0);
    }
    #
    # This is the parent.  Peel off $nDirs directories, $nHosts hosts,
    # and continue
    #
    $Jobs[$i]{fh}  = *CHILD;
    $Jobs[$i]{pid} = $pid;
    vec($FDread, fileno($Jobs[$i]{fh}), 1) = 1;
    splice(@Dirs,  0, $nDirs);
    splice(@Hosts, 0, $nHosts);
}

#
# compress the main log files (in the parents)
#
compressHostFiles(undef);

#
# Now wait for all the children to report results and finish up
#
my $TimeStart = time;
my $DonePct   = 0;
my $GotSignal = "";
while ( $FDread !~ /^\0*$/ ) {
    my $ein = $FDread;
    select(my $rout = $FDread, undef, $ein, undef);
    if ( $SigName ne $GotSignal ) {
        print("Got signal $SigName; waiting for $nChild children to cleanup\n");
        $GotSignal = $SigName;
    }
    for ( my $i = 0 ; $i < $nChild ; $i++ ) {
        next if ( !vec($rout, fileno($Jobs[$i]{fh}), 1) );
        my $data;
        if ( sysread($Jobs[$i]{fh}, $data, 1024) <= 0 ) {
            vec($FDread, fileno($Jobs[$i]{fh}), 1) = 0;
            close($Jobs[$i]{fh});
            next;
        }
        $Jobs[$i]{mesg} .= $data;
        while ( $Jobs[$i]{mesg} =~ /(.*?)[\n\r]+(.*)/s ) {
            my $mesg = $1;
            $Jobs[$i]{mesg} = $2;
            if ( $mesg =~ /^stats: (\d+) (\d+) (\d+) (\d+) (\d+) (\d+)/ ) {
                $Jobs[$i]{SubDirDone}     = $1;
                $Jobs[$i]{SubDirCnt}      = $2;
                $Jobs[$i]{FileCnt}        = $3;
                $Jobs[$i]{FileOrigSz}     = $4;
                $Jobs[$i]{FileCompressSz} = $5;
                $Jobs[$i]{Errors}         = $6;
                $SubDirDone = $SubDirCnt = $FileCnt = $FileOrigSz = 0;
                $FileCompressSz = $Errors = 0;
                my $numReports = 0;
                for ( my $j = 0 ; $j < $nChild ; $j++ ) {
                    next if ( !defined($Jobs[$j]{SubDirDone}) );
                    $SubDirDone     += $Jobs[$j]{SubDirDone};
                    $SubDirCnt      += $Jobs[$j]{SubDirCnt};
                    $FileCnt        += $Jobs[$j]{FileCnt};
                    $FileOrigSz     += $Jobs[$j]{FileOrigSz};
                    $FileCompressSz += $Jobs[$j]{FileCompressSz};
                    $Errors         += $Jobs[$j]{Errors};
                    $numReports++;
                }
                $SubDirCnt  ||= 1;
                $FileOrigSz ||= 1;
                my $pctDone = 100 * $SubDirDone / $SubDirCnt;
                if ( $numReports == $nChild && $pctDone >= $DonePct + 1 ) {
                    $DonePct = int($pctDone);
                    my $estSecLeft = 1.2 * (time - $TimeStart)
                                         * (100 / $pctDone - 1);
                    my $timeStamp = $bpc->timeStamp;
                    printf("%sDone %2.0f%% (%d of %d dirs, %d files,"
                            . " %.2fGB raw, %.1f%% reduce, %d errors)\n",
                                $timeStamp,
                                $pctDone, $SubDirDone, $SubDirCnt, $FileCnt,
                                $FileOrigSz / (1024 * 1024 * 1000),
                                100 * (1 - $FileCompressSz / $FileOrigSz));
                    printf("%s    Est complete in %.1f hours (around %s)\n",
                                $timeStamp, $estSecLeft / 3600,
                                $bpc->timeStamp(time + $estSecLeft, 1))
                                            if ( $DonePct < 100 );
                }
            } else {
                print($mesg, "\n");
            }
        }
    }
}
if ( $Errors ) {
    print("Finished with $Errors errors!!!!\n");
    exit(1);
}
