#!/usr/bin/perl -w 
#############################################################
#  This script takes a path as input and traverses recursively to verify flac and shn by examining ffp and md5 checksums 
#  - run with --help for the manual
# - relies on several on several commandline tools like flac, metaflac, md5sum
#  - originally written by Dr. Unclear, distributed under GPL license
#############################################################

use strict;
use warnings;
use Cwd;
use File::Find;
use Getopt::Long;
use File::Basename;
use File::Spec;

my ($wdir, @dirs, $absdir, @flac_files, @shn_files, %md5s, %ffps, %st5s, %cfps, $md5sum_cmd, $metaflac_cmd, $flac_cmd, $shntool_cmd, $options, $opt_md5, $opt_ffp, $opt_st5, $opt_flac_test, $opt_cfp, $opt_pattern, $ffp_regex, $ffp_arguments, $st5_regex, $st5_arguments, $cfp_arguments, $cfp_regex, $single_dir, $help_me, $opt_case);

##########################
# SET PATHS TO YOUR COMMANDLINE PROGRAMS, spaces in paths are acceptable
# Note if you're running perl from cygwin on Win, you'll need to use cygdrive paths:
# '/cygdrive/c/path/to/md5sum' OR '/bin/md5sum'
# On windows (activestate perl) use: 'c:\path to\your\md5sum.exe'
# TODO : make it work with files in your path by default
$md5sum_cmd = 'C:\path to\md5sum.exe';
$metaflac_cmd = 'C:\path to\metaflac.exe';
$flac_cmd = 'C:\path to\flac.exe';
$shntool_cmd = 'C:\path to\shntool.exe';
##########################


# set defaults, then grab the command line options, if any
$opt_flac_test = $opt_md5 = $opt_ffp = $opt_st5 = $opt_cfp = $opt_case = 1;
$single_dir = $help_me = 0;
$options = GetOptions ("test|t!" => \$opt_flac_test,
                       "md5|m!" => \$opt_md5,
                       "ffp|f!" => \$opt_ffp,
                       "st5!" => \$opt_st5,
                       "cfp!" => \$opt_cfp,
                       "case|c!" => \$opt_case,
                       "single|s" => \$single_dir,
                       "help|h" => \$help_me,
                       "pattern|p=s" => \$opt_pattern); 

# if someone asks for help, give it to them
&helpme if $help_me ==1;
					  
# Get path from the command line argument
my $path = shift                       
   or die "usage: $0 [options] (/)path/to/verify/\nUse $0 --help for more detailed help.";

# check if the above files exist
if (! -f $md5sum_cmd || ! -f $metaflac_cmd || ! -f $flac_cmd || ! -f $shntool_cmd) { die "Paths not set properly.  Please see --help for more information." };

# set up manual verification regex stuff
# st5 regex and arguments
$st5_regex = '\s{2}\[shntool\]\s{2}';
$st5_arguments = 'hash -q';
# cfp regex and arguments
$cfp_regex = '\s{2}\[shntool\]\s{2}composite';
$cfp_arguments = 'hash -q -c';
# ffp regex and arguments
$ffp_regex = '^\w.+?\:\w.+';
$ffp_arguments = '--show-md5sum --with-filename';

# if directory match pattern is not set as option, use a default
unless ($opt_pattern) {
   $opt_pattern = '^gd\d{2,4}';
} else {
   print "Looking for directories matching pattern: $opt_pattern\n\n";
}

# Set the original working dir so we can find our way home
$wdir = getcwd;

# Unless we've opted to check with a single directory, read standard gd directories into an array
if ($single_dir == 0) {
   find( sub { push @dirs, $File::Find::name if -d && /$opt_pattern/ }, $path );
   print "No matching directories found!\nUse the -s flag if you're trying to verify a single directory.\n" if (!@dirs);
} else {
   # we've opted to test just one directory, so the array will contain just the path we fed the script
   push (@dirs, $path);
}

# loop through each matching directory and find ffp and md5 files
foreach (@dirs) {
   # set the absolute path from the relative path, for use in command line functions
	$absdir = File::Spec->rel2abs($_, $wdir);
   print "=============================================\nWorking with $absdir:\n\n";
   chdir $absdir or next ">>> Can't chdir to $absdir:$!\n";
	# empty the container arrays out
	(%ffps, %md5s, %st5s, %cfps, @shn_files, @flac_files) = ();
	# add all checksum files and their paths to hashes
   find(sub { $md5s { $_ } = $File::Find::dir if (-f $_ && $_ =~ /\.md5$/)}, $absdir);
	find( sub { $ffps { $_ } = $File::Find::dir if (-f && /\.ffp$|\.ffp\.txt$/) }, $absdir );
   find(sub { $st5s { $_ } = $File::Find::dir if (-f $_ && $_ =~ /\.st5$/)}, $absdir);
   find(sub { $cfps { $_ } = $File::Find::dir if (-f $_ && $_ =~ /\.cfp$/)}, $absdir);
   
	#locate any flac files, locate any shn files
	find( sub { push @flac_files, $File::Find::name if -f && /\.flac$/ }, $absdir );
	find( sub { push @shn_files, $File::Find::name if -f && /\.shn$/ }, $absdir );
	
   ######################################
   # Main logic determining which tests to run
   #### if we have FLAC FILES:
	if (@flac_files) {
	   print "FLAC files found:\n";
	   #check whether md5s exist, test the checksum files
      if (%md5s && $opt_md5 ==1) {
         print "Starting md5 verification:\n";
   	   &validatemd5s(\%md5s, $md5sum_cmd);
         # run checksumcoverage sub to make sure there aren't extra files not covered by the md5s
         &checksumcoverage(\%md5s,\@flac_files,$opt_case);
      } elsif ($opt_md5 ==1) {
         print "No md5 files found.\n";
      }
      if (%ffps && $opt_ffp == 1) {
         print "Starting ffp verification:\n";
   	   &manualverify(\%ffps, \@flac_files, $metaflac_cmd, $ffp_arguments, $ffp_regex);
         # run checksumcoverage sub to make sure there aren't extra files not covered by the ffps
         &checksumcoverage(\%ffps,\@flac_files,$opt_case);
      } elsif ($opt_ffp == 1) {
         print "No ffp files found.\n";
      }
      # if st5 files exist, check them
      if (%st5s && $opt_st5 ==1) {
         print "Starting st5 verification:\n";
         &manualverify(\%st5s, \@flac_files, $shntool_cmd, $st5_arguments, $st5_regex);
         &checksumcoverage(\%st5s,\@flac_files,$opt_case);
      } elsif ($opt_st5 ==1) {
         print "No st5 files found.\n";
      }
      # if cfp files exist, check them
      if (%cfps && $opt_cfp ==1) {
         print "Starting cfp verification:\n";
         &cfpverify(\%cfps, '*.flac', $shntool_cmd, $cfp_arguments, $cfp_regex);
      } elsif ($opt_cfp ==1) {
         print "No cfp files found.\n";
      }
      if ($opt_flac_test == 1 ) {
   	   #if we didn't turn off flac tests then test the flac files interally
   	   &testflacfiles(\@flac_files, $flac_cmd);
      }
	} 
	#### if we have SHN Files:
	if (@shn_files) {
      print "SHN files found:\n";
      if ((%md5s) && ($opt_md5 == 1)) {
         print "Starting md5 verification:\n";
         #check whether md5s exists, run md5 validations
         &validatemd5s(\%md5s, $md5sum_cmd);
         # run checksumcoverage sub to make sure there aren't extra files not covered by the md5s
         &checksumcoverage(\%md5s,\@shn_files,$opt_case);
      } elsif ($opt_md5 == 1) {
         print "No md5 files found.\n";
      }
      # if st5 files exist, check them
      if (%st5s && $opt_st5 ==1) {
         print "Starting st5 verification:\n";
         &manualverify(\%st5s, \@shn_files, $shntool_cmd, $st5_arguments, $st5_regex);
         &checksumcoverage(\%st5s,\@shn_files,$opt_case);
      } elsif ($opt_st5 == 1) {
         print "No st5 files found.\n";
      }
      # if cfp files exist, check them
      if (%cfps && $opt_cfp ==1) {
         print "Starting cfp verification:\n";
         &cfpverify(\%cfps, '*.shn', $shntool_cmd, $cfp_arguments, $cfp_regex);
      } elsif ($opt_cfp ==1) {
         print "No cfp files found.\n";
      }
	} 
    #if we don't have any SHN or FLAC files
    if (!@shn_files && !@flac_files)	{
      print ">>>FAILURE: Cannot find .shn or .flac files in or below this directory!\n";
	}
	
	#if we don't have any checksum files, spout an error
	if (!%md5s && !%ffps && !%st5s)	{
	   print ">>>FAILURE: Cannot find checksums in or below this directory!\n";
	}
    
 }

#### subroutines!
# locate any md5 files, check that they match existing file set, check the fileset
sub validatemd5s {
   my ($md5s, $md5sum_cmd)=@_;
   my ($md5file,$md5path);
   foreach $md5file (keys %$md5s) {
      $md5path = $md5s{$md5file};
      print "  found \"$md5file\"\n";
      &md5verify($md5file,$md5path,$md5sum_cmd);
   }
}

# verify md5 files
sub md5verify {
   my ($md5file, $md5path, $md5sum_cmd)=@_;
   my ($md5_result);
   chdir($md5path) or print ">>> Cannot change directories to: $md5path";
   $md5_result = `"$md5sum_cmd" -c "$md5file"`;
   # if the return code is anything other than 0, we have an error
   if ($? != 0) {
      print "  >>>FAILURE: MD5 failed for $md5file\n$md5_result\n";
   } else {
      # otherwise print a success message, no output
	  print "  [SUCCESS: $md5file verified]\n";
   }
}

# sub makes sure the md5 files found actually cover all the flac/shn files
sub checksumcoverage {
  my %checksumfiles = %{$_[0]};
  my $files = $_[1];
  my $opt_case = $_[2];
  my ($checksum, $checksumpath, $checksumcontent, $musfile, $i);
  print "Checking coverage of checksum file: ";
  # loop through each checksum file
  foreach $checksum (keys %checksumfiles) {
     $checksumpath = $checksumfiles{$checksum};
     print "  ...$checksum";
     # open the checksum file and add it's contents to a variable
     chdir($checksumpath) or print ">>> Cannot change directories to $checksumpath";
     open CHKS, $checksum or next ">>> Cannot open $checksum for read :$!";
     while (<CHKS>) {
        $checksumcontent .= $_;
     }
	 close (CHKS);
  }
  # loop through each music file and check whether it is represented in a checksum file
  print "\n";
  $i =0;
  foreach (@$files) {
    $musfile = basename($_);
    if (($opt_case ==0) && ($checksumcontent !~ /(\*${musfile}($|\s))|($musfile:)|(\[shntool\]\s{2}$musfile)/i)) {
	   print "  >>>FAILURE: The file $musfile is not represented in checksum file! (Case insensitive mode)\n";
	   $i++;
	} elsif (($opt_case ==1) && ($checksumcontent !~ /(\*${musfile}($|\s))|($musfile:)|(\s{2}$musfile)/)) {
	   print "  >>>FAILURE: The file $musfile is not represented in checksum file!\n";
	   $i++;
   }
  }
  if ($i == 0) {
    print "  [SUCCESS: all music files covered]\n";
  } 
}

# internally verify/test flac files
sub testflacfiles {
   my ($files, $flac_cmd)=@_;
   my ($flacfile, $flactestresult, $i);
   print "Testing FLAC files internally...\n";
   $i = 0;
   foreach $flacfile (@$files) {
      $flactestresult = `"$flac_cmd" --test --silent "$flacfile"`;
	  # if the return code is anything other than 0, we have an error
	  if ($? != 0) {
	     print "  >>>FAILURE: flac test fails for $flacfile.\n";
	     $i++;
	  }
   }
   # if we had issues above with any of the file then we incremented i, 
   # so if i > 0, we failed the internal flac test
   if ( $i > 0 ) {
      print "  >>>FAILURE: Internal FLAC test failed for $i file(s)!\n";
   } else {
      print "\n  [SUCCESS: Internal FLAC test passed]\n";
   }
}

# cfp check subroutine
sub cfpverify {
  my %chksums = %{$_[0]};
  my $filetype = $_[1];
  my $cmd = $_[2];
  my $arguments = $_[3];
  my $regex = $_[4];
  my ($file, $actualchksum, $chksumfile, @chksum_array, @chksumlines, $chksumline, $i, );
  
  #loop through checksum files and paths
  foreach $chksumfile (keys %chksums) {
     $path = $chksums{$chksumfile};
     print "  found \"$chksumfile\"\n";
     chdir($path) or next ">>> Cannot change directories to $path: $!";
    
     # read file into a string
     local $/=undef;
     open (CKSM, $chksumfile) or next ">>> Cannot open $chksumfile for read: $!";
     $_ = <CKSM>;
     close (CKSM);
     
     # split up the string by end of line type characters:  CR, LF, CRLF
     @chksum_array = split(/(\r+|\n+)/);
     
     # cycle through the array elements and keep the checksum looking lines according to regex, discard the rest
     foreach( @chksum_array ) {
	    # only add checksums to the @chksumlines array, avoid comments, blank lines, etc.
	    if ($_ =~ /$regex/) {
           #trim whitespace off
           $_ =~ s/^\s*(\w.+?)\s*\r*\n*$/$1/;
           # if we have st5s, replace the odd ball characters with proper regex
           $_ =~ s/\s{2}\[shntool\]\s{2}/\\s\{2\}\\[shntool\\]\\s\{2\}/;
           #print "read: $_ \n";
           push @chksumlines, $_ ;
	    }
     }
  }
  
  # calculate the actual  checksums
  $actualchksum .= `"$cmd" $arguments $filetype`;
  
  #print $actualchksum;
  # now test each line in chksumlines
  $i = 0;
  foreach $chksumline (@chksumlines) {
     # if we find a line in the checksum files is missing or doesn't match expected:
     if ($actualchksum !~ /$chksumline/) {
      print "   >>>FAILURE: checksum verification failed, expecting: $chksumline\n";
      $i++;
	}
  }
  # if there were no errors above we're golden
  if ($i == 0) {
    print "  [SUCCESS: checksum verified]\n";
  } else {
    print "  >>>FAILURE: checksum verification failed for $i files!\n";
  }
}

# manually check checksum line by line
sub manualverify {
  my %chksums = %{$_[0]};
  my $files = $_[1];
  my $cmd = $_[2];
  my $arguments = $_[3];
  my $regex = $_[4];
  my ($file, $actualchksum, $chksumfile, @chksum_array, @chksumlines, $chksumline, $i, );
  
  #loop through checksum files and paths
  foreach $chksumfile (keys %chksums) {
     $path = $chksums{$chksumfile};
     print "  found \"$chksumfile\"\n";
     chdir($path) or next ">>> Cannot change directories to $path: $!";
    
     # read file into a string
     local $/=undef;
     open (CKSM, $chksumfile) or next ">>> Cannot open $chksumfile for read: $!";
     $_ = <CKSM>;
     close (CKSM);
     
     # split up the string by end of line type characters:  CR, LF, CRLF
     @chksum_array = split(/(\r+|\n+)/);
     
     # cycle through the array elements and keep the checksum looking lines according to regex, discard the rest
     foreach( @chksum_array ) {
	    # only add checksums to the @chksumlines array, avoid comments, blank lines, etc.
	    if ($_ =~ /$regex/) {
           #trim whitespace off
           $_ =~ s/^\s*(\w.+?)\s*\r*\n*$/$1/;
           # if we have st5s, replace the odd ball characters with proper regex
           $_ =~ s/\s{2}\[shntool\]\s{2}/\\s\{2\}\\[shntool\\]\\s\{2\}/;
           #print "read: $_ \n";
           push @chksumlines, $_ ;
	    }
     }
  }
  
  # calculate the actual  checksums
  foreach  (@$files) {
     $file = basename($_);
     $actualchksum .= `"$cmd" $arguments "$file"`;
  }
  #print $actualchksum;
  # now test each line in chksumlines
  $i = 0;
  foreach $chksumline (@chksumlines) {
     # if we find a line in the checksum files is missing or doesn't match expected:
     if ($actualchksum !~ /$chksumline/) {
      print "   >>>FAILURE: checksum verification failed, expecting: $chksumline\n";
      $i++;
	}
  }
  # if there were no errors above we're golden
  if ($i == 0) {
    print "  [SUCCESS: checksum verified]\n";
  } else {
    print "  >>>FAILURE: checksum verification failed for $i files!\n";
  }
}

sub helpme {
   my $help = <<HELPTEXT;
This script is designed to test FLAC and SHN files 
for conformance to checksums as well as internal 
integrity checks.  The script takes a path (absolute 
or relative) as an input.  By default, the script 
will traverse the input path looking for directories 
that start with the "gdYY*", where YY is meant to 
be a year designator (gdYY and gdYYYY directories 
will both work). 

SHN FILES:
---------------------------------------------
SHN files are have md5sums calculated and verified 
against any .md5 files in the directories.  
shntool md5s are also verified against any .st5
files in the directory.  Composite shntool 
fingerprints are verified against any .cfp files
in the directory.  The scope of the md5 and st5 
checksum files are checked as well to make sure 
that there aren't SHN files in the directory not 
covered by the checksum files.

FLAC FILES:
----------------------------------------------
FLAC files have md5sums checked (like SHN) files.  
They also have their FLAC fingerprints checked 
against any fingerprints stored in .ffp files,
and shntool md5 checksums stored in .st5 files.
Composite shntool fingerprints are verified 
against any .cfp files in the directory.    
The scope of all md5, ffp, and st5 files is checked
to make sure that there aren't FLAC files in the
directory not covered by the checksum files.  
Lastly, FLAC files are tested for internal 
integrity using flac's --test flag.  

OPTIONS:
---------------------------------------------
Option flags are taken prior to the path in 
command line usage.  Current options include:

  --single, -s: use this flag if you're looking 
  to verify just a single flac/shn fileset.  
  When this flag is set, the path inputed is not 
  traversed recursively.

  --no-test, -not:  use this flag if you wish to 
  skip the internal flac file integrity tests.
  
  --no-case, -noc: by default, coverage checking
  is case sensitive.  Use this option for case
  insensitive coverage tests.

  --no-md5, -nom:  use this flag to skip md5 checks.  
  Note, no checks on SHN files will be made.

  --no-ffp, -nof:  use this flag to skip ffp verification.
  
  --no-st5:  use this flag to skip st5 verification.
  
  --no-cfp:  use this flag to skip st5 verification.

  --pattern=regex, -p:  use this flag to override 
  the default directory pattern matching.  Uses 
  standard regular expressions.  See example below. 
  
DEPENDENCIES:
--------------------------------------------
This script relies on and serves as a wrapper for
several command line tools including md5sum, shntool,
metaflac and flac.  These tools are available under Windows 
and Unix-like systems.  Unix folks will likely know 
how to find these.  On Windows you'll need to download:
 md5sum.exe: http://etree.org/md5com.html

 flac.exe and metaflac.exe: http://flac.sourceforge.net/download.html

 shntool.exe: http://www.etree.org/shnutils/shntool/
 
Once downloaded, set the location of these files in 
the paths section of this script.

USAGE:
---------------------------------------------
Example - traverse a directory tree looking checking 
any filesets that exist under the path (relative or absolute):
  perl shnflac-verify.pl path/to/yourdir/
  
Example - find directories starting with paf followed 
by a two digit year:
  perl shnflac-verify.pl --pattern=^paf\\\\d\\\\d
  
Example - test a single seed, skipping ffp verifications 
and flac integrity checks:
  perl shnflac-verify.pl --single --no-ffp --no-test path/to/gdYYseed/
  
CREDITS:
Originally written by Dr. Unclear, distributed 
under the GPL license.
HELPTEXT
print $help;
exit 0;
}



