#!/usr/pkg/bin/perl

# a bit of a hack:
# Look through a set of .info files
#   - assumption is that all the files are for an identical code base
# Find places where the coverpoints in the files differ:
#    - one file contains source that is not in the other
#    - source from one file contains coverpoints not found in the other

use strict;
use warnings;
use Getopt::Long;

use lib "/usr/pkg/lib/lcov";
use lib "/usr/pkg/lib/lcov";    # path from 'support-scripts'

use lcovutil;

our ($keep_going, $dontDrop, $compact, $sortBySize, $reportAgreeBlocks,
     $verbose);

our @infoFileArray;

package Region;

sub new
{
    my ($class, $start, $finish, $in, $out) = @_;

    my $self = [$start, $finish, $in, $out];
    bless $self, $class;
    return $self;
}

sub start
{
    my $self = shift;
    return $self->[0];
}

sub finish
{
    my $self = shift;
    return $self->[1];
}

sub size
{
    my $self = shift;
    return $self->[1] - $self->[0] + 1;
}

sub consistent
{
    my $self = shift;
    return !(scalar(@{$self->[2]}) && scalar(@{$self->[3]}));
}

sub in
{
    my $self = shift;
    return $self->[2];
}

sub out
{
    my $self = shift;
    return $self->[3];
}

sub buildCodeKey
{
    my $self    = shift;
    my $code    = $self->[2];
    my $notcode = $self->[3];
    my $key     = '';
    for (my $i = 0; $i < $main::numInfoFiles; ++$i) {
        #my $name = $main::infoFileArray[$i];
        my $char = $i ~~ @{$code} ? '1' : ($i ~~ @$notcode ? '_' : '.');
        $key .= $char;
    }
    return $key;
}

sub toNames
{
    my $list = shift;
    my @rtn;
    foreach my $idx (@$list) {
        push(@rtn, $main::infoFileArray[$idx]);
    }
    return @rtn;
}

sub print
{
    my ($self, $dest) = @_;

    my ($first, $last, $code, $nonCode) = @$self;

    print($dest "$first : $last:");

    if ($main::compact) {
        print($dest '  ', $self->buildCodeKey(), '  ', $self->size(), " line",
              $self->size() == 1 ? '' : 's', "\n");
    } else {
        print($dest "  ", $self->size(), " line",
              $self->size() == 1 ? '' : 's', "\n");
        if (%$code) {
            if ($main::verbose) {
                print($dest "\tcode:\n\t\t" .
                      join("\n\t\t", toNames($code)) . "\n");
            } else {
                print($dest "\tcode:  " . join(' ', @$code) . "\n");
            }
        }
        if (%$nonCode) {
            if ($main::verbose) {
                print($dest "\tnot code:\n\t\t" .
                      join("\n\t\t", toNames($nonCode)) . "\n");
            } else {
                print($dest "\tnot code:  " . join(' ', @$nonCode) . "\n");
            }
        }
    }
}

package FileData;

sub new
{
    my ($class, $filename) = @_;

    # Data:  filename, [TraceInfo, # info for this file
    #        list of [traceInfo, infoFileIndex] for each .info file containing this file
    #        list of regions in order: [first, last, [in], [out]]
    my $self = [$filename, [], []];
    bless $self, $class;

    return $self;
}

sub name
{
    my $self = shift;
    return $self->[0];
}

sub traces
{
    # return list of [TraceInfo, infoFileIndex] for every .info file containing
    #   this source file
    my $self = shift;
    return $self->[1];
}

sub regions
{
    # return list of [first last in out] for every identified region
    my $self = shift;
    return $self->[2];
}

sub regionsBySize
{
    my $self = shift;
    return
        sort({ $b->size() <=> $a->size() or
                     $a->start() <=> $b->start() } @{$self->regions()});
}

sub totalRegionSize
{
    my $self = shift;
    my $size = 0;
    foreach my $r (@{$self->regions()}) {
        $size += $r->size();
    }
    return $size;
}

sub printCodeGroups
{
    my ($code, $nonCode, $dest) = @_;
    if ($main::compact) {
        print($dest '  ', buildCodeKey($code, $nonCode, $main::numInfoFiles),
              "\n");
    } else {
        print($dest "\n");
        if (%$code) {
            if ($main::verbose) {
                print($dest "\tcode:\n\t\t" .
                      join("\n\t\t", hashToNames($code)) . "\n");
            } else {
                print($dest "\tcode:  " . join(' ', sort keys(%$code)) . "\n");
            }
        }
        if (%$nonCode) {
            if ($main::verbose) {
                print($dest "\tnot code:\n\t\t" .
                      join("\n\t\t", hashToNames($nonCode)) . "\n");
            } else {
                print($dest "\tnot code:  " .
                      join(' ', sort keys(%$nonCode)) . "\n");
            }
        }
    }
}

sub print
{
    my ($self, $dest) = @_;
    $dest = \*STDOUT unless defined($dest);

    my $title =
        $self->name() . ":\n  total: " . $self->totalRegionSize() . "\n";

    foreach
        my $r ($main::sortBySize ? $self->regionsBySize() : @{$self->regions()})
    {
        print($dest $title);
        $r->print($dest);
        $title = '';
    }
    return $title eq '';    # return non-zero if I printed something
}

sub checkLineCoverageConsistency
{
    my $self = shift;

    my $traces = $self->traces();
    return 1
        if (scalar(@$traces) < 2);    # nothing to check..only one set of data

    # first, collect line coverage data for all the .info files
    # does everyone agree that this line is code or is not code?
    my @lineCovData;
    foreach my $t (@$traces) {
        my ($traceInfo, $idx) = @$t;
        my $d = $traceInfo->sum();
        push(@lineCovData, [$d, $idx]);
    }
    my $srcfile    = $self->name();
    my $consistent = 1;

    my $numLines;
    if (-f $srcfile) {
        ($numLines) = split(' ', `wc -l $srcfile`);
    } else {
        # hard way:  look through all the data to find the highest line number
        $numLines = 0;
        foreach my $lineData (@lineCovData) {
            my @lines   = sort $lineData->[0]->keylist();
            my $largest = $lines[-1];
            $numLines = $largest
                if $largest > $numLines;
        }
    }
    my $currentGroupStart;
    my %currentCodeGroup;
    my %currentNonCodeGroup;

    for (my $lineNo = 1; $lineNo < $numLines; ++$lineNo) {
        my %codeGroup;
        my %nonCodeGroup;
        foreach my $lineData (@lineCovData) {
            my ($d, $infoFileIdx) = @$lineData;
            if (defined($d->value($lineNo))) {
                $codeGroup{$infoFileIdx} = 1;
            } else {
                $nonCodeGroup{$infoFileIdx} = 1;
            }
        }
        if (!defined($currentGroupStart)) {
            $currentGroupStart   = $lineNo;
            %currentNonCodeGroup = %nonCodeGroup;
            %currentCodeGroup    = %codeGroup;
            next;
        }
        if (!%codeGroup ||
            !%nonCodeGroup) {
            # everyone agrees that this is is is not code..
            if (scalar(%currentCodeGroup) &&
                scalar(%currentNonCodeGroup)) {

                # there was disagreement before, but now we agree
                my $region = Region->new($currentGroupStart,
                                         $lineNo - 1,
                                         [sort(keys %currentCodeGroup)],
                                         [sort(keys %currentNonCodeGroup)]);
                push(@{$self->regions()}, $region);

                #print($title, '  ');
                #$title = '';
                #$region->print(\*STDOUT);
                $currentGroupStart = $lineNo;
                $consistent        = 0;
            }
            %currentCodeGroup    = %codeGroup;
            %currentNonCodeGroup = %nonCodeGroup;
        } else {
            # we have some disagreement about whether this is code or not
            #   - is the 'in' and 'out' group the same as it was before?
            if (!(hashIsSame(\%currentCodeGroup, \%codeGroup) &&
                  hashIsSame(\%currentNonCodeGroup, \%nonCodeGroup))) {
                if (defined($main::reportAgreeBlocks) ||
                    (%currentCodeGroup &&
                        %currentNonCodeGroup)
                ) {

                    my $region =
                        Region->new($currentGroupStart,
                                    $lineNo - 1,
                                    [sort(keys %currentCodeGroup)],
                                    [sort(keys %currentNonCodeGroup)]);
                    push(@{$self->regions()}, $region);
                    #print($title, '  ');
                    #$title = '';
                    #$region->print(\*STDOUT);
                }
                %currentNonCodeGroup = %nonCodeGroup;
                %currentCodeGroup    = %codeGroup;
                $currentGroupStart   = $lineNo;
                $consistent          = 0;
            }
        }
    }
    if (%currentCodeGroup &&
        %currentNonCodeGroup) {
        $consistent = 0;
    }
    # got to end of file..
    if ($reportAgreeBlocks || (%currentCodeGroup && %currentNonCodeGroup)) {
        my $region = Region->new($currentGroupStart,
                                 $numLines,
                                 [sort(keys %currentCodeGroup)],
                                 [sort(keys %currentNonCodeGroup)]);
        push(@{$self->regions()}, $region);
        #print($title, "  ");
        #$region->print(\*STDOUT);
    }
    return $consistent;
}

sub hashIsSame
{
    my ($hash1, $hash2) = @_;

    while (my ($k, $v) = each(%$hash1)) {
        return 0 unless exists($hash2->{$k});
    }
    while (my ($k, $v) = each(%$hash2)) {
        return 0 unless exists($hash1->{$k});
    }
    return 1;
}

package main;

my $help;

my $err = !GetOptions("verbose|v"    => \$verbose,
                      'substitute=s' => \@lcovutil::file_subst_patterns,
                      'exclude=s'    => \@lcovutil::exclude_file_patterns,
                      'include=s'    => \@lcovutil::include_file_patterns,
                      "keep-going"   => \$keep_going,
                      "drop"         => \$dontDrop,
                      "all"          => \$reportAgreeBlocks,
                      "compact"      => \$compact,
                      "sort"         => \$sortBySize,
                      "help|h"       => \$help,);

if ($err) {
    print(STDERR "$0: invalid argument:\n");
}
if ($err || $help) {
    if (!$err) {
        print("Check for consistency in set of .info files\n");
    }
    my $dest = $help ? \*STDOUT : \*STDERR;
    print($dest <<EOF);
Usage:
  $0 \\
     [--include glob] [--exclude glob] [--substitute regexp] \\
     [--all] [--drop] [--keep-going] [--compact] [--sort]    \\
     [--verbose] [--help] infoFile infoFile ...              \\

  infoFile            : .info file (which must end in ".info") or data file
                        containing names of .info files.
                        Data file comment character is '#'
  --include glob      : glob match source filename to check
  --exclude glob      : glob match source filename to skip
  --substitute regexp : munge source file path when reading .info files
  --keep-going        : do not stop after mismatch found
  --all               : print all regions (not just regions conflicting votes)
  --drop              : ignore .info file if it does not contain some source
                        file - continue to check consistency in the .info files
                        which do contain the file
  --compact           : compact printing of source code region
  --sort              : sort by region size
  --help              : print this message
  --verbose           : be chatty

Check for:
 - Is every source file present in every .info file ?
   i.e. was some source file simply not present/not processed
   in some .info files?
     -  No further properties are checked for this file, if the
        --drop flag is present.
     -  if --keep-going flag not set, stop after printing names of
        missing files.
  - For every source file that is not dropped:
     - Is the version ID the same in all the .info files?
     - Do all the .info files agree about the status of every line?
        - Do they all say the line is code or is not code?
          Or do some .info files say one thing and some another?
        - For every contiguous block with the same "code/not code" votes,
          Print the start/end of the block and the vote
        - If the --all flag is set:  print the start/end of blocks
          with unanimous votes as well.
     - Note: these checks require that the script be able to access
       the source code
         - If it cannot find the source file, then it prints a message.
           Die unless --keep-going flag is set.

EOF
    exit($err);
}
lcovutil::munge_file_patterns();    # used for exclude/include

foreach my $infoFile (@ARGV) {
    if ($infoFile =~ /\.info(\.gz)?$/) {
        push(@infoFileArray, $infoFile);
    } else {
        open(INPUT, "<", $infoFile) or
            die("Error: unable to opend $infoFile: $!\n");
        while (<INPUT>) {
            chomp($_);
            next
                if ($_ eq '' ||
                    $_ =~ /\s*#/);    # comment character
            push(@infoFileArray, $_);
        }
        close(INPUT);
    }
}

my %infoFiles;
my %sourceFiles;

our $numInfoFiles = scalar(@infoFileArray);

my $status = 0;

my $idx = -1;
print("Info file mapping:\n");
foreach my $f (@infoFileArray) {
    $idx++;
    print("  $idx: $f\n");
}
print("\n\n");

$idx = -1;
foreach my $f (@infoFileArray) {
    my $info = TraceFile->load($f);
    $infoFiles{$f} = [$info, ++$idx];
    # and collect list of source files that appear in each
    foreach my $src ($info->files()) {
        my $sourceFileData;
        if (!exists($sourceFiles{$src})) {
            $sourceFileData = FileData->new($src);
            $sourceFiles{$src} = $sourceFileData;
        } else {
            $sourceFileData = $sourceFiles{$src};
        }

        my $traces = $sourceFileData->traces();
        my $fInfo  = $info->data($src)
            ;    # the TraceInfo for this source, in this .info file
        if (@$traces) {
            # this isn't the first time we see this source file - check for version mismatch
            my $fv = $fInfo->version();
            my $v  = $traces->[0]->[0]->version();
            if ((defined($fv) && !defined($v)) ||
                (!defined($fv) && defined($v)) ||
                (defined($fv) && defined($v) && $fv ne $v)) {
                # versions don't match - so don't bother to check for matching
                #  line coverage data
                print("Error: version mismatch for $src between:\n\t" .
                      $traces->[0]->[1] .
                      ": " . ($v ? $v : 'undef') . "\n\t" . $f . ": " .
                      ($fv ? $fv : 'undef') . "\n");
                $status = 1;
                # version mismatch - so don't bother checking data for this file
                next;
            }
        }
        push(@$traces, [$fInfo, $idx]);
    }
}

# we have a list of all the source files now...
#  check that all of them appear in every .info file we read
my %missing;
foreach my $filename (sort keys %sourceFiles) {
    my $data   = $sourceFiles{$filename};
    my $traces = $data->traces();
    if (scalar(@$traces) != $numInfoFiles) {
        $status = 1;
        my %names;    # which info files DO contain this source file?
        foreach my $d (@$traces) {
            $names{$d->[1]} = 1;
        }
        print("Error: Source file '$filename' missing from:\n")
            if $verbose;
        my $idx = -1;
        foreach my $f (@infoFileArray) {
            ++$idx;
            next if exists($names{$idx});
            print("\t$f\n")
                if $verbose;
            if (exists($missing{$f})) {
                push(@{$missing{$f}}, $filename);
            } else {
                $missing{$f} = [$filename];
            }
        }
    }
}

if (%missing) {
    print("Files missing from .info data:\n");

    foreach my $f (sort keys %missing) {
        print("\t$f:\n");
        foreach my $src (@{$missing{$f}}) {
            print("\t\t$src\n");
            # and don't look at this file
            delete $sourceFiles{$src} unless $dontDrop;
        }
    }
}

exit($status) unless defined($keep_going) || $status == 0;

# now go through the source files to check that line coverpoints are the same
#  in all of them
foreach my $srcfile (sort keys %sourceFiles) {

    my $srcData = $sourceFiles{$srcfile};
    if (!$srcData->checkLineCoverageConsistency()) {
        $status = 1;
    }
}

# now sort the data and print it..
my @fileOrder =
    $sortBySize ?
    sort({ $sourceFiles{$b}->totalRegionSize()
                 <=> $sourceFiles{$a}->totalRegionSize() or
                 $a cmp $b } keys(%sourceFiles)) :
    sort(keys %sourceFiles);

foreach my $srcFile (@fileOrder) {
    my $printed = $sourceFiles{$srcFile}->print(\*STDOUT);
    print("\n") if $printed;
}

exit($status);
