#!/usr/pkg/bin/perl -w

=comment

  YamCha -- Yet Another Multipurpose CHunk Annotator
 
  $Id: mkmodel.in,v 1.15 2004/10/03 03:16:26 taku-ku Exp $;

  Copyright (C) 2000-2004 Taku Kudo <taku-ku@is.aist-nara.ac.jp>
  This is free software with ABSOLUTELY NO WARRANTY.
  
  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.
  
  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.
  
  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
=cut

$| = 1;

my $VERSION = "0.4";
my $GZIP    = "/usr/bin/gzip";

my %arg;
use File::Spec;
use Getopt::Std;
getopts("ves:m:t:",\%arg);

if (defined $arg{"v"}) { print "$VERSION\n"; exit 0; };

my $MODULE  = defined $arg{"e"} ? "PKE" : "PKI";
my $MINSUP  = $arg{"m"} || 2;
my $SIGMA   = $arg{"s"} || 0.001;
my $MODEL   = shift @ARGV;
my $OUT     = shift @ARGV;
my $TOOLDIR = $arg{"t"} || "/usr/pkg/libexec/yamcha";

die "Usage: $0 textmodel binmodel\n" if (! defined $MODEL || ! defined $OUT);

my $module =File::Spec->catfile($TOOLDIR, "$MODULE.pm");
require $module  || die "$!: $module\n";

sub initialize       { my @p = @_;  eval "${MODULE}::initialize (\@p)";           die $@ if $@; }
sub set_kernel_param { my @p = @_;  eval "${MODULE}::set_kernel_param (\@p)";     die $@ if $@; }
sub process_line     { my @p = @_;  eval "${MODULE}::process_line (\@p)";         die $@ if $@; }
sub mkmodel          { my @p = @_;  eval "${MODULE}::mkmodel  (\@p)";             die $@ if $@; }
sub get_header       { my @r = ();  eval "\@r = ${MODULE}::get_header ()";        die $@ if $@; return @r; }
sub get_type         { my $r;       eval "\$r = ${MODULE}::get_type   ()";        die $@ if $@; return $r; }
sub get_concat_files { my @r = ();  eval "\@r = ${MODULE}::get_concat_files ()";  die $@ if $@; return @r; }
sub finalize         {              eval "${MODULE}::finalize ()";                die $@ if $@; }
sub mydie            { finalize (); die $_[0]; };

sub sigclean 
{ 
    print STDERR "\nFATAL: signal caught! exit abnormally.\n"; 
    finalize ();
    exit -1; 
}

$SIG{INT}  = \&sigclean;
$SIG{KILL} = \&sigclean;
$SIG{TERM} = \&sigclean;
$SIG{STOP} = \&sigclean;
$SIG{QUIT} = \&sigclean;
$SIG{HUP}  = \&sigclean;

###############################################################################################
#
# main routine
#
initialize ($OUT, $TOOLDIR, $SIGMA, $MINSUP);

if ($MODEL =~/\.gz$/) {
    open(F, "$GZIP -dc $MODEL |") ||  mydie "$! $MODEL";
} else {
    open(F, $MODEL) ||  mydie "$! $MODEL";
}

my $param = "";
while(<F>) {
    chomp;
    next if (/^\s*\#/);
    last if (/^$/);
    my ($key, $value) = split /:/, $_, 2;
    $value =~ s/^\s*//; $key   =~ s/\s*$//;
    $param .= (lc($key) . pack("x") . $value . pack("x"));
}

my @dic = ();
while(<F>) {
    chomp;
    last if (/^$/);
    push @dic, [ split ];
}

my $line  = 0;
my $msize = 0;
print "Reading TEXT model: $MODEL\n";
while(<F>) {
    chomp;
    print "."  if ($line++ % 1000 == 0) ;
    if (/^(\S+) \# kernel type/) {
	if ($1 eq "1") {
	    $kernel_type = 'polynomial';
        } elsif ($1 eq "0") {
	    $kernel_type = 'linear';
	} else {
	    mydie "FATAL: cannot support Kernel Func: type $1\n";
	}
    } elsif (/^(\d+) \# kernel parameter -d/) {
	$param_degree = $1;
    } elsif (/^(\S+) \# kernel parameter -g/) {
	$param_g = $1;
    } elsif (/^(\S+) \# kernel parameter -s/) {
	$param_s = $1;
    } elsif (/^(\S+) \# kernel parameter -r/) {
	$param_r = $1;
    } elsif (/^(\S+) \# threshold b/) {
	$bias = $1;
	if ($kernel_type eq "linear") {
	    $kernel_type = "polynomial";	    
	    $param_degree  = 1;
	    $param_g = 1;	    
	    $param_s = 1;
	    $param_r = 0;
	}
	set_kernel_param ($kernel_type, $param_degree, $param_g, $param_s, $param_r);
    } elsif (/MULTI_CLASS (\S+) (\S+)/) {
	$poslabel = $1; $neglabel = $2;
    } elsif (/^$/) {
	$model[$msize] = [$poslabel, $neglabel, $bias];
	++$msize;
    } elsif (/^SVM-light/ || /^TinySVM/) {
	# do nothing
    } elsif (! /\#/) {
	process_line ($_, $msize);
    }
}
close (F);

mydie "FATAL: Invalid model file\n" if (@dic == 0 || @model == 0);

print "\nWriting BINARY model: $OUT\n";
mkmodel (\@dic, \@model);

my %class = ();
my $cid   = 0;
my $isonevsrest = 0;
for (@model) {
    my ($p, $n, $b) = @{$_};
    $class{$p} = ($cid++) if (! defined $class{$p});
    # ignore if dummy class for one vs rest
    if ($n eq "___OTHER___") {
        $isonevsrest = 1;
        next;
    }
    $class{$n} = ($cid++) if (! defined $class{$n});
}

# give largest id to dummy class
$class{"___OTHER___"} = $cid if ($isonevsrest);

my $csize = scalar (keys %class);
mydie "FATAL: class size == 1, OK?\n" if ($csize <= 1);
mydie "FATAL: Use pair wise when binary class ificaion\n" if ($isonevsrest && $csize == 3);

my @hlist = get_header ();
my $type  = get_type   ();

mydie "FATAL: size of header is invalid\n" if (@hlist != 8);

open (S,"> $OUT") || mydie "$! $OUT\n"; binmode S;

my $header = pack("a32IIa32IdddIIIIIIIIII",
             $VERSION,
             $type,                                 # PKB:0, PKI:1, PKE:2
             $isonevsrest,                          # pair-wise:0 one.vs.rest:1
	     $kernel_type,$param_degree,$param_g,$param_s,$param_r, # kernel param
	     $msize,      # model size, if binary, modelSize equal to 1.
	     $csize,      # class size, if binary, csize equal to 2. 
	     @hlist);

print S $header;

my $l1 = length ($header);
my $l2 = length ($param);
my $n = 8 * ( int(($l1 + $l2 + 4)/ 8) + 8)  - $l1 - 4; # tricky
print S pack("I",  $n); # size of parameter
print S pack("a*", $param); # contents of parameter
print S pack("a*", "\0" x ($n - $l2));

for (sort { $class{$a} <=> $class{$b} } keys %class) {
    print S pack("a32",$_);
}

for (@model) {
    my ($p,$n,$b) = @{$_};
    print S pack("IId", $class{$p}, $class{$n}, $b);
}

for my $file (get_concat_files ()) {
    my $buf ;
    open (F, $file) || mydie "$!: $file\n"; binmode F;
    while (1) {
	my $ret = read (F, $buf, 8192);
	print S $buf;
	last if ($ret < 8192);
    }
    close (F);
}
close (S);

finalize ();

print "Done!\n";
