#!/usr/bin/perl
# An ugly parser to use tknamazu + w3m
# by Jun Nishii <jun@vinelinux.org>
# Time-stamp: <00/07/15 13:32:36 kazuhiko>

sub parsedoc(){
    $/="";
    $*=1;

    $index=0;

    while(<TARGET>) {
	s/<a ?\n/<a /ig;

#    s/<a href="?mailto[^">]+"?>//
#    s|<!--.*-->|
	s/<a href="?[^">]+"?>/$url[$index++]=$&,"[$index]$&"/ieg;

        if (m|</body>|i) {
	$list="<p>References<br>\n";
        for($i=0; $i<$index; $i++){

        $url[$i] =~ s/<a href="?//i;
        $url[$i] =~ s/"?>.*//;
        if (($url[$i] !~ m|^/|) && ($url[$i] !~ m|mailto|)
		&& ($url[$i] !~ m|http://|) && ($url[$i] !~ m|ftp://|)) { 
            if ( $dirname =~ m|http://|) {
                $url[$i] = "$dirname/$url[$i]";
            } else {
                $url[$i] = "file://localhost$dirname/$url[$i]";
            }
        }
        $list=sprintf("%3s %d. %s<br>\n",$list, $i+1, $url[$i]);
        }
        }

     s|</body>|$list\n$&|ig;

    print W3M;
    }
}

sub parseftp(){
    $index=0;

    $upperdir=$dirname;
    if ($upperdir =~ m|/$|) { $upperdir =~ s|/[^/]*/$|/|;}
    else  { $upperdir =~ s|/[^/]*$|/|;}

    while(<TARGET>) {
	s|^[^ ]*/|$url[$index++]=$&,"[$index]$&"|e;
	s|\[Upper Directory\]|$url[$index++]=$upperdir,"[$index]Upper Directory"|e;
    print W3M $list;
    }

    $list="\n<p>References<br>\n";
    for($i=0; $i<$index; $i++){
	if($url[$i] !~ /^ftp:/) {
	    $url[$i] = "$dirname/$url[$i]"; }
        $list=sprintf("%3s %d. %s<br>\n",$list, $i+1, $url[$i]);
    }

    print W3M $list;
}

# parse arguments

$args="";

while((defined($ARGV[0]) && $ARGV[0] =~ /^-/)){
    $args .= $ARGV[0]." ";
    if ($ARGV[0] =~ /-cols/) {$args .= $ARGV[1]." "; shift @ARGV}
    shift @ARGV;
}

# parse input source
$_=$ARGV[0];
$mode="text";

if (m|^http://|) {
    $mode="html";
    open(TARGET,"w3m -dump_source $ARGV[0]|");
    # get pathname from filename
    ($dirname=$_) =~ s|/[^/]*$||;
} elsif (m|^ftp://|) {
    $mode="ftp";
    open(TARGET,"w3m -dump $ARGV[0]|");
    # get pathname from filename
    $dirname=$_;
} else {
    $mode="text";
    s|^file://localhost||;
    s|#.*||;
    $filename=$_;
    if (m|\.gz$|) {
	open(TARGET,"gzip -dc $filename|");
    } elsif (m|\.gz$|) {
	open(TARGET,"bzip2 -c $filename|");
    } elsif ( -f $filename) {
	open(TARGET,$filename);
    } elsif (-f "$filename.gz") {
	open(TARGET,"gzip -dc $filename.gz|");
    } elsif (-f "$filename.bz2") {
	open(TARGET,"bzip2 -dc $filename.bz2|");
    }

    # get pathname from filename
    ($dirname=$filename) =~ s|/[^/]*$||;
    if ($dirname eq '.' ){ ($dirname = `pwd`) =~ s/\n//; }
    if ($dirname !~ m|^/|){ ($dirname = `pwd`."/$dirname") =~ s/\n//; }
}

# set output
open(W3M,"|w3m -dump -T text/html $args");

#################### main routine #################### 

if ($mode eq "ftp") { parseftp; }
else { parsedoc; }

close(TARGET);
close(W3M);
