#!/usr/bin/perl

# ams2bib: convert AmSTeX bibliography into BibTeX
# usage:   ams2bib [-o file.bib] file[.tex]
#
# author:  A. G. Werschulz <agw@cs.columbia.edu>
# date:    10 September 2000
# version: 1.0
#
# Copyright (C) Arthur G. Werschulz 2000 (agw@cs.columbia.edu)
#
# May be freely redistributed under the Gnu Public License.
# Send me mail if you like it or if you have any questions or suggestions.

# modules I use:

use strict;
use vars qw($opt_h $opt_o);
use Getopt::Std;

my($prog, $usage, $bibfile, $still_more);

# I should really clean this up, given the unpleasantness of 
# global variables ...

my $current_author;
my %keys = ();

# ye olde error msg

$prog = $0;
$usage="Usage: $prog [-o file.bib] file[.tex]";

# handle options

if (!getopts('ho:')) {
    print STDERR $usage, "\n";
    exit 1;
}

if ($opt_h) {
    print STDERR $usage, "\n";
    exit 0;
}

$bibfile = $opt_o if ($opt_o);

# there should be exactly one input filename at this point
# default extension is ".tex" if no extension is explicitly given

die $usage if ($#ARGV != 0);

# set up input (AmSTeX) and output (BibTeX) files

my $dotspot = index($ARGV[0], ".");
if ($dotspot > 0) {
    my $root = substr($ARGV[0], 0, $dotspot);
    $bibfile = $root . ".bib" unless defined $bibfile;
}
else {
    $bibfile = $ARGV[0] . ".bib";
    $ARGV[0] .= ".tex";
}

open (INFILE, $ARGV[0]) || die "$prog: can't read $ARGV[0]:  $!\n";
open (OUTFILE, ">$bibfile") || 
  die "$prog: can't open $bibfile for output: $!\n";

# main processing loop

do {
    $still_more = process_one_ref();
} while $still_more;

# bring 'em all home, now

close INFILE;
close OUTFILE;

exit 0;

#
# process a single entry from the INFILE filehandle
# an entry has the form
#    \ref
#    \keyword blah blah blah
#    \endref
#
# keywords are listed below
# assumption: a keyword can only appear at the beginning of a line
#             (possibly preceded by whitespac)
# blank lines are ignored, as are instances of \pointer
# returns false when eof occurs on INFILE
#
# this subroutine does no error checking ... it assumes that the entry 
# has the stated form
#

sub process_one_ref() 
{
    my $keywords = 
       "\\book \\bookinfo \\by \\bysame \\endref \\inbook \\jour
        \\manyby \\pages \\paper \\paperinfo \\publ \\publaddr 
        \\toappear \\vol \\yr";  

    my (%bibentry, $currentword, $firstword, @remainder,
        $current_keyword, $rhs, $keyword);

    # if there's nothing there, we're done

    return 0 if (eof(INFILE));

    # first, go past anything that doesn't match \ref
    # (blank lines, \\pointer, etc.)

    while (<INFILE>) {
        s/%.+//;                # strip out comments
        last if index($_, "\\ref") >= 0;
    }
    return 0 if (eof(INFILE));

    # store contents of entry into a hash, indexed by the keywords

    while (<INFILE>) {
        s/%.+//;                # again with the comments??
        last if index($_, "\\endref") >= 0;
        ($firstword, @remainder) = split;
        if (index($keywords, $firstword) >= 0) {
            $firstword  = substr($firstword, 1); # strip off leading slash
            $current_keyword = $firstword;
            $rhs = join(" ", @remainder);
        # 
        # handle the various different fields
        #
        # here we deal with the \manyby ... \bysame construction
            if ($current_keyword eq "manyby") {
                $bibentry{"by"} = $rhs;
                $current_author = $rhs;
            } elsif ($current_keyword eq "bysame") {
                $bibentry{"by"} = $current_author;
            } else {
        # handle all remaining keywords
                $bibentry{$current_keyword} = $rhs;
            }
        } else {  # firstword not a keyword, so this is a continuation line
            $bibentry{$current_keyword} .= 
                " " . join(" ", $firstword, @remainder);
        }
    }

    if (exists $bibentry{"book"}) {
        handle_book(%bibentry);
    } elsif (exists $bibentry{"inbook"}) {
        handle_inbook(%bibentry);
    } elsif (exists $bibentry{"paper"}) {
        handle_paper(%bibentry);
    } else {
        print STDERR "$prog: unknown bibliographic entry\n";
        foreach $keyword (keys(%bibentry)) {
            print STDERR "$keyword: ", $bibentry{$keyword}, "\n";
        }
    }

    return 1;

}

#
# a book appearing in the bibliography
#

sub handle_book 
{
    my %bibentry = @_;
    my $author = adjust_author($bibentry{"by"});
    my $bibkey = bib_key($author, $bibentry{"yr"});
    print OUTFILE '@Book{', $bibkey, ",\n",
        "   author    = {$author},\n",
        "   title     = {", $bibentry{"book"}, "},\n",
        "   publisher = {", $bibentry{"publ"}, "},\n",
        "   address   = {", $bibentry{"publaddr"}, "},\n",
        "   year      = {", $bibentry{"yr"}, "},\n";
    if (exists $bibentry{"bookinfo"}) {
        print OUTFILE "   note      = {", $bibentry{"bookinfo"}, "},\n";
    }
    print OUTFILE "}\n\n";
}

#
# part of a book (e.g., a contributed paper or a chapter) appearing
# in the bibliography
#

sub handle_inbook 
{
    my %bibentry = @_;
    my $author = adjust_author($bibentry{"by"});
    my $bibkey = bib_key($author, $bibentry{"yr"});
    print OUTFILE '@InCollection{', $bibkey, ",\n",
        "   author        = {$author},\n",
        "   title         = {", $bibentry{"paper"}, "},\n",
        "   booktitle     = {", $bibentry{"inbook"}, "},\n",
        "   pages         = {", $bibentry{"pages"}, "},\n",
        "   publisher     = {", $bibentry{"publ"}, "},\n",
        "   address       = {", $bibentry{"publaddr"}, "},\n",
        "   year          = {", $bibentry{"yr"}, "},\n";
    if (exists $bibentry{"bookinfo"}) {
        print OUTFILE "   note          = {", $bibentry{"bookinfo"}, "},\n";
    }
    print OUTFILE "}\n\n";
}

#
# a paper appearing in the bibliography
#

sub handle_paper
{
    my %bibentry = @_;
    my $author = adjust_author($bibentry{"by"});
    my $bibkey = bib_key($author, $bibentry{"yr"});
    print OUTFILE '@Article{', $bibkey, ",\n",
        "   author    = {$author},\n",
        "   title     = {", $bibentry{"paper"}, "},\n",
        "   journal   = {", $bibentry{"jour"}, "},\n",
        "   volume    = {", $bibentry{"vol"}, "},\n",
        "   year      = {", $bibentry{"yr"}, "},\n",
        "   pages     = {", $bibentry{"pages"}, "},\n";
    if (exists $bibentry{"paperinfo"}) {
        print OUTFILE "   note      = {", $bibentry{"paperinfo"}, "},\n";
    }
    print OUTFILE "}\n\n";
}

#
# fix the author
#
# If a reference has more than two authors, then the AmSTeX file will
# have a comma separating the authors (with the exception of the last two).
# BibTeX requires "and" to separate the authors.
#
# Example:
#     Aziz, A. K., Kellogg, R. B., and Stephens, A. B. 
# becomes
#     Aziz, A. K. and Kellogg, R. B. and Stephens, A. B. 
#

sub adjust_author
{
    my ($orig_author) = @_;
    my @parsed_list = split(",", $orig_author);
    my $length = scalar(@parsed_list);
    return $orig_author if $length < 4;
    my $author = "";
    for (my $i = 0; $i < $length - 2; $i += 2) {
        $author .= $parsed_list[$i] . "," . $parsed_list[$i+1]. " and";
    }
    $author .=  $parsed_list[$length - 2] . "," . $parsed_list[$length - 1];
    $author =~ s/and and/and/;

    return $author;
}

#
# generate a key from the author(s) and publication year
#

sub bib_key 
{
    my ($author, $year) = @_;
    $author =~ s/(.+),.+$/$1/;  # clean out stuff that's not in last names
    $author =~ s/,.+ and//g;    # clean out separators: comma, " and"
    $author =~ s/\W//g;
    $author =~ s/\\[^\s{]+([\s{])/$1/g; # clean out TeX control sequences
    my $key_sofar = $author . $year ;
#
# has this key already appeared?
#
    if (! exists $keys{$key_sofar}) {
        $keys{$key_sofar} = 1;
        return $key_sofar;
    }
    my $key;
    my $addon = $keys{$key_sofar}++;
    $addon =~ tr/1-9/b-j/;
    return $key_sofar . $addon;
}
