#!/usr/pkg/bin/python3.12 -u
#
# Run doclifter against an entire manual tree.
# Sees all files in section 1 through 9 by default.

import sys, os, getopt, signal, time, re, commands, cStringIO, stat, sets

mandir = "/usr/share/man"
patchdir = os.path.abspath("prepatch")
outdir = None
patched = 0
makehtml = False
xslfragment = None
processed = sets.Set([])

def manfile(section, basename=""):
    "Return a manual file or directory based on section name."
    if not basename:
        # Return the directory
        return "%s/man%s/" % (mandir, section)
    elif basename[0] == '/':
        return basename
    elif basename.endswith(".gz") or basename.endswith(".bz2") or basename.endswith(".Z"):
        # We've been handed an actual filename
        return "%s/man%s/%s" % (mandir, section, basename)
    else:
        # We've been handed a filename section
        return "%s/man%s/%s.%s.gz" % (mandir, section[:1], basename, section)

def analyze_manpage(manpage):
    "Provide log annotations based on content."
    exclusions = (
        ("auto-generated by docbook2man-spec", "SGML DocBook"),
        ("Generated by db2man.xsl", "XML DocBook"),
        ("Automatically generated by Pod::Man", "Pod::Man"),
        )
    output = ""
    # Check to see if it has DocBook masters
    fp = open(manpage)
    firstline = fp.readline()
    for (pattern, generator) in exclusions:
        if firstline.find(pattern) > -1:
            output += "Generated from %s\n" % generator
    if firstline.startswith("<html>"):
        output += "This page is HTML.\n"
    fp.close()
    return output

def fetch_page(file, localcopy, patch):
    "Grab a local copy of a man page, patching if needed."
    output = ""
    if file[-3:] == ".gz":
        cstat = os.system("gunzip <%s >%s" % (file, localcopy))
    elif file[-4:] == ".bz2":
        cstat = os.system("bunzip2 <%s >%s" % (file, localcopy))
    elif file[-2:] == ".Z":
        cstat = os.system("uncompress <%s >%s" % (file, localcopy))
    else:
        cstat = os.system("cp %s %s" % (file, localcopy))
    if os.WIFSIGNALED(cstat) or os.WEXITSTATUS(cstat):
        return (1, output + "testjig: copy failed, status %d", cstat)
    if os.path.exists(patch):
        here = os.getcwd()
        os.chdir(outdir)
        patch = commands.getoutput("patch --version-control=never <%s" % (patch,))
        stem = os.path.basename(localcopy)
        os.system("rm -f %s.orig %s.rej" % (stem, stem))
        os.chdir(here)
        if patch:
            output += patch + "\n"
    return (0, output)

def getstem(file):
    "Reduce the name of a man page or generated HTML file to its stem"
    if file.endswith(".xml"):
        file = file[:-4]
    file = ".".join(file.split(".")[:-1])	# Remove section 
    return file

def make_xml(source, options, batchmode):
    "Make XML from specified man page."
    doclifter.stdout = doclifter.stderr = keep_io = cStringIO.StringIO()
    args = ["-I", mandir,] + options.split() + [source,]
    doclifter_status = doclifter.main(args, keep_io, keep_io)
    output = keep_io.getvalue()
    keep_io.close()
    lxmlloc = None
    if doclifter_status == 2:
        fp = open(source)
        contents = fp.read()
        inclusions = re.compile(r"\.so\s+(.*)").search(contents)
        fp.close()
        if inclusions:
            lxmlloc = os.path.join(outdir, getstem(inclusions.group(1)) + ".xml")
        return(2, lxmlloc, output)
    return (doclifter_status, None, output)

def validate(translation):
    "Validate an XML file produced by translation."
    output = ""
    # If it has entity inclusions it won't validate, so don't try.
    # This is only a good idea because man pages that have these are
    # usually trivial wrappers like builtins.1
    try:
        fp = open(translation)
        inclusions = re.compile("<!ENTITY.*SYSTEM '(.*)'>").search(fp.read())
        fp.close()
        if inclusions:
            output += "Won't validate due to entity inclusion of %s\n" % inclusions.group(1)
            return (0, output)
    except IOError:
        output += "%s is missing.\n" % translation
    # Run the validation checker
    (bstat, validate_out) = commands.getstatusoutput("xmllint --xinclude --postvalid %s >/dev/null" % translation)
    if validate_out:
        output += validate_out  + "\n"
    if os.WIFSIGNALED(bstat):
        output += "Bailing out of xmllint...\n"
        return (-1, output)
    xmllint_error_status = os.WEXITSTATUS(bstat)
    if xmllint_error_status:
        output += "xmllint error status:%s\n" % os.WEXITSTATUS(bstat)
    if xmllint_error_status:
        return (6, output)
    return (0, output)

def format(translation, fmt, xslfragment):
    "Format an XML file to a specified format."
    output = ""
    here = os.getcwd()
    os.chdir(os.path.dirname(translation))
    if xslfragment:
        command = "xmlto %s %s" % (fmt, os.path.basename(translation))
    else:
        command = "xmlto -m %s %s %s" % (xslfragment, fmt, os.path.basename(translation))
    (bstat, format_out) = commands.getstatusoutput(command)
    os.chdir(here)
    if format_out:
        output += format_out  + "\n"
    if os.WIFSIGNALED(bstat):
        output += "Bailing out of %s formatting...\n" % fmt
        return (-1, output)
    format_error_status = os.WEXITSTATUS(bstat)
    if format_error_status:
        output += "format error status:%s\n" % os.WEXITSTATUS(bstat)
    if format_error_status:
        return (6, output)
    return (0, output)

def deploy(source, target):
    try:
        os.rename(source, target)
    except OSError, e:
        return(3, "Rename of %s to %s failed, errno = %d" % (source, target, e.errno,))
    return (0, "")

def makelink(source, target):
    try:
        os.symlink(os.path.abspath(source), os.path.abspath(target))
    except OSError:
        pass

def singlerun(file, options, tmpstem="foo"+`os.getpid()`, batchmode=False):
    "Test-format a single file."
    global patched
    if not os.path.exists(file):
        return (0, "")
    output = ""
    if file[-3:] == ".gz":
        withsect = os.path.basename(file)[:-3]
    elif file[-4:] == ".bz2":
        withsect = os.path.basename(file)[:-4]
    elif file[-2:] == ".Z":
        withsect = os.path.basename(file)[:-2]
    else:
        withsect = os.path.basename(file)
    dot = withsect.rindex(".")
    section = withsect[dot+1:dot+2]
    subdir = os.path.join(outdir, "man" + section)
    stem = getstem(withsect)
    xmlloc = os.path.join(subdir, stem + ".xml")
    # Count patches here so our stats won't be off
    patch = os.path.join(patchdir, withsect + ".patch")
    if os.path.exists(patch):
        patched += 1    
    try:
        global processed
        tmpstem = os.path.join(outdir, tmpstem)
        source = tmpstem + ".man"
        # Save work by doing conversions only as needed
        rebuild_xml = True
        if batchmode and os.path.exists(xmlloc):
            if os.stat(file).st_mtime < os.lstat(xmlloc).st_mtime:
                output += "XML conversion is up to date\n"
                processed.discard(withsect)
                rebuild_xml = False
        htmlloc = os.path.join(subdir, stem + ".html")
        if rebuild_xml:
            # Grab the actual manual page
            localcopy = os.path.join(outdir, withsect)
            (status, output) = fetch_page(file, localcopy, patch)
            if (status):
                return (status, output)
            # Note the the patch was used
            processed.discard(withsect)
            # Add any annotations
            output += analyze_manpage(localcopy)
            # Move the source file into the output directory
            os.rename(localcopy, source) 
            # Run the translator
            (doclifter_status, lxmlloc, note) = make_xml(source, options, batchmode)
            output += note
            if doclifter_status not in (0, 2):
                if not batchmode:
                    output +=  "doclifter error status: %s\n" % doclifter_status
                return (doclifter_status, output)
            translation = tmpstem + ".man.xml"
            # Warn about FIX-ME problems
            output += commands.getoutput("grep FIX-ME " + translation + " 2>/dev/null")
            # If the translation went through, cleaning up consists
            # of putting this in its permanent location.
            try:
                # This will foo up if we ever have to symlink between dirs
                if batchmode and not os.path.exists(subdir):
                    os.mkdir(subdir)
            except OSError, e:
                return(3, output + "Creation of %s failed, errno = %d\n"%(subdir,e.errno))
            if doclifter_status == 2:
                makelink(lxmlloc, xmlloc)
            if doclifter_status == 0:
                if not makehtml:
                    (status, more) = validate(translation)
                    output += more
                    if batchmode and status:
                        os.remove(translation)
                        try:
                            os.remove(htmlloc)
                        except OSError:
                            pass
                        return (status, output)
                if batchmode:
                    (status, more) = deploy(translation, xmlloc)
                    translation = xmlloc
                    output += more
                    if status:
                        return (status, output)
        # Save work by doing HTML conversions only as needed
        rebuild_html = makehtml
        if batchmode and os.path.exists(htmlloc):
            if os.stat(xmlloc).st_mtime < os.lstat(htmlloc).st_mtime:
                output += "HTML conversion is up to date\n"
                rebuild_html = False
        if rebuild_html:
            if batchmode:
                htmlloc = os.path.join(subdir, stem + ".html")
            else:
                htmlloc = stem + ".html"
            if batchmode and stat.S_ISLNK(os.lstat(xmlloc).st_mode):
                makelink(os.readlink(xmlloc)[:-4]+".html", htmlloc)
            else:
                (status, more) = format(translation, "xhtml-nochunks", xslfragment)
                output += more
                if status:
                    if batchmode:
                        os.remove(xmlloc)
                        try:
                            os.remove(htmlloc)
                        except OSError:
                            pass
                    return (status, output)
    finally:
        # Clean up
        if batchmode:
            if os.path.exists(source):
                os.remove(source)
    return (0, output)

def sectionfiles(sections):
    "Generate files corresponding to a list of sections."
    files = []
    for section in sections:
        files = files + map(lambda f: manfile(section, f), os.listdir(manfile(section)))
    files.sort()
    return files

total = eligible = starttime = 0

def massrun(files, options):
    "Test against all files in specified sections."
    def bailout(signum, frame):
        print "\nBailing out with signal %d..." % signum
        os.system("rm -f doclifter_test%s.py doclifter_test%s.py[co]" % (os.getpid(), os.getpid()))
        sys.exit(0)
    global total, eligible, starttime
    total = 0
    starttime = int(time.time())
    eligible = len(files)
    doclifter_error_count = xmllint_error_count = total = 0
    def report(sig, frame, out=sys.stderr):
        ftotal = float(total)
        elapsed = int(time.time()) - starttime
        out.write("\n%%%d of %d files in %02d:%02d:%02d, %d OK, %d patched, %d doclifter errors, %d validation failures, %2.2f%% good.\n" % \
              (total, eligible,
               elapsed/3600, (elapsed % 3600)/60, elapsed % 60,
               (total - doclifter_error_count - xmllint_error_count),
               patched,
               doclifter_error_count, 
               xmllint_error_count,
               (ftotal-doclifter_error_count-xmllint_error_count-patched)*100.0/ftotal))
    def test(file, options):
            before = time.time()
            (status, output) = singlerun(file=file, options=options, batchmode=True)
            after = time.time()
            sys.stdout.write("! %s=%d (%2.2f)\n%s\n" % (file, status, after-before, output))
            return (status, output)
    signal.signal(signal.SIGUSR2, report)
    signal.signal(signal.SIGHUP, bailout)
    signal.signal(signal.SIGINT, bailout)
    if sys.platform != 'darwin':
         signal.signal(signal.SIGPWR, bailout)
    signal.signal(signal.SIGTERM, bailout)
    print "%Test started", time.ctime(), "\n"
    try:
        for file in files:
            (status, output) = test(file=file, options=options)
            if status == -1:
                break
            elif status in (1, 4):	# Doclifter parse or internal error
                if output.find("is empty") == -1 and output.find("has no text") == -1 and output.find("has no content") == -1:
                    doclifter_error_count += 1
            elif status == 2:		# .so inclusion
                pass
            elif status in (3, 5):	# File I/O error or keyboard interrupt
                pass
            elif status == 6:
                xmllint_error_count += 1
            total = total + 1
    except KeyboardInterrupt:
        pass
    report(0, sys.stdout)

htmlheader = '''
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Manlifter contents page</title>
</head>
<body>
'''
htmltrailer = "</body>\n</html>\n"

def genindex(ofp):
    # Collect all section/name/description triples
    filelist = []
    section_re = re.compile("/man([^/]*)")
    extract_re = re.compile("<refpurpose>([^<]*)</refpurpose>")
    section_dict = {}
    for (root, dirs, files) in os.walk('xmlman'):
        for file in files:
            try:
                # Extract the manual section
                m = section_re.search(root)
                if m:
                    section = m.group(1)
                else:
                    continue
                section_dict[section] = []
                # Extract the manual page name
                name = ".".join(file.split(".")[:-2])
                # Extract the description
                file = os.path.join(root, file)
                fp = open(file)
                contents = fp.read()
                fp.close()
                m = extract_re.search(contents)
                if m:
                    description = m.group(1)
                else:
                    description = "(no description)"
                # Build an index entry
                filelist.append((section, file, name, description))
            except IOError:
                pass
    filelist.sort()	# In case the directory was pieced together by several runs
    for (section, file, name, description) in filelist:
        section_dict[section].append((file, name, description))
    keys = section_dict.keys()
    keys.sort()

    for section in keys:
        ofp.write(htmlheader)
        ofp.write("<h1>%s:</h1>\n<dl>\n" % section)
        for (file, name, description) in section_dict[section]:
            ofp.write("<dt><a href='%s'>%s</a></dt><dd>%s</dd>\n" \
                  % (file, name, description))
        ofp.write("</dl>\n")
        ofp.write(htmltrailer)

def statistics():
    counts = [0] * 7
    legends = (
        "OK ",
        "???",
        ".so",
        "I/O",
        "!!!",
        "^C ",
        "XML",
        )

    patchcount = re.compile("([0-9]+) patched")
    totalcount = 0
    while True:
        line = sys.stdin.readline()
        if not line:
            break
        m = patchcount.search(line)
        if m:
            patched = int(m.group(1))
        if line[0] != '!':
            if line.find("is empty") > 1 or line.find("has no text") > -1:
                print file
                counts[1] -= 1
                counts[0] += 1
            continue
        line = line[2:]
        rcolon = line.rindex("=")
        file = line[:rcolon]
        retval = line[rcolon+1:].split()[0]
        if file.endswith(".gz"):
            file = file[:-3]
        elif file.endswith(".bz2"):
            file = file[:-4]
        elif file.endswith(".Z"):
            file = file[:-2]
        file = os.path.basename(file)
        counts[int(retval)] += 1
        totalcount += 1

    total = sum(counts)
    for (i, count) in enumerate(counts):
        print "%d = %s: %5d	%2.2f%%" % (i, legends[i], count, (count * 1.0)*100/total)
    print "Total:",totalcount
    print "Patched: %d (%2.2f%%)" % (patched, patched*100/float(totalcount))
    print "With patches: %2.2f%%" % ((counts[0]+counts[2])*100/float(totalcount))
    print "Without patches: %2.2f%%" % ((counts[0]+counts[2]-patched)*100/float(totalcount))

def errorclean(error_only, pattern):
    if pattern:
        pattern = re.compile(pattern)
    pagename = re.compile(r"! (.*)=([0-9]+)")
    while 1:
        header = sys.stdin.readline()
        if not header:
            break
        # Look for a log leader
        m = pagename.search(header)
        if not m:
           continue 
        subject = m.group(1)
        status = int(m.group(2))
        # Collect following error messages up to a blank line
        trailer = ''
        while 1:
            line = sys.stdin.readline()
            trailer += line
            if not line or line == '\n':
                break
        if pattern:
            # Emit by pattern
            if pattern.search(trailer):
                sys.stdout.write(subject+"\n")
        else:
            # Emit some of them by status
            if status == 0 or status == 2:
                continue
            if status == 1 and (trailer.find("page is empty") > -1 or trailer.find("page has no text") > -1):
                continue
            # Otherwise, emit
            if error_only:
                print subject
            else:
                sys.stdout.write(header + trailer)

citereftemplate = '''
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                version="1.0">

<xsl:param name="citerefentry.link" select="1"/>

<xsl:template name="generate.citerefentry.link">
  <xsl:text>%s</xsl:text>
  <xsl:text>/man</xsl:text>
  <xsl:value-of select="manvolnum"/>
  <xsl:text>/</xsl:text>
  <xsl:value-of select="refentrytitle"/>
  <xsl:text>.html</xsl:text>
</xsl:template>

</xsl:stylesheet>
'''

def doclifter_driver(options, arguments):
    "Lift old markup to new."
    global makehtml, outdir, xslfragment, patchdir
    filelist = []
    sections = []
    callopts = ""
    makehtml = False
    errorfilter = False
    quiet = False
    fval = None
    for (switch, val) in options:
        if (switch == '-d'):
            callopts += " -d " + val
        elif (switch == '-e'):
            errorfilter = True
	elif (switch == '-f'):	# Translate files in the specified list
            fval = val
        elif (switch == '-h'):
            makehtml = True
        elif (switch == '-I'):	# Specify the root of the manual hierarchy
            mandir = val
	elif (switch == '-p'):	# Specify patch directory
	    patchdir = os.path.abspath(val)
	elif (switch in ("-q", '-v')):	# Set verbosity level
            quiet = True
	    callopts += " " + switch
	elif (switch == '-s'):	# Specify search list of sections
	    sections.append(val)
	elif (switch == '-S'):	# Generate statistics from log on stdin
	    statistics()
            sys.exit(0)
    if not sections:
        sections = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
    if not outdir:
        if not arguments:
            outdir = 'xmlman'
        else:
            outdir = '.'
    # Clean/create the output directory
    if not arguments:
        if not os.path.exists(outdir):
            os.mkdir(outdir)
            # Create XSL fragment for making refentries into links
            xslfragment = os.path.abspath(os.path.join(outdir, "citerefentry.xsl"))
            fp = open(xslfragment, "w")
            fp.write(citereftemplate % outdir)
            fp.close()
    try:
        # Process args, if present
        if arguments: 
            for file in arguments:
                for section in sections:
                    manpage = manfile(section, file)
                    print "Trying", manpage
                    if os.path.exists(manpage):
                        (status, output) = singlerun(manpage, callopts, "foobar", batchmode=False)
                        print output
                        break
        elif errorfilter:
            errorclean(quiet, fval)
        elif fval:
            fp = open(fval)
            filelist = map(lambda x: x.rstrip(), fp.readlines())
            fp.close()
            massrun(filelist, callopts)
        else:
            global processed 
            processed = sets.Set(map(lambda x: x.replace(".patch", "").replace(".correction", ""), os.listdir(patchdir)))
            massrun(sectionfiles(sections), callopts)
            if processed:
                print "# Patches not used:"
                for file in processed:
                    print file
    finally:
        pass
        #os.remove(xslfragment)
    # Now, rebuild the index page
    if makehtml:
        fp = open(os.path.join(outdir, "index.html"), "w")
        genindex(fp)
        fp.close()

if __name__ == "__main__":
    import imp
    fp = open('/usr/pkg/bin/doclifter')
    try:
	try:
	    doclifter = imp.load_module('doclifter', fp, '/usr/pkg/bin/doclifter', ('.py', 'U', 1))
	except ImportError:
	    sys.stderr.write("manlifter: can't find doclifter!")
	    sys.exit(1)
    finally:
	if fp:
	    fp.close()
    # Gather options
    (options, arguments) = getopt.getopt(sys.argv[1:], "d:ef:hI:p:qs:Sv")
    # Do the real work
    doclifter_driver(options, arguments)
# End
