#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
require 'kramdown-rfc2629'
require 'kramdown-rfc/parameterset'
require 'kramdown-rfc/refxml'
require 'yaml'
require 'kramdown-rfc/erb'
require 'date'

# try to get this from gemspec.
KDRFC_VERSION=Gem.loaded_specs["kramdown-rfc2629"].version rescue "unknown-version"

Encoding.default_external = "UTF-8" # wake up, smell the coffee

def boilerplate(key)
  case key.downcase
  when /\Abcp14(\+)?\z/i
    ret = <<RFC8174
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL
NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED",
"MAY", and "OPTIONAL" in this document are to be interpreted as
described in BCP 14 {{!RFC2119}} {{!RFC8174}} when, and only when, they
appear in all capitals, as shown here.
RFC8174
    if $1
      ret << <<PLUS
These words may also appear in this document in
lower case as plain English words, absent their normative meanings.
PLUS
    end
    ret
  else
    warn "** Unknwon boilerplate key: #{key}"
    "{::boilerplate #{key}}"
  end
end

def do_the_tls_dance
  begin
    require 'openssl'
    File.open(OpenSSL::X509::DEFAULT_CERT_FILE) do end
    # This guards against having an unreadable cert file (yes, that appears to happen a lot).
  rescue
    if Dir[File.join(OpenSSL::X509::DEFAULT_CERT_DIR, "*.pem")].empty?
      # This guards against having no certs at all, not against missing the right one for IETF.
      # Oh well.
      warn "** Configuration problem with OpenSSL certificate store."
      warn "**   You may want to examine #{OpenSSL::X509::DEFAULT_CERT_FILE}"
      warn "**    and #{OpenSSL::X509::DEFAULT_CERT_DIR}."
      warn "**   Activating suboptimal workaround."
      warn "**   Occasionally run `certified-update` to maintain that workaround."
      require 'certified'
    end
  end
end

RE_NL = /(?:\n|\r|\r\n)/
RE_SECTION = /---(?:\s+(\w+)(-?))?\s*#{RE_NL}(.*?#{RE_NL})(?=---(?:\s+\w+-?)?\s*#{RE_NL}|\Z)/m

NMDTAGS = ["{:/nomarkdown}\n\n", "\n\n{::nomarkdown}\n"]

NORMINFORM = { "!" => :normative, "?" => :informative }

def yaml_load(input, *args)
  if YAML.respond_to?(:safe_load)
    YAML.safe_load(input, *args)
  else
    YAML.load(input)
 end
end

def xml_from_sections(input)

  unless ENV["KRAMDOWN_NO_SOURCE"]
    require 'kramdown-rfc/gzip-clone'
    require 'base64'
    compressed_input = Gzip.compress(input)
    $source = Base64.encode64(compressed_input)
  end

  sections = input.scan(RE_SECTION)
  # resulting in an array; each section is [section-label, nomarkdown-flag, section-text]

  # the first section is a YAML with front matter parameters (don't put a label here)
  # We put back the "---" plus gratuitous blank lines to hack the line number in errors
  yaml_in = input[/---\s*/] << sections.shift[2]
  ps = KramdownRFC::ParameterSet.new(yaml_load(yaml_in, [Date], [], true))
  coding_override = ps.has(:coding)
  smart_quotes = ps[:smart_quotes]
  typographic_symbols = ps[:typographic_symbols]
  kramdown_options = ps[:kramdown_options]

  # all the other sections are put in a Hash, possibly concatenated from parts there
  sechash = Hash.new{ |h,k| h[k] = ""}
  snames = []                   # a stack of section names
  sections.each do |sname, nmdflag, text|
    nmdin, nmdout = {
      "-" => ["", ""],          # stay in nomarkdown
      "" => NMDTAGS, # pop out temporarily
    }[nmdflag || ""]
    if sname
      snames << sname           # "--- label" -> push label (now current)
    else
      snames.pop                # just "---" -> pop label (previous now current)
    end
    sechash[snames.last] << "#{nmdin}#{text}#{nmdout}"
  end

  ref_replacements = { }
  anchor_to_bibref = { }

  [:ref, :normative, :informative].each do |sn|
    if refs = ps.has(sn)
      warn "*** bad section #{sn}: #{refs.inspect}" unless refs.respond_to? :each
      refs.each do |k, v|
        if v.respond_to? :to_str
          if bibtagsys(v)       # enable "foo: RFC4711" as a custom anchor definition
            anchor_to_bibref[k] = v.to_str
          end
          ref_replacements[v.to_str] = k
        end
        if Hash === v
          if aliasname = v.delete("-")
            ref_replacements[aliasname] = k
          end
          if bibref = v.delete("=")
            anchor_to_bibref[k] = bibref
          end
        end
      end
    end
  end
  open_refs = ps[:ref] || { }       # consumed

  norm_ref = { }

  # convenience replacement of {{-coap}} with {{I-D.ietf-core-coap}}
  # collect normative/informative tagging {{!RFC2119}} {{?RFC4711}}
  sechash.each do |k, v|
    next if k == "fluff"
    v.gsub!(/{{(?:([?!])(-)?|(-))([\w._\-]+)(?:=([\w.\/_\-]+))?}}/) do |match|
      norminform = $1
      replacing = $2 || $3
      word = $4
      bibref = $5
      if replacing
        if new = ref_replacements[word]
          word = new
        else
          warn "*** no alias replacement for {{-#{word}}}"
          word = "-#{word}"
        end
      end       # now, word is the anchor
      if bibref
        if old = anchor_to_bibref[word]
          if bibref != old
            warn "*** conflicting definitions for xref #{word}: #{old} != #{bibref}"
          end
        else
          anchor_to_bibref[word] = bibref
        end
      end

      # things can be normative in one place and informative in another -> normative
      # collect norm/inform above and assign it by priority here
      if norminform
        norm_ref[word] ||= norminform == '!' # one normative ref is enough
      end
      "{{#{word}}}"
    end
  end

  [:normative, :informative].each do |k|
    ps.rest[k.to_s] ||= { }
  end

  norm_ref.each do |k, v|
    # could check bibtagsys here: needed if open_refs is nil or string
    target = ps.has(v ? :normative : :informative)
    warn "*** overwriting #{k}" if target.has_key?(k)
    target[k] = open_refs[k] # add reference to normative/informative
  end
  # note that unused items from ref are considered OK, therefore no check for that here

  # also should allow norm/inform check of other references
  # {{?coap}} vs. {{!coap}} vs. {{-coap}} (undecided)
  # or {{?-coap}} vs. {{!-coap}} vs. {{-coap}} (undecided)
  # could require all references to be decided by a global flag
  overlap = [:normative, :informative].map { |s| (ps.has(s) || { }).keys }.reduce(:&)
  unless overlap.empty?
    warn "*** #{overlap.join(', ')}: both normative and informative"
  end

  stand_alone = ps[:stand_alone]
  link_defs = {}

  [:normative, :informative].each do |sn|
    if refs = ps[sn]
      refs.each do |k, v|
        href = k.gsub(/\A[0-9]/) { "_#{$&}" } # can't start an IDREF with a number
        link_defs[k] = ["##{href}", nil]            # allow [RFC2119] in addition to {{RFC2119}}

        bibref = anchor_to_bibref[k] || k
        bts, url = bibtagsys(bibref, k, stand_alone)
        if bts && (!v || v == {} || v.respond_to?(:to_str))
          if stand_alone
            a = %{{: anchor="#{k}"}}
            sechash[sn.to_s] << %{\n#{NMDTAGS[0]}\n![:include:](#{bts})#{a}\n#{NMDTAGS[1]}\n}
          else
            bts.gsub!('/', '_')
            (ps.rest["bibxml"] ||= []) << [bts, url]
            sechash[sn.to_s] << %{&#{bts};\n} # ???
          end
        else
          unless v && Hash === v
            warn "*** don't know how to expand ref #{k}"
            next
          end
          if bts && !v.delete("override")
            warn "*** warning: explicit settings completely override canned bibxml in reference #{k}"
          end
          options = {input: 'RFC2629Kramdown', entity_output: coding_override, link_defs: link_defs}
          $global_markdown_options = options # For recursive calls in bibref annotation processing.
          sechash[sn.to_s] << KramdownRFC::ref_to_xml(k, v)
        end
      end
    end
  end

  erbfilename = File.expand_path '../../data/kramdown-rfc2629.erb', __FILE__
  erbfile = File.read(erbfilename, coding: "UTF-8")
  erb = ERB.trim_new(erbfile, '-')
  # remove redundant nomarkdown pop outs/pop ins as they confuse kramdown
  input = erb.result(binding).gsub(%r"{::nomarkdown}\s*{:/nomarkdown}"m, "")
  ps.warn_if_leftovers
  sechash.delete("fluff")       # fluff is a "commented out" section
  if !sechash.empty?            # any sections unused by the ERb file?
    warn "*** sections left #{sechash.keys.inspect}!"
  end

  [input, coding_override, link_defs, smart_quotes, typographic_symbols, kramdown_options]
end

XML_RESOURCE_ORG_PREFIX = Kramdown::Converter::Rfc2629::XML_RESOURCE_ORG_PREFIX

# return XML entity name, url, rewrite_anchor flag
def bibtagsys(bib, anchor=nil, stand_alone=true)
  if bib =~ /\Arfc(\d+)/i
    rfc4d = "%04d" % $1.to_i
    [bib.upcase,
     "#{XML_RESOURCE_ORG_PREFIX}/bibxml/reference.RFC.#{rfc4d}.xml"]
  elsif bib =~ /\A([-A-Z0-9]+)\./ &&
        (xro = Kramdown::Converter::Rfc2629::XML_RESOURCE_ORG_MAP[$1])
    dir, _ttl, rewrite_anchor = xro
    bib1 = bib.gsub(/\A[0-9]/) { "_#{$&}" } # can't start an ID with a number
    if anchor && bib1 != anchor
      if rewrite_anchor
        a = %{?anchor=#{anchor}}
      else
        if !stand_alone
          warn "*** selecting a custom anchor '#{anchor}' for '#{bib1}' requires stand_alone mode"
          warn "    the output will need manual editing to correct this"
        end
      end
    end
    [bib1,
     "#{XML_RESOURCE_ORG_PREFIX}/#{dir}/reference.#{bib}.xml#{a}"]
  end
end

def read_encodings
  encfilename = File.expand_path '../../data/encoding-fallbacks.txt', __FILE__
  encfile = File.read(encfilename, coding: "UTF-8")
  Hash[encfile.lines.map{|l|
         l.chomp!;
         x, s = l.split(" ", 2)
         [x.hex.chr(Encoding::UTF_8), s || " "]}]
end

FALLBACK = read_encodings

def expand_tabs(s, tab_stops = 8)
  s.gsub(/([^\t\n]*)\t/) do
    $1 + " " * (tab_stops - ($1.size % tab_stops))
  end
end


require 'optparse'
require 'ostruct'

$options = OpenStruct.new
op = OptionParser.new do |opts|
  opts.banner = <<BANNER
Usage: kramdown-rfc2629 [options] file.md|file.mkd > file.xml
Version: #{KDRFC_VERSION}
BANNER
  opts.on("-V", "--version", "Show version and exit") do |v|
    puts "kramdown-rfc2629 #{KDRFC_VERSION}"
    exit
  end
  opts.on("-H", "--help", "Show option summary and exit") do |v|
    puts opts
    exit
  end
  opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
    $options.verbose = v
  end
  opts.on("-3", "--[no-]v3", "Use RFCXML v3 processing rules") do |v|
    $options.v3 = v
  end
end
op.parse!

if $options.verbose && $options.v3
  warn "*** not much RFCXMLv3 stuff implemented yet"
end

coding_override = :as_char
input = ARGF.read
if input[0] == "\uFEFF"
   warn "*** There is a leading byte order mark. Ignored."
   input[0..0] = ''
end
if input[-1] != "\n"
  # warn "*** added missing newline at end"
  input << "\n"                 # fix #26
end
input.gsub!(/^\{::include\s+(.*?)\}/) {
  File.read($1).chomp
} unless ENV["KRAMDOWN_SAFE"]
input.gsub!(/^\{::boilerplate\s+(.*?)\}/) {
  boilerplate($1)
}
if input =~ /[\t]/
   warn "*** Input contains HT (\"tab\") characters. Undefined behavior will ensue."
   input = expand_tabs(input)
end

link_defs = {}
if input =~ /\A---/        # this is a sectionized file
  do_the_tls_dance unless ENV["KRAMDOWN_DONT_VERIFY_HTTPS"]
  input, target_coding, link_defs, smart_quotes, typographic_symbols, kramdown_options = xml_from_sections(input)
end
if input =~ /\A<\?xml/          # if this is a whole XML file, protect it
  input = "{::nomarkdown}\n#{input}\n{:/nomarkdown}\n"
end
options = {input: 'RFC2629Kramdown', entity_output: coding_override, link_defs: link_defs}
if smart_quotes.nil?
  if (target_coding && target_coding =~ /ascii/) || $options.v3
     smart_quotes = false
  end
end
if smart_quotes == false
  smart_quotes = ["'".ord, "'".ord, '"'.ord, '"'.ord]
end
case smart_quotes
when Array
  options[:smart_quotes] = smart_quotes
when nil, true
  # nothin
else
  warn "*** Can't deal with smart_quotes value #{smart_quotes.inspect}"
end
if typographic_symbols.nil?
  if (target_coding && target_coding =~ /ascii/) || $options.v3
     typographic_symbols = false
  end
end
if typographic_symbols == false
  typographic_symbols = Hash[::Kramdown::Parser::Kramdown::TYPOGRAPHIC_SYMS.map { |k, v|
                               if Symbol === v
                                 [v.intern, k]
                               end
                             }.compact]
end
# warn [:TYPOGRAPHIC_SYMBOLS, typographic_symbols].to_yaml
case typographic_symbols
when Hash
  options[:typographic_symbols] = typographic_symbols
when nil, true
  # nothin
else
  warn "*** Can't deal with typographic_symbols value #{typographic_symbols.inspect}"
end

if kramdown_options
  options.merge! kramdown_options
end

if target_coding
  input = input.encode(Encoding.find(target_coding), fallback: FALLBACK)
end

# warn "options: #{options.inspect}"
doc = Kramdown::Document.new(input, options)
$stderr.puts doc.warnings.to_yaml unless doc.warnings.empty?
puts doc.to_rfc2629
