use ammonia;
use failure;

use html5ever::rcdom::Node;
use html5ever::rcdom::NodeData;
use html5ever::rcdom::RcDom;
use html5ever::tendril::TendrilSink;
use html5ever::tree_builder::TreeBuilderOpts;
use html5ever::{parse_document, ParseOpts};

use lazy_static::lazy_static;
use linkify::{LinkFinder, LinkKind};
use maplit::{hashmap, hashset};

use regex::Regex;

const AMP: &str = "(&amp;)";
const DOMAIN: &str = "[^\\s,)(\"]+";
const HASH: &str = "(#[\\w._-]+)?";

pub mod block;

/// Sanitize the input using [`ammonia`][ammonia]'s defaults,
/// Convert the input `&str` to pango format and parse
/// URLS to show as `pango` markup links(removes rel attributes).
///
/// If you want to parse a pre-sanitized input, you can use `markup_from_raw`.
///
/// Currently it support conversion of the following `html` tags:
/// * `<p>` and `</p>` => `""`
/// * `<i>`, `<em>` and `</i>`,` </em>` => `<i>` and `</i>``
/// * `<b>`, `<strong>` and `</b>`, `</strong>` => `<b>` and `</b>`
/// * `<br>` => `\n`
/// * rest `<` => `&lt`
/// * rest `>` => `&gt`
/// * `&nbsp;` => ` ` (non breaking space)
///
/// # Examples
///
/// ```rust
/// # use html2pango::markup;
///
/// let m = markup("this is parsed");
/// assert_eq!(&m, "this is parsed");
///
/// let m = markup("<b>this <i>is &ssd<f;</i></b>");
/// assert_eq!(&m, "<b>this <i>is &amp;ssd</i></b>");
///
/// let m = markup("this is <span>parsed</span>");
/// assert_eq!(&m, "this is &lt;span&gt;parsed&lt;/span&gt;");
///
/// let m = markup("with links: http://gnome.org");
/// assert_eq!(&m, "with links: <a href=\"http://gnome.org\">http://gnome.org</a>");
/// ```
///
/// [ammonia]: https://docs.rs/ammonia/1.1.0/ammonia/fn.clean.html
pub fn markup(s: &str) -> String {
    let sanitized_html = ammonia::Builder::new().link_rel(None).clean(s).to_string();
    markup_from_raw(&sanitized_html)
}

/// Same as `markup` but without sanizing the input.
///
/// # Examples
///
/// ```
/// # use html2pango::markup_from_raw;
///
/// let m = markup_from_raw("this is parsed");
/// assert_eq!(&m, "this is parsed");
///
/// let m = markup_from_raw("<b>this <i>is &ssd<f;</i></b>");
/// assert_eq!(&m, "<b>this <i>is &ssd&lt;f;</i></b>");
///
/// let m = markup_from_raw("this is <span>parsed</span>");
/// assert_eq!(&m, "this is &lt;span&gt;parsed&lt;/span&gt;");
///
/// let m = markup_from_raw("with links: http://gnome.org");
/// assert_eq!(&m, "with links: <a href=\"http://gnome.org\">http://gnome.org</a>");
/// ```
pub fn markup_from_raw(s: &str) -> String {
    lazy_static! {
        static ref PARAM: String = format!("({amp}?\\w+(=[\\w._-]+)?)", amp = AMP);
        static ref PARAMS: String = format!("(\\?{param}*)*", param = *PARAM);
        static ref REURL: String = format!(
            "(https?://{domain}{params}{hash})",
            domain = DOMAIN,
            params = *PARAMS,
            hash = HASH
        );
        static ref RE: Regex = Regex::new(&REURL).unwrap();
        static ref MATCH: Regex = Regex::new(
            r"<p>|</p>|<br>|<b>|</b>|<strong>|</strong>|<code>|</code>|<i>|</i>|<em>|</em>|<|>|&nbsp;"
        ).unwrap();
    }

    let s = s.trim();

    let mut previous_end = 0;
    let mut foo = Vec::with_capacity(s.as_bytes().len());
    for mat in MATCH.find_iter(s) {
        foo.push(&s[previous_end..mat.start()]);
        // Kepp in sync with the regex query or it will panic
        foo.push(match mat.as_str() {
            "<p>" | "</p>" => "",
            "<br>" => "\n",
            "<b>" | "<strong>" => "<b>",
            "</b>" | "</strong>" => "</b>",
            "<code>" => "<tt>",
            "</code>" => "</tt>",
            "<i>" | "<em>" => "<i>",
            "</i>" | "</em>" => "</i>",
            "<" => "&lt;",
            ">" => "&gt;",
            "&nbsp;" => " ",
            _ => unreachable!(),
        });
        previous_end = mat.end();
    }

    foo.push(&s[previous_end..]);
    let mut out = String::with_capacity(foo.iter().map(|s| s.len()).sum());
    out.extend(foo.into_iter());

    String::from(RE.replace_all(&out.trim(), "<a href=\"$0\">$0</a>"))
}

// WIP: only allow the html subset that matrix uses.
pub fn matrix_html_to_markup(s: &str) -> String {
    // https://github.com/matrix-org/matrix-react-sdk/blob/4bf5e44b2043bbe95faa66943878acad23dfb823/src/HtmlUtils.js#L178-L184
    #[cfg_attr(rustfmt, rustfmt_skip)]
    let allowed_tags = hashset![
        "font", // custom to matrix for IRC-style font coloring
        "del", // for markdown
        "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "p", "a", "ul", "ol", "sup", "sub",
        "nl", "li", "b", "i", "u", "strong", "em", "strike", "code", "hr", "br", "div",
        "table", "thead", "caption", "tbody", "tr", "th", "td", "pre", "span", "img",
    ];

    // https://github.com/matrix-org/matrix-react-sdk/blob/4bf5e44b2043bbe95faa66943878acad23dfb823/src/HtmlUtils.js#L185-L193
    let allowed_attributes = hashmap![
        // custom ones first:
        "font" => hashset!["color", "data-mx-bg-color", "data-mx-color", "style"],
        "span" => hashset!["data-mx-bg-color", "data-mx-color", "style"],
        // remote target: custom to matrix
        "a" => hashset!["href", "name", "target", "rel"],
        "img" => hashset!["src", "width", "height", "alt", "title"],
        "ol" => hashset!["start"],
        // We don't actually allow all classes, TODO: we should filter them afterwards
        "code" => hashset!["class"],
    ];

    // https://github.com/matrix-org/matrix-react-sdk/blob/4bf5e44b2043bbe95faa66943878acad23dfb823/src/HtmlUtils.js#L48
    let allowed_urls = hashset!["http", "https", "ftp", "mailto", "magnet"];

    let sanitized_html = ammonia::Builder::new()
        .url_schemes(allowed_urls)
        .tags(allowed_tags)
        .tag_attributes(allowed_attributes)
        .link_rel(None)
        .clean(s)
        .to_string();

    markup_html(&sanitized_html)
        .map(|x| x.trim().to_string())
        .unwrap_or_else(|_| markup_from_raw(&sanitized_html))
}

/// Escape the html entities of `s`
pub fn html_escape(s: &str) -> String {
    s.to_string()
        .replace('&', "&amp;")
        .replace('<', "&lt;")
        .replace('>', "&gt;")
        .replace('"', "&quot;")
}

/// Converts links to <a href="LINK">LINK</a>
pub fn markup_links(s: &str) -> String {
    let mut parsed = String::with_capacity(s.len());
    let finder = LinkFinder::new();
    let mut prepend_str: Option<String> = None;

    for span in finder.spans(s) {
        let mut s = span.as_str().to_string();
        match span.kind() {
            Some(&LinkKind::Url) => {
                if s.ends_with("&amp") {
                    prepend_str = Some("&amp".to_string());
                    let t = s.len() - 4;
                    s.truncate(t);
                }
                if s.ends_with("&lt") {
                    prepend_str = Some("&lt".to_string());
                    let t = s.len() - 3;
                    s.truncate(t);
                }
                if s.ends_with("&gt") {
                    prepend_str = Some("&gt".to_string());
                    let t = s.len() - 3;
                    s.truncate(t);
                }
                if s.ends_with("&quot") {
                    prepend_str = Some("&quot".to_string());
                    let t = s.len() - 5;
                    s.truncate(t);
                }
                // This is to manage "> or "< or "&
                if s.ends_with("&quot;") {
                    prepend_str = Some("&quot;".to_string() + &prepend_str.unwrap_or_default());
                    let t = s.len() - 6;
                    s.truncate(t);
                }
                parsed.push_str(&format!("<a href=\"{0}\">{0}</a>", s))
            }
            _ => {
                if let Some(s) = prepend_str {
                    parsed.push_str(&s);
                    prepend_str = None;
                }
                parsed.push_str(&s);
            }
        };
    }

    parsed
}

fn convert_tag<'a>(t: &'a str) -> Option<(&'a str, &'a str)> {
    let allowed = [
        "a",
        "br",
        "em",
        "i",
        "p",
        "code",
        "strong",
        "b",
        "blockquote",
    ];

    if !allowed.contains(&t) {
        return Some(("", ""));
    }

    match t {
        "em" | "i" => Some(("<i>", "</i>")),
        "blockquote" => Some(("<i>", "</i>")),
        "p" => Some(("\n", "\n")),
        "br" => Some(("\n", "")),
        "code" => Some(("<tt>", "</tt>")),
        "strong" | "b" => Some(("<b>", "</b>")),
        _ => None,
    }
}

fn convert_node(node: &Node, autolinks: bool) -> String {
    let mut output = String::new();

    match node.data {
        NodeData::Text { contents: ref c } => {
            let escaped = &html_escape(&c.borrow().replace("\n", " "));
            if autolinks {
                output.push_str(&markup_links(escaped));
            } else {
                output.push_str(&escaped);
            }
        }
        NodeData::Element {
            name: ref n,
            attrs: ref a,
            ..
        } => {
            let mut content = String::new();
            for child in node.children.borrow().iter() {
                content.push_str(&convert_node(child, true));
            }

            let tag = n.local.to_string();
            match &tag[..] {
                "body" => {
                    output.push_str(&content);
                }
                "a" => {
                    let mut link = "".to_string();
                    for attr in a.borrow().iter() {
                        let s = attr.name.local.to_string();
                        match &s[..] {
                            "href" => {
                                link = attr.value.to_string();
                            }
                            _ => {}
                        }
                    }

                    let mut no_link_content = String::new();
                    for child in node.children.borrow().iter() {
                        no_link_content.push_str(&convert_node(child, false));
                    }

                    output.push_str(&format!("<a href=\"{}\">{}</a>", html_escape(&link), no_link_content));
                }
                "font" => {
                    let mut color = "".to_string();
                    for attr in a.borrow().iter() {
                        let s = attr.name.local.to_string();
                        match &s[..] {
                            "color" => {
                                color = attr.value.to_string();
                            }
                            _ => {}
                        }
                    }
                    output.push_str(&format!(
                        "<span foreground=\"{}\">{1}</span>",
                        color, content
                    ));
                }
                _ => {
                    if let Some((t1, t2)) = convert_tag(&tag) {
                        output.push_str(&format!("{}{}{}", t1, content, t2));
                    } else {
                        output.push_str(&format!("<{0}>{1}</{0}>", tag, content));
                    }
                }
            };
        }
        _ => {}
    }

    output
}

pub fn markup_html(s: &str) -> Result<String, failure::Error> {
    let opts = ParseOpts {
        tree_builder: TreeBuilderOpts {
            drop_doctype: true,
            ..Default::default()
        },
        ..Default::default()
    };
    let dom = parse_document(RcDom::default(), opts)
        .from_utf8()
        .read_from(&mut s.as_bytes())?;

    let document = &dom.document;
    let html = &document.children.borrow()[0];
    let body = &html.children.borrow()[1];

    Ok(convert_node(body, true))
}

#[cfg(test)]
mod test {
    use super::*;
    use pretty_assertions::assert_eq;

    #[test]
    fn test_markup() {
        let m = markup("this is parsed");
        assert_eq!(&m, "this is parsed");

        let m = markup("this is <span>parsed</span>");
        assert_eq!(&m, "this is &lt;span&gt;parsed&lt;/span&gt;");

        let m = markup("this is &ssdf;");
        assert_eq!(&m, "this is &amp;ssdf;");

        // TODO: add more tests
        let m = markup("<p>this <br>is &ssdf;</p>");
        assert_eq!(&m, "this \nis &amp;ssdf;");

        let m = markup("<b>this <i>is &ssd<f;</i></b>");
        assert_eq!(&m, "<b>this <i>is &amp;ssd</i></b>");

        let url = "http://url.com/test?foo1&foo2=test&foo3#hashing";
        let m = markup(&format!("this is &ssdf; {}", url));
        assert_eq!(
            &m,
            &format!(
                "this is &amp;ssdf; <a href=\"{0}\">{0}</a>",
                url.replace('&', "&amp;")
            )
        );

        for l in &[
            ("with links: http://gnome.org :D", "http://gnome.org"),
            (
                "with links: http://url.com/test.html&stuff :D",
                "http://url.com/test.html&stuff",
            ),
        ] {
            let m = markup(l.0);
            assert_eq!(
                &m,
                &format!(
                    "with links: <a href=\"{0}\">{0}</a> :D",
                    l.1.replace('&', "&amp;")
                )
            );
        }
    }

    #[test]
    // FIXME: Write specific tests instead of copying the above
    fn test_matrix() {
        let markup = matrix_html_to_markup;
        let m = markup("this is parsed");
        assert_eq!(&m, "this is parsed");

        let m = markup("this is <span>parsed</span>");
        assert_eq!(&m, "this is parsed");

        let m = markup("this is &ssdf;");
        assert_eq!(&m, "this is &amp;ssdf;");

        // TODO: add more tests
        let m = markup("<p>this <br>is &ssdf;</p>");
        assert_eq!(&m, "this \nis &amp;ssdf;");

        let m = markup("<b>this <i>is &ssd<f;</i></b>");
        assert_eq!(&m, "<b>this <i>is &amp;ssd</i></b>");

        let m = markup("hello <font color=\"#112233\">world</font>");
        assert_eq!(&m, "hello <span foreground=\"#112233\">world</span>");

        let m = markup("hello <em><font color=\"#112233\">http://gnome.org</font></em>");
        assert_eq!(&m, "hello <i><span foreground=\"#112233\"><a href=\"http://gnome.org\">http://gnome.org</a></span></i>");

        let url = "http://url.com/test?foo1&foo2=test&foo3#hashing";
        let m = markup(&format!("this is &ssdf; {}", url));
        assert_eq!(
            &m,
            &format!(
                "this is &amp;ssdf; <a href=\"{0}\">{0}</a>",
                url.replace('&', "&amp;")
            )
        );

        for l in &[
            ("with links: http://gnome.org :D", "http://gnome.org"),
            (
                "with links: http://url.com/test.html&stuff :D",
                "http://url.com/test.html&stuff",
            ),
        ] {
            let m = markup(l.0);
            assert_eq!(
                &m,
                &format!(
                    "with links: <a href=\"{0}\">{0}</a> :D",
                    l.1.replace('&', "&amp;")
                )
            );
        }
    }

    #[test]
    fn test_links() {
        let strings = [
            ("clean string without markup",
             "clean string without markup"),

            ("clean string with a <b>markup</b>",
             "clean string with a &lt;b&gt;markup&lt;/b&gt;"),

            ("clean string with a <b>markup</b> and link http://gnome.org/?p=1&q#hash",
             "clean string with a &lt;b&gt;markup&lt;/b&gt; and link <a href=\"http://gnome.org/?p=1&amp;q#hash\">http://gnome.org/?p=1&amp;q#hash</a>"),

            ("report-bug is: please report bugs with parabola packages on the packaging bug tracker at: https://labs.parabola.nu/projects/issue-tracker/issues?set_filter=1&tracker_id=1",
             "report-bug is: please report bugs with parabola packages on the packaging bug tracker at: <a href=\"https://labs.parabola.nu/projects/issue-tracker/issues?set_filter=1&amp;tracker_id=1\">https://labs.parabola.nu/projects/issue-tracker/issues?set_filter=1&amp;tracker_id=1</a>"),

             (
             "bill-auger, isacdaavid: there are two major issues I see with gnome-software. The first issue is that flathub, the largest repo for flatpaks, has nonfree software. If flathub isn't included by default, I think this is fine. The second is archlinux-appstream-data. The [PKGBUILD](https://git.archlinux.org/svntogit/packages.git/tree/trunk/PKGBUILD?h=packages/archlinux-appstream-data) does not use appstream-generator at all. However, it does require grabbing files from sources.archlinux.org",
             "bill-auger, isacdaavid: there are two major issues I see with gnome-software. The first issue is that flathub, the largest repo for flatpaks, has nonfree software. If flathub isn't included by default, I think this is fine. The second is archlinux-appstream-data. The [PKGBUILD](<a href=\"https://git.archlinux.org/svntogit/packages.git/tree/trunk/PKGBUILD?h=packages/archlinux-appstream-data\">https://git.archlinux.org/svntogit/packages.git/tree/trunk/PKGBUILD?h=packages/archlinux-appstream-data</a>) does not use appstream-generator at all. However, it does require grabbing files from sources.archlinux.org",
             ),

             ("links with problems: http://gnome.org/?p=1&",
              "links with problems: <a href=\"http://gnome.org/?p=1\">http://gnome.org/?p=1</a>&amp;"),
             ("links with problems: http://gnome.org/?p=1>",
              "links with problems: <a href=\"http://gnome.org/?p=1\">http://gnome.org/?p=1</a>&gt;"),
             ("links with problems: http://gnome.org/?p=1<",
              "links with problems: <a href=\"http://gnome.org/?p=1\">http://gnome.org/?p=1</a>&lt;"),
        ];

        for &(s, e) in strings.iter() {
            let m = markup_links(&html_escape(s));
            assert_eq!(&m, e);
        }
    }

    #[test]
    fn test_markup_links() {
        let strings = [
            ("This is a test message with <em>markdown</em><br /><a href=\"http://gnome.org\">gnome</a><br />and other link http://gnome.org",
             "This is a test message with <i>markdown</i>\n<a href=\"http://gnome.org\">gnome</a>\nand other link <a href=\"http://gnome.org\">http://gnome.org</a>"),
        ];

        for &(s, e) in strings.iter() {
            let m = markup_html(s).unwrap();
            assert_eq!(&m, e);
        }
    }

    #[test]
    fn test_ending_quote_link() {
        let strings = [
            ("<boxes:gnome-boxes xmlns:boxes=\"https://wiki.gnome.org/Apps/Boxes\">",
             "&lt;boxes:gnome-boxes xmlns:boxes=&quot;<a href=\"https://wiki.gnome.org/Apps/Boxes\">https://wiki.gnome.org/Apps/Boxes</a>&quot;&gt;"),
        ];

        for &(s, e) in strings.iter() {
            let m = markup_links(&html_escape(s));
            assert_eq!(&m, e);
        }
    }

    #[test]
    fn test_link_scape() {
        let strings = [
            ("<a href=\"https://forums.transbian.love/?page=thread&id=69\">https://forums.transbian.love/?page=thread&id=69</a>",
             "<a href=\"https://forums.transbian.love/?page=thread&amp;id=69\">https://forums.transbian.love/?page=thread&amp;id=69</a>"),
            ("<a href=\"https://forums.transbian.love/?page=thread&id=69\">https://forums.transbian.love/?page=thread&amp;id=69</a>",
             "<a href=\"https://forums.transbian.love/?page=thread&amp;id=69\">https://forums.transbian.love/?page=thread&amp;id=69</a>"),
            ("https://forums.transbian.love/?page=thread&id=69",
             "<a href=\"https://forums.transbian.love/?page=thread&amp;id=69\">https://forums.transbian.love/?page=thread&amp;id=69</a>"),
        ];

        for &(s, e) in strings.iter() {
            let m = markup_html(s).unwrap();
            assert_eq!(&m, e);
        }
    }
}
