import { isBlockLevelNode, isVoidElement } from "./domUtils";
import { MATHML_SOURCE_ATTRIBUTE } from "../constants";

export const TEXT_CONTENT = {
    IS_EMPTY      : 0x01,
    HAS_WHITESPACE: 0x02,
    HAS_CONTENT   : 0x04,
};

// Elements with these tagNames are considered as a text content and their children are not scanned.
const ALLOWED_EMPTY_NODES = ["SVG", "MATH"];

// Elements with these tagNames have to remain in DOM even though they have no content,
// but their children are scanned and removed (reduced) if empty.
const ALLOWED_EMPTY_CONTENT = ["TD", "TH", "TR"];

const isAllowedEmptyNode = (node) => {
    return node.tagName && ALLOWED_EMPTY_NODES.includes(node.tagName.toUpperCase());
};

const isAllowedEmptyContent = (node) => {
    return node.tagName && ALLOWED_EMPTY_CONTENT.includes(node.tagName.toUpperCase());
};

/**
 * Reduces group of spaces to the single one:
 * "      A       B    C  " => " A B C "
 * @param {string} text
 */
const reduceSpaces = (text) => {
    return text.replace(/[ ]{2,}/g, " ");
};

/**
 * Reduces all groups of white-space chars to single space.
 *
 * During Copy&Paste, Chrome does that by default in a quite sophisticated way.
 * Firefox and Edge seems to pass HTML in the original including formatting spaces and newlines.
 *
 * Example:
 *    # html source (five lines)
 *      <p>
 *        <span>A</span>
 *        <span>B</span>
 *        <span>C</span>
 *      </p>
 *
 *    # Clipboard data when copying from Chrome (simplified).
 *    # Notice that the outer Paragraph disappeared, white-spaces between SPANs replaced by extra SPAN
 *      with NON-BREAK-SPACE character inside:
 *      <span>A</span><span>&nbsp;</span><span>B</span><span>&nbsp;</span><span>C</span>
 *
 *    # Clipboard data when copying from Firefox:
 *      "<p>\n\t<span>A</span>\n\t<span>B</span>\n\t<span>C</span>\n</p>\n"
 *
 * @param {string} html
 */
export const reduceWhitespace = (html) => {

    if (html.length && !html.trim()) {
        return " ";
    }

    // convert CR/LF/TAB to space
    html = html.replace(/[\r\n\t]/g, " ");

    // reduce surrounding spaces
    if (html[0] === " ") {
        html = " " + html.trimLeft();
    }
    if (html[html.length - 1] === " ") {
        html = html.trimRight() + " ";
    }

    // reduce spaces before the first tag
    html = html.replace(/^([^<]+?)</, (match, parenthesizedCapture) => {
        return reduceSpaces(parenthesizedCapture) + "<";
    });

    // reduce spaces between tags
    html = html.replace(/>(.+?)</g, (match, parenthesizedCapture) => {
        return ">" + reduceSpaces(parenthesizedCapture) + "<";
    });

    // reduce spaces after the last tag
    html = html.replace(/>([^<]+?)$/, (match, parenthesizedCapture) => {
        return ">" + reduceSpaces(parenthesizedCapture);
    });

    return html;
};

/**
 * Return array of DOM Nodes by specified criteria.
 *
 * @param {Node} rootNode
 * @param {NodeFilter} nodeFilter https://www.w3.org/TR/DOM-Level-2-Traversal-Range/traversal.html#Traversal-NodeFilter
 * @param {Function} filter Optional filtering for specific nodeType
 */
const getNodesByFilter = (rootNode, nodeFilter, filter) => {
    const nodes = [];
    const nodeIterator = document.createNodeIterator(
        rootNode,
        nodeFilter,
        filter
    );

    let node = nodeIterator.nextNode();

    while (node) {
        nodes.push(node);
        node = nodeIterator.nextNode();
    }

    return nodes;
};

const removeHtmlComments = (doc) => {
    const commentNodes = getNodesByFilter(doc, NodeFilter.SHOW_COMMENT);

    commentNodes.forEach((node) => {
        const parentNode = node.parentNode;
        parentNode.removeChild(node);
        parentNode.normalize();
        const firstChild = parentNode.firstChild;

        if (parentNode.childNodes.length === 1 && firstChild.nodeType === Node.TEXT_NODE) {
            firstChild.textContent = reduceSpaces(firstChild.textContent);
        }
    });
};

const filterTextNode = (node) => {
    if (node.nodeType === Node.ELEMENT_NODE) {
        return (isVoidElement(node)) || isAllowedEmptyNode(node) || isAllowedEmptyContent(node) ? NodeFilter.FILTER_ACCEPT : NodeFilter.FILTER_SKIP;
    }

    return NodeFilter.FILTER_ACCEPT;
};

export const checkTextContent = (node) => {
    const textNodes = getNodesByFilter(
        node,
        NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT,
        filterTextNode
    );
    let nodeContentType = TEXT_CONTENT.IS_EMPTY;

    for (const node of textNodes) {

        if (node.nodeType === Node.ELEMENT_NODE) {
            return TEXT_CONTENT.HAS_CONTENT;
        }

        const text = node.nodeValue;

        if (text) {
            if (text.trim()) {
                return TEXT_CONTENT.HAS_CONTENT;
            }
            if (text.indexOf("\xA0") !== -1) {  // non-breaking space is a significant char
                return TEXT_CONTENT.HAS_CONTENT;

            }
            nodeContentType = TEXT_CONTENT.HAS_WHITESPACE;
        }
    }

    return nodeContentType;
};

/**
 * All given children will be removed from rooNode and replaced by:
 *      a. text node
 *      b. DIV element
 *  according to `isBlock` argument. Content ot the new element is `space` or `empty` upon `hasSpace` argument.
 *  `<div>` is used for compressing empty block elements to avoid problem with nesting,
 *  so group of block nodes will replaced by `<div>` regardless the group contains only `<p>` nodes.
 *
 * Specific elements are excluded from this processing (typically table cells).
 *
 * @param {Node} rootNode
 * @param {Array<Node>} children
 * @param {boolean} isBlock
 * @param {boolean} hasSpace
 */
const compressEmptyNodes = (rootNode, children, isBlock, hasSpace) => {
    const doc = rootNode.ownerDocument;
    let newNode;

    if (isAllowedEmptyContent(rootNode)) {
        return;
    }

    if (isBlock) {
        newNode = doc.createElement("div");
        if (hasSpace) {
            newNode.innerHTML = " ";
        }
    }
    else {
        newNode = doc.createTextNode(hasSpace ? " " : "");
    }

    rootNode.replaceChild(newNode, children.pop());
    children.forEach((emptyNode) => rootNode.removeChild(emptyNode));
};

/**
 * Joins group of empty siblings (contain space or nothing) into one node
 * containing space or nothing (according to content of merged siblings).
 *
 * @param {Node} rootNode
 */
const normalizeEmptyNodes = (rootNode) => {
    const treeWalker = document.createTreeWalker(
        rootNode,
        NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT
    );

    let node = treeWalker.firstChild();
    let siblingContent = 0;
    let containsBlockNode = false;
    const emptySiblings = [];

    while (node) {

        if (isVoidElement(node) || isAllowedEmptyNode(node)) {
            node = treeWalker.nextSibling();
            continue;
        }

        normalizeEmptyNodes(node);
        const nodeContent = checkTextContent(node);

        if (nodeContent === TEXT_CONTENT.HAS_CONTENT) {

            if (emptySiblings.length) {
                compressEmptyNodes(rootNode, emptySiblings, containsBlockNode, siblingContent & TEXT_CONTENT.HAS_WHITESPACE);
                emptySiblings.length = 0;
                siblingContent = 0;
                containsBlockNode = false;
            }
        }
        else {
            emptySiblings.push(node);
            siblingContent |= nodeContent;
            containsBlockNode = containsBlockNode || isBlockLevelNode(node);
        }

        node = treeWalker.nextSibling();
    }

    if (emptySiblings.length) {
        compressEmptyNodes(rootNode, emptySiblings, containsBlockNode, siblingContent & TEXT_CONTENT.HAS_WHITESPACE);
    }

    return;
};

const compressTextNode = (node, text) => {
    if (text) {
        node.nodeValue = text.trimLeft();
    }
    else {
        node.parentNode.removeChild(node);
    }
};

/**
 * When text node has space at its beginning and its previous sibling is block level node
 * (or parent node if the text node doesn't have previous sibling), the initial space is useless
 * from rendering point of view and can be removed.
 * The same is applied on ending space.
 *
 * @param {Node} rootNode
 */
const removeSurroundingSpaces = (rootNode) => {
    const textNodes = getNodesByFilter(rootNode, NodeFilter.SHOW_TEXT);

    for (const node of textNodes) {
        const prevNode = node.previousSibling || node.parentNode;
        const nextNode = node.nextSibling || node.parentNode;
        let text = node.nodeValue;

        if (text[0] === " " && isBlockLevelNode(prevNode)) {
            text = text.trimLeft();
            compressTextNode(node, text);
        }

        if (text && text[text.length - 1] === " " && isBlockLevelNode(nextNode)) {
            compressTextNode(node, text.trimRight());
        }
    }
};

/**
 * This module is used by pastedTextConvertor which uses `html-to-draftjs` package.
 * Before we can apply our functions (`customChunkRenderer`) for conversion of specific HTML elements into DratJS entities,
 * the HTML provided by this module (htmlPreProcessor) is processed by `html-to-draftjs`, which replaces `&nbsp;` with spaces
 * (see https://github.com/jpuri/html-to-draftjs/blob/3da7a604761f519caf25a6cdd7bbfe5e1d9f8974/src/library/index.js#L161).
 * But it damage MathML expressions with `&nbsp;` inside.
 *
 * Therefore we store the original MathML into custom attribute (dataset is not supported by <math> tag) and use it
 * in `customChunkRenderer` instead of <math> tag itself.
 */
const addMathMLDataSet = (rootNode) => {
    const mathNodes = rootNode.getElementsByTagName("math");

    for (const node of mathNodes) {
        if (!node.getAttribute(MATHML_SOURCE_ATTRIBUTE)) {
            node.setAttribute(MATHML_SOURCE_ATTRIBUTE, node.outerHTML);
        }
    }
};

const normalizeDocument = (doc) => {
    removeHtmlComments(doc);
    normalizeEmptyNodes(doc.body);
    removeSurroundingSpaces(doc.body);
    addMathMLDataSet(doc.body);
};

const replaceElement = (document, oldTag, newTag) => {
    // get a static copy of element list. This is important
    // because getElementsByTagName returns live list and we can't
    // have a list over which we iterate to be changing
    let oldTags = document.querySelectorAll(oldTag);

    for (let element of oldTags) {
        let newElement = document.createElement(newTag);
        newElement.append(...element.childNodes);
        element.parentNode.replaceChild(newElement, element);
    }
};

const removeAllStyles = document => {
    // get a static copy of element list. This is important
    // because getElementsByTagName returns live list and we can't
    // have a list over which we iterate to be changing
    let styledElements = document.querySelectorAll("[style]");

    for (let element of styledElements) {
        element.removeAttribute("style");
    }
};

/**
 * Note: <p> cannot contain block-level elements (including P itself).
 * https://stackoverflow.com/questions/8397852/why-p-tag-cant-contain-div-tag-inside-it
 * Should we checked it? E.g.:
 * `<p><div>A</div></p>` is converted by DOMParser to `<p><\p><div>A</div><p></p>`, or
 * `<p><ul><li>aaa</li></ul</p>` -> `<p></p><ul><li>aaa</li></ul>`.
 */
export const preprocessHtml = html => {
    if (!html) return null;

    const sanitizedHtml = reduceWhitespace(html);
    const parser = new DOMParser();
    const doc = parser.parseFromString(sanitizedHtml, "text/html");

    // order of particular transformation is significant, `doc` is modified via in-place functions.
    removeAllStyles(doc);
    normalizeDocument(doc);
    replaceElement(doc, "dd", "li");
    replaceElement(doc, "dl", "ul");

    return doc.body.innerHTML;
};
