// See https://www.w3.org/TR/css-text-3/#white-space-rules

export const TextPhrasingElements = [
    'A',
    'ABBR',
    'B',
    'BDI',
    'BDO',
    'BR',
    'CITE',
    'CODE',
    'DATA',
    'DEL',
    'DFN',
    'EM',
    'I',
    'INS',
    'KBD',
    'MARK',
    'Q',
    'RP',
    'RT',
    'RUBY',
    'S',
    'SAMP',
    'SMALL',
    'SPAN',
    'STRONG',
    'SUB',
    'TIME',
    'U',
    'VAR',
    'WBR',
];

const CollapsibleAroundSegmentBreakRegex = /[ \t]*[\r\n\f]+[ \t]*/g;
const ConsecutiveSegmentBreaksRegex = /[\r\n\f]+/g;
const ZeroWidthSpacesAroundSegmentBreaksRegex = /(\u200b\n\u200b?|\u200b?\n\u200b)/g;
const SegmentBreakRegex = /\n/g;
const SubsequentWhitespaceRegex = /[ \t]+/g;

type Options = {
    isInPhrasing?: boolean;
};

const removeNode = (node: ChildNode) => {
    if (node.nodeName !== 'BR' && node.nodeName !== 'WBR') {
        node.remove();
    }
};

const isEmpty = (node: ChildNode) => {
    if (node.nodeType === Node.TEXT_NODE) {
        return node.nodeValue.length === 0;
    }
    if (node.nodeName === 'IMG') {
        return false;
    }
    for (const childNode of node.childNodes) {
        if (!isEmpty(childNode)) {
            return false;
        }
    }
    return true;
};

const isCollapsibleStart = (node: ChildNode): boolean => {
    return node.nodeType === Node.TEXT_NODE
        ? node.nodeValue[0] === ' '
        : (node.nodeType === Node.ELEMENT_NODE && !TextPhrasingElements.includes(node.nodeName)) ||
              node.firstChild == null ||
              isCollapsibleStart(node.firstChild);
};

const isCollapsibleEnd = (node: ChildNode): boolean => {
    return node.nodeType === Node.TEXT_NODE
        ? node.nodeValue[node.nodeValue.length - 1] === ' '
        : (node.nodeType === Node.ELEMENT_NODE && !TextPhrasingElements.includes(node.nodeName)) ||
              node.lastChild == null ||
              isCollapsibleEnd(node.lastChild);
};

const trimStart = (node: ChildNode) => {
    if (node.nodeType === Node.TEXT_NODE) {
        if (node.nodeValue[0] === ' ') {
            node.nodeValue = node.nodeValue.slice(1);
        }
    } else if (node.firstChild != null) {
        trimStart(node.firstChild);
        if (isEmpty(node.firstChild)) {
            removeNode(node.firstChild);
        }
    }
};

const trimEnd = (node: ChildNode) => {
    if (node.nodeType === Node.TEXT_NODE) {
        if (node.nodeValue[node.nodeValue.length - 1] === ' ') {
            node.nodeValue = node.nodeValue.slice(0, -1);
        }
    } else if (node.lastChild != null) {
        trimEnd(node.lastChild);
        if (isEmpty(node.lastChild)) {
            removeNode(node.lastChild);
        }
    }
};

const collapseText = (node: ChildNode) => {
    node.nodeValue = node.nodeValue
        .replace(CollapsibleAroundSegmentBreakRegex, '\n')
        .replace(ConsecutiveSegmentBreaksRegex, '\n')
        .replace(ZeroWidthSpacesAroundSegmentBreaksRegex, '\u200b')
        .replace(SegmentBreakRegex, ' ')
        .replace(SubsequentWhitespaceRegex, ' ');
};

const collapsePhrasing = (node: ChildNode, options?: Options) => {
    for (const childNode of Array.from(node.childNodes)) {
        collapseNode(childNode, { isInPhrasing: true });
        if (
            childNode.previousSibling != null &&
            isCollapsibleEnd(childNode.previousSibling) &&
            isCollapsibleStart(childNode)
        ) {
            if (childNode.nodeName === 'BR') {
                trimEnd(childNode.previousSibling);
            } else {
                trimStart(childNode);
            }
        }
    }
    if (!options?.isInPhrasing) {
        trimStart(node);
        trimEnd(node);
    }
};

const collapseElement = (node: ChildNode, options?: Options) => {
    let collapsableNodes: Array<ChildNode> = [];
    const tryCollapse = () => {
        if (collapsableNodes.length > 0) {
            const phrasing = {
                firstChild: collapsableNodes[0],
                lastChild: collapsableNodes[collapsableNodes.length - 1],
                childNodes: collapsableNodes,
            } as unknown as ChildNode;
            collapsePhrasing(phrasing, options);
            if (isEmpty(phrasing)) {
                collapsableNodes.forEach(removeNode);
            }
            collapsableNodes = [];
        }
    };
    for (const childNode of Array.from(node.childNodes)) {
        if (childNode.nodeType === Node.TEXT_NODE || TextPhrasingElements.includes(childNode.nodeName)) {
            collapsableNodes.push(childNode);
        } else {
            tryCollapse();
            collapseNode(childNode);
        }
    }
    tryCollapse();
};

const collapseNode = (node: ChildNode, options?: Options) => {
    if (node.nodeType === Node.TEXT_NODE) {
        collapseText(node);
    } else if (node.nodeType === Node.ELEMENT_NODE) {
        collapseElement(node, options);
    } else {
        node.remove();
    }
};

export const collapseHtmlDocument = (document: Document) => {
    collapseNode(document.body);
    if (isCollapsibleStart(document.body)) {
        trimStart(document.body);
    }
    if (isCollapsibleEnd(document.body)) {
        trimEnd(document.body);
    }
    return document;
};
