import { removeNonStandardCharactersFromString } from "@rivial-security/func-utils";

/**
 * Removes non-standard characters from html content
 * @param {String} htmlContent
 * @return {string}
 */
export const removeNonStandardCharacters = (htmlContent) => {
  // Parse HTML string
  const parser = new DOMParser();
  const doc = parser.parseFromString(htmlContent, "text/html");

  // Recursive function to traverse the DOM tree
  const processDOMNodes = (node) => {
    switch (node.nodeType) {
      case Node.ELEMENT_NODE:
        // If the node is an element, check its child nodes
        for (const child of node.childNodes) {
          processDOMNodes(child);
        }
        break;
      case Node.TEXT_NODE:
        // If the node is a text node, remove special latin script characters/letters and replace them with empty string
        // Looks for characters/letters that are not within this range: A-Z, a-z, 0-9, whitespace, and these special characters: .,;:'"!?-`~$#-+={}
        node.textContent = removeNonStandardCharactersFromString({
          stringContent: node?.textContent,
          removeAllNonBasicCharacters: true,
          removeUnicodeControlCharacters: false,
          removeNullCharacters: true,
        });
        break;
    }
  };

  processDOMNodes(doc.body);

  return doc.body.innerHTML;
};
