// processDocument.js
export function processDocument(doc) {
  function normalizeTableStructure() {
    const tables = doc.querySelectorAll("table");
    tables.forEach((table) => {
      table.querySelectorAll("table").forEach((nestedTable) => {
        nestedTable.parentNode.removeChild(nestedTable);
      });

      const thead = table.querySelector("thead");
      const tbody =
        table.querySelector("tbody") || document.createElement("tbody");

      if (thead) {
        const rows = Array.from(thead.querySelectorAll("tr"));
        rows.forEach((row) => {
          const cells = Array.from(row.querySelectorAll("th"));
          cells.forEach((cell) => {
            const td = document.createElement("td");
            while (cell.firstChild) {
              td.appendChild(cell.firstChild);
            }
            for (const attr of cell.attributes) {
              td.setAttribute(attr.name, attr.value);
            }
            cell.parentNode.replaceChild(td, cell);
          });
          tbody.appendChild(row);
        });

        thead.parentNode.removeChild(thead);
      }

      if (!table.querySelector("tbody")) {
        table.appendChild(tbody);
      }
    });
  }

  function convertHeadingsToDivsWithAttributes() {
    const headings = doc.querySelectorAll("h1, h2, h3, h4, h5, h6");
    headings.forEach((heading) => {
      const div = document.createElement("div");
      let headingType = heading.tagName.toLowerCase();

      switch (headingType) {
        case "h1":
          div.setAttribute("heading", "title");
          break;
        case "h2":
          div.setAttribute("heading", "section");
          break;
        case "h3":
          div.setAttribute("heading", "subsection");
          break;
        case "h4":
          div.setAttribute("heading", "subsubsection");
          break;
        case "h5":
          div.setAttribute("heading", "abstract");
          break;
        case "h6":
          div.setAttribute("heading", "keywords");
          break;
        default:
          break;
      }

      while (heading.firstChild) {
        div.appendChild(heading.firstChild);
      }

      heading.parentNode.replaceChild(div, heading);
    });
  }

  function wrapTablesInDivsWithCaption() {
    const tables = doc.querySelectorAll("table");
    tables.forEach((table) => {
      const div = document.createElement("div");
      div.setAttribute("tablecaption", "true");
      div.appendChild(table.cloneNode(true));
      table.parentNode.replaceChild(div, table);
    });
  }

  function wrapImagesInDivsWithCaption() {
    const images = doc.querySelectorAll("img");
    images.forEach((img) => {
      const div = document.createElement("div");
      div.setAttribute("caption", "true");
      div.appendChild(img.cloneNode(true));
      img.parentNode.replaceChild(div, img);
    });
  }

  function removeLatexFormulasFromDocument() {
    doc.querySelectorAll("Latext").forEach((formula) => {
      formula.parentNode.removeChild(formula);
    });
  }

  function removeStylesFromDocument() {
    doc.querySelectorAll("*").forEach((node) => {
      node.removeAttribute("style");
    });
  }

  function wrapParagraphsInDivs() {
    doc.querySelectorAll("p").forEach((paragraph) => {
      const div = document.createElement("div");
      div.appendChild(paragraph.cloneNode(true));
      paragraph.parentNode.replaceChild(div, paragraph);
    });
  }

  function removeAllTagsExcept() {
    const allowedTags = [
      "a",
      "b",
      "i",
      "code",
      "ul",
      "li",
      "libomath",
      "libo_space",
      "references",
      "ref",
      "table",
      "thead",
      "tbody",
      "tfoot",
      "tr",
      "td",
      "th",
    ];

    function traverseAndClean(node) {
      Array.from(node.childNodes).forEach((child) => {
        if (child.nodeType === Node.ELEMENT_NODE) {
          if (!allowedTags.includes(child.tagName.toLowerCase())) {
            const div = document.createElement("div");

            // Preserve the "heading" attribute if it exists
            if (child.hasAttribute("heading")) {
              div.setAttribute("heading", child.getAttribute("heading"));
            }

            // Transfer child nodes and their attributes
            while (child.firstChild) {
              div.appendChild(child.firstChild);
            }

            child.parentNode.replaceChild(div, child);
          } else if (child.tagName.toLowerCase() === "table") {
            traverseAndCleanTable(child);
          } else {
            traverseAndClean(child);
          }
        } else if (child.nodeType === Node.TEXT_NODE) {
          const div = document.createElement("div");
          div.appendChild(child.cloneNode(true));
          child.parentNode.replaceChild(div, child);
        }
      });
    }

    function traverseAndCleanTable(table) {
      Array.from(table.querySelectorAll("*")).forEach((element) => {
        if (
          !["table", "thead", "tbody", "tfoot", "tr", "td", "th"].includes(
            element.tagName.toLowerCase()
          )
        ) {
          while (element.firstChild) {
            element.parentNode.insertBefore(element.firstChild, element);
          }
          element.parentNode.removeChild(element);
        }
      });
    }

    traverseAndClean(doc.body);
  }

  function removeOPTags() {
    doc.querySelectorAll("o\\:p").forEach((tag) => {
      while (tag.firstChild) {
        tag.parentNode.insertBefore(tag.firstChild, tag);
      }
      tag.parentNode.removeChild(tag);
    });
  }

  function removeEmptyDivs() {
    const divs = doc.querySelectorAll("div");
    divs.forEach((div) => {
      if (!div.innerHTML.trim()) {
        div.parentNode.removeChild(div);
      }
    });
  }

  function removeSpecificAttributes() {
    const elements = doc.querySelectorAll("*");
    elements.forEach((element) => {
      element.removeAttribute("id");
    });
  }

  removeOPTags();
  removeAllTagsExcept();
  normalizeTableStructure();
  convertHeadingsToDivsWithAttributes();
  // wrapTablesInDivsWithCaption();
  wrapImagesInDivsWithCaption();
  removeLatexFormulasFromDocument();
  removeStylesFromDocument();
  wrapParagraphsInDivs();
  removeEmptyDivs();
  removeSpecificAttributes();
}

export function extractFollowingDivs() {
  // Find the div with lineindex="2"
  const startingDiv = document.querySelector('[lineindex="2"]');
  if (!startingDiv) {
    console.warn("Starting div not found");
    return;
  }

  // Create a new container for extracted divs
  const newContainer = document.createElement("div");
  newContainer.id = "extracted-divs-container";

  // Extract the starting div and all following sibling divs
  let currentDiv = startingDiv.nextElementSibling;
  while (currentDiv) {
    const nextDiv = currentDiv.nextElementSibling;
    newContainer.appendChild(currentDiv);
    currentDiv = nextDiv;
  }

  // Find the title div and append the new container after it
  const titleDiv = document.querySelector('[heading="title"]');
  if (titleDiv && titleDiv.parentNode) {
    titleDiv.parentNode.insertBefore(newContainer, titleDiv.nextSibling);
  } else {
    console.warn("Title div not found or has no parent");
  }
}
