import { Expression } from "./types"; export function parseWiktionaryData( prompt: string, data: string ): Expression | null { // Prevent next.js SSR from crashing here if (typeof window === "undefined") return null; const parser = new DOMParser(); const document = parser.parseFromString(data, "text/html"); // TODO settings-based language selection const header = document.getElementById("Finnish")?.parentElement; if (!header) return null; // Grab window of wanted elements and filter out unwanted elements within the window const elements: (Element | HTMLElement)[] = []; let current = header.nextElementSibling; while ( current && !["hr", "h2"].includes(current.tagName.toLocaleLowerCase()) && !isReferences(current) ) { let include = true; include = include && current.tagName.toLocaleLowerCase() !== "style"; include = include && !current.classList.contains("noprint"); include = include && !current.classList.contains("inflection-table"); include = include && !isDeclension(current); if (include) elements.push(current); current = current.nextElementSibling; } const content = document.createElement("div"); content.className = "wiktionary-content"; for (const element of elements) { content.append(safeTreeClone(element) as string | Node); } return { prompt, description: content.outerHTML, }; } function isDeclension(element?: Element | HTMLElement) { if (!element) return false; if (element.id.includes("Conjugation")) return true; if (element.id.includes("Declension")) return true; let child = element.firstElementChild; while (child) { if (isDeclension(child)) return true; child = child.nextElementSibling; } } function isReferences(element?: Element | HTMLElement) { if (!element) return false; if (element.id.includes("References")) return true; let child = element.firstElementChild; while (child) { if (isReferences(child)) return true; child = child.nextElementSibling; } } function safeTreeClone(element?: string | Element | HTMLElement) { if (!element) return null; if (typeof element === "string") return document.createTextNode(element); if (element.tagName.toLowerCase() === "a") return document.createTextNode(element.textContent || ""); if (element.classList.contains("mw-editsection")) return null; if (element.classList.contains("nyms-toggle")) return null; if (element.tagName.toLowerCase() === "sup") return null; const result = document.createElement(element.tagName); const children = element.childNodes; for (let i = 0; i < children.length; i++) { const child = children.item(i); if (child.nodeName === "#text") result.append(child.cloneNode()); else { const cloned = safeTreeClone(child as Element); if (cloned) result.append(cloned); } } return result; }