Spaces:
Runtime error
Runtime error
| import { JSDOM } from "jsdom"; | |
| export const cleanHtml = (html: string): string => { | |
| const blacklistedElements = new Set([ | |
| "head", | |
| "title", | |
| "meta", | |
| "script", | |
| "style", | |
| "path", | |
| "svg", | |
| "br", | |
| "hr", | |
| "link", | |
| "object", | |
| "embed", | |
| ]); | |
| const blacklistedAttributes = [ | |
| "style", | |
| "ping", | |
| "src", | |
| "item.*", | |
| "aria.*", | |
| "js.*", | |
| "data-.*", | |
| "role", | |
| "tabindex", | |
| "onerror", | |
| ]; | |
| const dom = new JSDOM(html); | |
| const document = dom.window.document; | |
| // Remove blacklisted elements | |
| blacklistedElements.forEach((tag) => { | |
| const elements = document.querySelectorAll(tag); | |
| elements.forEach((element) => { | |
| element.remove(); | |
| }); | |
| }); | |
| // Remove blacklisted attributes | |
| const elements = document.querySelectorAll("*"); | |
| elements.forEach((element) => { | |
| blacklistedAttributes.forEach((attrPattern) => { | |
| const regex = new RegExp(`^${attrPattern}$`); | |
| Array.from(element.attributes).forEach((attr: any) => { | |
| if (regex.test(attr.name)) { | |
| element.removeAttribute(attr.name); | |
| } | |
| }); | |
| }); | |
| }); | |
| // Remove empty elements | |
| elements.forEach((element) => { | |
| if (!element.hasAttributes() && element.textContent?.trim() === "") { | |
| element.remove(); | |
| } | |
| }); | |
| const sourceCode = document.documentElement.outerHTML; | |
| return sourceCode; | |
| }; | |