File size: 1,385 Bytes
fb38ec5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import { JSDOM } from "jsdom";

export const cleanHtml = (html: string): string => {
  const blacklistedElements = new Set([
    "head",
    "title",
    "meta",
    "script",
    "style",
    "path",
    "svg",
    "br",
    "hr",
    "link",
    "object",
    "embed",
  ]);

  const blacklistedAttributes = [
    "style",
    "ping",
    "src",
    "item.*",
    "aria.*",
    "js.*",
    "data-.*",
    "role",
    "tabindex",
    "onerror",
  ];

  const dom = new JSDOM(html);
  const document = dom.window.document;

  // Remove blacklisted elements
  blacklistedElements.forEach((tag) => {
    const elements = document.querySelectorAll(tag);
    elements.forEach((element) => {
      element.remove();
    });
  });

  // Remove blacklisted attributes
  const elements = document.querySelectorAll("*");
  elements.forEach((element) => {
    blacklistedAttributes.forEach((attrPattern) => {
      const regex = new RegExp(`^${attrPattern}$`);
      Array.from(element.attributes).forEach((attr: any) => {
        if (regex.test(attr.name)) {
          element.removeAttribute(attr.name);
        }
      });
    });
  });

  // Remove empty elements
  elements.forEach((element) => {
    if (!element.hasAttributes() && element.textContent?.trim() === "") {
      element.remove();
    }
  });

  const sourceCode = document.documentElement.outerHTML;

  return sourceCode;
};