Spaces:
Running
Running
File size: 772 Bytes
a67789e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | const { cleanHTML } = require("../../data_extraction/clean_html");
describe("cleanHTML", () => {
test("removes scripts, styles, headers, footers, ads", () => {
const html = `
<html>
<head><style>.x{}</style></head>
<body>
<header>HEADER</header>
<script>alert("x")</script>
<div class="ads">Buy NOW</div>
<p>Hello world</p>
<footer>FOOTER</footer>
</body>
</html>
`;
const out = cleanHTML(html);
expect(out).toContain("Hello world");
expect(out).not.toMatch(/HEADER|FOOTER|Buy NOW|alert/);
});
test("collapses whitespace", () => {
const html = `<p>Text</p>\n\n<p>More</p>`;
const out = cleanHTML(html);
expect(out).toBe("Text More");
});
});
|