File size: 1,102 Bytes
41788c4 6ed4967 41788c4 6ed4967 41788c4 6ed4967 41788c4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | from docsifer.core.html_cleaner import clean_html
def test_removes_style_script_noscript() -> None:
html = (
"<html><head><style>body{}</style><script>x()</script></head>"
"<body><noscript>nope</noscript><p>hi</p></body></html>"
)
out = clean_html(html)
assert "<style" not in out.lower()
assert "<script" not in out.lower()
assert "<noscript" not in out.lower()
assert "hi" in out
def test_removes_hidden_attribute_and_inline_styles() -> None:
html = (
"<div hidden>secret</div>"
'<div style="display:none">also-hidden</div>'
'<div style="display: none;">spaced</div>'
'<div aria-hidden="true">aria</div>'
"<p>visible</p>"
)
out = clean_html(html)
assert "secret" not in out
assert "also-hidden" not in out
assert "spaced" not in out
assert "aria" not in out
assert "visible" in out
def test_empty_input_returns_empty() -> None:
assert clean_html("") == ""
def test_malformed_html_does_not_raise() -> None:
out = clean_html("<<<<><><>")
assert isinstance(out, str)
|