import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from ingestion.parsers.readme_parser import parse_readme_bytes
from ingestion.parsers.text_sanitizer import strip_html_tags
def test_strip_html_tags_removes_img_and_comments() -> None:
text = "Hello
world bold"
cleaned = strip_html_tags(text)
assert "
" not in cleaned
assert "Hello" in cleaned and "world" in cleaned
def test_parse_readme_bytes_removes_raw_html() -> None:
readme = b"# Repo\n\n
\n\nSome content"
parsed = parse_readme_bytes(readme, repo_name="1337Xcode/demo")
assert "![]()