import sys from pathlib import Path ROOT = Path(__file__).resolve().parents[2] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) from ingestion.parsers.readme_parser import parse_readme_bytes from ingestion.parsers.text_sanitizer import strip_html_tags def test_strip_html_tags_removes_img_and_comments() -> None: text = "Hello world bold" cleaned = strip_html_tags(text) assert "" not in cleaned assert "Hello" in cleaned and "world" in cleaned def test_parse_readme_bytes_removes_raw_html() -> None: readme = b"# Repo\n\n\n\nSome content" parsed = parse_readme_bytes(readme, repo_name="1337Xcode/demo") assert "