Spaces:
Running
Running
| import sys | |
| from pathlib import Path | |
| ROOT = Path(__file__).resolve().parents[2] | |
| if str(ROOT) not in sys.path: | |
| sys.path.insert(0, str(ROOT)) | |
| from ingestion.parsers.readme_parser import parse_readme_bytes | |
| from ingestion.parsers.text_sanitizer import strip_html_tags | |
| def test_strip_html_tags_removes_img_and_comments() -> None: | |
| text = "Hello <!-- comment --> <img src='x'> world <b>bold</b>" | |
| cleaned = strip_html_tags(text) | |
| assert "<img" not in cleaned | |
| assert "<!--" not in cleaned | |
| assert "<b>" not in cleaned | |
| assert "Hello" in cleaned and "world" in cleaned | |
| def test_parse_readme_bytes_removes_raw_html() -> None: | |
| readme = b"# Repo\n\n<img src='banner.png'/>\n\nSome content" | |
| parsed = parse_readme_bytes(readme, repo_name="1337Xcode/demo") | |
| assert "<img" not in parsed["clean_content"] | |
| assert "Some content" in parsed["clean_content"] | |