File size: 882 Bytes
9563e4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from ingestion.parsers.readme_parser import parse_readme_bytes
from ingestion.parsers.text_sanitizer import strip_html_tags


def test_strip_html_tags_removes_img_and_comments() -> None:
    text = "Hello <!-- comment --> <img src='x'> world <b>bold</b>"
    cleaned = strip_html_tags(text)
    assert "<img" not in cleaned
    assert "<!--" not in cleaned
    assert "<b>" not in cleaned
    assert "Hello" in cleaned and "world" in cleaned


def test_parse_readme_bytes_removes_raw_html() -> None:
    readme = b"# Repo\n\n<img src='banner.png'/>\n\nSome content"
    parsed = parse_readme_bytes(readme, repo_name="1337Xcode/demo")
    assert "<img" not in parsed["clean_content"]
    assert "Some content" in parsed["clean_content"]