lesson-agent-dev / libs /researchmind /tests /test_url_validate.py
MSG
Feat/research tab agent skills (#5)
e7fd66f
Raw
History Blame Contribute Delete
1.83 kB
from __future__ import annotations
from researchmind.url_validate import (
filter_valid_urls,
is_well_formed,
normalize_url,
validate_url,
)
def test_rejects_truncated_and_bad_arxiv():
ok, reason = is_well_formed("https://arxiv.org/abs/quantcomm/2021/10.0")
assert not ok
assert "arxiv" in reason
ok, reason = is_well_formed("https://ieeexplore.ieee.org/document/...")
assert not ok
def test_accepts_valid_arxiv():
ok, _ = is_well_formed("https://arxiv.org/abs/2301.00001")
assert ok
def test_normalize_adds_scheme():
assert normalize_url("en.wikipedia.org/wiki/AI_agent").startswith("https://")
def test_validate_url_does_not_shadow_probe(monkeypatch):
"""Regression: check_reachable=True must not call the bool parameter."""
def fake_probe(url, *, timeout=12.0):
return True, "ok"
monkeypatch.setattr("researchmind.url_validate.probe_url_reachable", fake_probe)
ok, reason, normalized = validate_url(
"https://en.wikipedia.org/wiki/Agent",
check_reachable=True,
)
assert ok
assert reason == "ok"
assert "wikipedia" in normalized
def test_rejects_bing_tracking_links():
ok, reason = is_well_formed(
"https://www.bing.com/aclick?id=abc&u=aHR0cHM6Ly9leGFtcGxlLmNvbQ"
)
assert not ok
assert "tracking" in reason
def test_filter_valid_urls_skips_bad(monkeypatch):
def fake_validate(url, *, check_reachable=True):
if "bad" in url:
return False, "bad", url
return True, "ok", url
monkeypatch.setattr("researchmind.url_validate.validate_url", fake_validate)
out = filter_valid_urls(
["https://good.example/a", "https://bad.example/b"],
check_reachable=False,
max_results=5,
)
assert out == ["https://good.example/a"]