Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from researchmind.url_validate import ( | |
| filter_valid_urls, | |
| is_well_formed, | |
| normalize_url, | |
| validate_url, | |
| ) | |
| def test_rejects_truncated_and_bad_arxiv(): | |
| ok, reason = is_well_formed("https://arxiv.org/abs/quantcomm/2021/10.0") | |
| assert not ok | |
| assert "arxiv" in reason | |
| ok, reason = is_well_formed("https://ieeexplore.ieee.org/document/...") | |
| assert not ok | |
| def test_accepts_valid_arxiv(): | |
| ok, _ = is_well_formed("https://arxiv.org/abs/2301.00001") | |
| assert ok | |
| def test_normalize_adds_scheme(): | |
| assert normalize_url("en.wikipedia.org/wiki/AI_agent").startswith("https://") | |
| def test_validate_url_does_not_shadow_probe(monkeypatch): | |
| """Regression: check_reachable=True must not call the bool parameter.""" | |
| def fake_probe(url, *, timeout=12.0): | |
| return True, "ok" | |
| monkeypatch.setattr("researchmind.url_validate.probe_url_reachable", fake_probe) | |
| ok, reason, normalized = validate_url( | |
| "https://en.wikipedia.org/wiki/Agent", | |
| check_reachable=True, | |
| ) | |
| assert ok | |
| assert reason == "ok" | |
| assert "wikipedia" in normalized | |
| def test_rejects_bing_tracking_links(): | |
| ok, reason = is_well_formed( | |
| "https://www.bing.com/aclick?id=abc&u=aHR0cHM6Ly9leGFtcGxlLmNvbQ" | |
| ) | |
| assert not ok | |
| assert "tracking" in reason | |
| def test_filter_valid_urls_skips_bad(monkeypatch): | |
| def fake_validate(url, *, check_reachable=True): | |
| if "bad" in url: | |
| return False, "bad", url | |
| return True, "ok", url | |
| monkeypatch.setattr("researchmind.url_validate.validate_url", fake_validate) | |
| out = filter_valid_urls( | |
| ["https://good.example/a", "https://bad.example/b"], | |
| check_reachable=False, | |
| max_results=5, | |
| ) | |
| assert out == ["https://good.example/a"] | |