| import importlib.util |
| import sys |
| from datetime import datetime, timezone |
| from pathlib import Path |
| from types import SimpleNamespace |
|
|
| import httpx |
| import pytest |
|
|
|
|
| def _load(): |
| path = Path(__file__).parent.parent.parent / "scripts" / "prioritize_backlog.py" |
| spec = importlib.util.spec_from_file_location("prioritize_backlog", path) |
| mod = importlib.util.module_from_spec(spec) |
| sys.modules["prioritize_backlog"] = mod |
| spec.loader.exec_module(mod) |
| return mod |
|
|
|
|
| class FakeResponse: |
| def __init__(self, data, headers=None, text=None): |
| self._data = data |
| self.headers = headers or {} |
| self.text = text if text is not None else "" |
|
|
| def json(self): |
| return self._data |
|
|
| def raise_for_status(self): |
| return None |
|
|
|
|
| class RateLimitResponse(FakeResponse): |
| def __init__(self, status_code=403): |
| super().__init__({}) |
| self.status_code = status_code |
| self.request = httpx.Request("GET", "https://api.github.test/rate") |
| self.response = httpx.Response( |
| status_code, |
| headers={"x-ratelimit-reset": "123"}, |
| request=self.request, |
| ) |
|
|
| def raise_for_status(self): |
| raise httpx.HTTPStatusError( |
| "rate limited", request=self.request, response=self.response |
| ) |
|
|
|
|
| class FakeIssueClient: |
| def __init__(self): |
| self.posts = [] |
| self.closed = False |
|
|
| def post(self, url, headers=None, json=None): |
| self.posts.append({"url": url, "headers": headers or {}, "json": json or {}}) |
| return FakeResponse( |
| { |
| "number": 42, |
| "html_url": "https://github.com/owner/repo/issues/42", |
| "url": "https://api.github.com/repos/owner/repo/issues/42", |
| "title": json["title"], |
| } |
| ) |
|
|
| def close(self): |
| self.closed = True |
|
|
|
|
| class FakeGitHubClient: |
| def __init__(self): |
| self.requests = [] |
|
|
| def get(self, url, headers=None, params=None): |
| self.requests.append((url, params or {})) |
| page = (params or {}).get("page") |
|
|
| if url == "https://api.github.com/repos/owner/repo/issues": |
| if page == 1: |
| return FakeResponse( |
| [ |
| { |
| "number": 1, |
| "html_url": "https://github.com/owner/repo/issues/1", |
| "title": "Issue one", |
| "body": "broken", |
| "labels": [{"name": "bug"}], |
| "user": {"login": "alice"}, |
| "state": "open", |
| "created_at": "2026-05-01T00:00:00Z", |
| "updated_at": "2026-05-02T00:00:00Z", |
| "comments": 1, |
| "comments_url": "https://api.github.test/issues/1/comments", |
| }, |
| { |
| "number": 2, |
| "html_url": "https://github.com/owner/repo/pull/2", |
| "title": "PR two", |
| "body": "adds feature", |
| "labels": [{"name": "enhancement"}], |
| "user": {"login": "bob"}, |
| "state": "open", |
| "created_at": "2026-05-01T00:00:00Z", |
| "updated_at": "2026-05-02T00:00:00Z", |
| "comments": 0, |
| "comments_url": "https://api.github.test/issues/2/comments", |
| "pull_request": {"url": "https://api.github.test/pulls/2"}, |
| }, |
| ], |
| headers={"link": '<https://api.github.test?page=2>; rel="next"'}, |
| ) |
| return FakeResponse( |
| [ |
| { |
| "number": 3, |
| "html_url": "https://github.com/owner/repo/issues/3", |
| "title": "Issue three", |
| "body": "request", |
| "labels": [], |
| "user": {"login": "carol"}, |
| "state": "open", |
| "created_at": "2026-05-03T00:00:00Z", |
| "updated_at": "2026-05-03T00:00:00Z", |
| "comments": 0, |
| "comments_url": "https://api.github.test/issues/3/comments", |
| } |
| ] |
| ) |
|
|
| if url.endswith("/comments") and "/pulls/" not in url: |
| return FakeResponse( |
| [ |
| { |
| "body": "comment", |
| "user": {"login": "dana"}, |
| "created_at": "2026-05-02T00:00:00Z", |
| "html_url": "https://github.com/comment", |
| } |
| ] |
| ) |
|
|
| if url == "https://api.github.com/repos/owner/repo/pulls/2": |
| return FakeResponse( |
| { |
| "number": 2, |
| "html_url": "https://github.com/owner/repo/pull/2", |
| "title": "PR two", |
| "body": "adds feature", |
| "user": {"login": "bob"}, |
| "state": "open", |
| "draft": False, |
| "base": {"ref": "main"}, |
| "head": {"ref": "feature"}, |
| "commits": 2, |
| "additions": 10, |
| "deletions": 3, |
| "changed_files": 2, |
| "review_comments": 0, |
| } |
| ) |
|
|
| if url in { |
| "https://api.github.com/repos/owner/repo/pulls/2/comments", |
| "https://api.github.com/repos/owner/repo/pulls/2/reviews", |
| }: |
| return FakeResponse([]) |
|
|
| raise AssertionError(f"unexpected URL: {url}") |
|
|
|
|
| def test_github_pagination_and_issue_pr_splitting(): |
| mod = _load() |
| records = mod.collect_github_sources("owner/repo", client=FakeGitHubClient()) |
|
|
| assert [record["id"] for record in records] == [ |
| "github_issue#1", |
| "github_pr#2", |
| "github_issue#3", |
| ] |
| assert records[0]["source"] == "github_issue" |
| assert records[1]["source"] == "github_pr" |
| assert records[1]["metadata"]["base"] == "main" |
|
|
|
|
| def test_collect_github_sources_excludes_generated_report_label(): |
| mod = _load() |
|
|
| class ReportIssueClient: |
| def close(self): |
| return None |
|
|
| def get(self, url, headers=None, params=None): |
| if url == "https://api.github.com/repos/owner/repo/issues": |
| return FakeResponse( |
| [ |
| { |
| "number": 1, |
| "html_url": "https://github.com/owner/repo/issues/1", |
| "title": "Generated report", |
| "body": "report", |
| "labels": [ |
| {"name": mod.DEFAULT_GITHUB_REPORT_LABEL.upper()} |
| ], |
| "user": {"login": "bot"}, |
| "state": "open", |
| "comments": 0, |
| "comments_url": "https://api.github.test/issues/1/comments", |
| }, |
| { |
| "number": 2, |
| "html_url": "https://github.com/owner/repo/issues/2", |
| "title": "Real issue", |
| "body": "broken", |
| "labels": [{"name": "bug"}], |
| "user": {"login": "alice"}, |
| "state": "open", |
| "comments": 0, |
| "comments_url": "https://api.github.test/issues/2/comments", |
| }, |
| ] |
| ) |
| if url == "https://api.github.test/issues/2/comments": |
| return FakeResponse([]) |
| raise AssertionError(f"unexpected URL: {url}") |
|
|
| records = mod.collect_github_sources( |
| "owner/repo", |
| exclude_labels=[mod.DEFAULT_GITHUB_REPORT_LABEL], |
| client=ReportIssueClient(), |
| ) |
|
|
| assert [record["id"] for record in records] == ["github_issue#2"] |
|
|
|
|
| def test_collect_github_sources_returns_partial_results_on_rate_limit(caplog): |
| mod = _load() |
|
|
| class RateLimitedClient: |
| def close(self): |
| return None |
|
|
| def get(self, url, headers=None, params=None): |
| if url == "https://api.github.com/repos/owner/repo/issues": |
| return FakeResponse( |
| [ |
| { |
| "number": 1, |
| "html_url": "https://github.com/owner/repo/issues/1", |
| "title": "Issue one", |
| "body": "broken", |
| "labels": [], |
| "user": {"login": "alice"}, |
| "state": "open", |
| "comments": 0, |
| "comments_url": "https://api.github.test/issues/1/comments", |
| }, |
| { |
| "number": 2, |
| "html_url": "https://github.com/owner/repo/issues/2", |
| "title": "Issue two", |
| "body": "rate limited", |
| "labels": [], |
| "user": {"login": "bob"}, |
| "state": "open", |
| "comments": 0, |
| "comments_url": "https://api.github.test/issues/2/comments", |
| }, |
| ] |
| ) |
| if url == "https://api.github.test/issues/1/comments": |
| return FakeResponse([]) |
| if url == "https://api.github.test/issues/2/comments": |
| return RateLimitResponse() |
| raise AssertionError(f"unexpected URL: {url}") |
|
|
| with caplog.at_level("WARNING"): |
| records = mod.collect_github_sources("owner/repo", client=RateLimitedClient()) |
|
|
| assert [record["id"] for record in records] == ["github_issue#1"] |
| assert "GitHub rate limit" in caplog.text |
|
|
|
|
| def test_github_comment_cap_and_truncation(): |
| mod = _load() |
|
|
| class CommentClient: |
| def get(self, url, headers=None, params=None): |
| assert url == "https://api.github.test/comments" |
| return FakeResponse( |
| [ |
| {"body": "abcdef", "user": {"login": "one"}}, |
| {"body": "second", "user": {"login": "two"}}, |
| ], |
| headers={ |
| "link": '<https://api.github.test/comments?page=2>; rel="next"' |
| }, |
| ) |
|
|
| comments = mod._fetch_github_comments( |
| CommentClient(), |
| "https://api.github.test/comments", |
| {}, |
| max_comments=1, |
| max_comment_chars=5, |
| ) |
|
|
| assert len(comments) == 1 |
| assert comments[0]["author"] == "one" |
| assert comments[0]["body"].endswith("[truncated]") |
|
|
|
|
| def test_hf_discussion_event_normalization(): |
| mod = _load() |
| discussion = SimpleNamespace( |
| num=7, |
| repo_id="smolagents/ml-intern", |
| repo_type="space", |
| title="Space fails", |
| status="open", |
| author="alice", |
| created_at=datetime(2026, 5, 1, tzinfo=timezone.utc), |
| ) |
| details = SimpleNamespace( |
| title="Space fails", |
| status="open", |
| events=[ |
| SimpleNamespace( |
| type="comment", |
| content="Initial report", |
| hidden=False, |
| author="alice", |
| created_at=datetime(2026, 5, 1, tzinfo=timezone.utc), |
| ), |
| SimpleNamespace( |
| type="comment", |
| content="Hidden moderation", |
| hidden=True, |
| author="mod", |
| created_at=datetime(2026, 5, 1, tzinfo=timezone.utc), |
| ), |
| SimpleNamespace( |
| type="comment", |
| content="Maintainer reply", |
| hidden=False, |
| author="bob", |
| created_at=datetime(2026, 5, 2, tzinfo=timezone.utc), |
| ), |
| SimpleNamespace(type="status-change", new_status="open"), |
| ], |
| ) |
|
|
| record = mod.normalize_hf_discussion(discussion, details) |
|
|
| assert record["id"] == "hf_discussion#7" |
| assert record["url"] == ( |
| "https://huggingface.co/spaces/smolagents/ml-intern/discussions/7" |
| ) |
| assert record["body"] == "Initial report" |
| assert len(record["comments"]) == 1 |
| assert record["comments"][0]["body"] == "Maintainer reply" |
| assert record["engagement"]["comments_count"] == 2 |
|
|
|
|
| def test_resolution_check_marks_pr_and_linked_issue_as_closable(): |
| mod = _load() |
| records = [ |
| { |
| "id": "github_pr#2", |
| "source": "github_pr", |
| "number": 2, |
| "url": "https://github.com/owner/repo/pull/2", |
| "title": "Fix login", |
| "body": "Fixes the login flow.", |
| "comments": [], |
| }, |
| { |
| "id": "github_issue#1", |
| "source": "github_issue", |
| "number": 1, |
| "url": "https://github.com/owner/repo/issues/1", |
| "title": "Login broken", |
| "body": "Fixed by PR #2.", |
| "comments": [], |
| }, |
| { |
| "id": "github_issue#3", |
| "source": "github_issue", |
| "number": 3, |
| "url": "https://github.com/owner/repo/issues/3", |
| "title": "Direct issue", |
| "body": "", |
| "comments": [], |
| }, |
| ] |
| commits = [ |
| { |
| "commit": "abcdef1234567890", |
| "subject": "Fix login flow (#2)", |
| "body": "Also fixes #3", |
| } |
| ] |
|
|
| checked = mod.apply_resolution_checks( |
| records, |
| checked_ref="main", |
| checked_sha="abcdef1234567890", |
| commits=commits, |
| github_repo="owner/repo", |
| ) |
|
|
| by_id = {record["id"]: record for record in checked} |
| assert by_id["github_pr#2"]["resolution"]["can_close"] is True |
| assert by_id["github_pr#2"]["resolution"]["status"] == "resolved" |
| assert by_id["github_issue#1"]["resolution"]["can_close"] is True |
| assert by_id["github_issue#1"]["resolution"]["status"] == "likely_resolved" |
| assert by_id["github_issue#3"]["resolution"]["can_close"] is True |
|
|
|
|
| def test_linked_pr_numbers_require_resolution_language(): |
| mod = _load() |
|
|
| assert ( |
| mod._linked_pr_numbers( |
| "Related to PR #12, but that PR does not address this.", |
| github_repo="owner/repo", |
| ) |
| == set() |
| ) |
| assert mod._linked_pr_numbers("Fixed by PR #12.", github_repo="owner/repo") == {12} |
|
|
|
|
| def test_merge_can_be_closed_adds_local_resolution_candidates(): |
| mod = _load() |
| records = [ |
| { |
| "id": "github_pr#2", |
| "source": "github_pr", |
| "url": "https://github.com/owner/repo/pull/2", |
| "title": "Fix login", |
| "resolution": { |
| "checked_ref": "main", |
| "checked_sha": "abcdef1234567890", |
| "status": "resolved", |
| "can_close": True, |
| "confidence": 0.95, |
| "reasons": ["PR #2 appears to already be present on main."], |
| "evidence": [], |
| }, |
| } |
| ] |
|
|
| ranking = mod.merge_can_be_closed({"summary": "x"}, records) |
|
|
| assert ranking["can_be_closed"][0]["source_ids"] == ["github_pr#2"] |
| assert "already be present" in ranking["can_be_closed"][0]["reason"] |
|
|
|
|
| def test_fetch_pr_patch_matches_uses_patch_id(monkeypatch): |
| mod = _load() |
| records = [ |
| { |
| "id": "github_pr#2", |
| "source": "github_pr", |
| "number": 2, |
| "metadata": {"patch_url": "https://api.github.test/pr/2.patch"}, |
| } |
| ] |
|
|
| class PatchClient: |
| def close(self): |
| return None |
|
|
| def get(self, url, headers=None): |
| assert url == "https://api.github.test/pr/2.patch" |
| assert headers["Accept"] == "application/vnd.github.patch" |
| return FakeResponse({}, text="diff --git a/a b/a") |
|
|
| monkeypatch.setattr(mod, "_patch_id_for_text", lambda _text: "patch-id") |
|
|
| matches = mod._fetch_pr_patch_matches( |
| records, |
| github_token=None, |
| main_patch_ids={"patch-id": "abcdef1234567890"}, |
| client=PatchClient(), |
| ) |
|
|
| assert matches[2]["kind"] == "patch_id" |
| assert matches[2]["commit"] == "abcdef123456" |
|
|
|
|
| def test_fetch_pr_patch_matches_stops_on_rate_limit(caplog, monkeypatch): |
| mod = _load() |
| records = [ |
| { |
| "id": "github_pr#2", |
| "source": "github_pr", |
| "number": 2, |
| "metadata": {"patch_url": "https://api.github.test/pr/2.patch"}, |
| }, |
| { |
| "id": "github_pr#3", |
| "source": "github_pr", |
| "number": 3, |
| "metadata": {"patch_url": "https://api.github.test/pr/3.patch"}, |
| }, |
| ] |
| calls = [] |
|
|
| class RateLimitedPatchClient: |
| def close(self): |
| return None |
|
|
| def get(self, url, headers=None): |
| calls.append(url) |
| return RateLimitResponse(status_code=429) |
|
|
| monkeypatch.setattr(mod, "_patch_id_for_text", lambda _text: "patch-id") |
|
|
| with caplog.at_level("WARNING"): |
| matches = mod._fetch_pr_patch_matches( |
| records, |
| github_token=None, |
| main_patch_ids={"patch-id": "abcdef1234567890"}, |
| client=RateLimitedPatchClient(), |
| ) |
|
|
| assert matches == {} |
| assert calls == ["https://api.github.test/pr/2.patch"] |
| assert "GitHub rate limit" in caplog.text |
|
|
|
|
| def test_create_github_report_issue_posts_markdown_report(): |
| mod = _load() |
| client = FakeIssueClient() |
|
|
| issue = mod.create_github_report_issue( |
| "owner/repo", |
| title="Backlog report", |
| report="# Report\n\nBody", |
| token="gh-token", |
| labels=["pm-report, backlog", "triage"], |
| client=client, |
| ) |
|
|
| assert issue["number"] == 42 |
| assert issue["url"] == "https://github.com/owner/repo/issues/42" |
| assert client.closed is False |
| post = client.posts[0] |
| assert post["url"] == "https://api.github.com/repos/owner/repo/issues" |
| assert post["headers"]["Authorization"] == "Bearer gh-token" |
| assert post["json"]["title"] == "Backlog report" |
| assert post["json"]["body"].startswith("# Report") |
| assert "Generated by" in post["json"]["body"] |
| assert post["json"]["labels"] == ["pm-report", "backlog", "triage"] |
|
|
|
|
| def test_create_github_report_issue_requires_token(): |
| mod = _load() |
|
|
| with pytest.raises(ValueError, match="GITHUB_TOKEN"): |
| mod.create_github_report_issue( |
| "owner/repo", |
| title="Backlog report", |
| report="# Report", |
| token=None, |
| client=FakeIssueClient(), |
| ) |
|
|
|
|
| def test_github_issue_body_truncates_with_footer(): |
| mod = _load() |
| body = mod._github_issue_body("abcdef" * 100, max_chars=120) |
|
|
| assert len(body) <= 120 |
| assert "Report truncated" in body |
|
|
|
|
| def test_append_published_issue_section_adds_local_link(): |
| mod = _load() |
| report = mod.append_published_issue_section( |
| "# Report\n", |
| {"number": 42, "url": "https://github.com/owner/repo/issues/42"}, |
| ) |
|
|
| assert "## Published GitHub Issue" in report |
| assert "[#42](https://github.com/owner/repo/issues/42)" in report |
|
|
|
|
| @pytest.mark.asyncio |
| async def test_async_main_fails_early_when_issue_publish_token_missing(monkeypatch): |
| mod = _load() |
| monkeypatch.delenv("GITHUB_TOKEN", raising=False) |
|
|
| def fail_collect(*_args, **_kwargs): |
| raise AssertionError("collection should not run without a GitHub token") |
|
|
| monkeypatch.setattr(mod, "collect_sources", fail_collect) |
|
|
| result = await mod.async_main(["--create-github-issue"]) |
|
|
| assert result == 1 |
|
|
|
|
| @pytest.mark.asyncio |
| async def test_call_json_llm_retries_after_invalid_json(): |
| mod = _load() |
| calls = [] |
|
|
| async def fake_completion(**kwargs): |
| calls.append(kwargs) |
| content = "not json" if len(calls) == 1 else '{"ok": true}' |
| return {"choices": [{"message": {"content": content}}]} |
|
|
| result = await mod._call_json_llm( |
| [{"role": "user", "content": "return json"}], |
| {}, |
| completion_func=fake_completion, |
| retries=1, |
| ) |
|
|
| assert result == {"ok": True} |
| assert len(calls) == 2 |
| assert "previous response was not valid JSON" in calls[1]["messages"][-1]["content"] |
|
|
|
|
| @pytest.mark.asyncio |
| async def test_call_json_llm_uses_temperature_one_for_thinking_params(): |
| mod = _load() |
| calls = [] |
|
|
| async def fake_completion(**kwargs): |
| calls.append(kwargs) |
| return {"choices": [{"message": {"content": '{"ok": true}'}}]} |
|
|
| result = await mod._call_json_llm( |
| [{"role": "user", "content": "return json"}], |
| {"thinking": {"type": "adaptive"}, "output_config": {"effort": "high"}}, |
| completion_func=fake_completion, |
| retries=0, |
| ) |
|
|
| assert result == {"ok": True} |
| assert calls[0]["temperature"] == 1.0 |
|
|
|
|
| def test_render_markdown_report_from_sample_ranking(): |
| mod = _load() |
| records = [ |
| { |
| "id": "github_issue#1", |
| "source": "github_issue", |
| "url": "https://github.com/owner/repo/issues/1", |
| "title": "Broken login", |
| }, |
| { |
| "id": "github_pr#2", |
| "source": "github_pr", |
| "url": "https://github.com/owner/repo/pull/2", |
| "title": "Fix login", |
| }, |
| ] |
| ranking = { |
| "summary": "Fix login first.", |
| "can_be_closed": [ |
| { |
| "title": "Fix login", |
| "source_ids": ["github_pr#2"], |
| "reason": "PR already landed on main.", |
| "confidence": 0.95, |
| "close_action": "Close duplicate PR.", |
| } |
| ], |
| "highest_impact_next": [ |
| { |
| "title": "Unblock login", |
| "category": "fix", |
| "recommendation": "Review and merge the existing PR.", |
| "impact_score": 5, |
| "effort_score": 1, |
| "confidence": 0.9, |
| "source_ids": ["github_issue#1", "github_pr#2"], |
| "rationale": "It blocks onboarding.", |
| "next_action": "Review PR #2.", |
| } |
| ], |
| "features": [], |
| "fixes": [], |
| } |
|
|
| report = mod.render_markdown_report( |
| ranking, |
| records, |
| generated_at="2026-05-04T10:00:00+00:00", |
| model="openai/gpt-5.5", |
| ) |
|
|
| assert "# ML Intern Backlog Prioritization" in report |
| assert "## Can Be Closed" in report |
| assert "PR already landed on main." in report |
| assert "## Highest Impact Next" in report |
| assert "[github_issue#1](https://github.com/owner/repo/issues/1)" in report |
| assert "Review and merge the existing PR." in report |
|
|
|
|
| def test_cli_defaults_without_live_network_or_llm(): |
| mod = _load() |
| args = mod.parse_args([]) |
| out = mod.resolve_output_dir( |
| None, now=datetime(2026, 5, 4, 12, 30, tzinfo=timezone.utc) |
| ) |
|
|
| assert args.github_repo == "huggingface/ml-intern" |
| assert args.hf_space == "smolagents/ml-intern" |
| assert args.config == "configs/cli_agent_config.json" |
| assert args.resolution_ref == "main" |
| assert args.create_github_issue is False |
| assert args.github_issue_label == [] |
| assert args.github_report_label == mod.DEFAULT_GITHUB_REPORT_LABEL |
| assert args.output_dir is None |
| assert out.name == "20260504T123000Z" |
| assert "scratch/backlog-prioritization" in str(out) |
|
|