Spaces:
Sleeping
Sleeping
Deploy OpenClaw PR API
Browse files- README.md +2 -1
- src/slop_farmer/app/pr_search.py +47 -0
- src/slop_farmer/app/pr_search_api.py +13 -28
- src/slop_farmer/app/pr_search_client.py +47 -42
- src/slop_farmer/reports/pr_search_service.py +175 -0
README.md
CHANGED
|
@@ -31,5 +31,6 @@ CLI examples:
|
|
| 31 |
```bash
|
| 32 |
pr-search repo status
|
| 33 |
pr-search pr similar 67096
|
| 34 |
-
pr-search pr
|
|
|
|
| 35 |
```
|
|
|
|
| 31 |
```bash
|
| 32 |
pr-search repo status
|
| 33 |
pr-search pr similar 67096
|
| 34 |
+
pr-search pr clusters 67096
|
| 35 |
+
pr-search --json pr similar 67096
|
| 36 |
```
|
src/slop_farmer/app/pr_search.py
CHANGED
|
@@ -8,7 +8,9 @@ from slop_farmer.reports import pr_search_service
|
|
| 8 |
run_pr_search_refresh = pr_search_service.run_pr_search_refresh
|
| 9 |
get_pr_search_status = pr_search_service.get_pr_search_status
|
| 10 |
get_pr_search_similar = pr_search_service.get_pr_search_similar
|
|
|
|
| 11 |
get_pr_search_candidate_clusters = pr_search_service.get_pr_search_candidate_clusters
|
|
|
|
| 12 |
get_pr_search_cluster = pr_search_service.get_pr_search_cluster
|
| 13 |
explain_pr_search_pair = pr_search_service.explain_pr_search_pair
|
| 14 |
probe_pr_search_live = pr_search_service.probe_pr_search_live
|
|
@@ -44,6 +46,11 @@ def format_pr_search_similar(result: Mapping[str, Any]) -> str:
|
|
| 44 |
f"Active snapshot: {result['snapshot_id']}",
|
| 45 |
"",
|
| 46 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
if not result["similar_prs"]:
|
| 48 |
lines.append("No similar PRs found in the active run.")
|
| 49 |
return "\n".join(lines)
|
|
@@ -87,6 +94,46 @@ def format_pr_search_candidate_clusters(result: Mapping[str, Any]) -> str:
|
|
| 87 |
return "\n".join(lines)
|
| 88 |
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
def format_pr_search_cluster(result: Mapping[str, Any]) -> str:
|
| 91 |
cluster = result["cluster"]
|
| 92 |
lines = [
|
|
|
|
| 8 |
run_pr_search_refresh = pr_search_service.run_pr_search_refresh
|
| 9 |
get_pr_search_status = pr_search_service.get_pr_search_status
|
| 10 |
get_pr_search_similar = pr_search_service.get_pr_search_similar
|
| 11 |
+
get_pr_search_similar_lookup = pr_search_service.get_pr_search_similar_lookup
|
| 12 |
get_pr_search_candidate_clusters = pr_search_service.get_pr_search_candidate_clusters
|
| 13 |
+
get_pr_search_clusters = pr_search_service.get_pr_search_clusters
|
| 14 |
get_pr_search_cluster = pr_search_service.get_pr_search_cluster
|
| 15 |
explain_pr_search_pair = pr_search_service.explain_pr_search_pair
|
| 16 |
probe_pr_search_live = pr_search_service.probe_pr_search_live
|
|
|
|
| 46 |
f"Active snapshot: {result['snapshot_id']}",
|
| 47 |
"",
|
| 48 |
]
|
| 49 |
+
query = result.get("query") or {}
|
| 50 |
+
if query.get("mode_used") == "live":
|
| 51 |
+
source = query.get("source") or "live"
|
| 52 |
+
lines.insert(3, f"Lookup mode: live via {source}")
|
| 53 |
+
lines.insert(4, "")
|
| 54 |
if not result["similar_prs"]:
|
| 55 |
lines.append("No similar PRs found in the active run.")
|
| 56 |
return "\n".join(lines)
|
|
|
|
| 94 |
return "\n".join(lines)
|
| 95 |
|
| 96 |
|
| 97 |
+
def format_pr_search_clusters(result: Mapping[str, Any]) -> str:
|
| 98 |
+
lines = [
|
| 99 |
+
f"PR #{result['pr']['pr_number']}: cluster context",
|
| 100 |
+
"",
|
| 101 |
+
]
|
| 102 |
+
query = result.get("query") or {}
|
| 103 |
+
if query.get("mode_used") == "live":
|
| 104 |
+
source = query.get("source") or "live"
|
| 105 |
+
lines.extend([f"Lookup mode: live via {source}", ""])
|
| 106 |
+
assigned_clusters = result.get("assigned_clusters") or []
|
| 107 |
+
lines.append("Assigned clusters:")
|
| 108 |
+
if not assigned_clusters:
|
| 109 |
+
lines.append("- none")
|
| 110 |
+
else:
|
| 111 |
+
for cluster in assigned_clusters:
|
| 112 |
+
lines.append(
|
| 113 |
+
f"- {cluster['cluster_id']} representative=PR #{cluster['representative_pr_number']} "
|
| 114 |
+
f"size={cluster['cluster_size']}"
|
| 115 |
+
)
|
| 116 |
+
if cluster.get("summary"):
|
| 117 |
+
lines.append(f" {cluster['summary']}")
|
| 118 |
+
lines.extend(["", "Candidate clusters:"])
|
| 119 |
+
candidate_clusters = result.get("candidate_clusters") or []
|
| 120 |
+
if not candidate_clusters:
|
| 121 |
+
lines.append("- none")
|
| 122 |
+
return "\n".join(lines)
|
| 123 |
+
for index, row in enumerate(candidate_clusters, start=1):
|
| 124 |
+
lines.append(
|
| 125 |
+
f"{index}. {row['cluster_id']} score={row['candidate_score']:.2f} "
|
| 126 |
+
f"assigned={'yes' if row['assigned'] else 'no'}"
|
| 127 |
+
)
|
| 128 |
+
lines.append(f" representative: PR #{row['representative_pr_number']}")
|
| 129 |
+
matched = row.get("matched_member_pr_numbers") or []
|
| 130 |
+
if matched:
|
| 131 |
+
lines.append(f" matched members: {', '.join(f'#{number}' for number in matched)}")
|
| 132 |
+
if row.get("reason"):
|
| 133 |
+
lines.append(f" reason: {row['reason']}")
|
| 134 |
+
return "\n".join(lines)
|
| 135 |
+
|
| 136 |
+
|
| 137 |
def format_pr_search_cluster(result: Mapping[str, Any]) -> str:
|
| 138 |
cluster = result["cluster"]
|
| 139 |
lines = [
|
src/slop_farmer/app/pr_search_api.py
CHANGED
|
@@ -4,7 +4,7 @@ import os
|
|
| 4 |
from contextlib import asynccontextmanager
|
| 5 |
from dataclasses import dataclass
|
| 6 |
from pathlib import Path
|
| 7 |
-
from typing import Any
|
| 8 |
|
| 9 |
from fastapi import FastAPI, HTTPException, Request
|
| 10 |
from fastapi.responses import JSONResponse
|
|
@@ -12,11 +12,10 @@ from fastapi.responses import JSONResponse
|
|
| 12 |
from slop_farmer.config import PrSearchRefreshOptions
|
| 13 |
from slop_farmer.data.ghreplica_api import GhReplicaProbeUnavailableError, GhrProbeClient
|
| 14 |
from slop_farmer.reports.pr_search_service import (
|
| 15 |
-
get_pr_search_candidate_clusters,
|
| 16 |
get_pr_search_cluster,
|
| 17 |
-
|
|
|
|
| 18 |
get_pr_search_status,
|
| 19 |
-
probe_pr_search_live,
|
| 20 |
run_pr_search_refresh,
|
| 21 |
)
|
| 22 |
|
|
@@ -131,29 +130,33 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
|
|
| 131 |
number: int,
|
| 132 |
request: Request,
|
| 133 |
limit: int | None = None,
|
|
|
|
| 134 |
) -> dict[str, Any]:
|
| 135 |
settings = request.app.state.settings
|
| 136 |
repo_slug = _repo_slug(settings, owner, repo)
|
| 137 |
-
return
|
| 138 |
settings.index_path,
|
| 139 |
repo=repo_slug,
|
| 140 |
pr_number=number,
|
| 141 |
limit=_limit(
|
| 142 |
limit, default=settings.similar_limit_default, maximum=settings.similar_limit_max
|
| 143 |
),
|
|
|
|
|
|
|
| 144 |
)
|
| 145 |
|
| 146 |
-
@app.get("/v1/repos/{owner}/{repo}/pulls/{number}/
|
| 147 |
-
async def
|
| 148 |
owner: str,
|
| 149 |
repo: str,
|
| 150 |
number: int,
|
| 151 |
request: Request,
|
| 152 |
limit: int | None = None,
|
|
|
|
| 153 |
) -> dict[str, Any]:
|
| 154 |
settings = request.app.state.settings
|
| 155 |
repo_slug = _repo_slug(settings, owner, repo)
|
| 156 |
-
return
|
| 157 |
settings.index_path,
|
| 158 |
repo=repo_slug,
|
| 159 |
pr_number=number,
|
|
@@ -162,6 +165,8 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
|
|
| 162 |
default=settings.candidate_limit_default,
|
| 163 |
maximum=settings.candidate_limit_max,
|
| 164 |
),
|
|
|
|
|
|
|
| 165 |
)
|
| 166 |
|
| 167 |
@app.get("/v1/repos/{owner}/{repo}/clusters/{cluster_id}")
|
|
@@ -175,26 +180,6 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
|
|
| 175 |
repo_slug = _repo_slug(settings, owner, repo)
|
| 176 |
return get_pr_search_cluster(settings.index_path, repo=repo_slug, cluster_id=cluster_id)
|
| 177 |
|
| 178 |
-
@app.get("/v1/repos/{owner}/{repo}/pulls/{number}/probe")
|
| 179 |
-
async def pr_probe(
|
| 180 |
-
owner: str,
|
| 181 |
-
repo: str,
|
| 182 |
-
number: int,
|
| 183 |
-
request: Request,
|
| 184 |
-
limit: int | None = None,
|
| 185 |
-
) -> dict[str, Any]:
|
| 186 |
-
settings = request.app.state.settings
|
| 187 |
-
repo_slug = _repo_slug(settings, owner, repo)
|
| 188 |
-
return probe_pr_search_live(
|
| 189 |
-
settings.index_path,
|
| 190 |
-
repo=repo_slug,
|
| 191 |
-
pr_number=number,
|
| 192 |
-
limit=_limit(
|
| 193 |
-
limit, default=settings.probe_limit_default, maximum=settings.probe_limit_max
|
| 194 |
-
),
|
| 195 |
-
client=_probe_client(settings),
|
| 196 |
-
)
|
| 197 |
-
|
| 198 |
return app
|
| 199 |
|
| 200 |
|
|
|
|
| 4 |
from contextlib import asynccontextmanager
|
| 5 |
from dataclasses import dataclass
|
| 6 |
from pathlib import Path
|
| 7 |
+
from typing import Any, Literal
|
| 8 |
|
| 9 |
from fastapi import FastAPI, HTTPException, Request
|
| 10 |
from fastapi.responses import JSONResponse
|
|
|
|
| 12 |
from slop_farmer.config import PrSearchRefreshOptions
|
| 13 |
from slop_farmer.data.ghreplica_api import GhReplicaProbeUnavailableError, GhrProbeClient
|
| 14 |
from slop_farmer.reports.pr_search_service import (
|
|
|
|
| 15 |
get_pr_search_cluster,
|
| 16 |
+
get_pr_search_clusters,
|
| 17 |
+
get_pr_search_similar_lookup,
|
| 18 |
get_pr_search_status,
|
|
|
|
| 19 |
run_pr_search_refresh,
|
| 20 |
)
|
| 21 |
|
|
|
|
| 130 |
number: int,
|
| 131 |
request: Request,
|
| 132 |
limit: int | None = None,
|
| 133 |
+
mode: Literal["auto", "indexed", "live"] = "auto",
|
| 134 |
) -> dict[str, Any]:
|
| 135 |
settings = request.app.state.settings
|
| 136 |
repo_slug = _repo_slug(settings, owner, repo)
|
| 137 |
+
return get_pr_search_similar_lookup(
|
| 138 |
settings.index_path,
|
| 139 |
repo=repo_slug,
|
| 140 |
pr_number=number,
|
| 141 |
limit=_limit(
|
| 142 |
limit, default=settings.similar_limit_default, maximum=settings.similar_limit_max
|
| 143 |
),
|
| 144 |
+
mode=mode,
|
| 145 |
+
client=_probe_client(settings),
|
| 146 |
)
|
| 147 |
|
| 148 |
+
@app.get("/v1/repos/{owner}/{repo}/pulls/{number}/clusters")
|
| 149 |
+
async def pr_clusters(
|
| 150 |
owner: str,
|
| 151 |
repo: str,
|
| 152 |
number: int,
|
| 153 |
request: Request,
|
| 154 |
limit: int | None = None,
|
| 155 |
+
mode: Literal["auto", "indexed", "live"] = "auto",
|
| 156 |
) -> dict[str, Any]:
|
| 157 |
settings = request.app.state.settings
|
| 158 |
repo_slug = _repo_slug(settings, owner, repo)
|
| 159 |
+
return get_pr_search_clusters(
|
| 160 |
settings.index_path,
|
| 161 |
repo=repo_slug,
|
| 162 |
pr_number=number,
|
|
|
|
| 165 |
default=settings.candidate_limit_default,
|
| 166 |
maximum=settings.candidate_limit_max,
|
| 167 |
),
|
| 168 |
+
mode=mode,
|
| 169 |
+
client=_probe_client(settings),
|
| 170 |
)
|
| 171 |
|
| 172 |
@app.get("/v1/repos/{owner}/{repo}/clusters/{cluster_id}")
|
|
|
|
| 180 |
repo_slug = _repo_slug(settings, owner, repo)
|
| 181 |
return get_pr_search_cluster(settings.index_path, repo=repo_slug, cluster_id=cluster_id)
|
| 182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
return app
|
| 184 |
|
| 185 |
|
src/slop_farmer/app/pr_search_client.py
CHANGED
|
@@ -10,9 +10,8 @@ from collections.abc import Callable
|
|
| 10 |
from typing import Any
|
| 11 |
|
| 12 |
from slop_farmer.app.pr_search import (
|
| 13 |
-
format_pr_search_candidate_clusters,
|
| 14 |
format_pr_search_cluster,
|
| 15 |
-
|
| 16 |
format_pr_search_similar,
|
| 17 |
format_pr_search_status,
|
| 18 |
)
|
|
@@ -29,8 +28,8 @@ def build_parser() -> argparse.ArgumentParser:
|
|
| 29 |
"Examples:\n"
|
| 30 |
" pr-search repo status\n"
|
| 31 |
" pr-search pr similar 67096\n"
|
| 32 |
-
" pr-search pr
|
| 33 |
-
" pr-search pr
|
| 34 |
" pr-search cluster view pr-scope-123-4\n"
|
| 35 |
" pr-search -R openclaw/openclaw repo status"
|
| 36 |
),
|
|
@@ -80,27 +79,31 @@ def build_parser() -> argparse.ArgumentParser:
|
|
| 80 |
|
| 81 |
similar = pr_subparsers.add_parser(
|
| 82 |
"similar",
|
| 83 |
-
help="Show similar
|
| 84 |
-
description="Find
|
| 85 |
)
|
| 86 |
similar.add_argument("number", type=int)
|
| 87 |
similar.add_argument("--limit", type=int, default=None, help="Maximum rows to return.")
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
"
|
| 91 |
-
|
| 92 |
-
|
| 93 |
)
|
| 94 |
-
candidates.add_argument("number", type=int)
|
| 95 |
-
candidates.add_argument("--limit", type=int, default=None, help="Maximum rows to return.")
|
| 96 |
|
| 97 |
-
|
| 98 |
-
"
|
| 99 |
-
help="
|
| 100 |
-
description="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
)
|
| 102 |
-
probe.add_argument("number", type=int)
|
| 103 |
-
probe.add_argument("--limit", type=int, default=None, help="Maximum rows to return.")
|
| 104 |
|
| 105 |
cluster_parser = subparsers.add_parser(
|
| 106 |
"cluster",
|
|
@@ -133,42 +136,37 @@ class PrSearchApiClient:
|
|
| 133 |
*,
|
| 134 |
number: int,
|
| 135 |
limit: int | None,
|
|
|
|
| 136 |
) -> dict[str, Any]:
|
| 137 |
owner, name = _split_repo(repo)
|
| 138 |
return self._get_json(
|
| 139 |
f"/v1/repos/{owner}/{name}/pulls/{number}/similar",
|
| 140 |
-
params=
|
| 141 |
)
|
| 142 |
|
| 143 |
-
def
|
| 144 |
self,
|
| 145 |
repo: str,
|
| 146 |
*,
|
| 147 |
number: int,
|
| 148 |
limit: int | None,
|
|
|
|
| 149 |
) -> dict[str, Any]:
|
| 150 |
owner, name = _split_repo(repo)
|
| 151 |
return self._get_json(
|
| 152 |
-
f"/v1/repos/{owner}/{name}/pulls/{number}/
|
| 153 |
-
params=
|
| 154 |
)
|
| 155 |
|
| 156 |
def get_cluster(self, repo: str, *, cluster_id: str) -> dict[str, Any]:
|
| 157 |
owner, name = _split_repo(repo)
|
| 158 |
return self._get_json(f"/v1/repos/{owner}/{name}/clusters/{cluster_id}")
|
| 159 |
|
| 160 |
-
def probe(self, repo: str, *, number: int, limit: int | None) -> dict[str, Any]:
|
| 161 |
-
owner, name = _split_repo(repo)
|
| 162 |
-
return self._get_json(
|
| 163 |
-
f"/v1/repos/{owner}/{name}/pulls/{number}/probe",
|
| 164 |
-
params=_limit_params(limit),
|
| 165 |
-
)
|
| 166 |
-
|
| 167 |
def _get_json(
|
| 168 |
self,
|
| 169 |
path: str,
|
| 170 |
*,
|
| 171 |
-
params: dict[str, int] | None = None,
|
| 172 |
) -> dict[str, Any]:
|
| 173 |
query = f"?{urllib.parse.urlencode(params)}" if params else ""
|
| 174 |
request = urllib.request.Request(f"{self.base_url}{path}{query}")
|
|
@@ -199,18 +197,22 @@ def main(argv: list[str] | None = None) -> None:
|
|
| 199 |
|
| 200 |
if args.command == "pr":
|
| 201 |
if args.pr_command == "similar":
|
| 202 |
-
result = client.get_similar(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
_emit(result, args.json, format_pr_search_similar)
|
| 204 |
return
|
| 205 |
-
if args.pr_command == "
|
| 206 |
-
result = client.
|
| 207 |
-
args.repo,
|
|
|
|
|
|
|
|
|
|
| 208 |
)
|
| 209 |
-
_emit(result, args.json,
|
| 210 |
-
return
|
| 211 |
-
if args.pr_command == "probe":
|
| 212 |
-
result = client.probe(args.repo, number=args.number, limit=args.limit)
|
| 213 |
-
_emit(result, args.json, format_pr_search_probe)
|
| 214 |
return
|
| 215 |
|
| 216 |
if args.command == "cluster" and args.cluster_command == "view":
|
|
@@ -239,8 +241,11 @@ def _split_repo(repo: str) -> tuple[str, str]:
|
|
| 239 |
return owner, name
|
| 240 |
|
| 241 |
|
| 242 |
-
def
|
| 243 |
-
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
|
| 246 |
def _error_detail(detail: str, *, fallback: str) -> str:
|
|
|
|
| 10 |
from typing import Any
|
| 11 |
|
| 12 |
from slop_farmer.app.pr_search import (
|
|
|
|
| 13 |
format_pr_search_cluster,
|
| 14 |
+
format_pr_search_clusters,
|
| 15 |
format_pr_search_similar,
|
| 16 |
format_pr_search_status,
|
| 17 |
)
|
|
|
|
| 28 |
"Examples:\n"
|
| 29 |
" pr-search repo status\n"
|
| 30 |
" pr-search pr similar 67096\n"
|
| 31 |
+
" pr-search pr clusters 67096\n"
|
| 32 |
+
" pr-search --json pr similar 67096 --mode live\n"
|
| 33 |
" pr-search cluster view pr-scope-123-4\n"
|
| 34 |
" pr-search -R openclaw/openclaw repo status"
|
| 35 |
),
|
|
|
|
| 79 |
|
| 80 |
similar = pr_subparsers.add_parser(
|
| 81 |
"similar",
|
| 82 |
+
help="Show similar PRs.",
|
| 83 |
+
description="Find similar pull requests for one PR number.",
|
| 84 |
)
|
| 85 |
similar.add_argument("number", type=int)
|
| 86 |
similar.add_argument("--limit", type=int, default=None, help="Maximum rows to return.")
|
| 87 |
+
similar.add_argument(
|
| 88 |
+
"--mode",
|
| 89 |
+
choices=("auto", "indexed", "live"),
|
| 90 |
+
default="auto",
|
| 91 |
+
help="Lookup mode. Defaults to auto.",
|
| 92 |
)
|
|
|
|
|
|
|
| 93 |
|
| 94 |
+
clusters = pr_subparsers.add_parser(
|
| 95 |
+
"clusters",
|
| 96 |
+
help="Show cluster context for a PR.",
|
| 97 |
+
description="Show assigned and candidate clusters for one PR number.",
|
| 98 |
+
)
|
| 99 |
+
clusters.add_argument("number", type=int)
|
| 100 |
+
clusters.add_argument("--limit", type=int, default=None, help="Maximum rows to return.")
|
| 101 |
+
clusters.add_argument(
|
| 102 |
+
"--mode",
|
| 103 |
+
choices=("auto", "indexed", "live"),
|
| 104 |
+
default="auto",
|
| 105 |
+
help="Lookup mode. Defaults to auto.",
|
| 106 |
)
|
|
|
|
|
|
|
| 107 |
|
| 108 |
cluster_parser = subparsers.add_parser(
|
| 109 |
"cluster",
|
|
|
|
| 136 |
*,
|
| 137 |
number: int,
|
| 138 |
limit: int | None,
|
| 139 |
+
mode: str,
|
| 140 |
) -> dict[str, Any]:
|
| 141 |
owner, name = _split_repo(repo)
|
| 142 |
return self._get_json(
|
| 143 |
f"/v1/repos/{owner}/{name}/pulls/{number}/similar",
|
| 144 |
+
params=_lookup_params(limit, mode=mode),
|
| 145 |
)
|
| 146 |
|
| 147 |
+
def get_clusters(
|
| 148 |
self,
|
| 149 |
repo: str,
|
| 150 |
*,
|
| 151 |
number: int,
|
| 152 |
limit: int | None,
|
| 153 |
+
mode: str,
|
| 154 |
) -> dict[str, Any]:
|
| 155 |
owner, name = _split_repo(repo)
|
| 156 |
return self._get_json(
|
| 157 |
+
f"/v1/repos/{owner}/{name}/pulls/{number}/clusters",
|
| 158 |
+
params=_lookup_params(limit, mode=mode),
|
| 159 |
)
|
| 160 |
|
| 161 |
def get_cluster(self, repo: str, *, cluster_id: str) -> dict[str, Any]:
|
| 162 |
owner, name = _split_repo(repo)
|
| 163 |
return self._get_json(f"/v1/repos/{owner}/{name}/clusters/{cluster_id}")
|
| 164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
def _get_json(
|
| 166 |
self,
|
| 167 |
path: str,
|
| 168 |
*,
|
| 169 |
+
params: dict[str, int | str] | None = None,
|
| 170 |
) -> dict[str, Any]:
|
| 171 |
query = f"?{urllib.parse.urlencode(params)}" if params else ""
|
| 172 |
request = urllib.request.Request(f"{self.base_url}{path}{query}")
|
|
|
|
| 197 |
|
| 198 |
if args.command == "pr":
|
| 199 |
if args.pr_command == "similar":
|
| 200 |
+
result = client.get_similar(
|
| 201 |
+
args.repo,
|
| 202 |
+
number=args.number,
|
| 203 |
+
limit=args.limit,
|
| 204 |
+
mode=args.mode,
|
| 205 |
+
)
|
| 206 |
_emit(result, args.json, format_pr_search_similar)
|
| 207 |
return
|
| 208 |
+
if args.pr_command == "clusters":
|
| 209 |
+
result = client.get_clusters(
|
| 210 |
+
args.repo,
|
| 211 |
+
number=args.number,
|
| 212 |
+
limit=args.limit,
|
| 213 |
+
mode=args.mode,
|
| 214 |
)
|
| 215 |
+
_emit(result, args.json, format_pr_search_clusters)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
return
|
| 217 |
|
| 218 |
if args.command == "cluster" and args.cluster_command == "view":
|
|
|
|
| 241 |
return owner, name
|
| 242 |
|
| 243 |
|
| 244 |
+
def _lookup_params(limit: int | None, *, mode: str) -> dict[str, int | str]:
|
| 245 |
+
params: dict[str, int | str] = {"mode": mode}
|
| 246 |
+
if limit is not None:
|
| 247 |
+
params["limit"] = limit
|
| 248 |
+
return params
|
| 249 |
|
| 250 |
|
| 251 |
def _error_detail(detail: str, *, fallback: str) -> str:
|
src/slop_farmer/reports/pr_search_service.py
CHANGED
|
@@ -287,6 +287,108 @@ def get_pr_search_candidate_clusters(
|
|
| 287 |
connection.close()
|
| 288 |
|
| 289 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
def get_pr_search_cluster(
|
| 291 |
db_path: Path,
|
| 292 |
*,
|
|
@@ -584,6 +686,60 @@ def _scoped_rows(rows: list[dict[str, Any]], **extra: Any) -> list[dict[str, Any
|
|
| 584 |
return [{**extra, **row} for row in rows]
|
| 585 |
|
| 586 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 587 |
def _require_document(connection: Any, *, run_id: str, pr_number: int) -> dict[str, Any]:
|
| 588 |
document = get_document(connection, run_id=run_id, pr_number=pr_number)
|
| 589 |
if document is None:
|
|
@@ -623,6 +779,25 @@ def _json_float_dict(raw: Any) -> dict[str, float]:
|
|
| 623 |
return {str(key): float(value) for key, value in payload.items()}
|
| 624 |
|
| 625 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 626 |
def _probe_source_metadata(
|
| 627 |
client: Any,
|
| 628 |
*,
|
|
|
|
| 287 |
connection.close()
|
| 288 |
|
| 289 |
|
| 290 |
+
def get_pr_search_similar_lookup(
|
| 291 |
+
db_path: Path,
|
| 292 |
+
*,
|
| 293 |
+
pr_number: int,
|
| 294 |
+
repo: str | None = None,
|
| 295 |
+
limit: int = 10,
|
| 296 |
+
mode: str = "auto",
|
| 297 |
+
client: ProbeClientLike | None = None,
|
| 298 |
+
) -> dict[str, Any]:
|
| 299 |
+
resolved_mode = _normalize_lookup_mode(mode)
|
| 300 |
+
if resolved_mode != "live":
|
| 301 |
+
try:
|
| 302 |
+
result = get_pr_search_similar(db_path, pr_number=pr_number, repo=repo, limit=limit)
|
| 303 |
+
except ValueError as exc:
|
| 304 |
+
if resolved_mode == "indexed" or not _is_index_miss(exc):
|
| 305 |
+
raise
|
| 306 |
+
else:
|
| 307 |
+
result["query"] = {
|
| 308 |
+
"pr_number": pr_number,
|
| 309 |
+
"mode_requested": resolved_mode,
|
| 310 |
+
"mode_used": "indexed",
|
| 311 |
+
"source": "active_index",
|
| 312 |
+
}
|
| 313 |
+
return result
|
| 314 |
+
|
| 315 |
+
live_result = probe_pr_search_live(
|
| 316 |
+
db_path,
|
| 317 |
+
pr_number=pr_number,
|
| 318 |
+
repo=repo,
|
| 319 |
+
limit=limit,
|
| 320 |
+
client=client,
|
| 321 |
+
)
|
| 322 |
+
return {
|
| 323 |
+
"repo": live_result["repo"],
|
| 324 |
+
"snapshot_id": live_result["snapshot_id"],
|
| 325 |
+
"run_id": live_result["run_id"],
|
| 326 |
+
"query": {
|
| 327 |
+
"pr_number": pr_number,
|
| 328 |
+
"mode_requested": resolved_mode,
|
| 329 |
+
"mode_used": "live",
|
| 330 |
+
"source": live_result["probe_source"]["provider"],
|
| 331 |
+
},
|
| 332 |
+
"pr": live_result["probe_pr"],
|
| 333 |
+
"probe_source": live_result["probe_source"],
|
| 334 |
+
"similar_prs": live_result["similar_prs"],
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
def get_pr_search_clusters(
|
| 339 |
+
db_path: Path,
|
| 340 |
+
*,
|
| 341 |
+
pr_number: int,
|
| 342 |
+
repo: str | None = None,
|
| 343 |
+
limit: int = 5,
|
| 344 |
+
mode: str = "auto",
|
| 345 |
+
client: ProbeClientLike | None = None,
|
| 346 |
+
) -> dict[str, Any]:
|
| 347 |
+
resolved_mode = _normalize_lookup_mode(mode)
|
| 348 |
+
if resolved_mode != "live":
|
| 349 |
+
try:
|
| 350 |
+
result = _get_pr_search_clusters_indexed(
|
| 351 |
+
db_path,
|
| 352 |
+
pr_number=pr_number,
|
| 353 |
+
repo=repo,
|
| 354 |
+
limit=limit,
|
| 355 |
+
)
|
| 356 |
+
except ValueError as exc:
|
| 357 |
+
if resolved_mode == "indexed" or not _is_index_miss(exc):
|
| 358 |
+
raise
|
| 359 |
+
else:
|
| 360 |
+
result["query"] = {
|
| 361 |
+
"pr_number": pr_number,
|
| 362 |
+
"mode_requested": resolved_mode,
|
| 363 |
+
"mode_used": "indexed",
|
| 364 |
+
"source": "active_index",
|
| 365 |
+
}
|
| 366 |
+
return result
|
| 367 |
+
|
| 368 |
+
live_result = probe_pr_search_live(
|
| 369 |
+
db_path,
|
| 370 |
+
pr_number=pr_number,
|
| 371 |
+
repo=repo,
|
| 372 |
+
limit=limit,
|
| 373 |
+
client=client,
|
| 374 |
+
)
|
| 375 |
+
return {
|
| 376 |
+
"repo": live_result["repo"],
|
| 377 |
+
"snapshot_id": live_result["snapshot_id"],
|
| 378 |
+
"run_id": live_result["run_id"],
|
| 379 |
+
"query": {
|
| 380 |
+
"pr_number": pr_number,
|
| 381 |
+
"mode_requested": resolved_mode,
|
| 382 |
+
"mode_used": "live",
|
| 383 |
+
"source": live_result["probe_source"]["provider"],
|
| 384 |
+
},
|
| 385 |
+
"pr": live_result["probe_pr"],
|
| 386 |
+
"probe_source": live_result["probe_source"],
|
| 387 |
+
"assigned_clusters": [],
|
| 388 |
+
"candidate_clusters": live_result["candidate_clusters"],
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
|
| 392 |
def get_pr_search_cluster(
|
| 393 |
db_path: Path,
|
| 394 |
*,
|
|
|
|
| 686 |
return [{**extra, **row} for row in rows]
|
| 687 |
|
| 688 |
|
| 689 |
+
def _get_pr_search_clusters_indexed(
|
| 690 |
+
db_path: Path,
|
| 691 |
+
*,
|
| 692 |
+
pr_number: int,
|
| 693 |
+
repo: str | None = None,
|
| 694 |
+
limit: int = 5,
|
| 695 |
+
) -> dict[str, Any]:
|
| 696 |
+
connection = connect_pr_search_db(db_path, read_only=True)
|
| 697 |
+
try:
|
| 698 |
+
active_run = resolve_active_run(connection, repo=repo)
|
| 699 |
+
run_id = str(active_run["id"])
|
| 700 |
+
document = _require_document(connection, run_id=run_id, pr_number=pr_number)
|
| 701 |
+
candidate_rows = get_candidate_cluster_rows(
|
| 702 |
+
connection,
|
| 703 |
+
run_id=run_id,
|
| 704 |
+
pr_number=pr_number,
|
| 705 |
+
limit=limit,
|
| 706 |
+
)
|
| 707 |
+
assigned_cluster_ids = get_cluster_ids_for_prs(
|
| 708 |
+
connection,
|
| 709 |
+
run_id=run_id,
|
| 710 |
+
pr_numbers=[pr_number],
|
| 711 |
+
).get(pr_number, [])
|
| 712 |
+
assigned_clusters = []
|
| 713 |
+
for cluster_id in assigned_cluster_ids:
|
| 714 |
+
cluster = get_cluster(connection, run_id=run_id, cluster_id=cluster_id)
|
| 715 |
+
if cluster is None:
|
| 716 |
+
continue
|
| 717 |
+
assigned_clusters.append(_cluster_summary(cluster))
|
| 718 |
+
candidates = []
|
| 719 |
+
for row in candidate_rows:
|
| 720 |
+
evidence = _json_dict(row.get("evidence_json"))
|
| 721 |
+
candidates.append(
|
| 722 |
+
{
|
| 723 |
+
**row,
|
| 724 |
+
"shared_filenames": _json_list(row.get("shared_filenames_json")),
|
| 725 |
+
"shared_directories": _json_list(row.get("shared_directories_json")),
|
| 726 |
+
"evidence": evidence,
|
| 727 |
+
"matched_member_pr_numbers": evidence.get("matched_member_pr_numbers") or [],
|
| 728 |
+
"reason": evidence.get("reason") or "",
|
| 729 |
+
}
|
| 730 |
+
)
|
| 731 |
+
return {
|
| 732 |
+
"repo": active_run["repo"],
|
| 733 |
+
"snapshot_id": active_run["snapshot_id"],
|
| 734 |
+
"run_id": run_id,
|
| 735 |
+
"pr": document,
|
| 736 |
+
"assigned_clusters": assigned_clusters,
|
| 737 |
+
"candidate_clusters": candidates,
|
| 738 |
+
}
|
| 739 |
+
finally:
|
| 740 |
+
connection.close()
|
| 741 |
+
|
| 742 |
+
|
| 743 |
def _require_document(connection: Any, *, run_id: str, pr_number: int) -> dict[str, Any]:
|
| 744 |
document = get_document(connection, run_id=run_id, pr_number=pr_number)
|
| 745 |
if document is None:
|
|
|
|
| 779 |
return {str(key): float(value) for key, value in payload.items()}
|
| 780 |
|
| 781 |
|
| 782 |
+
def _cluster_summary(cluster: dict[str, Any]) -> dict[str, Any]:
|
| 783 |
+
return {
|
| 784 |
+
**cluster,
|
| 785 |
+
"shared_filenames": _json_list(cluster.get("shared_filenames_json")),
|
| 786 |
+
"shared_directories": _json_list(cluster.get("shared_directories_json")),
|
| 787 |
+
}
|
| 788 |
+
|
| 789 |
+
|
| 790 |
+
def _normalize_lookup_mode(mode: str) -> str:
|
| 791 |
+
normalized = mode.strip().lower()
|
| 792 |
+
if normalized not in {"auto", "indexed", "live"}:
|
| 793 |
+
raise ValueError(f"Unsupported mode {mode!r}; expected auto, indexed, or live.")
|
| 794 |
+
return normalized
|
| 795 |
+
|
| 796 |
+
|
| 797 |
+
def _is_index_miss(exc: ValueError) -> bool:
|
| 798 |
+
return "active indexed universe" in str(exc)
|
| 799 |
+
|
| 800 |
+
|
| 801 |
def _probe_source_metadata(
|
| 802 |
client: Any,
|
| 803 |
*,
|