evalstate HF Staff commited on
Commit
961b4d3
·
verified ·
1 Parent(s): 0316aff

Deploy OpenClaw PR API

Browse files
README.md CHANGED
@@ -31,5 +31,6 @@ CLI examples:
31
  ```bash
32
  pr-search repo status
33
  pr-search pr similar 67096
34
- pr-search pr probe 67096 --json
 
35
  ```
 
31
  ```bash
32
  pr-search repo status
33
  pr-search pr similar 67096
34
+ pr-search pr clusters 67096
35
+ pr-search --json pr similar 67096
36
  ```
src/slop_farmer/app/pr_search.py CHANGED
@@ -8,7 +8,9 @@ from slop_farmer.reports import pr_search_service
8
  run_pr_search_refresh = pr_search_service.run_pr_search_refresh
9
  get_pr_search_status = pr_search_service.get_pr_search_status
10
  get_pr_search_similar = pr_search_service.get_pr_search_similar
 
11
  get_pr_search_candidate_clusters = pr_search_service.get_pr_search_candidate_clusters
 
12
  get_pr_search_cluster = pr_search_service.get_pr_search_cluster
13
  explain_pr_search_pair = pr_search_service.explain_pr_search_pair
14
  probe_pr_search_live = pr_search_service.probe_pr_search_live
@@ -44,6 +46,11 @@ def format_pr_search_similar(result: Mapping[str, Any]) -> str:
44
  f"Active snapshot: {result['snapshot_id']}",
45
  "",
46
  ]
 
 
 
 
 
47
  if not result["similar_prs"]:
48
  lines.append("No similar PRs found in the active run.")
49
  return "\n".join(lines)
@@ -87,6 +94,46 @@ def format_pr_search_candidate_clusters(result: Mapping[str, Any]) -> str:
87
  return "\n".join(lines)
88
 
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def format_pr_search_cluster(result: Mapping[str, Any]) -> str:
91
  cluster = result["cluster"]
92
  lines = [
 
8
  run_pr_search_refresh = pr_search_service.run_pr_search_refresh
9
  get_pr_search_status = pr_search_service.get_pr_search_status
10
  get_pr_search_similar = pr_search_service.get_pr_search_similar
11
+ get_pr_search_similar_lookup = pr_search_service.get_pr_search_similar_lookup
12
  get_pr_search_candidate_clusters = pr_search_service.get_pr_search_candidate_clusters
13
+ get_pr_search_clusters = pr_search_service.get_pr_search_clusters
14
  get_pr_search_cluster = pr_search_service.get_pr_search_cluster
15
  explain_pr_search_pair = pr_search_service.explain_pr_search_pair
16
  probe_pr_search_live = pr_search_service.probe_pr_search_live
 
46
  f"Active snapshot: {result['snapshot_id']}",
47
  "",
48
  ]
49
+ query = result.get("query") or {}
50
+ if query.get("mode_used") == "live":
51
+ source = query.get("source") or "live"
52
+ lines.insert(3, f"Lookup mode: live via {source}")
53
+ lines.insert(4, "")
54
  if not result["similar_prs"]:
55
  lines.append("No similar PRs found in the active run.")
56
  return "\n".join(lines)
 
94
  return "\n".join(lines)
95
 
96
 
97
+ def format_pr_search_clusters(result: Mapping[str, Any]) -> str:
98
+ lines = [
99
+ f"PR #{result['pr']['pr_number']}: cluster context",
100
+ "",
101
+ ]
102
+ query = result.get("query") or {}
103
+ if query.get("mode_used") == "live":
104
+ source = query.get("source") or "live"
105
+ lines.extend([f"Lookup mode: live via {source}", ""])
106
+ assigned_clusters = result.get("assigned_clusters") or []
107
+ lines.append("Assigned clusters:")
108
+ if not assigned_clusters:
109
+ lines.append("- none")
110
+ else:
111
+ for cluster in assigned_clusters:
112
+ lines.append(
113
+ f"- {cluster['cluster_id']} representative=PR #{cluster['representative_pr_number']} "
114
+ f"size={cluster['cluster_size']}"
115
+ )
116
+ if cluster.get("summary"):
117
+ lines.append(f" {cluster['summary']}")
118
+ lines.extend(["", "Candidate clusters:"])
119
+ candidate_clusters = result.get("candidate_clusters") or []
120
+ if not candidate_clusters:
121
+ lines.append("- none")
122
+ return "\n".join(lines)
123
+ for index, row in enumerate(candidate_clusters, start=1):
124
+ lines.append(
125
+ f"{index}. {row['cluster_id']} score={row['candidate_score']:.2f} "
126
+ f"assigned={'yes' if row['assigned'] else 'no'}"
127
+ )
128
+ lines.append(f" representative: PR #{row['representative_pr_number']}")
129
+ matched = row.get("matched_member_pr_numbers") or []
130
+ if matched:
131
+ lines.append(f" matched members: {', '.join(f'#{number}' for number in matched)}")
132
+ if row.get("reason"):
133
+ lines.append(f" reason: {row['reason']}")
134
+ return "\n".join(lines)
135
+
136
+
137
  def format_pr_search_cluster(result: Mapping[str, Any]) -> str:
138
  cluster = result["cluster"]
139
  lines = [
src/slop_farmer/app/pr_search_api.py CHANGED
@@ -4,7 +4,7 @@ import os
4
  from contextlib import asynccontextmanager
5
  from dataclasses import dataclass
6
  from pathlib import Path
7
- from typing import Any
8
 
9
  from fastapi import FastAPI, HTTPException, Request
10
  from fastapi.responses import JSONResponse
@@ -12,11 +12,10 @@ from fastapi.responses import JSONResponse
12
  from slop_farmer.config import PrSearchRefreshOptions
13
  from slop_farmer.data.ghreplica_api import GhReplicaProbeUnavailableError, GhrProbeClient
14
  from slop_farmer.reports.pr_search_service import (
15
- get_pr_search_candidate_clusters,
16
  get_pr_search_cluster,
17
- get_pr_search_similar,
 
18
  get_pr_search_status,
19
- probe_pr_search_live,
20
  run_pr_search_refresh,
21
  )
22
 
@@ -131,29 +130,33 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
131
  number: int,
132
  request: Request,
133
  limit: int | None = None,
 
134
  ) -> dict[str, Any]:
135
  settings = request.app.state.settings
136
  repo_slug = _repo_slug(settings, owner, repo)
137
- return get_pr_search_similar(
138
  settings.index_path,
139
  repo=repo_slug,
140
  pr_number=number,
141
  limit=_limit(
142
  limit, default=settings.similar_limit_default, maximum=settings.similar_limit_max
143
  ),
 
 
144
  )
145
 
146
- @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/candidate-clusters")
147
- async def pr_candidate_clusters(
148
  owner: str,
149
  repo: str,
150
  number: int,
151
  request: Request,
152
  limit: int | None = None,
 
153
  ) -> dict[str, Any]:
154
  settings = request.app.state.settings
155
  repo_slug = _repo_slug(settings, owner, repo)
156
- return get_pr_search_candidate_clusters(
157
  settings.index_path,
158
  repo=repo_slug,
159
  pr_number=number,
@@ -162,6 +165,8 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
162
  default=settings.candidate_limit_default,
163
  maximum=settings.candidate_limit_max,
164
  ),
 
 
165
  )
166
 
167
  @app.get("/v1/repos/{owner}/{repo}/clusters/{cluster_id}")
@@ -175,26 +180,6 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
175
  repo_slug = _repo_slug(settings, owner, repo)
176
  return get_pr_search_cluster(settings.index_path, repo=repo_slug, cluster_id=cluster_id)
177
 
178
- @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/probe")
179
- async def pr_probe(
180
- owner: str,
181
- repo: str,
182
- number: int,
183
- request: Request,
184
- limit: int | None = None,
185
- ) -> dict[str, Any]:
186
- settings = request.app.state.settings
187
- repo_slug = _repo_slug(settings, owner, repo)
188
- return probe_pr_search_live(
189
- settings.index_path,
190
- repo=repo_slug,
191
- pr_number=number,
192
- limit=_limit(
193
- limit, default=settings.probe_limit_default, maximum=settings.probe_limit_max
194
- ),
195
- client=_probe_client(settings),
196
- )
197
-
198
  return app
199
 
200
 
 
4
  from contextlib import asynccontextmanager
5
  from dataclasses import dataclass
6
  from pathlib import Path
7
+ from typing import Any, Literal
8
 
9
  from fastapi import FastAPI, HTTPException, Request
10
  from fastapi.responses import JSONResponse
 
12
  from slop_farmer.config import PrSearchRefreshOptions
13
  from slop_farmer.data.ghreplica_api import GhReplicaProbeUnavailableError, GhrProbeClient
14
  from slop_farmer.reports.pr_search_service import (
 
15
  get_pr_search_cluster,
16
+ get_pr_search_clusters,
17
+ get_pr_search_similar_lookup,
18
  get_pr_search_status,
 
19
  run_pr_search_refresh,
20
  )
21
 
 
130
  number: int,
131
  request: Request,
132
  limit: int | None = None,
133
+ mode: Literal["auto", "indexed", "live"] = "auto",
134
  ) -> dict[str, Any]:
135
  settings = request.app.state.settings
136
  repo_slug = _repo_slug(settings, owner, repo)
137
+ return get_pr_search_similar_lookup(
138
  settings.index_path,
139
  repo=repo_slug,
140
  pr_number=number,
141
  limit=_limit(
142
  limit, default=settings.similar_limit_default, maximum=settings.similar_limit_max
143
  ),
144
+ mode=mode,
145
+ client=_probe_client(settings),
146
  )
147
 
148
+ @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/clusters")
149
+ async def pr_clusters(
150
  owner: str,
151
  repo: str,
152
  number: int,
153
  request: Request,
154
  limit: int | None = None,
155
+ mode: Literal["auto", "indexed", "live"] = "auto",
156
  ) -> dict[str, Any]:
157
  settings = request.app.state.settings
158
  repo_slug = _repo_slug(settings, owner, repo)
159
+ return get_pr_search_clusters(
160
  settings.index_path,
161
  repo=repo_slug,
162
  pr_number=number,
 
165
  default=settings.candidate_limit_default,
166
  maximum=settings.candidate_limit_max,
167
  ),
168
+ mode=mode,
169
+ client=_probe_client(settings),
170
  )
171
 
172
  @app.get("/v1/repos/{owner}/{repo}/clusters/{cluster_id}")
 
180
  repo_slug = _repo_slug(settings, owner, repo)
181
  return get_pr_search_cluster(settings.index_path, repo=repo_slug, cluster_id=cluster_id)
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  return app
184
 
185
 
src/slop_farmer/app/pr_search_client.py CHANGED
@@ -10,9 +10,8 @@ from collections.abc import Callable
10
  from typing import Any
11
 
12
  from slop_farmer.app.pr_search import (
13
- format_pr_search_candidate_clusters,
14
  format_pr_search_cluster,
15
- format_pr_search_probe,
16
  format_pr_search_similar,
17
  format_pr_search_status,
18
  )
@@ -29,8 +28,8 @@ def build_parser() -> argparse.ArgumentParser:
29
  "Examples:\n"
30
  " pr-search repo status\n"
31
  " pr-search pr similar 67096\n"
32
- " pr-search pr candidates 67096 --limit 5\n"
33
- " pr-search pr probe 67096 --json\n"
34
  " pr-search cluster view pr-scope-123-4\n"
35
  " pr-search -R openclaw/openclaw repo status"
36
  ),
@@ -80,27 +79,31 @@ def build_parser() -> argparse.ArgumentParser:
80
 
81
  similar = pr_subparsers.add_parser(
82
  "similar",
83
- help="Show similar indexed PRs.",
84
- description="Find the nearest indexed pull requests for one PR number.",
85
  )
86
  similar.add_argument("number", type=int)
87
  similar.add_argument("--limit", type=int, default=None, help="Maximum rows to return.")
88
-
89
- candidates = pr_subparsers.add_parser(
90
- "candidates",
91
- help="Show candidate clusters.",
92
- description="Show candidate scope clusters for one indexed pull request.",
93
  )
94
- candidates.add_argument("number", type=int)
95
- candidates.add_argument("--limit", type=int, default=None, help="Maximum rows to return.")
96
 
97
- probe = pr_subparsers.add_parser(
98
- "probe",
99
- help="Probe a live PR against the active index.",
100
- description="Fetch one live pull request and compare it against the active index.",
 
 
 
 
 
 
 
 
101
  )
102
- probe.add_argument("number", type=int)
103
- probe.add_argument("--limit", type=int, default=None, help="Maximum rows to return.")
104
 
105
  cluster_parser = subparsers.add_parser(
106
  "cluster",
@@ -133,42 +136,37 @@ class PrSearchApiClient:
133
  *,
134
  number: int,
135
  limit: int | None,
 
136
  ) -> dict[str, Any]:
137
  owner, name = _split_repo(repo)
138
  return self._get_json(
139
  f"/v1/repos/{owner}/{name}/pulls/{number}/similar",
140
- params=_limit_params(limit),
141
  )
142
 
143
- def get_candidate_clusters(
144
  self,
145
  repo: str,
146
  *,
147
  number: int,
148
  limit: int | None,
 
149
  ) -> dict[str, Any]:
150
  owner, name = _split_repo(repo)
151
  return self._get_json(
152
- f"/v1/repos/{owner}/{name}/pulls/{number}/candidate-clusters",
153
- params=_limit_params(limit),
154
  )
155
 
156
  def get_cluster(self, repo: str, *, cluster_id: str) -> dict[str, Any]:
157
  owner, name = _split_repo(repo)
158
  return self._get_json(f"/v1/repos/{owner}/{name}/clusters/{cluster_id}")
159
 
160
- def probe(self, repo: str, *, number: int, limit: int | None) -> dict[str, Any]:
161
- owner, name = _split_repo(repo)
162
- return self._get_json(
163
- f"/v1/repos/{owner}/{name}/pulls/{number}/probe",
164
- params=_limit_params(limit),
165
- )
166
-
167
  def _get_json(
168
  self,
169
  path: str,
170
  *,
171
- params: dict[str, int] | None = None,
172
  ) -> dict[str, Any]:
173
  query = f"?{urllib.parse.urlencode(params)}" if params else ""
174
  request = urllib.request.Request(f"{self.base_url}{path}{query}")
@@ -199,18 +197,22 @@ def main(argv: list[str] | None = None) -> None:
199
 
200
  if args.command == "pr":
201
  if args.pr_command == "similar":
202
- result = client.get_similar(args.repo, number=args.number, limit=args.limit)
 
 
 
 
 
203
  _emit(result, args.json, format_pr_search_similar)
204
  return
205
- if args.pr_command == "candidates":
206
- result = client.get_candidate_clusters(
207
- args.repo, number=args.number, limit=args.limit
 
 
 
208
  )
209
- _emit(result, args.json, format_pr_search_candidate_clusters)
210
- return
211
- if args.pr_command == "probe":
212
- result = client.probe(args.repo, number=args.number, limit=args.limit)
213
- _emit(result, args.json, format_pr_search_probe)
214
  return
215
 
216
  if args.command == "cluster" and args.cluster_command == "view":
@@ -239,8 +241,11 @@ def _split_repo(repo: str) -> tuple[str, str]:
239
  return owner, name
240
 
241
 
242
- def _limit_params(limit: int | None) -> dict[str, int] | None:
243
- return None if limit is None else {"limit": limit}
 
 
 
244
 
245
 
246
  def _error_detail(detail: str, *, fallback: str) -> str:
 
10
  from typing import Any
11
 
12
  from slop_farmer.app.pr_search import (
 
13
  format_pr_search_cluster,
14
+ format_pr_search_clusters,
15
  format_pr_search_similar,
16
  format_pr_search_status,
17
  )
 
28
  "Examples:\n"
29
  " pr-search repo status\n"
30
  " pr-search pr similar 67096\n"
31
+ " pr-search pr clusters 67096\n"
32
+ " pr-search --json pr similar 67096 --mode live\n"
33
  " pr-search cluster view pr-scope-123-4\n"
34
  " pr-search -R openclaw/openclaw repo status"
35
  ),
 
79
 
80
  similar = pr_subparsers.add_parser(
81
  "similar",
82
+ help="Show similar PRs.",
83
+ description="Find similar pull requests for one PR number.",
84
  )
85
  similar.add_argument("number", type=int)
86
  similar.add_argument("--limit", type=int, default=None, help="Maximum rows to return.")
87
+ similar.add_argument(
88
+ "--mode",
89
+ choices=("auto", "indexed", "live"),
90
+ default="auto",
91
+ help="Lookup mode. Defaults to auto.",
92
  )
 
 
93
 
94
+ clusters = pr_subparsers.add_parser(
95
+ "clusters",
96
+ help="Show cluster context for a PR.",
97
+ description="Show assigned and candidate clusters for one PR number.",
98
+ )
99
+ clusters.add_argument("number", type=int)
100
+ clusters.add_argument("--limit", type=int, default=None, help="Maximum rows to return.")
101
+ clusters.add_argument(
102
+ "--mode",
103
+ choices=("auto", "indexed", "live"),
104
+ default="auto",
105
+ help="Lookup mode. Defaults to auto.",
106
  )
 
 
107
 
108
  cluster_parser = subparsers.add_parser(
109
  "cluster",
 
136
  *,
137
  number: int,
138
  limit: int | None,
139
+ mode: str,
140
  ) -> dict[str, Any]:
141
  owner, name = _split_repo(repo)
142
  return self._get_json(
143
  f"/v1/repos/{owner}/{name}/pulls/{number}/similar",
144
+ params=_lookup_params(limit, mode=mode),
145
  )
146
 
147
+ def get_clusters(
148
  self,
149
  repo: str,
150
  *,
151
  number: int,
152
  limit: int | None,
153
+ mode: str,
154
  ) -> dict[str, Any]:
155
  owner, name = _split_repo(repo)
156
  return self._get_json(
157
+ f"/v1/repos/{owner}/{name}/pulls/{number}/clusters",
158
+ params=_lookup_params(limit, mode=mode),
159
  )
160
 
161
  def get_cluster(self, repo: str, *, cluster_id: str) -> dict[str, Any]:
162
  owner, name = _split_repo(repo)
163
  return self._get_json(f"/v1/repos/{owner}/{name}/clusters/{cluster_id}")
164
 
 
 
 
 
 
 
 
165
  def _get_json(
166
  self,
167
  path: str,
168
  *,
169
+ params: dict[str, int | str] | None = None,
170
  ) -> dict[str, Any]:
171
  query = f"?{urllib.parse.urlencode(params)}" if params else ""
172
  request = urllib.request.Request(f"{self.base_url}{path}{query}")
 
197
 
198
  if args.command == "pr":
199
  if args.pr_command == "similar":
200
+ result = client.get_similar(
201
+ args.repo,
202
+ number=args.number,
203
+ limit=args.limit,
204
+ mode=args.mode,
205
+ )
206
  _emit(result, args.json, format_pr_search_similar)
207
  return
208
+ if args.pr_command == "clusters":
209
+ result = client.get_clusters(
210
+ args.repo,
211
+ number=args.number,
212
+ limit=args.limit,
213
+ mode=args.mode,
214
  )
215
+ _emit(result, args.json, format_pr_search_clusters)
 
 
 
 
216
  return
217
 
218
  if args.command == "cluster" and args.cluster_command == "view":
 
241
  return owner, name
242
 
243
 
244
+ def _lookup_params(limit: int | None, *, mode: str) -> dict[str, int | str]:
245
+ params: dict[str, int | str] = {"mode": mode}
246
+ if limit is not None:
247
+ params["limit"] = limit
248
+ return params
249
 
250
 
251
  def _error_detail(detail: str, *, fallback: str) -> str:
src/slop_farmer/reports/pr_search_service.py CHANGED
@@ -287,6 +287,108 @@ def get_pr_search_candidate_clusters(
287
  connection.close()
288
 
289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  def get_pr_search_cluster(
291
  db_path: Path,
292
  *,
@@ -584,6 +686,60 @@ def _scoped_rows(rows: list[dict[str, Any]], **extra: Any) -> list[dict[str, Any
584
  return [{**extra, **row} for row in rows]
585
 
586
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
  def _require_document(connection: Any, *, run_id: str, pr_number: int) -> dict[str, Any]:
588
  document = get_document(connection, run_id=run_id, pr_number=pr_number)
589
  if document is None:
@@ -623,6 +779,25 @@ def _json_float_dict(raw: Any) -> dict[str, float]:
623
  return {str(key): float(value) for key, value in payload.items()}
624
 
625
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
626
  def _probe_source_metadata(
627
  client: Any,
628
  *,
 
287
  connection.close()
288
 
289
 
290
+ def get_pr_search_similar_lookup(
291
+ db_path: Path,
292
+ *,
293
+ pr_number: int,
294
+ repo: str | None = None,
295
+ limit: int = 10,
296
+ mode: str = "auto",
297
+ client: ProbeClientLike | None = None,
298
+ ) -> dict[str, Any]:
299
+ resolved_mode = _normalize_lookup_mode(mode)
300
+ if resolved_mode != "live":
301
+ try:
302
+ result = get_pr_search_similar(db_path, pr_number=pr_number, repo=repo, limit=limit)
303
+ except ValueError as exc:
304
+ if resolved_mode == "indexed" or not _is_index_miss(exc):
305
+ raise
306
+ else:
307
+ result["query"] = {
308
+ "pr_number": pr_number,
309
+ "mode_requested": resolved_mode,
310
+ "mode_used": "indexed",
311
+ "source": "active_index",
312
+ }
313
+ return result
314
+
315
+ live_result = probe_pr_search_live(
316
+ db_path,
317
+ pr_number=pr_number,
318
+ repo=repo,
319
+ limit=limit,
320
+ client=client,
321
+ )
322
+ return {
323
+ "repo": live_result["repo"],
324
+ "snapshot_id": live_result["snapshot_id"],
325
+ "run_id": live_result["run_id"],
326
+ "query": {
327
+ "pr_number": pr_number,
328
+ "mode_requested": resolved_mode,
329
+ "mode_used": "live",
330
+ "source": live_result["probe_source"]["provider"],
331
+ },
332
+ "pr": live_result["probe_pr"],
333
+ "probe_source": live_result["probe_source"],
334
+ "similar_prs": live_result["similar_prs"],
335
+ }
336
+
337
+
338
+ def get_pr_search_clusters(
339
+ db_path: Path,
340
+ *,
341
+ pr_number: int,
342
+ repo: str | None = None,
343
+ limit: int = 5,
344
+ mode: str = "auto",
345
+ client: ProbeClientLike | None = None,
346
+ ) -> dict[str, Any]:
347
+ resolved_mode = _normalize_lookup_mode(mode)
348
+ if resolved_mode != "live":
349
+ try:
350
+ result = _get_pr_search_clusters_indexed(
351
+ db_path,
352
+ pr_number=pr_number,
353
+ repo=repo,
354
+ limit=limit,
355
+ )
356
+ except ValueError as exc:
357
+ if resolved_mode == "indexed" or not _is_index_miss(exc):
358
+ raise
359
+ else:
360
+ result["query"] = {
361
+ "pr_number": pr_number,
362
+ "mode_requested": resolved_mode,
363
+ "mode_used": "indexed",
364
+ "source": "active_index",
365
+ }
366
+ return result
367
+
368
+ live_result = probe_pr_search_live(
369
+ db_path,
370
+ pr_number=pr_number,
371
+ repo=repo,
372
+ limit=limit,
373
+ client=client,
374
+ )
375
+ return {
376
+ "repo": live_result["repo"],
377
+ "snapshot_id": live_result["snapshot_id"],
378
+ "run_id": live_result["run_id"],
379
+ "query": {
380
+ "pr_number": pr_number,
381
+ "mode_requested": resolved_mode,
382
+ "mode_used": "live",
383
+ "source": live_result["probe_source"]["provider"],
384
+ },
385
+ "pr": live_result["probe_pr"],
386
+ "probe_source": live_result["probe_source"],
387
+ "assigned_clusters": [],
388
+ "candidate_clusters": live_result["candidate_clusters"],
389
+ }
390
+
391
+
392
  def get_pr_search_cluster(
393
  db_path: Path,
394
  *,
 
686
  return [{**extra, **row} for row in rows]
687
 
688
 
689
+ def _get_pr_search_clusters_indexed(
690
+ db_path: Path,
691
+ *,
692
+ pr_number: int,
693
+ repo: str | None = None,
694
+ limit: int = 5,
695
+ ) -> dict[str, Any]:
696
+ connection = connect_pr_search_db(db_path, read_only=True)
697
+ try:
698
+ active_run = resolve_active_run(connection, repo=repo)
699
+ run_id = str(active_run["id"])
700
+ document = _require_document(connection, run_id=run_id, pr_number=pr_number)
701
+ candidate_rows = get_candidate_cluster_rows(
702
+ connection,
703
+ run_id=run_id,
704
+ pr_number=pr_number,
705
+ limit=limit,
706
+ )
707
+ assigned_cluster_ids = get_cluster_ids_for_prs(
708
+ connection,
709
+ run_id=run_id,
710
+ pr_numbers=[pr_number],
711
+ ).get(pr_number, [])
712
+ assigned_clusters = []
713
+ for cluster_id in assigned_cluster_ids:
714
+ cluster = get_cluster(connection, run_id=run_id, cluster_id=cluster_id)
715
+ if cluster is None:
716
+ continue
717
+ assigned_clusters.append(_cluster_summary(cluster))
718
+ candidates = []
719
+ for row in candidate_rows:
720
+ evidence = _json_dict(row.get("evidence_json"))
721
+ candidates.append(
722
+ {
723
+ **row,
724
+ "shared_filenames": _json_list(row.get("shared_filenames_json")),
725
+ "shared_directories": _json_list(row.get("shared_directories_json")),
726
+ "evidence": evidence,
727
+ "matched_member_pr_numbers": evidence.get("matched_member_pr_numbers") or [],
728
+ "reason": evidence.get("reason") or "",
729
+ }
730
+ )
731
+ return {
732
+ "repo": active_run["repo"],
733
+ "snapshot_id": active_run["snapshot_id"],
734
+ "run_id": run_id,
735
+ "pr": document,
736
+ "assigned_clusters": assigned_clusters,
737
+ "candidate_clusters": candidates,
738
+ }
739
+ finally:
740
+ connection.close()
741
+
742
+
743
  def _require_document(connection: Any, *, run_id: str, pr_number: int) -> dict[str, Any]:
744
  document = get_document(connection, run_id=run_id, pr_number=pr_number)
745
  if document is None:
 
779
  return {str(key): float(value) for key, value in payload.items()}
780
 
781
 
782
+ def _cluster_summary(cluster: dict[str, Any]) -> dict[str, Any]:
783
+ return {
784
+ **cluster,
785
+ "shared_filenames": _json_list(cluster.get("shared_filenames_json")),
786
+ "shared_directories": _json_list(cluster.get("shared_directories_json")),
787
+ }
788
+
789
+
790
+ def _normalize_lookup_mode(mode: str) -> str:
791
+ normalized = mode.strip().lower()
792
+ if normalized not in {"auto", "indexed", "live"}:
793
+ raise ValueError(f"Unsupported mode {mode!r}; expected auto, indexed, or live.")
794
+ return normalized
795
+
796
+
797
+ def _is_index_miss(exc: ValueError) -> bool:
798
+ return "active indexed universe" in str(exc)
799
+
800
+
801
  def _probe_source_metadata(
802
  client: Any,
803
  *,