Spaces:

evalstate
/

openclaw-pr-api

Sleeping

App Files Files Community

evalstate HF Staff commited on Apr 17

Commit

41a8c52

verified ·

1 Parent(s): 1fb282b

Deploy OpenClaw PR API

Browse files

Files changed (4) hide show

pyproject.toml +1 -2
src/slop_farmer/app/pr_search_api.py +107 -0
src/slop_farmer/reports/analysis_service.py +318 -0
src/slop_farmer/reports/new_contributor_report.py +140 -14

pyproject.toml CHANGED Viewed

@@ -16,7 +16,7 @@ dependencies = [
   "pydantic>=2.11",
   "PyYAML>=6.0.2",
   "rank-bm25>=0.2.2",
-  "fast-agent-mcp>=0.6.16",
   "uvicorn>=0.34.0",
 ]
@@ -33,7 +33,6 @@ llm = [
 [project.scripts]
 slop-farmer = "slop_farmer.app.cli:main"
-pr-search = "slop_farmer.app.pr_search_client:main"
 [tool.setuptools]
 package-dir = {"" = "src"}

   "pydantic>=2.11",
   "PyYAML>=6.0.2",
   "rank-bm25>=0.2.2",
+  "fast-agent-mcp>=0.6.17",
   "uvicorn>=0.34.0",
 ]
 [project.scripts]
 slop-farmer = "slop_farmer.app.cli:main"
 [tool.setuptools]
 package-dir = {"" = "src"}

src/slop_farmer/app/pr_search_api.py CHANGED Viewed

@@ -11,6 +11,14 @@ from fastapi.responses import JSONResponse
 from slop_farmer.config import PrSearchRefreshOptions
 from slop_farmer.data.ghreplica_api import GhReplicaProbeUnavailableError, GhrProbeClient
 from slop_farmer.reports.pr_search_service import (
     get_pr_search_cluster,
     get_pr_search_clusters,
@@ -204,6 +212,104 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
             ),
         )
     return app
@@ -289,6 +395,7 @@ def _looks_not_found(exc: ValueError) -> bool:
     message = str(exc).lower()
     return (
         "not found" in message
         or "no active pr search run" in message
         or "was not found in the active indexed universe" in message
     )

 from slop_farmer.config import PrSearchRefreshOptions
 from slop_farmer.data.ghreplica_api import GhReplicaProbeUnavailableError, GhrProbeClient
+from slop_farmer.reports.analysis_service import (
+    get_analysis_best,
+    get_analysis_meta_bug,
+    get_analysis_status,
+    get_pr_analysis,
+    list_analysis_duplicate_prs,
+    list_analysis_meta_bugs,
+)
 from slop_farmer.reports.pr_search_service import (
     get_pr_search_cluster,
     get_pr_search_clusters,
             ),
         )
+    @app.get("/v1/repos/{owner}/{repo}/analysis/status")
+    async def analysis_status(
+        owner: str,
+        repo: str,
+        request: Request,
+        variant: Literal["auto", "hybrid", "deterministic"] = "auto",
+    ) -> dict[str, Any]:
+        settings = request.app.state.settings
+        repo_slug = _repo_slug(settings, owner, repo)
+        return get_analysis_status(settings.index_path, repo=repo_slug, variant=variant)
+    @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/analysis")
+    async def pr_analysis(
+        owner: str,
+        repo: str,
+        number: int,
+        request: Request,
+        variant: Literal["auto", "hybrid", "deterministic"] = "auto",
+    ) -> dict[str, Any]:
+        settings = request.app.state.settings
+        repo_slug = _repo_slug(settings, owner, repo)
+        return get_pr_analysis(
+            settings.index_path,
+            repo=repo_slug,
+            pr_number=number,
+            variant=variant,
+        )
+    @app.get("/v1/repos/{owner}/{repo}/analysis/meta-bugs")
+    async def analysis_meta_bugs(
+        owner: str,
+        repo: str,
+        request: Request,
+        limit: int | None = None,
+        variant: Literal["auto", "hybrid", "deterministic"] = "auto",
+    ) -> dict[str, Any]:
+        settings = request.app.state.settings
+        repo_slug = _repo_slug(settings, owner, repo)
+        return list_analysis_meta_bugs(
+            settings.index_path,
+            repo=repo_slug,
+            variant=variant,
+            limit=_limit(
+                limit,
+                default=settings.cluster_list_limit_default,
+                maximum=settings.cluster_list_limit_max,
+            ),
+        )
+    @app.get("/v1/repos/{owner}/{repo}/analysis/meta-bugs/{cluster_id}")
+    async def analysis_meta_bug(
+        owner: str,
+        repo: str,
+        cluster_id: str,
+        request: Request,
+        variant: Literal["auto", "hybrid", "deterministic"] = "auto",
+    ) -> dict[str, Any]:
+        settings = request.app.state.settings
+        repo_slug = _repo_slug(settings, owner, repo)
+        return get_analysis_meta_bug(
+            settings.index_path,
+            repo=repo_slug,
+            cluster_id=cluster_id,
+            variant=variant,
+        )
+    @app.get("/v1/repos/{owner}/{repo}/analysis/duplicate-prs")
+    async def analysis_duplicate_prs(
+        owner: str,
+        repo: str,
+        request: Request,
+        limit: int | None = None,
+        variant: Literal["auto", "hybrid", "deterministic"] = "auto",
+    ) -> dict[str, Any]:
+        settings = request.app.state.settings
+        repo_slug = _repo_slug(settings, owner, repo)
+        return list_analysis_duplicate_prs(
+            settings.index_path,
+            repo=repo_slug,
+            variant=variant,
+            limit=_limit(
+                limit,
+                default=settings.cluster_list_limit_default,
+                maximum=settings.cluster_list_limit_max,
+            ),
+        )
+    @app.get("/v1/repos/{owner}/{repo}/analysis/best")
+    async def analysis_best(
+        owner: str,
+        repo: str,
+        request: Request,
+        variant: Literal["auto", "hybrid", "deterministic"] = "auto",
+    ) -> dict[str, Any]:
+        settings = request.app.state.settings
+        repo_slug = _repo_slug(settings, owner, repo)
+        return get_analysis_best(settings.index_path, repo=repo_slug, variant=variant)
     return app
     message = str(exc).lower()
     return (
         "not found" in message
+        or "no analysis report was found" in message
         or "no active pr search run" in message
         or "was not found in the active indexed universe" in message
     )

src/slop_farmer/reports/analysis_service.py ADDED Viewed

	@@ -0,0 +1,318 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+from slop_farmer.data.parquet_io import read_json
+from slop_farmer.data.search_duckdb import connect_pr_search_db, resolve_active_run
+ANALYSIS_VARIANTS = {"auto", "deterministic", "hybrid"}
+ANALYSIS_REPORT_FILENAMES = {
+    "deterministic": "analysis-report.json",
+    "hybrid": "analysis-report-hybrid.json",
+}
+@dataclass(frozen=True, slots=True)
+class ActiveSnapshotContext:
+    active_run: dict[str, Any]
+    snapshot_dir: Path
+@dataclass(frozen=True, slots=True)
+class AnalysisContext:
+    active_run: dict[str, Any]
+    report: dict[str, Any]
+    variant_requested: str
+    variant_used: str
+def get_analysis_status(
+    db_path: Path,
+    *,
+    repo: str | None = None,
+    variant: str = "auto",
+) -> dict[str, Any]:
+    active = _resolve_active_snapshot_context(db_path, repo=repo)
+    report_path, variant_used = _resolve_analysis_report_path(
+        active.snapshot_dir,
+        variant,
+        required=False,
+    )
+    payload = {
+        "repo": str(active.active_run["repo"]),
+        "snapshot_id": str(active.active_run["snapshot_id"]),
+        "run_id": str(active.active_run["id"]),
+        "variant_requested": _normalize_analysis_variant(variant),
+        "available": report_path is not None,
+    }
+    if report_path is None or variant_used is None:
+        return payload
+    report = _load_report(report_path)
+    return {
+        **payload,
+        "variant_used": variant_used,
+        "llm_enrichment": bool(report.get("llm_enrichment")),
+        "generated_at": report.get("generated_at"),
+        "counts": _analysis_counts(report),
+    }
+def get_pr_analysis(
+    db_path: Path,
+    *,
+    pr_number: int,
+    repo: str | None = None,
+    variant: str = "auto",
+) -> dict[str, Any]:
+    context = _load_analysis_context(db_path, repo=repo, variant=variant)
+    meta_bug, rank = _find_meta_bug_for_pr(context.report, pr_number)
+    duplicate_pr = _find_duplicate_pr_for_pr(context.report, pr_number)
+    return {
+        **_analysis_base_payload(context),
+        "pr_number": pr_number,
+        "found": meta_bug is not None or duplicate_pr is not None,
+        "meta_bug": None if meta_bug is None else _meta_bug_payload(meta_bug, rank=rank),
+        "duplicate_pr": duplicate_pr,
+    }
+def list_analysis_meta_bugs(
+    db_path: Path,
+    *,
+    repo: str | None = None,
+    variant: str = "auto",
+    limit: int = 50,
+) -> dict[str, Any]:
+    context = _load_analysis_context(db_path, repo=repo, variant=variant)
+    meta_bugs = [
+        _meta_bug_payload(cluster, rank=index)
+        for index, cluster in enumerate(context.report.get("meta_bugs", [])[:limit], start=1)
+    ]
+    return {
+        **_analysis_base_payload(context),
+        "meta_bugs": meta_bugs,
+        "meta_bug_count": len(meta_bugs),
+    }
+def get_analysis_meta_bug(
+    db_path: Path,
+    *,
+    cluster_id: str,
+    repo: str | None = None,
+    variant: str = "auto",
+) -> dict[str, Any]:
+    context = _load_analysis_context(db_path, repo=repo, variant=variant)
+    for index, cluster in enumerate(context.report.get("meta_bugs", []), start=1):
+        if str(cluster.get("cluster_id")) != cluster_id:
+            continue
+        return {
+            **_analysis_base_payload(context),
+            "meta_bug": _meta_bug_payload(cluster, rank=index),
+            "duplicate_pr": _find_duplicate_pr_by_cluster_id(context.report, cluster_id),
+        }
+    raise ValueError(f"Analysis cluster {cluster_id!r} was not found in the active snapshot.")
+def list_analysis_duplicate_prs(
+    db_path: Path,
+    *,
+    repo: str | None = None,
+    variant: str = "auto",
+    limit: int = 50,
+) -> dict[str, Any]:
+    context = _load_analysis_context(db_path, repo=repo, variant=variant)
+    duplicate_prs = [
+        {"rank": index, **dict(entry)}
+        for index, entry in enumerate(context.report.get("duplicate_prs", [])[:limit], start=1)
+    ]
+    return {
+        **_analysis_base_payload(context),
+        "duplicate_prs": duplicate_prs,
+        "duplicate_pr_count": len(duplicate_prs),
+    }
+def get_analysis_best(
+    db_path: Path,
+    *,
+    repo: str | None = None,
+    variant: str = "auto",
+) -> dict[str, Any]:
+    context = _load_analysis_context(db_path, repo=repo, variant=variant)
+    return {
+        **_analysis_base_payload(context),
+        "best_issue": _best_entry_with_cluster_id(
+            context.report,
+            context.report.get("best_issue"),
+            number_key="issue_number",
+            numbers_key="issue_numbers",
+        ),
+        "best_pr": _best_entry_with_cluster_id(
+            context.report,
+            context.report.get("best_pr"),
+            number_key="pr_number",
+            numbers_key="pr_numbers",
+        ),
+    }
+def _resolve_active_snapshot_context(
+    db_path: Path,
+    *,
+    repo: str | None,
+) -> ActiveSnapshotContext:
+    connection = connect_pr_search_db(db_path, read_only=True)
+    try:
+        active_run = resolve_active_run(connection, repo=repo)
+    finally:
+        connection.close()
+    return ActiveSnapshotContext(
+        active_run={str(key): value for key, value in active_run.items()},
+        snapshot_dir=Path(str(active_run["snapshot_dir"])).resolve(),
+    )
+def _load_analysis_context(
+    db_path: Path,
+    *,
+    repo: str | None,
+    variant: str,
+) -> AnalysisContext:
+    active = _resolve_active_snapshot_context(db_path, repo=repo)
+    report_path, variant_used = _resolve_analysis_report_path(
+        active.snapshot_dir,
+        variant,
+        required=True,
+    )
+    assert report_path is not None
+    assert variant_used is not None
+    return AnalysisContext(
+        active_run=active.active_run,
+        report=_load_report(report_path),
+        variant_requested=_normalize_analysis_variant(variant),
+        variant_used=variant_used,
+    )
+def _resolve_analysis_report_path(
+    snapshot_dir: Path,
+    variant: str,
+    *,
+    required: bool,
+) -> tuple[Path | None, str | None]:
+    normalized = _normalize_analysis_variant(variant)
+    if normalized == "auto":
+        hybrid_path = snapshot_dir / ANALYSIS_REPORT_FILENAMES["hybrid"]
+        if hybrid_path.exists():
+            return hybrid_path, "hybrid"
+        deterministic_path = snapshot_dir / ANALYSIS_REPORT_FILENAMES["deterministic"]
+        if deterministic_path.exists():
+            return deterministic_path, "deterministic"
+        if not required:
+            return None, None
+        raise ValueError("No analysis report was found for the active snapshot.")
+    report_path = snapshot_dir / ANALYSIS_REPORT_FILENAMES[normalized]
+    if report_path.exists():
+        return report_path, normalized
+    if not required:
+        return None, None
+    raise ValueError(
+        f"{normalized.capitalize()} analysis report was not found for the active snapshot."
+    )
+def _normalize_analysis_variant(variant: str) -> str:
+    normalized = variant.strip().lower()
+    if normalized not in ANALYSIS_VARIANTS:
+        raise ValueError(
+            f"Unsupported analysis variant {variant!r}; expected auto, hybrid, or deterministic."
+        )
+    return normalized
+def _analysis_base_payload(context: AnalysisContext) -> dict[str, Any]:
+    return {
+        "repo": str(context.active_run["repo"]),
+        "snapshot_id": str(context.active_run["snapshot_id"]),
+        "run_id": str(context.active_run["id"]),
+        "variant_requested": context.variant_requested,
+        "variant_used": context.variant_used,
+        "llm_enrichment": bool(context.report.get("llm_enrichment")),
+        "generated_at": context.report.get("generated_at"),
+    }
+def _analysis_counts(report: dict[str, Any]) -> dict[str, int]:
+    return {
+        "meta_bugs": len(report.get("meta_bugs") or []),
+        "duplicate_issues": len(report.get("duplicate_issues") or []),
+        "duplicate_prs": len(report.get("duplicate_prs") or []),
+    }
+def _meta_bug_payload(cluster: dict[str, Any], *, rank: int | None = None) -> dict[str, Any]:
+    payload = dict(cluster)
+    if rank is not None:
+        payload["rank"] = rank
+    return payload
+def _find_meta_bug_for_pr(
+    report: dict[str, Any],
+    pr_number: int,
+) -> tuple[dict[str, Any] | None, int | None]:
+    for index, cluster in enumerate(report.get("meta_bugs", []), start=1):
+        pr_numbers = {int(number) for number in cluster.get("pr_numbers", [])}
+        if pr_number in pr_numbers:
+            return dict(cluster), index
+    return None, None
+def _find_duplicate_pr_for_pr(report: dict[str, Any], pr_number: int) -> dict[str, Any] | None:
+    for entry in report.get("duplicate_prs", []):
+        numbers = {
+            int(entry["canonical_pr_number"]),
+            *(int(number) for number in entry.get("duplicate_pr_numbers", [])),
+        }
+        if pr_number in numbers:
+            return dict(entry)
+    return None
+def _find_duplicate_pr_by_cluster_id(
+    report: dict[str, Any],
+    cluster_id: str,
+) -> dict[str, Any] | None:
+    for entry in report.get("duplicate_prs", []):
+        if str(entry.get("cluster_id")) == cluster_id:
+            return dict(entry)
+    return None
+def _best_entry_with_cluster_id(
+    report: dict[str, Any],
+    entry: Any,
+    *,
+    number_key: str,
+    numbers_key: str,
+) -> dict[str, Any] | None:
+    if not isinstance(entry, dict):
+        return None
+    number = entry.get(number_key)
+    if number is None:
+        return dict(entry)
+    for cluster in report.get("meta_bugs", []):
+        numbers = {int(value) for value in cluster.get(numbers_key, [])}
+        if int(number) in numbers:
+            return {"cluster_id": cluster.get("cluster_id"), **dict(entry)}
+    return dict(entry)
+def _load_report(path: Path) -> dict[str, Any]:
+    payload = read_json(path)
+    if not isinstance(payload, dict):
+        raise ValueError(f"Analysis report at {path} must contain a JSON object.")
+    return {str(key): value for key, value in payload.items()}

src/slop_farmer/reports/new_contributor_report.py CHANGED Viewed

@@ -102,6 +102,7 @@ query SearchIssues($query: String!, $cursor: String) {
   }
 }
 """.strip()
 def run_new_contributor_report(options: NewContributorReportOptions) -> Path:
@@ -167,8 +168,16 @@ def _report_contributors(
     previous_snapshot_dir = _previous_snapshot_dir(snapshot)
     previous_primary_authors = _snapshot_primary_authors(previous_snapshot_dir)
     previous_merged_pr_authors = _snapshot_merged_pr_authors(previous_snapshot_dir)
-    previous_report_contributors = _previous_report_contributors(previous_snapshot_dir)
-    cutoff = _snapshot_reference_time(snapshot) - timedelta(days=options.window_days)
     selected = []
     for row in author_rows:
         if row["pr_count"] == 0:
@@ -224,12 +233,30 @@ def _report_contributors(
     for index, row in enumerate(selected, start=1):
         first_seen_in_snapshot = row["author_login"] not in previous_primary_authors
         known_via_prior_merged_pr = row["author_login"] in previous_merged_pr_authors
         if index == 1 or index == total_selected or index % 10 == 0:
             _report_log(
                 f"Enriching contributors: {index}/{total_selected} "
                 f"(current={row['author_login']}, first_seen={str(first_seen_in_snapshot).lower()}, "
                 f"known_via_prior_merged_pr={str(known_via_prior_merged_pr).lower()})"
             )
         try:
             summary = summarize_user(row["author_login"], options.window_days, None)
             fetch_error = None
@@ -442,15 +469,21 @@ def _snapshot_merged_pr_authors(snapshot_dir: Path | None) -> set[str]:
     return authors
-def _previous_report_contributors(snapshot_dir: Path | None) -> set[str]:
     if snapshot_dir is None:
-        return set()
     path = snapshot_dir / "new-contributors-report.json"
     if not path.exists():
-        return set()
     try:
         payload = read_json(path)
     except Exception:
         return set()
     contributors = payload.get("contributors")
     if not isinstance(contributors, list):
@@ -462,15 +495,10 @@ def _previous_report_contributors(snapshot_dir: Path | None) -> set[str]:
     }
-def _previous_report_contributor_entries(snapshot_dir: Path | None) -> dict[str, dict[str, Any]]:
-    if snapshot_dir is None:
-        return {}
-    path = snapshot_dir / "new-contributors-report.json"
-    if not path.exists():
-        return {}
-    try:
-        payload = read_json(path)
-    except Exception:
         return {}
     contributors = payload.get("contributors")
     if not isinstance(contributors, list):
@@ -482,6 +510,104 @@ def _previous_report_contributor_entries(snapshot_dir: Path | None) -> dict[str,
     }
 def _contributor_entry(
     repo: str,
     row: dict[str, Any],

   }
 }
 """.strip()
+PREVIOUS_REPORT_REUSE_MAX_AGE = timedelta(days=2)
 def run_new_contributor_report(options: NewContributorReportOptions) -> Path:
     previous_snapshot_dir = _previous_snapshot_dir(snapshot)
     previous_primary_authors = _snapshot_primary_authors(previous_snapshot_dir)
     previous_merged_pr_authors = _snapshot_merged_pr_authors(previous_snapshot_dir)
+    previous_report_payload = _previous_report_payload(previous_snapshot_dir)
+    previous_report_contributors = _previous_report_contributors(previous_report_payload)
+    previous_report_entries = _previous_report_contributor_entries(previous_report_payload)
+    snapshot_reference_time = _snapshot_reference_time(snapshot)
+    previous_report_reusable = _previous_report_reuse_allowed(
+        previous_report_payload,
+        window_days=options.window_days,
+        reference_time=snapshot_reference_time,
+    )
+    cutoff = snapshot_reference_time - timedelta(days=options.window_days)
     selected = []
     for row in author_rows:
         if row["pr_count"] == 0:
     for index, row in enumerate(selected, start=1):
         first_seen_in_snapshot = row["author_login"] not in previous_primary_authors
         known_via_prior_merged_pr = row["author_login"] in previous_merged_pr_authors
+        previous_entry = previous_report_entries.get(row["author_login"])
         if index == 1 or index == total_selected or index % 10 == 0:
             _report_log(
                 f"Enriching contributors: {index}/{total_selected} "
                 f"(current={row['author_login']}, first_seen={str(first_seen_in_snapshot).lower()}, "
                 f"known_via_prior_merged_pr={str(known_via_prior_merged_pr).lower()})"
             )
+        if (
+            previous_report_reusable
+            and previous_entry is not None
+            and not previous_entry.get("fetch_error")
+            and not known_via_prior_merged_pr
+        ):
+            contributors.append(
+                _reused_previous_report_entry(
+                    snapshot["repo"],
+                    row,
+                    previous_entry,
+                    first_seen_in_snapshot=first_seen_in_snapshot,
+                    known_via_prior_merged_pr=known_via_prior_merged_pr,
+                )
+            )
+            reused_previous_report += 1
+            continue
         try:
             summary = summarize_user(row["author_login"], options.window_days, None)
             fetch_error = None
     return authors
+def _previous_report_payload(snapshot_dir: Path | None) -> dict[str, Any] | None:
     if snapshot_dir is None:
+        return None
     path = snapshot_dir / "new-contributors-report.json"
     if not path.exists():
+        return None
     try:
         payload = read_json(path)
     except Exception:
+        return None
+    return payload if isinstance(payload, dict) else None
+def _previous_report_contributors(payload: dict[str, Any] | None) -> set[str]:
+    if payload is None:
         return set()
     contributors = payload.get("contributors")
     if not isinstance(contributors, list):
     }
+def _previous_report_contributor_entries(
+    payload: dict[str, Any] | None,
+) -> dict[str, dict[str, Any]]:
+    if payload is None:
         return {}
     contributors = payload.get("contributors")
     if not isinstance(contributors, list):
     }
+def _previous_report_reuse_allowed(
+    payload: dict[str, Any] | None,
+    *,
+    window_days: int,
+    reference_time: datetime,
+) -> bool:
+    if payload is None:
+        return False
+    if _coerce_int(payload.get("window_days")) != window_days:
+        return False
+    generated_at = _coerce_datetime(payload.get("generated_at"))
+    if generated_at is None:
+        return False
+    return abs(reference_time - generated_at) <= PREVIOUS_REPORT_REUSE_MAX_AGE
+def _reused_previous_report_entry(
+    repo: str,
+    row: dict[str, Any],
+    previous_entry: dict[str, Any],
+    *,
+    first_seen_in_snapshot: bool,
+    known_via_prior_merged_pr: bool,
+) -> dict[str, Any]:
+    login = row["author_login"]
+    age_days = _coerce_int(previous_entry.get("account_age_days"))
+    return {
+        "author_login": login,
+        "name": previous_entry.get("name"),
+        "profile_url": _profile_url(login),
+        "repo_pull_requests_url": _repo_search_url(repo, login, is_pr=True),
+        "repo_issues_url": _repo_search_url(repo, login, is_pr=False),
+        "repo_first_seen_at": row["first_seen_at"],
+        "repo_last_seen_at": row["last_seen_at"],
+        "repo_primary_artifact_count": row["primary_artifact_count"],
+        "repo_artifact_count": row["artifact_count"],
+        "snapshot_issue_count": row["issue_count"],
+        "snapshot_pr_count": row["pr_count"],
+        "snapshot_comment_count": row["comment_count"],
+        "snapshot_review_count": row["review_count"],
+        "snapshot_review_comment_count": row["review_comment_count"],
+        "repo_association": row.get("repo_association"),
+        "new_to_repo": first_seen_in_snapshot,
+        "first_seen_in_snapshot": first_seen_in_snapshot,
+        "known_via_prior_merged_pr": known_via_prior_merged_pr,
+        "report_reason": "first_seen_in_snapshot" if first_seen_in_snapshot else None,
+        "enrichment_source": "previous_report",
+        "live_refetch_skipped": True,
+        "account_age_days": age_days,
+        "young_account": age_days is not None and age_days <= 365,
+        "follow_through_score": previous_entry.get("follow_through_score"),
+        "breadth_score": previous_entry.get("breadth_score"),
+        "automation_risk_signal": previous_entry.get("automation_risk_signal"),
+        "heuristic_note": previous_entry.get("heuristic_note"),
+        "public_orgs": _previous_report_public_orgs(previous_entry),
+        "activity": _previous_report_activity(previous_entry),
+        "examples": {
+            "pull_requests": [
+                _artifact_example(item, "pull_request") for item in row["pull_requests"]
+            ],
+            "issues": [_artifact_example(item, "issue") for item in row["issues"]],
+        },
+        "fetch_error": None,
+    }
+def _previous_report_public_orgs(previous_entry: dict[str, Any]) -> list[str]:
+    values = previous_entry.get("public_orgs")
+    if not isinstance(values, list):
+        return []
+    public_orgs: list[str] = []
+    for value in values:
+        if isinstance(value, str) and value.strip():
+            public_orgs.append(value.strip())
+        elif isinstance(value, dict):
+            login = str(value.get("login") or "").strip()
+            if login:
+                public_orgs.append(login)
+    return public_orgs
+def _previous_report_activity(previous_entry: dict[str, Any]) -> dict[str, Any]:
+    activity = previous_entry.get("activity")
+    if not isinstance(activity, dict):
+        activity = previous_entry
+    return {
+        "visible_authored_pr_count": activity.get("visible_authored_pr_count"),
+        "merged_pr_count": activity.get("merged_pr_count"),
+        "closed_unmerged_pr_count": activity.get("closed_unmerged_pr_count"),
+        "open_pr_count": activity.get("open_pr_count"),
+        "merged_pr_rate": activity.get("merged_pr_rate"),
+        "closed_unmerged_pr_rate": activity.get("closed_unmerged_pr_rate"),
+        "still_open_pr_rate": activity.get("still_open_pr_rate"),
+        "distinct_repos_with_authored_prs": activity.get("distinct_repos_with_authored_prs"),
+        "distinct_repos_with_open_prs": activity.get("distinct_repos_with_open_prs"),
+    }
 def _contributor_entry(
     repo: str,
     row: dict[str, Any],