evalstate's picture
evalstate HF Staff
Deploy PR search API with issues/contributors routes
114bead verified
from __future__ import annotations
import json
from datetime import UTC, datetime
from slop_farmer.app.pipeline import run_pipeline
from slop_farmer.config import (
AnalysisOptions,
DashboardDataOptions,
FullPipelineOptions,
MarkdownReportOptions,
PipelineOptions,
)
from slop_farmer.reports.analysis import render_markdown_report, run_analysis
from slop_farmer.reports.dashboard import run_dashboard_data
def run_full_pipeline(options: FullPipelineOptions) -> str:
repo_anchor = options.repo.name
base_dir = options.workspace_root.resolve() / repo_anchor
data_dir = base_dir / "data"
dashboard_dir = base_dir / "web" / "public" / "data"
snapshot_dir = run_pipeline(
PipelineOptions(
repo=options.repo,
output_dir=data_dir,
since=None,
resume=True,
http_timeout=180,
http_max_retries=5,
max_issues=options.max_issues,
max_prs=options.max_prs,
max_issue_comments=None,
max_reviews_per_pr=None,
max_review_comments_per_pr=None,
fetch_timeline=options.fetch_timeline,
publish=True,
hf_repo_id=options.dataset,
private_hf_repo=options.private_hf_repo,
new_contributor_report=True,
new_contributor_window_days=options.new_contributor_window_days,
new_contributor_max_authors=0,
issue_max_age_days=options.issue_max_age_days,
pr_max_age_days=options.pr_max_age_days,
)
)
analysis_path = run_analysis(
AnalysisOptions(
snapshot_dir=snapshot_dir,
output_dir=data_dir,
output=None,
hf_repo_id=None,
hf_revision=None,
hf_materialize_dir=None,
ranking_backend=options.ranking_backend,
model=options.model,
max_clusters=options.max_clusters,
open_prs_only=options.open_prs_only,
)
)
markdown_path = render_markdown_report(
MarkdownReportOptions(
input=analysis_path,
output=None,
snapshot_dir=snapshot_dir,
)
)
dashboard_output = run_dashboard_data(
DashboardDataOptions(
snapshot_dir=snapshot_dir,
output_dir=dashboard_dir,
analysis_input=analysis_path,
contributors_input=snapshot_dir / "new-contributors-report.json",
pr_scope_input=snapshot_dir / "pr-scope-clusters.json",
window_days=options.dashboard_window_days,
)
)
payload = {
"repo": options.repo.slug,
"dataset": options.dataset,
"workspace": str(base_dir),
"snapshot_dir": str(snapshot_dir),
"analysis_report_json": str(analysis_path),
"analysis_report_markdown": str(markdown_path),
"dashboard_output_dir": str(dashboard_output),
"generated_at": datetime.now(tz=UTC)
.replace(microsecond=0)
.isoformat()
.replace("+00:00", "Z"),
}
return json.dumps(payload, indent=2)