from flask import Blueprint, jsonify from datasets import load_dataset import json bp = Blueprint("selected_tools", __name__, url_prefix="/api/selected-tools") HF_REPO = "timchen0618/browsecomp-plus-selected-tools-analysis-v1" _cache: list | None = None def _load(): global _cache if _cache is not None: return _cache ds = load_dataset(HF_REPO, split="train") rows = [] for row in ds: tool_counts = {} try: tool_counts = json.loads(row.get("tool_call_counts") or "{}") except Exception: pass total_tool_calls = sum(tool_counts.values()) if tool_counts else 0 rows.append({ "query_id": str(row["query_id"]), "rationale": row["rationale"], "selected_indices": row["selected_indices"], "k_requested": int(row["k_requested"]), "k_effective": int(row["k_effective"]), "excerpt": row["excerpt"], "new_trajectory": row["new_trajectory"], "direct_answer": bool(row["direct_answer"]), "tool_call_counts": tool_counts, "total_tool_calls": total_tool_calls, "status": row["status"], }) _cache = rows return rows @bp.get("/") def get_data(): try: rows = _load() return jsonify({"rows": rows}) except Exception as e: return jsonify({"error": str(e)}), 500 @bp.post("/reload") def reload_data(): global _cache _cache = None try: rows = _load() return jsonify({"status": "ok", "count": len(rows)}) except Exception as e: return jsonify({"error": str(e)}), 500