from flask import Blueprint, jsonify from datasets import load_dataset import json bp = Blueprint("browsecomp", __name__, url_prefix="/api/browsecomp") HF_REPO = "timchen0618/browsecomp-plus-benchmark" _cache: list | None = None def _load() -> list: global _cache if _cache is not None: return _cache ds = load_dataset(HF_REPO, split="train") rows = [] for row in ds: rows.append({ "query_id": row["query_id"], "query": row["query"], "answer": row["answer"], "evidence_docs": json.loads(row["evidence_docs"]), "gold_docs": json.loads(row["gold_docs"]), }) _cache = rows return rows @bp.get("/") def get_list(): try: rows = _load() # Return lightweight list (just query_id + query preview) items = [{"query_id": r["query_id"], "query": r["query"]} for r in rows] return jsonify({"items": items, "total": len(items)}) except Exception as e: return jsonify({"error": str(e)}), 500 @bp.get("/") def get_example(query_id: str): try: rows = _load() for row in rows: if str(row["query_id"]) == query_id: return jsonify(row) return jsonify({"error": "not found"}), 404 except Exception as e: return jsonify({"error": str(e)}), 500 @bp.post("/reload") def reload(): global _cache _cache = None try: rows = _load() return jsonify({"status": "ok", "count": len(rows)}) except Exception as e: return jsonify({"error": str(e)}), 500