from flask import Blueprint, jsonify from datasets import load_dataset import json bp = Blueprint("sft_diff", __name__, url_prefix="/api/sft-diff") HF_REPO = "timchen0618/browsecomp-plus-sft-diff-v1" _cache: list | None = None def _load(): global _cache if _cache is not None: return _cache ds = load_dataset(HF_REPO, split="train") rows = [] for row in ds: rows.append({ "query_id": str(row["query_id"]), "excerpt": row["excerpt"], "messages_gpt": json.loads(row["messages_json"]) if row.get("messages_json") else None, "messages_qwen": json.loads(row["messages_json_qwen"]) if row.get("messages_json_qwen") else None, }) _cache = rows return rows @bp.get("/") def get_data(): try: rows = _load() return jsonify({"rows": rows}) except Exception as e: return jsonify({"error": str(e)}), 500 @bp.post("/reload") def reload_data(): global _cache _cache = None try: rows = _load() return jsonify({"status": "ok", "count": len(rows)}) except Exception as e: return jsonify({"error": str(e)}), 500