import json import os import random import time from urllib.parse import quote from flask import Flask, redirect, render_template, request, session, url_for, abort app = Flask(__name__) app.secret_key = os.environ.get("FLASK_SECRET_KEY", "swebench-practice-hf-key") # --- Load pre-computed quiz data --- BASE_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(BASE_DIR, "data") SUBMISSIONS_FILE = os.path.join(BASE_DIR, "submissions.json") with open(os.path.join(BASE_DIR, "quiz_data.json")) as f: QUIZ_DATA = json.load(f) TRAJECTORY_LIST = list(QUIZ_DATA.keys()) print(f"Loaded quiz data for {len(TRAJECTORY_LIST)} trajectories") HUB_BASE = "https://hub.harborframework.com/tasks/swe-bench" def hub_url(task_name): return f"{HUB_BASE}/{quote(task_name, safe='')}/latest" def save_submission(name, task_name, task_dir, score, total, answers): entry = { "name": name, "task_name": task_name, "task_dir": task_dir, "score": score, "total": total, "answers": answers, "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), } submissions = [] if os.path.isfile(SUBMISSIONS_FILE): with open(SUBMISSIONS_FILE) as f: try: submissions = json.load(f) except json.JSONDecodeError: submissions = [] submissions.append(entry) with open(SUBMISSIONS_FILE, "w") as f: json.dump(submissions, f, separators=(",", ":")) # --- Trajectory viewer helpers --- def format_steps_for_display(data): """Format trajectory steps for the Jinja2 template.""" steps = data.get("steps", []) formatted = [] prev_timestamp = None llm_call_number = 0 for step in steps: ts = step.get("timestamp", "") source = step.get("source", "") # A new LLM call starts when timestamp changes for agent steps is_new_llm_call = False if source != "user" and ts and ts != prev_timestamp: llm_call_number += 1 is_new_llm_call = True if ts: prev_timestamp = ts s = { "step_id": step.get("step_id", 0), "timestamp": ts[11:19] if len(ts) >= 19 else ts, # Show HH:MM:SS only "source": source, "message": step.get("message", ""), "reasoning": step.get("reasoning_content", ""), "tool_calls": [], "is_sidechain": step.get("extra", {}).get("is_sidechain", False), "is_new_llm_call": is_new_llm_call, "llm_call_number": llm_call_number, } for tc in step.get("tool_calls", []): tool = { "name": tc["function_name"], "args": tc.get("arguments", {}), "output": "", } obs = step.get("observation", {}) if obs and obs.get("results"): for r in obs["results"]: if r.get("source_call_id") == tc.get("tool_call_id"): content = r.get("content", "") if isinstance(content, str) and len(content) > 10000: tool["output"] = content[:10000] + "\n... (truncated)" else: tool["output"] = content if isinstance(content, str) else json.dumps(content) break s["tool_calls"].append(tool) formatted.append(s) return formatted # --- Routes --- @app.route("/") def home(): return render_template("home.html") @app.route("/go") def go(): name = request.args.get("name", "").strip() if not name: return redirect(url_for("home")) # Pick a random trajectory task_dir = random.choice(TRAJECTORY_LIST) entry = QUIZ_DATA[task_dir] task_url = hub_url(entry["task_name"]) viewer_url = url_for("trajectory_view", task_dir=task_dir) return render_template( "quiz.html", name=name, task_dir=task_dir, task_name=entry["task_name"], viewer_url=viewer_url, task_url=task_url, quiz=entry["quiz"], ) @app.route("/trajectory/") def trajectory_view(task_dir): if task_dir not in QUIZ_DATA: abort(404) traj_path = os.path.join(DATA_DIR, f"{task_dir}.json") if not os.path.isfile(traj_path): abort(404) with open(traj_path) as f: data = json.load(f) steps = format_steps_for_display(data) task_name = QUIZ_DATA[task_dir]["task_name"] agent_info = data.get("agent", {}) # Get resolution status from quiz data (Q7 answer) quiz_q7 = QUIZ_DATA[task_dir]["quiz"][6] resolved = quiz_q7["answer"] == "Passed" return render_template( "trajectory.html", task_name=task_name, task_dir=task_dir, steps=steps, agent_info=agent_info, resolved=resolved, ) @app.route("/submit", methods=["POST"]) def submit(): # Read from hidden form fields (no session dependency) task_dir = request.form.get("_task_dir", "") name = request.form.get("_name", "Anonymous") if not task_dir or task_dir not in QUIZ_DATA: return redirect(url_for("home")) entry = QUIZ_DATA[task_dir] quiz = entry["quiz"] task_name = entry["task_name"] results = [] score = 0 for q in quiz: student_answer = request.form.get(q["id"], "") correct = q["answer"] is_correct = student_answer == correct if is_correct: score += 1 results.append({ "text": q["text"], "student_answer": student_answer, "correct_answer": correct, "is_correct": is_correct, "reason": q.get("reason", ""), }) total = len(quiz) passed = score >= 5 save_submission(name, task_name, task_dir, score, total, results) return render_template( "result.html", name=name, task_name=task_name, score=score, total=total, passed=passed, results=results, ) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860, debug=True)