Spaces:
Running
Running
| import json | |
| import os | |
| import random | |
| import time | |
| from urllib.parse import quote | |
| from flask import Flask, redirect, render_template, request, session, url_for, abort | |
| app = Flask(__name__) | |
| app.secret_key = os.environ.get("FLASK_SECRET_KEY", "swebench-practice-hf-key") | |
| # --- Load pre-computed quiz data --- | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| DATA_DIR = os.path.join(BASE_DIR, "data") | |
| SUBMISSIONS_FILE = os.path.join(BASE_DIR, "submissions.json") | |
| with open(os.path.join(BASE_DIR, "quiz_data.json")) as f: | |
| QUIZ_DATA = json.load(f) | |
| TRAJECTORY_LIST = list(QUIZ_DATA.keys()) | |
| print(f"Loaded quiz data for {len(TRAJECTORY_LIST)} trajectories") | |
| HUB_BASE = "https://hub.harborframework.com/tasks/swe-bench" | |
| def hub_url(task_name): | |
| return f"{HUB_BASE}/{quote(task_name, safe='')}/latest" | |
| def save_submission(name, task_name, task_dir, score, total, answers): | |
| entry = { | |
| "name": name, | |
| "task_name": task_name, | |
| "task_dir": task_dir, | |
| "score": score, | |
| "total": total, | |
| "answers": answers, | |
| "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), | |
| } | |
| submissions = [] | |
| if os.path.isfile(SUBMISSIONS_FILE): | |
| with open(SUBMISSIONS_FILE) as f: | |
| try: | |
| submissions = json.load(f) | |
| except json.JSONDecodeError: | |
| submissions = [] | |
| submissions.append(entry) | |
| with open(SUBMISSIONS_FILE, "w") as f: | |
| json.dump(submissions, f, separators=(",", ":")) | |
| # --- Trajectory viewer helpers --- | |
| def format_steps_for_display(data): | |
| """Format trajectory steps for the Jinja2 template.""" | |
| steps = data.get("steps", []) | |
| formatted = [] | |
| prev_timestamp = None | |
| llm_call_number = 0 | |
| for step in steps: | |
| ts = step.get("timestamp", "") | |
| source = step.get("source", "") | |
| # A new LLM call starts when timestamp changes for agent steps | |
| is_new_llm_call = False | |
| if source != "user" and ts and ts != prev_timestamp: | |
| llm_call_number += 1 | |
| is_new_llm_call = True | |
| if ts: | |
| prev_timestamp = ts | |
| s = { | |
| "step_id": step.get("step_id", 0), | |
| "timestamp": ts[11:19] if len(ts) >= 19 else ts, # Show HH:MM:SS only | |
| "source": source, | |
| "message": step.get("message", ""), | |
| "reasoning": step.get("reasoning_content", ""), | |
| "tool_calls": [], | |
| "is_sidechain": step.get("extra", {}).get("is_sidechain", False), | |
| "is_new_llm_call": is_new_llm_call, | |
| "llm_call_number": llm_call_number, | |
| } | |
| for tc in step.get("tool_calls", []): | |
| tool = { | |
| "name": tc["function_name"], | |
| "args": tc.get("arguments", {}), | |
| "output": "", | |
| } | |
| obs = step.get("observation", {}) | |
| if obs and obs.get("results"): | |
| for r in obs["results"]: | |
| if r.get("source_call_id") == tc.get("tool_call_id"): | |
| content = r.get("content", "") | |
| if isinstance(content, str) and len(content) > 10000: | |
| tool["output"] = content[:10000] + "\n... (truncated)" | |
| else: | |
| tool["output"] = content if isinstance(content, str) else json.dumps(content) | |
| break | |
| s["tool_calls"].append(tool) | |
| formatted.append(s) | |
| return formatted | |
| # --- Routes --- | |
| def home(): | |
| return render_template("home.html") | |
| def go(): | |
| name = request.args.get("name", "").strip() | |
| if not name: | |
| return redirect(url_for("home")) | |
| # Pick a random trajectory | |
| task_dir = random.choice(TRAJECTORY_LIST) | |
| entry = QUIZ_DATA[task_dir] | |
| task_url = hub_url(entry["task_name"]) | |
| viewer_url = url_for("trajectory_view", task_dir=task_dir) | |
| return render_template( | |
| "quiz.html", | |
| name=name, | |
| task_dir=task_dir, | |
| task_name=entry["task_name"], | |
| viewer_url=viewer_url, | |
| task_url=task_url, | |
| quiz=entry["quiz"], | |
| ) | |
| def trajectory_view(task_dir): | |
| if task_dir not in QUIZ_DATA: | |
| abort(404) | |
| traj_path = os.path.join(DATA_DIR, f"{task_dir}.json") | |
| if not os.path.isfile(traj_path): | |
| abort(404) | |
| with open(traj_path) as f: | |
| data = json.load(f) | |
| steps = format_steps_for_display(data) | |
| task_name = QUIZ_DATA[task_dir]["task_name"] | |
| agent_info = data.get("agent", {}) | |
| # Get resolution status from quiz data (Q7 answer) | |
| quiz_q7 = QUIZ_DATA[task_dir]["quiz"][6] | |
| resolved = quiz_q7["answer"] == "Passed" | |
| return render_template( | |
| "trajectory.html", | |
| task_name=task_name, | |
| task_dir=task_dir, | |
| steps=steps, | |
| agent_info=agent_info, | |
| resolved=resolved, | |
| ) | |
| def submit(): | |
| # Read from hidden form fields (no session dependency) | |
| task_dir = request.form.get("_task_dir", "") | |
| name = request.form.get("_name", "Anonymous") | |
| if not task_dir or task_dir not in QUIZ_DATA: | |
| return redirect(url_for("home")) | |
| entry = QUIZ_DATA[task_dir] | |
| quiz = entry["quiz"] | |
| task_name = entry["task_name"] | |
| results = [] | |
| score = 0 | |
| for q in quiz: | |
| student_answer = request.form.get(q["id"], "") | |
| correct = q["answer"] | |
| is_correct = student_answer == correct | |
| if is_correct: | |
| score += 1 | |
| results.append({ | |
| "text": q["text"], | |
| "student_answer": student_answer, | |
| "correct_answer": correct, | |
| "is_correct": is_correct, | |
| "reason": q.get("reason", ""), | |
| }) | |
| total = len(quiz) | |
| passed = score >= 5 | |
| save_submission(name, task_name, task_dir, score, total, results) | |
| return render_template( | |
| "result.html", | |
| name=name, | |
| task_name=task_name, | |
| score=score, | |
| total=total, | |
| passed=passed, | |
| results=results, | |
| ) | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860, debug=True) | |