| import json | |
| import os | |
| import subprocess | |
| from pathlib import Path | |
| import gradio as gr | |
| import pandas as pd | |
| from src.download_swebench_leaderboard import download_leaderboard, get_leaderboard | |
| DATA_DIR = Path("data") | |
| TRAJS_DIR = DATA_DIR / "swebench_trajs" | |
| LEADERBOARD_CACHE = DATA_DIR / "swebench_leaderboard_latest.json" | |
| S3_BUCKET = "s3://swe-bench-experiments/bash-only" | |
| def load_or_download_leaderboard(): | |
| if LEADERBOARD_CACHE.exists(): | |
| with open(LEADERBOARD_CACHE) as f: | |
| return json.load(f) | |
| filename = download_leaderboard(output_dir=str(DATA_DIR)) | |
| os.rename(filename, LEADERBOARD_CACHE) | |
| with open(LEADERBOARD_CACHE) as f: | |
| return json.load(f) | |
| def get_bash_only_df(): | |
| data = load_or_download_leaderboard() | |
| leaderboards = data.get("leaderboards", []) | |
| bash_only = next((lb for lb in leaderboards if lb["name"] == "bash-only"), None) | |
| if not bash_only: | |
| return pd.DataFrame() | |
| rows = [] | |
| for r in bash_only["results"]: | |
| rows.append({ | |
| "name": r.get("name", ""), | |
| "date": r.get("date", ""), | |
| "cost": round(r.get("cost", 0), 2), | |
| "instance_cost": round(r.get("instance_cost", 0), 4), | |
| "instance_calls": r.get("instance_calls", 0), | |
| "folder": r.get("folder", ""), | |
| "os_model": "โ " if r.get("os_model") else "โ", | |
| "os_system": "โ " if r.get("os_system") else "โ", | |
| }) | |
| df = pd.DataFrame(rows) | |
| return df | |
| def get_model_details(folder: str): | |
| if not folder: | |
| return None, "Select a model from the table" | |
| data = load_or_download_leaderboard() | |
| leaderboards = data.get("leaderboards", []) | |
| bash_only = next((lb for lb in leaderboards if lb["name"] == "bash-only"), None) | |
| if not bash_only: | |
| return None, "Leaderboard not found" | |
| model = next((r for r in bash_only["results"] if r.get("folder") == folder), None) | |
| if not model: | |
| return None, f"Model with folder '{folder}' not found" | |
| return model, None | |
| def download_trajectories_from_s3(folder: str, progress=gr.Progress()): | |
| if not folder: | |
| return "โ No model selected" | |
| model, error = get_model_details(folder) | |
| if error: | |
| return f"โ {error}" | |
| output_dir = TRAJS_DIR / folder | |
| if output_dir.exists() and any(output_dir.iterdir()): | |
| file_count = len(list(output_dir.glob("*/*.traj.json"))) | |
| return f"โ Already downloaded: {output_dir}\n\n{file_count} trajectory files" | |
| s3_path = f"{S3_BUCKET}/{folder}/trajs/" | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| progress(0, desc="Starting S3 download...") | |
| try: | |
| result = subprocess.run( | |
| ["aws", "s3", "cp", "--recursive", s3_path, str(output_dir), "--no-sign-request"], | |
| capture_output=True, | |
| text=True, | |
| timeout=600, | |
| ) | |
| if result.returncode != 0: | |
| return f"โ S3 download failed:\n{result.stderr}" | |
| file_count = len(list(output_dir.glob("*/*.traj.json"))) | |
| if file_count == 0: | |
| file_count = len(list(output_dir.glob("*.json"))) | |
| per_instance = model.get("per_instance_details", {}) | |
| resolved_count = sum(1 for v in per_instance.values() if v.get("resolved")) | |
| total_count = len(per_instance) | |
| return f"โ Downloaded to {output_dir}\n\n{file_count} trajectory files\nResolved: {resolved_count}/{total_count} ({100*resolved_count/total_count:.1f}%)" | |
| except subprocess.TimeoutExpired: | |
| return "โ Download timed out (>10 min)" | |
| except FileNotFoundError: | |
| return "โ AWS CLI not found. Install with: pip install awscli" | |
| except Exception as e: | |
| return f"โ Error: {e}" | |
| def on_row_select(evt: gr.SelectData, df: pd.DataFrame): | |
| if evt.index is None: | |
| return "", "", gr.update() | |
| row_idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index | |
| row = df.iloc[row_idx] | |
| folder = row["folder"] | |
| name = row["name"] | |
| return folder, name, gr.update(interactive=True) | |
| def build_app(): | |
| df = get_bash_only_df() | |
| with gr.Blocks(title="SWE-bench Routing Cost Calculator") as app: | |
| gr.Markdown("# ๐งฎ SWE-bench Bash-Only Leaderboard") | |
| gr.Markdown("Select a model to use as base for cost analysis") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| leaderboard_table = gr.Dataframe( | |
| value=df, | |
| label="Bash-Only Leaderboard", | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Selected Model") | |
| selected_name = gr.Textbox(label="Model Name", interactive=False) | |
| selected_folder = gr.Textbox(label="Folder ID", interactive=False) | |
| download_btn = gr.Button("๐ฅ Download Trajectories", interactive=False) | |
| download_status = gr.Textbox(label="Status", interactive=False, lines=3) | |
| leaderboard_table.select( | |
| fn=on_row_select, | |
| inputs=[leaderboard_table], | |
| outputs=[selected_folder, selected_name, download_btn], | |
| ) | |
| download_btn.click( | |
| fn=download_trajectories_from_s3, | |
| inputs=[selected_folder], | |
| outputs=[download_status], | |
| ) | |
| return app | |
| if __name__ == "__main__": | |
| app = build_app() | |
| app.launch() | |