Spaces:
Running
Running
File size: 5,352 Bytes
03a907a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | """SWE-bench Lite task adapter for CodeEnv."""
from __future__ import annotations
import os
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, List, Tuple
DATASET_ROOT = Path(__file__).parent
DEFAULT_TASKS_ROOT = DATASET_ROOT / "swebench_lite_tasks"
DIFFICULTIES = ("easy", "medium", "hard")
def _difficulty_bounds(total: int) -> Dict[str, Tuple[int, int]]:
one_third = max(total // 3, 1)
two_third = max((2 * total) // 3, one_third + 1)
return {
"easy": (0, one_third),
"medium": (one_third, two_third),
"hard": (two_third, total),
}
@lru_cache(maxsize=1)
def _load_swebench_lite_rows() -> List[Dict[str, Any]]:
from datasets import load_dataset
ds = load_dataset("princeton-nlp/SWE-bench_Lite", split="test")
return [dict(row) for row in ds]
def _candidate_dirs(tasks_root: Path, instance_id: str, row_idx: int) -> List[Path]:
return [
tasks_root / instance_id,
tasks_root / f"instance_{row_idx}",
tasks_root / str(row_idx),
]
def get_swebench_task(difficulty: str) -> Dict[str, Any]:
"""
Resolve one SWE-bench Lite task into CodeEnv-compatible task dict.
Expected local layout:
dataset/swebench_lite_tasks/<instance_id>/buggy.py
dataset/swebench_lite_tasks/<instance_id>/test.py
First tries to load from local files, then falls back to HuggingFace dataset.
"""
diff = (difficulty or "").strip().lower()
if diff not in DIFFICULTIES:
raise ValueError(f"Invalid difficulty '{difficulty}'. Must be one of {DIFFICULTIES}.")
tasks_root = Path(os.getenv("SWEBENCH_TASKS_ROOT", str(DEFAULT_TASKS_ROOT)))
# First, try to load from local materialized tasks
if tasks_root.exists():
# Find all instance directories
instance_dirs = []
for item in tasks_root.iterdir():
if item.is_dir() and (item / "buggy.py").exists() and (item / "test.py").exists():
# Check if this directory matches the difficulty
if diff in item.name.lower():
instance_dirs.append(item)
if instance_dirs:
# Sort for deterministic selection
instance_dirs.sort(key=lambda x: x.name)
# Select based on SWEBENCH_INDEX
preferred_offset = int(os.getenv("SWEBENCH_INDEX", "0"))
selected_dir = instance_dirs[preferred_offset % len(instance_dirs)]
buggy_file = selected_dir / "buggy.py"
test_file = selected_dir / "test.py"
metadata_file = selected_dir / "metadata.json"
code = buggy_file.read_text(encoding="utf-8")
# Load metadata if available
metadata = {"source": "swebench_lite", "difficulty": diff}
if metadata_file.exists():
import json
metadata = json.loads(metadata_file.read_text(encoding="utf-8"))
return {
"code": code,
"tests": str(test_file),
"metadata": metadata,
"problem_dir": str(selected_dir),
"problem_id": selected_dir.name,
}
# Fallback: try to load from HuggingFace dataset
try:
rows = _load_swebench_lite_rows()
if not rows:
raise RuntimeError("SWE-bench Lite split is empty.")
bounds = _difficulty_bounds(len(rows))
start, end = bounds[diff]
candidates = rows[start:end] if end > start else rows
preferred_offset = int(os.getenv("SWEBENCH_INDEX", "0"))
# Deterministic scan order with optional offset.
ordered = candidates[preferred_offset:] + candidates[:preferred_offset]
for row in ordered:
row_idx = int(row.get("__index_level_0__", 0))
instance_id = str(row.get("instance_id", f"row_{row_idx}"))
for folder in _candidate_dirs(tasks_root, instance_id, row_idx):
buggy_file = folder / "buggy.py"
test_file = folder / "test.py"
if buggy_file.exists() and test_file.exists():
code = buggy_file.read_text(encoding="utf-8")
metadata = {
"source": "swebench_lite",
"instance_id": instance_id,
"repo": row.get("repo"),
"base_commit": row.get("base_commit"),
"problem_statement": row.get("problem_statement"),
"difficulty": diff,
}
return {
"code": code,
"tests": str(test_file),
"metadata": metadata,
"problem_dir": str(folder),
"problem_id": instance_id,
}
except Exception as e:
# If HuggingFace fails, raise the original error about missing local files
pass
raise FileNotFoundError(
"No materialized SWE-bench task workspace found. "
f"Expected buggy.py/test.py under '{tasks_root}'."
)
|