"""SWE-bench Lite task adapter for CodeEnv.""" from __future__ import annotations import os from functools import lru_cache from pathlib import Path from typing import Any, Dict, List, Tuple DATASET_ROOT = Path(__file__).parent DEFAULT_TASKS_ROOT = DATASET_ROOT / "swebench_lite_tasks" DIFFICULTIES = ("easy", "medium", "hard") def _difficulty_bounds(total: int) -> Dict[str, Tuple[int, int]]: one_third = max(total // 3, 1) two_third = max((2 * total) // 3, one_third + 1) return { "easy": (0, one_third), "medium": (one_third, two_third), "hard": (two_third, total), } @lru_cache(maxsize=1) def _load_swebench_lite_rows() -> List[Dict[str, Any]]: from datasets import load_dataset ds = load_dataset("princeton-nlp/SWE-bench_Lite", split="test") return [dict(row) for row in ds] def _candidate_dirs(tasks_root: Path, instance_id: str, row_idx: int) -> List[Path]: return [ tasks_root / instance_id, tasks_root / f"instance_{row_idx}", tasks_root / str(row_idx), ] def get_swebench_task(difficulty: str) -> Dict[str, Any]: """ Resolve one SWE-bench Lite task into CodeEnv-compatible task dict. Expected local layout: dataset/swebench_lite_tasks//buggy.py dataset/swebench_lite_tasks//test.py First tries to load from local files, then falls back to HuggingFace dataset. """ diff = (difficulty or "").strip().lower() if diff not in DIFFICULTIES: raise ValueError(f"Invalid difficulty '{difficulty}'. Must be one of {DIFFICULTIES}.") tasks_root = Path(os.getenv("SWEBENCH_TASKS_ROOT", str(DEFAULT_TASKS_ROOT))) # First, try to load from local materialized tasks if tasks_root.exists(): # Find all instance directories instance_dirs = [] for item in tasks_root.iterdir(): if item.is_dir() and (item / "buggy.py").exists() and (item / "test.py").exists(): # Check if this directory matches the difficulty if diff in item.name.lower(): instance_dirs.append(item) if instance_dirs: # Sort for deterministic selection instance_dirs.sort(key=lambda x: x.name) # Select based on SWEBENCH_INDEX preferred_offset = int(os.getenv("SWEBENCH_INDEX", "0")) selected_dir = instance_dirs[preferred_offset % len(instance_dirs)] buggy_file = selected_dir / "buggy.py" test_file = selected_dir / "test.py" metadata_file = selected_dir / "metadata.json" code = buggy_file.read_text(encoding="utf-8") # Load metadata if available metadata = {"source": "swebench_lite", "difficulty": diff} if metadata_file.exists(): import json metadata = json.loads(metadata_file.read_text(encoding="utf-8")) return { "code": code, "tests": str(test_file), "metadata": metadata, "problem_dir": str(selected_dir), "problem_id": selected_dir.name, } # Fallback: try to load from HuggingFace dataset try: rows = _load_swebench_lite_rows() if not rows: raise RuntimeError("SWE-bench Lite split is empty.") bounds = _difficulty_bounds(len(rows)) start, end = bounds[diff] candidates = rows[start:end] if end > start else rows preferred_offset = int(os.getenv("SWEBENCH_INDEX", "0")) # Deterministic scan order with optional offset. ordered = candidates[preferred_offset:] + candidates[:preferred_offset] for row in ordered: row_idx = int(row.get("__index_level_0__", 0)) instance_id = str(row.get("instance_id", f"row_{row_idx}")) for folder in _candidate_dirs(tasks_root, instance_id, row_idx): buggy_file = folder / "buggy.py" test_file = folder / "test.py" if buggy_file.exists() and test_file.exists(): code = buggy_file.read_text(encoding="utf-8") metadata = { "source": "swebench_lite", "instance_id": instance_id, "repo": row.get("repo"), "base_commit": row.get("base_commit"), "problem_statement": row.get("problem_statement"), "difficulty": diff, } return { "code": code, "tests": str(test_file), "metadata": metadata, "problem_dir": str(folder), "problem_id": instance_id, } except Exception as e: # If HuggingFace fails, raise the original error about missing local files pass raise FileNotFoundError( "No materialized SWE-bench task workspace found. " f"Expected buggy.py/test.py under '{tasks_root}'." )