File size: 5,352 Bytes
03a907a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""SWE-bench Lite task adapter for CodeEnv."""

from __future__ import annotations

import os
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, List, Tuple


DATASET_ROOT = Path(__file__).parent
DEFAULT_TASKS_ROOT = DATASET_ROOT / "swebench_lite_tasks"
DIFFICULTIES = ("easy", "medium", "hard")


def _difficulty_bounds(total: int) -> Dict[str, Tuple[int, int]]:
    one_third = max(total // 3, 1)
    two_third = max((2 * total) // 3, one_third + 1)
    return {
        "easy": (0, one_third),
        "medium": (one_third, two_third),
        "hard": (two_third, total),
    }


@lru_cache(maxsize=1)
def _load_swebench_lite_rows() -> List[Dict[str, Any]]:
    from datasets import load_dataset

    ds = load_dataset("princeton-nlp/SWE-bench_Lite", split="test")
    return [dict(row) for row in ds]


def _candidate_dirs(tasks_root: Path, instance_id: str, row_idx: int) -> List[Path]:
    return [
        tasks_root / instance_id,
        tasks_root / f"instance_{row_idx}",
        tasks_root / str(row_idx),
    ]


def get_swebench_task(difficulty: str) -> Dict[str, Any]:
    """

    Resolve one SWE-bench Lite task into CodeEnv-compatible task dict.



    Expected local layout:

      dataset/swebench_lite_tasks/<instance_id>/buggy.py

      dataset/swebench_lite_tasks/<instance_id>/test.py

    

    First tries to load from local files, then falls back to HuggingFace dataset.

    """
    diff = (difficulty or "").strip().lower()
    if diff not in DIFFICULTIES:
        raise ValueError(f"Invalid difficulty '{difficulty}'. Must be one of {DIFFICULTIES}.")

    tasks_root = Path(os.getenv("SWEBENCH_TASKS_ROOT", str(DEFAULT_TASKS_ROOT)))
    
    # First, try to load from local materialized tasks
    if tasks_root.exists():
        # Find all instance directories
        instance_dirs = []
        for item in tasks_root.iterdir():
            if item.is_dir() and (item / "buggy.py").exists() and (item / "test.py").exists():
                # Check if this directory matches the difficulty
                if diff in item.name.lower():
                    instance_dirs.append(item)
        
        if instance_dirs:
            # Sort for deterministic selection
            instance_dirs.sort(key=lambda x: x.name)
            
            # Select based on SWEBENCH_INDEX
            preferred_offset = int(os.getenv("SWEBENCH_INDEX", "0"))
            selected_dir = instance_dirs[preferred_offset % len(instance_dirs)]
            
            buggy_file = selected_dir / "buggy.py"
            test_file = selected_dir / "test.py"
            metadata_file = selected_dir / "metadata.json"
            
            code = buggy_file.read_text(encoding="utf-8")
            
            # Load metadata if available
            metadata = {"source": "swebench_lite", "difficulty": diff}
            if metadata_file.exists():
                import json
                metadata = json.loads(metadata_file.read_text(encoding="utf-8"))
            
            return {
                "code": code,
                "tests": str(test_file),
                "metadata": metadata,
                "problem_dir": str(selected_dir),
                "problem_id": selected_dir.name,
            }
    
    # Fallback: try to load from HuggingFace dataset
    try:
        rows = _load_swebench_lite_rows()
        if not rows:
            raise RuntimeError("SWE-bench Lite split is empty.")

        bounds = _difficulty_bounds(len(rows))
        start, end = bounds[diff]
        candidates = rows[start:end] if end > start else rows

        preferred_offset = int(os.getenv("SWEBENCH_INDEX", "0"))

        # Deterministic scan order with optional offset.
        ordered = candidates[preferred_offset:] + candidates[:preferred_offset]
        for row in ordered:
            row_idx = int(row.get("__index_level_0__", 0))
            instance_id = str(row.get("instance_id", f"row_{row_idx}"))
            for folder in _candidate_dirs(tasks_root, instance_id, row_idx):
                buggy_file = folder / "buggy.py"
                test_file = folder / "test.py"
                if buggy_file.exists() and test_file.exists():
                    code = buggy_file.read_text(encoding="utf-8")
                    metadata = {
                        "source": "swebench_lite",
                        "instance_id": instance_id,
                        "repo": row.get("repo"),
                        "base_commit": row.get("base_commit"),
                        "problem_statement": row.get("problem_statement"),
                        "difficulty": diff,
                    }
                    return {
                        "code": code,
                        "tests": str(test_file),
                        "metadata": metadata,
                        "problem_dir": str(folder),
                        "problem_id": instance_id,
                    }
    except Exception as e:
        # If HuggingFace fails, raise the original error about missing local files
        pass

    raise FileNotFoundError(
        "No materialized SWE-bench task workspace found. "
        f"Expected buggy.py/test.py under '{tasks_root}'."
    )