Spaces:

Viraj0112
/

rl_code_fix_env

Sleeping

File size: 8,506 Bytes

03a907a

"""

Script to download and materialize SWE-bench Lite tasks.



This script:

1. Downloads SWE-bench Lite dataset from HuggingFace

2. Extracts the buggy code and creates test files

3. Organizes them into the expected directory structure



Usage:

    python -m dataset.prepare_swebench [--max-tasks N] [--difficulty easy|medium|hard|all]

"""

import argparse
import os
import sys
from pathlib import Path

# Add parent to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

from datasets import load_dataset


def get_problem_statement(row):
    """Extract problem statement from row."""
    return row.get("problem_statement", "")


def get_patch(row):
    """Extract the patch/fix from row."""
    return row.get("patch", "")


def get_instance_id(row):
    """Get instance ID from row."""
    return row.get("instance_id", "")


def create_buggy_file(instance_dir: Path, row):
    """

    Create buggy.py from the base commit and instance.

    

    The SWE-bench dataset provides the full repository at base_commit.

    We need to extract just the relevant file that has the bug.

    """
    # For SWE-bench, the "buggy" version is actually the version BEFORE the patch
    # We need to get the file content from the base commit
    # This is complex as it requires cloning the repo at a specific commit
    
    # For simplicity, we'll use a different approach:
    # The problem_statement describes the bug, and we can create a simplified
    # buggy version based on that description
    
    instance_id = get_instance_id(row)
    problem_stmt = get_problem_statement(row)
    
    # Try to extract the file from the created files in the instance
    # SWE-bench provides 'repo' and we need to find the relevant file
    created_files = row.get("created_files", [])
    
    if not created_files:
        # Fallback: create a placeholder
        buggy_code = f'''# Buggy code for {instance_id}

# Problem: {problem_stmt[:200]}...



def solution():

    """Placeholder solution - needs to be fixed."""

    pass

'''
    else:
        # For now, create a simple placeholder
        # In a full implementation, we'd clone the repo at base_commit
        file_path = created_files[0] if created_files else "solution.py"
        buggy_code = f'''# Buggy code for {instance_id}

# File: {file_path}

# Problem: {problem_stmt[:200]}...



def solution():

    """Placeholder solution - needs to be fixed."""

    pass

'''
    
    buggy_file = instance_dir / "buggy.py"
    buggy_file.write_text(buggy_code, encoding="utf-8")
    return buggy_file


def create_test_file(instance_dir: Path, row):
    """

    Create test.py based on the problem statement.

    

    For SWE-bench, tests are typically derived from the issue description.

    We'll create a simple test that checks if the solution works.

    """
    instance_id = get_instance_id(row)
    problem_stmt = get_problem_statement(row)
    
    # Create a simple test file
    # In practice, SWE-bench has a test.json file with test cases
    test_cases = row.get("test_cases", [])
    
    if test_cases:
        # Create tests from provided test cases
        test_code = "import unittest\\n\\n"
        for i, tc in enumerate(test_cases):
            input_str = tc.get("input", "")
            output_str = tc.get("output", "")
            test_code += f'''class TestSolution(unittest.TestCase):

    def test_case_{i+1}(self):

        # Input: {input_str}

        # Expected: {output_str}

        pass  # TODO: Add actual test

'''
    else:
        # Create a basic test based on problem statement
        test_code = f'''"""Test file for {instance_id}"""



import unittest

from buggy import solution





class TestSolution(unittest.TestCase):

    def test_basic(self):

        """Test based on problem statement."""

        # Problem: {problem_stmt[:300]}...

        result = solution()

        self.assertIsNotNone(result)





if __name__ == "__main__":

    unittest.main()

'''
    
    test_file = instance_dir / "test.py"
    test_file.write_text(test_code, encoding="utf-8")
    return test_file


def create_metadata_file(instance_dir: Path, row):
    """Create metadata.json with instance info."""
    import json
    
    metadata = {
        "instance_id": get_instance_id(row),
        "repo": row.get("repo", ""),
        "base_commit": row.get("base_commit", ""),
        "problem_statement": get_problem_statement(row),
        "patch": get_patch(row),
        "difficulty": "medium",  # Will be set based on index
    }
    
    metadata_file = instance_dir / "metadata.json"
    metadata_file.write_text(json.dumps(metadata, indent=2), encoding="utf-8")
    return metadata_file


def prepare_swebench_tasks(

    output_dir: Path,

    max_tasks: int = 30,

    difficulty: str = "all"

):
    """

    Download and prepare SWE-bench Lite tasks.

    

    Args:

        output_dir: Directory to save tasks

        max_tasks: Maximum number of tasks to download

        difficulty: "easy", "medium", "hard", or "all"

    """
    print(f"Loading SWE-bench Lite dataset...")
    
    try:
        ds = load_dataset("princeton-nlp/SWE-bench_Lite", split="test")
    except Exception as e:
        print(f"Error loading dataset: {e}")
        print("Trying alternative dataset name...")
        ds = load_dataset("swe-bench/swe-bench-lite", split="test")
    
    print(f"Loaded {len(ds)} tasks")
    
    # Calculate difficulty bounds
    total = len(ds)
    one_third = max(total // 3, 1)
    two_third = max((2 * total) // 3, one_third + 1)
    
    difficulty_ranges = {
        "easy": (0, one_third),
        "medium": (one_third, two_third),
        "hard": (two_third, total),
    }
    
    # Determine which tasks to download
    if difficulty == "all":
        ranges = list(difficulty_ranges.values())
        indices = []
        for start, end in ranges:
            indices.extend(range(start, min(end, start + max_tasks // 3)))
    else:
        start, end = difficulty_ranges.get(difficulty, (0, total))
        indices = list(range(start, min(end, max_tasks)))
    
    # Create output directory
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    print(f"Preparing {len(indices)} tasks...")
    
    success_count = 0
    for i, idx in enumerate(indices):
        try:
            row = ds[idx]
            instance_id = get_instance_id(row)
            
            # Create instance directory
            instance_dir = output_dir / instance_id
            instance_dir.mkdir(parents=True, exist_ok=True)
            
            # Create files
            create_buggy_file(instance_dir, row)
            create_test_file(instance_dir, row)
            create_metadata_file(instance_dir, row)
            
            success_count += 1
            if (i + 1) % 10 == 0:
                print(f"  Processed {i + 1}/{len(indices)} tasks...")
                
        except Exception as e:
            print(f"  Warning: Failed to process task {idx}: {e}")
            continue
    
    print(f"\nDone! Prepared {success_count}/{len(indices)} tasks in {output_dir}")
    print(f"Set SWEBENCH_TASKS_ROOT={output_dir.absolute()} to use these tasks.")


def main():
    parser = argparse.ArgumentParser(description="Prepare SWE-bench Lite tasks")
    parser.add_argument(
        "--max-tasks", 
        type=int, 
        default=30,
        help="Maximum number of tasks to download (default: 30)"
    )
    parser.add_argument(
        "--difficulty",
        type=str,
        default="all",
        choices=["easy", "medium", "hard", "all"],
        help="Difficulty level to download (default: all)"
    )
    parser.add_argument(
        "--output-dir",
        type=str,
        default=None,
        help="Output directory (default: dataset/swebench_lite_tasks)"
    )
    
    args = parser.parse_args()
    
    # Determine output directory
    if args.output_dir:
        output_dir = Path(args.output_dir)
    else:
        script_dir = Path(__file__).parent
        output_dir = script_dir / "swebench_lite_tasks"
    
    prepare_swebench_tasks(
        output_dir=output_dir,
        max_tasks=args.max_tasks,
        difficulty=args.difficulty
    )


if __name__ == "__main__":
    main()