rl_code_fix_env / dataset /loader.py
Viraj0112's picture
Upload folder using huggingface_hub
03a907a verified
"""Load static, competition-approved tasks."""
import os
import json
from pathlib import Path
from typing import Dict, List, Optional
# Get the dataset root (same folder as this file)
DATASET_ROOT = Path(__file__).parent
# Hardcoded competition tasks: Easy Medium Hard
STATIC_TASKS = {
"easy": {
"problem_id": "problem_1",
"difficulty": "easy",
"description": "String reversal with space normalization",
},
"medium": {
"problem_id": "problem_10",
"difficulty": "medium",
"description": "Matrix 90 clockwise rotation",
},
"hard": {
"problem_id": "problem_13",
"difficulty": "hard",
"description": "LRU cache with correct eviction policy",
},
}
def load_problem(problem_id: str) -> Dict[str, any]:
"""
Load a single problem from disk.
Args:
problem_id: e.g., "problem_1", "problem_10", "problem_13"
Returns:
{
"code": str, # buggy.py content
"tests": str, # test.py path (relative to problem folder)
"metadata": dict, # metadata.json
"problem_dir": str, # absolute path to problem folder
}
"""
problem_dir = DATASET_ROOT / problem_id
if not problem_dir.exists():
raise FileNotFoundError(f"Problem directory not found: {problem_dir}")
# Load buggy code
buggy_file = problem_dir / "buggy.py"
code = buggy_file.read_text(encoding="utf-8")
# Load metadata
metadata_file = problem_dir / "metadata.json"
metadata = json.loads(metadata_file.read_text(encoding="utf-8"))
# Test file path (relative to problem root)
test_path = str(problem_dir / "test.py")
return {
"code": code,
"tests": test_path,
"metadata": metadata,
"problem_dir": str(problem_dir),
"problem_id": problem_id,
}
def get_hardcoded_task(difficulty: str) -> Dict[str, any]:
"""
Get one of the three static competition tasks.
Args:
difficulty: "easy" | "medium" | "hard"
Returns:
Task dict with code, tests, metadata
Raises:
ValueError: if difficulty is not one of the three approved values
"""
if difficulty not in STATIC_TASKS:
raise ValueError(
f"Invalid difficulty '{difficulty}'. "
f"Must be one of: {list(STATIC_TASKS.keys())}"
)
task_info = STATIC_TASKS[difficulty]
problem_id = task_info["problem_id"]
return load_problem(problem_id)
def get_random_tasks():
"""
DEPRECATED: Use get_hardcoded_task() instead.
Kept for backward compatibility.
"""
import warnings
warnings.warn(
"get_random_tasks() is deprecated. Use get_hardcoded_task('easy'|'medium'|'hard')",
DeprecationWarning,
stacklevel=2
)
# Return a default (easy)
return get_hardcoded_task("easy")