# openenv.yaml — validated by `openenv validate` name: code-debug-env version: "1.0.0" description: > A real-world RL environment where an AI agent repairs buggy Python functions. The agent receives broken code and must iteratively submit patches until all unit tests pass. Designed for training LLMs on code repair via GRPO/RLVR. author: "luciferai-devil" license: MIT # Hackathon domain tag domain: software-engineering tasks: - id: task_easy difficulty: easy description: "Fix a single off-by-one error in a Kadane's algorithm implementation." - id: task_medium difficulty: medium description: "Fix two independent bugs in a string parsing utility." - id: task_hard difficulty: hard description: "Fix 3+ subtle bugs in a recursive tree function with missing edge cases." action: type: object properties: patch: type: string description: "Full replacement Python source for the function body." task_id: type: string description: "Which task this patch targets." think: type: string description: "Optional chain-of-thought reasoning (earns bonus reward)." required: [patch, task_id] observation: type: object properties: task_id: { type: string } buggy_code: { type: string } task_description: { type: string } test_results: { type: array } passed: { type: integer } total: { type: integer } score: { type: number, minimum: 0.0, maximum: 1.0 } done: { type: boolean } error: { type: string, nullable: true } reward: description: > Composite reward: 0.5×correctness + 0.2×valid_syntax + 0.2×chain_of_thought + 0.1×step_efficiency − 0.3×timeout_penalty. Range: [0.0, 1.0]. type: number minimum: 0.0 maximum: 1.0 episode: max_steps: 10 termination: "All tests pass (score=1.0) OR max_steps reached." server: port: 8000 transport: websocket # openenv uses WebSocket for persistent sessions huggingface: space_id: "luciferai-devil/code-debug-env"