code-debug-env / openenv.yaml
luciferai-devil's picture
Upload folder using huggingface_hub
cacd58c verified
# openenv.yaml — validated by `openenv validate`
name: code-debug-env
version: "1.0.0"
description: >
A real-world RL environment where an AI agent repairs buggy Python functions.
The agent receives broken code and must iteratively submit patches until all
unit tests pass. Designed for training LLMs on code repair via GRPO/RLVR.
author: "luciferai-devil"
license: MIT
# Hackathon domain tag
domain: software-engineering
tasks:
- id: task_easy
difficulty: easy
description: "Fix a single off-by-one error in a Kadane's algorithm implementation."
- id: task_medium
difficulty: medium
description: "Fix two independent bugs in a string parsing utility."
- id: task_hard
difficulty: hard
description: "Fix 3+ subtle bugs in a recursive tree function with missing edge cases."
action:
type: object
properties:
patch:
type: string
description: "Full replacement Python source for the function body."
task_id:
type: string
description: "Which task this patch targets."
think:
type: string
description: "Optional chain-of-thought reasoning (earns bonus reward)."
required: [patch, task_id]
observation:
type: object
properties:
task_id: { type: string }
buggy_code: { type: string }
task_description: { type: string }
test_results: { type: array }
passed: { type: integer }
total: { type: integer }
score: { type: number, minimum: 0.0, maximum: 1.0 }
done: { type: boolean }
error: { type: string, nullable: true }
reward:
description: >
Composite reward: 0.5×correctness + 0.2×valid_syntax + 0.2×chain_of_thought
+ 0.1×step_efficiency − 0.3×timeout_penalty. Range: [0.0, 1.0].
type: number
minimum: 0.0
maximum: 1.0
episode:
max_steps: 10
termination: "All tests pass (score=1.0) OR max_steps reached."
server:
port: 8000
transport: websocket # openenv uses WebSocket for persistent sessions
huggingface:
space_id: "luciferai-devil/code-debug-env"