File size: 2,077 Bytes
cacd58c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# openenv.yaml — validated by `openenv validate`
name: code-debug-env
version: "1.0.0"
description: >
  A real-world RL environment where an AI agent repairs buggy Python functions.
  The agent receives broken code and must iteratively submit patches until all
  unit tests pass. Designed for training LLMs on code repair via GRPO/RLVR.

author: "luciferai-devil"
license: MIT

# Hackathon domain tag
domain: software-engineering

tasks:
  - id: task_easy
    difficulty: easy
    description: "Fix a single off-by-one error in a Kadane's algorithm implementation."
  - id: task_medium
    difficulty: medium
    description: "Fix two independent bugs in a string parsing utility."
  - id: task_hard
    difficulty: hard
    description: "Fix 3+ subtle bugs in a recursive tree function with missing edge cases."

action:
  type: object
  properties:
    patch:
      type: string
      description: "Full replacement Python source for the function body."
    task_id:
      type: string
      description: "Which task this patch targets."
    think:
      type: string
      description: "Optional chain-of-thought reasoning (earns bonus reward)."
  required: [patch, task_id]

observation:
  type: object
  properties:
    task_id:        { type: string }
    buggy_code:     { type: string }
    task_description: { type: string }
    test_results:   { type: array }
    passed:         { type: integer }
    total:          { type: integer }
    score:          { type: number, minimum: 0.0, maximum: 1.0 }
    done:           { type: boolean }
    error:          { type: string, nullable: true }

reward:
  description: >
    Composite reward: 0.5×correctness + 0.2×valid_syntax + 0.2×chain_of_thought
    + 0.1×step_efficiency − 0.3×timeout_penalty. Range: [0.0, 1.0].
  type: number
  minimum: 0.0
  maximum: 1.0

episode:
  max_steps: 10
  termination: "All tests pass (score=1.0) OR max_steps reached."

server:
  port: 8000
  transport: websocket   # openenv uses WebSocket for persistent sessions

huggingface:
  space_id: "luciferai-devil/code-debug-env"