File size: 1,471 Bytes
a5c1fa0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
name: codebase-nav-env
version: "1.0.0"
description: >
  An RL environment where an LLM agent navigates an unfamiliar Python codebase,
  finds bugs, and implements features by reading files and running tests.
  Graded by actual pytest execution — fully deterministic.

author: your-hf-username
license: MIT

tasks:
  - id: task1
    name: "Single-file bug repair"
    description: "Find and fix bugs in a Python module so all tests pass."
    difficulty: easy
    max_steps: 20
    reward_range: [0.0, 1.0]

  - id: task2
    name: "Cross-module interface bug"
    description: "Fix a type mismatch between two modules and add a regression test."
    difficulty: medium
    max_steps: 25
    reward_range: [0.0, 1.0]

  - id: task3
    name: "Feature implementation from spec"
    description: "Read FEATURE_SPEC.md and implement the feature across multiple files."
    difficulty: hard
    max_steps: 30
    reward_range: [0.0, 1.0]

action_space:
  type: text
  schema:
    action_type: string
    path: string (optional)
    content: string (optional)
    query: string (optional)

observation_space:
  type: structured
  fields:
    - repo_tree: list of file paths
    - task_description: string
    - failing_tests: list of test names
    - files_read: list of paths read so far
    - last_action_result: string
    - steps_remaining: integer
    - current_task: string

endpoints:
  reset: POST /reset
  step: POST /step
  state: GET /state
  health: GET /health