Spaces:
Sleeping
Sleeping
File size: 1,471 Bytes
a5c1fa0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | name: codebase-nav-env
version: "1.0.0"
description: >
An RL environment where an LLM agent navigates an unfamiliar Python codebase,
finds bugs, and implements features by reading files and running tests.
Graded by actual pytest execution — fully deterministic.
author: your-hf-username
license: MIT
tasks:
- id: task1
name: "Single-file bug repair"
description: "Find and fix bugs in a Python module so all tests pass."
difficulty: easy
max_steps: 20
reward_range: [0.0, 1.0]
- id: task2
name: "Cross-module interface bug"
description: "Fix a type mismatch between two modules and add a regression test."
difficulty: medium
max_steps: 25
reward_range: [0.0, 1.0]
- id: task3
name: "Feature implementation from spec"
description: "Read FEATURE_SPEC.md and implement the feature across multiple files."
difficulty: hard
max_steps: 30
reward_range: [0.0, 1.0]
action_space:
type: text
schema:
action_type: string
path: string (optional)
content: string (optional)
query: string (optional)
observation_space:
type: structured
fields:
- repo_tree: list of file paths
- task_description: string
- failing_tests: list of test names
- files_read: list of paths read so far
- last_action_result: string
- steps_remaining: integer
- current_task: string
endpoints:
reset: POST /reset
step: POST /step
state: GET /state
health: GET /health
|