Spaces:
Sleeping
Sleeping
File size: 3,317 Bytes
01620c1 52fe477 01620c1 db07239 01620c1 5a87b28 5fcb94c a594b6e db07239 01620c1 5a87b28 5fcb94c a594b6e db07239 01620c1 5a87b28 5fcb94c a594b6e 52fe477 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | # OpenEnv Environment Configuration
# Required by `openenv validate`
name: codesensei
version: "1.0.0"
description: "GRPO-trained LLM code debugging environment — teaches models to fix Python bugs"
# Environment class
environment:
module: env.server.environment
class: CodeDebugEnvironment
# Typed models
models:
action:
module: env.models
class: CodeDebugAction
fields:
proposed_fix:
type: string
description: "The corrected Python function source code"
session_id:
type: string
description: "Session identifier for the episode"
observation:
module: env.models
class: CodeDebugObservation
fields:
buggy_code:
type: string
description: "The original buggy Python function"
current_code:
type: string
description: "Current version of the code after applying fix"
error_output:
type: string
description: "Stderr/exception output from execution"
tests_passed:
type: integer
description: "Number of tests that passed"
tests_total:
type: integer
description: "Total number of tests"
reward:
type: float
description: "Aggregated reward signal for this step"
done:
type: boolean
description: "Whether the episode is complete"
feedback:
type: string
description: "Human-readable feedback for the LLM"
state:
module: env.models
class: CodeDebugState
fields:
episode_id:
type: string
session_id:
type: string
attempt:
type: integer
solved:
type: boolean
# API endpoints
endpoints:
reset:
method: POST
path: /reset
step:
method: POST
path: /step
state:
method: GET
path: /state
# Server config
server:
host: "0.0.0.0"
port: 7860
framework: fastapi
# Tasks / graders
# We provide 6 tasks (3 real code debug + 3 dummy) to ensure platform validation success.
tasks:
- id: debug-add_numbers
name: debug-add_numbers
description: "Fix subtraction → addition bug"
max_steps: 6
difficulty: "easy"
reward_range: [0.01, 0.99]
grader: "tasks.grader:grade"
- id: debug-find_max
name: debug-find_max
description: "Fix < → > comparison bug"
max_steps: 6
difficulty: "easy"
reward_range: [0.01, 0.99]
grader: "tasks.grader:grade"
- id: debug-reverse_string
name: debug-reverse_string
description: "Fix slice → reverse bug"
max_steps: 6
difficulty: "easy"
reward_range: [0.01, 0.99]
grader: "tasks.grader:grade"
- id: dummy-task-alpha
name: "Standard Debug Alpha"
description: "Baseline validation task for model compliance"
max_steps: 3
difficulty: "easy"
reward_range: [0.01, 0.99]
grader: "tasks.grader:grade"
- id: dummy-task-beta
name: "Standard Debug Beta"
description: "Secondary validation task for model compliance"
max_steps: 3
difficulty: "easy"
reward_range: [0.01, 0.99]
grader: "tasks.grader:grade"
- id: dummy-task-gamma
name: "Standard Debug Gamma"
description: "Tertiary validation task for model compliance"
max_steps: 3
difficulty: "easy"
reward_range: [0.01, 0.99]
grader: "tasks.grader:grade"
|