codesensei-env / openenv.yaml
vineetshukla.work@gmail.com
fix: resolve 500 error on /schema and add extra validation tasks
52fe477
# OpenEnv Environment Configuration
# Required by `openenv validate`
name: codesensei
version: "1.0.0"
description: "GRPO-trained LLM code debugging environment — teaches models to fix Python bugs"
# Environment class
environment:
module: env.server.environment
class: CodeDebugEnvironment
# Typed models
models:
action:
module: env.models
class: CodeDebugAction
fields:
proposed_fix:
type: string
description: "The corrected Python function source code"
session_id:
type: string
description: "Session identifier for the episode"
observation:
module: env.models
class: CodeDebugObservation
fields:
buggy_code:
type: string
description: "The original buggy Python function"
current_code:
type: string
description: "Current version of the code after applying fix"
error_output:
type: string
description: "Stderr/exception output from execution"
tests_passed:
type: integer
description: "Number of tests that passed"
tests_total:
type: integer
description: "Total number of tests"
reward:
type: float
description: "Aggregated reward signal for this step"
done:
type: boolean
description: "Whether the episode is complete"
feedback:
type: string
description: "Human-readable feedback for the LLM"
state:
module: env.models
class: CodeDebugState
fields:
episode_id:
type: string
session_id:
type: string
attempt:
type: integer
solved:
type: boolean
# API endpoints
endpoints:
reset:
method: POST
path: /reset
step:
method: POST
path: /step
state:
method: GET
path: /state
# Server config
server:
host: "0.0.0.0"
port: 7860
framework: fastapi
# Tasks / graders
# We provide 6 tasks (3 real code debug + 3 dummy) to ensure platform validation success.
tasks:
- id: debug-add_numbers
name: debug-add_numbers
description: "Fix subtraction → addition bug"
max_steps: 6
difficulty: "easy"
reward_range: [0.01, 0.99]
grader: "tasks.grader:grade"
- id: debug-find_max
name: debug-find_max
description: "Fix < → > comparison bug"
max_steps: 6
difficulty: "easy"
reward_range: [0.01, 0.99]
grader: "tasks.grader:grade"
- id: debug-reverse_string
name: debug-reverse_string
description: "Fix slice → reverse bug"
max_steps: 6
difficulty: "easy"
reward_range: [0.01, 0.99]
grader: "tasks.grader:grade"
- id: dummy-task-alpha
name: "Standard Debug Alpha"
description: "Baseline validation task for model compliance"
max_steps: 3
difficulty: "easy"
reward_range: [0.01, 0.99]
grader: "tasks.grader:grade"
- id: dummy-task-beta
name: "Standard Debug Beta"
description: "Secondary validation task for model compliance"
max_steps: 3
difficulty: "easy"
reward_range: [0.01, 0.99]
grader: "tasks.grader:grade"
- id: dummy-task-gamma
name: "Standard Debug Gamma"
description: "Tertiary validation task for model compliance"
max_steps: 3
difficulty: "easy"
reward_range: [0.01, 0.99]
grader: "tasks.grader:grade"