# OpenEnv Environment Configuration # Required by `openenv validate` name: codesensei version: "1.0.0" description: "GRPO-trained LLM code debugging environment — teaches models to fix Python bugs" # Environment class environment: module: env.server.environment class: CodeDebugEnvironment # Typed models models: action: module: env.models class: CodeDebugAction fields: proposed_fix: type: string description: "The corrected Python function source code" session_id: type: string description: "Session identifier for the episode" observation: module: env.models class: CodeDebugObservation fields: buggy_code: type: string description: "The original buggy Python function" current_code: type: string description: "Current version of the code after applying fix" error_output: type: string description: "Stderr/exception output from execution" tests_passed: type: integer description: "Number of tests that passed" tests_total: type: integer description: "Total number of tests" reward: type: float description: "Aggregated reward signal for this step" done: type: boolean description: "Whether the episode is complete" feedback: type: string description: "Human-readable feedback for the LLM" state: module: env.models class: CodeDebugState fields: episode_id: type: string session_id: type: string attempt: type: integer solved: type: boolean # API endpoints endpoints: reset: method: POST path: /reset step: method: POST path: /step state: method: GET path: /state # Server config server: host: "0.0.0.0" port: 7860 framework: fastapi # Tasks / graders # We provide 6 tasks (3 real code debug + 3 dummy) to ensure platform validation success. tasks: - id: debug-add_numbers name: debug-add_numbers description: "Fix subtraction → addition bug" max_steps: 6 difficulty: "easy" reward_range: [0.01, 0.99] grader: "tasks.grader:grade" - id: debug-find_max name: debug-find_max description: "Fix < → > comparison bug" max_steps: 6 difficulty: "easy" reward_range: [0.01, 0.99] grader: "tasks.grader:grade" - id: debug-reverse_string name: debug-reverse_string description: "Fix slice → reverse bug" max_steps: 6 difficulty: "easy" reward_range: [0.01, 0.99] grader: "tasks.grader:grade" - id: dummy-task-alpha name: "Standard Debug Alpha" description: "Baseline validation task for model compliance" max_steps: 3 difficulty: "easy" reward_range: [0.01, 0.99] grader: "tasks.grader:grade" - id: dummy-task-beta name: "Standard Debug Beta" description: "Secondary validation task for model compliance" max_steps: 3 difficulty: "easy" reward_range: [0.01, 0.99] grader: "tasks.grader:grade" - id: dummy-task-gamma name: "Standard Debug Gamma" description: "Tertiary validation task for model compliance" max_steps: 3 difficulty: "easy" reward_range: [0.01, 0.99] grader: "tasks.grader:grade"