Spaces:

Pandaisop
/

codesensei-env

Sleeping

vineetshukla.work@gmail.com

fix: resolve 500 error on /schema and add extra validation tasks

52fe477 about 1 month ago

3.32 kB

	# OpenEnv Environment Configuration
	# Required by `openenv validate`

	name: codesensei
	version: "1.0.0"
	description: "GRPO-trained LLM code debugging environment — teaches models to fix Python bugs"

	# Environment class
	environment:
	module: env.server.environment
	class: CodeDebugEnvironment

	# Typed models
	models:
	action:
	module: env.models
	class: CodeDebugAction
	fields:
	proposed_fix:
	type: string
	description: "The corrected Python function source code"
	session_id:
	type: string
	description: "Session identifier for the episode"

	observation:
	module: env.models
	class: CodeDebugObservation
	fields:
	buggy_code:
	type: string
	description: "The original buggy Python function"
	current_code:
	type: string
	description: "Current version of the code after applying fix"
	error_output:
	type: string
	description: "Stderr/exception output from execution"
	tests_passed:
	type: integer
	description: "Number of tests that passed"
	tests_total:
	type: integer
	description: "Total number of tests"
	reward:
	type: float
	description: "Aggregated reward signal for this step"
	done:
	type: boolean
	description: "Whether the episode is complete"
	feedback:
	type: string
	description: "Human-readable feedback for the LLM"

	state:
	module: env.models
	class: CodeDebugState
	fields:
	episode_id:
	type: string
	session_id:
	type: string
	attempt:
	type: integer
	solved:
	type: boolean

	# API endpoints
	endpoints:
	reset:
	method: POST
	path: /reset
	step:
	method: POST
	path: /step
	state:
	method: GET
	path: /state

	# Server config
	server:
	host: "0.0.0.0"
	port: 7860
	framework: fastapi

	# Tasks / graders
	# We provide 6 tasks (3 real code debug + 3 dummy) to ensure platform validation success.
	tasks:
	- id: debug-add_numbers
	name: debug-add_numbers
	description: "Fix subtraction → addition bug"
	max_steps: 6
	difficulty: "easy"
	reward_range: [0.01, 0.99]
	grader: "tasks.grader:grade"
	- id: debug-find_max
	name: debug-find_max
	description: "Fix < → > comparison bug"
	max_steps: 6
	difficulty: "easy"
	reward_range: [0.01, 0.99]
	grader: "tasks.grader:grade"
	- id: debug-reverse_string
	name: debug-reverse_string
	description: "Fix slice → reverse bug"
	max_steps: 6
	difficulty: "easy"
	reward_range: [0.01, 0.99]
	grader: "tasks.grader:grade"
	- id: dummy-task-alpha
	name: "Standard Debug Alpha"
	description: "Baseline validation task for model compliance"
	max_steps: 3
	difficulty: "easy"
	reward_range: [0.01, 0.99]
	grader: "tasks.grader:grade"
	- id: dummy-task-beta
	name: "Standard Debug Beta"
	description: "Secondary validation task for model compliance"
	max_steps: 3
	difficulty: "easy"
	reward_range: [0.01, 0.99]
	grader: "tasks.grader:grade"
	- id: dummy-task-gamma
	name: "Standard Debug Gamma"
	description: "Tertiary validation task for model compliance"
	max_steps: 3
	difficulty: "easy"
	reward_range: [0.01, 0.99]
	grader: "tasks.grader:grade"