Spaces:

Souravdanyal
/

code-debug-env

Running

App Files Files Community

code-debug-env / openenv.yaml

Souravdanyal

error fixing

d510c1d about 2 months ago

raw

history blame

2.91 kB

	spec_version: 1
	name: code-debug-env
	type: typed
	description: >
	A real-world RL environment where an LLM agent diagnoses and fixes buggy Python
	code across three difficulty levels (easy, medium, hard). Tasks cover real-world
	domains: data processing, string algorithms, API validation, sorting, dynamic
	programming, and graph algorithms. Rewards are partial and proportional to test
	cases passed, with bonuses for correct explanations on hard tasks.

	version: 1.0.0
	author: Souravdanyal

	tags:
	- code-debugging
	- python
	- reinforcement-learning
	- openenv
	- llm-agent
	- software-engineering
	- real-world

	runtime:
	type: docker
	port: 7860

	app:
	entry: server/app.py
	host: 0.0.0.0
	port: 7860

	config:
	episode_timeout: 300
	max_steps: 5

	tasks:
	- id: easy
	description: "Fix a single off-by-one, operator, or return bug in a Python function"
	difficulty: easy
	max_steps: 5
	reward_range: [0.0, 1.0]
	grader: deterministic
	num_tasks: 15

	- id: medium
	description: "Fix two bugs (logic bug + edge case) so all test cases pass"
	difficulty: medium
	max_steps: 5
	reward_range: [0.0, 1.0]
	grader: deterministic
	num_tasks: 15

	- id: hard
	description: "Fix an algorithmic bug AND provide a correct explanation of root cause"
	difficulty: hard
	max_steps: 5
	reward_range: [0.0, 1.0]
	grader: deterministic
	num_tasks: 15

	reward_range: [0.0, 1.0]

	action_space:
	type: dict
	description: "Agent submits fixed Python code and optional explanation"
	fields:
	fixed_code:
	type: string
	required: true
	description: "Complete corrected Python function. Must be valid Python including imports."
	explanation:
	type: string
	required: false
	description: "Required for hard tasks. Explain the bug, root cause, and fix."

	observation_space:
	type: dict
	description: "Returned after reset() and step()"
	fields:
	task_id:
	type: string
	description: "Unique task identifier e.g. easy_003"
	difficulty:
	type: enum
	values: [easy, medium, hard]
	buggy_code:
	type: string
	description: "The buggy Python function to fix"
	instructions:
	type: string
	description: "Natural language description of what is wrong"
	test_cases_description:
	type: string
	description: "What the test cases check"
	reward:
	type: float
	description: "Score 0.0-1.0 (null on reset)"
	passed_tests:
	type: integer
	description: "Test cases passed (null on reset)"
	total_tests:
	type: integer
	description: "Total test cases (always 3)"
	feedback:
	type: string
	description: "Per-test feedback showing Input, Expected, Got"
	done:
	type: boolean
	description: "True when episode complete"

	api:
	reset: /reset
	step: /step
	state: /state
	health: /health
	tasks: /tasks