Spaces:

luciferai-devil
/

code-debug-env

Sleeping

App Files Files Community

code-debug-env / openenv.yaml

luciferai-devil

Upload folder using huggingface_hub

cacd58c verified about 2 months ago

raw

history blame contribute delete

2.08 kB

	# openenv.yaml — validated by `openenv validate`
	name: code-debug-env
	version: "1.0.0"
	description: >
	A real-world RL environment where an AI agent repairs buggy Python functions.
	The agent receives broken code and must iteratively submit patches until all
	unit tests pass. Designed for training LLMs on code repair via GRPO/RLVR.

	author: "luciferai-devil"
	license: MIT

	# Hackathon domain tag
	domain: software-engineering

	tasks:
	- id: task_easy
	difficulty: easy
	description: "Fix a single off-by-one error in a Kadane's algorithm implementation."
	- id: task_medium
	difficulty: medium
	description: "Fix two independent bugs in a string parsing utility."
	- id: task_hard
	difficulty: hard
	description: "Fix 3+ subtle bugs in a recursive tree function with missing edge cases."

	action:
	type: object
	properties:
	patch:
	type: string
	description: "Full replacement Python source for the function body."
	task_id:
	type: string
	description: "Which task this patch targets."
	think:
	type: string
	description: "Optional chain-of-thought reasoning (earns bonus reward)."
	required: [patch, task_id]

	observation:
	type: object
	properties:
	task_id: { type: string }
	buggy_code: { type: string }
	task_description: { type: string }
	test_results: { type: array }
	passed: { type: integer }
	total: { type: integer }
	score: { type: number, minimum: 0.0, maximum: 1.0 }
	done: { type: boolean }
	error: { type: string, nullable: true }

	reward:
	description: >
	Composite reward: 0.5×correctness + 0.2×valid_syntax + 0.2×chain_of_thought
	+ 0.1×step_efficiency − 0.3×timeout_penalty. Range: [0.0, 1.0].
	type: number
	minimum: 0.0
	maximum: 1.0

	episode:
	max_steps: 10
	termination: "All tests pass (score=1.0) OR max_steps reached."

	server:
	port: 8000
	transport: websocket # openenv uses WebSocket for persistent sessions

	huggingface:
	space_id: "luciferai-devil/code-debug-env"