Spaces:

Parthiban007
/

rust_coder

Sleeping

App Files Files Community

rust_coder / openenv.yaml

Parthiban007

Upload folder using huggingface_hub

8a096e2 verified 12 days ago

raw

history blame

4.94 kB

	spec_version: 1
	name: rust_coder
	description: "High-fidelity RL environment for evaluating LLM agents on Rust systems programming, including borrow checking, safe concurrency, and memory management."
	type: space
	runtime: fastapi
	app: server.app:app
	port: 8000
	dockerfile: Dockerfile
	tags:
	- openenv
	- software-engineering
	- rust
	- coding-benchmark

	# Task Definition (Easy -> Medium -> Hard)
	# Each task has a grader that scores submissions 0.0-1.0
	tasks:
	- id: "task_1"
	title: "Broken CLI Argument Parser"
	difficulty: "easy"
	description: "Fix enum variant mismatches and incomplete match arms in a CLI argument parser."
	grader:
	type: "programmatic"
	endpoint: "/grade/task_1"
	success_threshold: 0.7
	reward_range: [0.0, 1.0]
	description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"

	- id: "task_2"
	title: "Conflicting Borrows in Collection Processing"
	difficulty: "easy"
	description: "Resolve mutable/immutable borrow conflicts in a string collection processor."
	grader:
	type: "programmatic"
	endpoint: "/grade/task_2"
	success_threshold: 0.7
	reward_range: [0.0, 1.0]
	description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"

	- id: "task_3"
	title: "Lifetime Annotations"
	difficulty: "medium"
	description: "Add correct lifetime annotations to enable a struct holding references to work properly."
	grader:
	type: "programmatic"
	endpoint: "/grade/task_3"
	success_threshold: 0.6
	reward_range: [0.0, 1.0]
	description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"

	- id: "task_4"
	title: "Business Logic Bug"
	difficulty: "medium"
	description: "Fix off-by-one errors and logic bugs in a financial calculation module."
	grader:
	type: "programmatic"
	endpoint: "/grade/task_4"
	success_threshold: 0.6
	reward_range: [0.0, 1.0]
	description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"

	- id: "task_5"
	title: "Linked List Management"
	difficulty: "medium"
	description: "Implement a safe singly-linked list with push, pop, and peek operations."
	grader:
	type: "programmatic"
	endpoint: "/grade/task_5"
	success_threshold: 0.6
	reward_range: [0.0, 1.0]
	description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"

	- id: "task_6"
	title: "Multi-threaded Deadlocks"
	difficulty: "hard"
	description: "Identify and fix deadlock conditions in a multi-threaded producer-consumer pattern."
	grader:
	type: "programmatic"
	endpoint: "/grade/task_6"
	success_threshold: 0.5
	reward_range: [0.0, 1.0]
	description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"

	- id: "task_7"
	title: "Async Borrowing"
	difficulty: "hard"
	description: "Fix async/await borrowing conflicts in a concurrent file processor."
	grader:
	type: "programmatic"
	endpoint: "/grade/task_7"
	success_threshold: 0.5
	reward_range: [0.0, 1.0]
	description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"

	- id: "task_8"
	title: "Unsafe FFI Integration"
	difficulty: "hard"
	description: "Write safe Rust wrappers around unsafe FFI calls to a C library."
	grader:
	type: "programmatic"
	endpoint: "/grade/task_8"
	success_threshold: 0.5
	reward_range: [0.0, 1.0]
	description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"

	- id: "task_9"
	title: "Inefficient Data Pipelines"
	difficulty: "hard"
	description: "Optimize a data transformation pipeline using iterators and avoiding unnecessary allocations."
	grader:
	type: "programmatic"
	endpoint: "/grade/task_9"
	success_threshold: 0.5
	reward_range: [0.0, 1.0]
	description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"

	- id: "task_10"
	title: "Memory Leak Prevention"
	difficulty: "hard"
	description: "Fix memory leak patterns in a custom allocator and ensure proper Drop implementations."
	grader:
	type: "programmatic"
	endpoint: "/grade/task_10"
	success_threshold: 0.4
	reward_range: [0.0, 1.0]
	description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"

	# Definitions for Documentation and Graders
	action_space:
	type: "RustCoderAction"
	description: "A single string containing the fixed Rust code."

	observation_space:
	type: "RustCoderObservation"
	description: "Observation containing problem description, compilation logs, test results, and reward breakdown."