Spaces:
Sleeping
Sleeping
| spec_version: 1 | |
| name: rust_coder | |
| description: "High-fidelity RL environment for evaluating LLM agents on Rust systems programming, including borrow checking, safe concurrency, and memory management." | |
| type: space | |
| runtime: fastapi | |
| app: server.app:app | |
| port: 8000 | |
| dockerfile: Dockerfile | |
| tags: | |
| - openenv | |
| - software-engineering | |
| - rust | |
| - coding-benchmark | |
| # Task Definition (Easy -> Medium -> Hard) | |
| # Each task has a grader that scores submissions 0.0-1.0 | |
| tasks: | |
| - id: "task_1" | |
| title: "Broken CLI Argument Parser" | |
| difficulty: "easy" | |
| description: "Fix enum variant mismatches and incomplete match arms in a CLI argument parser." | |
| grader: | |
| type: "programmatic" | |
| endpoint: "/grade/task_1" | |
| success_threshold: 0.7 | |
| reward_range: [0.0, 1.0] | |
| description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)" | |
| - id: "task_2" | |
| title: "Conflicting Borrows in Collection Processing" | |
| difficulty: "easy" | |
| description: "Resolve mutable/immutable borrow conflicts in a string collection processor." | |
| grader: | |
| type: "programmatic" | |
| endpoint: "/grade/task_2" | |
| success_threshold: 0.7 | |
| reward_range: [0.0, 1.0] | |
| description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)" | |
| - id: "task_3" | |
| title: "Lifetime Annotations" | |
| difficulty: "medium" | |
| description: "Add correct lifetime annotations to enable a struct holding references to work properly." | |
| grader: | |
| type: "programmatic" | |
| endpoint: "/grade/task_3" | |
| success_threshold: 0.6 | |
| reward_range: [0.0, 1.0] | |
| description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)" | |
| - id: "task_4" | |
| title: "Business Logic Bug" | |
| difficulty: "medium" | |
| description: "Fix off-by-one errors and logic bugs in a financial calculation module." | |
| grader: | |
| type: "programmatic" | |
| endpoint: "/grade/task_4" | |
| success_threshold: 0.6 | |
| reward_range: [0.0, 1.0] | |
| description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)" | |
| - id: "task_5" | |
| title: "Linked List Management" | |
| difficulty: "medium" | |
| description: "Implement a safe singly-linked list with push, pop, and peek operations." | |
| grader: | |
| type: "programmatic" | |
| endpoint: "/grade/task_5" | |
| success_threshold: 0.6 | |
| reward_range: [0.0, 1.0] | |
| description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)" | |
| - id: "task_6" | |
| title: "Multi-threaded Deadlocks" | |
| difficulty: "hard" | |
| description: "Identify and fix deadlock conditions in a multi-threaded producer-consumer pattern." | |
| grader: | |
| type: "programmatic" | |
| endpoint: "/grade/task_6" | |
| success_threshold: 0.5 | |
| reward_range: [0.0, 1.0] | |
| description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)" | |
| - id: "task_7" | |
| title: "Async Borrowing" | |
| difficulty: "hard" | |
| description: "Fix async/await borrowing conflicts in a concurrent file processor." | |
| grader: | |
| type: "programmatic" | |
| endpoint: "/grade/task_7" | |
| success_threshold: 0.5 | |
| reward_range: [0.0, 1.0] | |
| description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)" | |
| - id: "task_8" | |
| title: "Unsafe FFI Integration" | |
| difficulty: "hard" | |
| description: "Write safe Rust wrappers around unsafe FFI calls to a C library." | |
| grader: | |
| type: "programmatic" | |
| endpoint: "/grade/task_8" | |
| success_threshold: 0.5 | |
| reward_range: [0.0, 1.0] | |
| description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)" | |
| - id: "task_9" | |
| title: "Inefficient Data Pipelines" | |
| difficulty: "hard" | |
| description: "Optimize a data transformation pipeline using iterators and avoiding unnecessary allocations." | |
| grader: | |
| type: "programmatic" | |
| endpoint: "/grade/task_9" | |
| success_threshold: 0.5 | |
| reward_range: [0.0, 1.0] | |
| description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)" | |
| - id: "task_10" | |
| title: "Memory Leak Prevention" | |
| difficulty: "hard" | |
| description: "Fix memory leak patterns in a custom allocator and ensure proper Drop implementations." | |
| grader: | |
| type: "programmatic" | |
| endpoint: "/grade/task_10" | |
| success_threshold: 0.4 | |
| reward_range: [0.0, 1.0] | |
| description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)" | |
| # Definitions for Documentation and Graders | |
| action_space: | |
| type: "RustCoderAction" | |
| description: "A single string containing the fixed Rust code." | |
| observation_space: | |
| type: "RustCoderObservation" | |
| description: "Observation containing problem description, compilation logs, test results, and reward breakdown." | |