rust_coder / openenv.yaml
Parthiban007's picture
Upload folder using huggingface_hub
8a096e2 verified
raw
history blame
4.94 kB
spec_version: 1
name: rust_coder
description: "High-fidelity RL environment for evaluating LLM agents on Rust systems programming, including borrow checking, safe concurrency, and memory management."
type: space
runtime: fastapi
app: server.app:app
port: 8000
dockerfile: Dockerfile
tags:
- openenv
- software-engineering
- rust
- coding-benchmark
# Task Definition (Easy -> Medium -> Hard)
# Each task has a grader that scores submissions 0.0-1.0
tasks:
- id: "task_1"
title: "Broken CLI Argument Parser"
difficulty: "easy"
description: "Fix enum variant mismatches and incomplete match arms in a CLI argument parser."
grader:
type: "programmatic"
endpoint: "/grade/task_1"
success_threshold: 0.7
reward_range: [0.0, 1.0]
description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
- id: "task_2"
title: "Conflicting Borrows in Collection Processing"
difficulty: "easy"
description: "Resolve mutable/immutable borrow conflicts in a string collection processor."
grader:
type: "programmatic"
endpoint: "/grade/task_2"
success_threshold: 0.7
reward_range: [0.0, 1.0]
description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
- id: "task_3"
title: "Lifetime Annotations"
difficulty: "medium"
description: "Add correct lifetime annotations to enable a struct holding references to work properly."
grader:
type: "programmatic"
endpoint: "/grade/task_3"
success_threshold: 0.6
reward_range: [0.0, 1.0]
description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
- id: "task_4"
title: "Business Logic Bug"
difficulty: "medium"
description: "Fix off-by-one errors and logic bugs in a financial calculation module."
grader:
type: "programmatic"
endpoint: "/grade/task_4"
success_threshold: 0.6
reward_range: [0.0, 1.0]
description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
- id: "task_5"
title: "Linked List Management"
difficulty: "medium"
description: "Implement a safe singly-linked list with push, pop, and peek operations."
grader:
type: "programmatic"
endpoint: "/grade/task_5"
success_threshold: 0.6
reward_range: [0.0, 1.0]
description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
- id: "task_6"
title: "Multi-threaded Deadlocks"
difficulty: "hard"
description: "Identify and fix deadlock conditions in a multi-threaded producer-consumer pattern."
grader:
type: "programmatic"
endpoint: "/grade/task_6"
success_threshold: 0.5
reward_range: [0.0, 1.0]
description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
- id: "task_7"
title: "Async Borrowing"
difficulty: "hard"
description: "Fix async/await borrowing conflicts in a concurrent file processor."
grader:
type: "programmatic"
endpoint: "/grade/task_7"
success_threshold: 0.5
reward_range: [0.0, 1.0]
description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
- id: "task_8"
title: "Unsafe FFI Integration"
difficulty: "hard"
description: "Write safe Rust wrappers around unsafe FFI calls to a C library."
grader:
type: "programmatic"
endpoint: "/grade/task_8"
success_threshold: 0.5
reward_range: [0.0, 1.0]
description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
- id: "task_9"
title: "Inefficient Data Pipelines"
difficulty: "hard"
description: "Optimize a data transformation pipeline using iterators and avoiding unnecessary allocations."
grader:
type: "programmatic"
endpoint: "/grade/task_9"
success_threshold: 0.5
reward_range: [0.0, 1.0]
description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
- id: "task_10"
title: "Memory Leak Prevention"
difficulty: "hard"
description: "Fix memory leak patterns in a custom allocator and ensure proper Drop implementations."
grader:
type: "programmatic"
endpoint: "/grade/task_10"
success_threshold: 0.4
reward_range: [0.0, 1.0]
description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
# Definitions for Documentation and Graders
action_space:
type: "RustCoderAction"
description: "A single string containing the fixed Rust code."
observation_space:
type: "RustCoderObservation"
description: "Observation containing problem description, compilation logs, test results, and reward breakdown."