Spaces:

Parthiban007
/

rust_coder

Running

File size: 6,909 Bytes

"""
FastAPI application for the Rust Coder OpenEnv environment.

Entrypoint: server.app:app  (see openenv.yaml and Dockerfile CMD)

Standard OpenEnv endpoints (via create_app):
    POST /reset   — start a new episode
    POST /step    — submit an action, receive observation + reward
    GET  /state   — current episode state
    GET  /schema  — action / observation JSON schemas
    WS   /ws      — WebSocket interface

Custom endpoints:
    GET  /health               — health check
    GET  /tasks                — list all tasks with action schema
    POST /grader?task_id=X     — programmatic grader for task X
"""

import os
import logging

from dotenv import load_dotenv
from fastapi import HTTPException

from openenv.core.env_server.http_server import create_app

from models import RustCoderAction, RustCoderObservation, TaskInfo
from server.rust_coder_environment import RustCoderEnvironment

load_dotenv()

_LOG_LEVEL = (os.getenv("LOG_LEVEL") or "INFO").upper()
logging.basicConfig(
    level=getattr(logging, _LOG_LEVEL, logging.INFO),
    format="%(asctime)s %(levelname)s %(name)s - %(message)s",
)

app = create_app(
    RustCoderEnvironment,
    RustCoderAction,
    RustCoderObservation,
    env_name="rust_coder",
    max_concurrent_envs=1,
)

# ---------------------------------------------------------------------------
# Task registry
# ---------------------------------------------------------------------------

TASK_REGISTRY = {
    "task_1": {
        "index": 0,
        "difficulty": "easy",
        "description": "Fix enum variant mismatches and incomplete match arms in a CLI argument parser.",
        "success_threshold": 0.7,
    },
    "task_2": {
        "index": 1,
        "difficulty": "easy",
        "description": "Resolve mutable/immutable borrow conflicts in a string collection processor.",
        "success_threshold": 0.7,
    },
    "task_3": {
        "index": 2,
        "difficulty": "medium",
        "description": "Add correct lifetime annotations so a struct holding references compiles and works.",
        "success_threshold": 0.6,
    },
    "task_4": {
        "index": 3,
        "difficulty": "medium",
        "description": "Fix off-by-one errors and logic bugs in a financial calculation module.",
        "success_threshold": 0.6,
    },
    "task_5": {
        "index": 4,
        "difficulty": "medium",
        "description": "Implement a safe singly-linked list with push, pop, and peek operations.",
        "success_threshold": 0.6,
    },
    "task_6": {
        "index": 5,
        "difficulty": "hard",
        "description": "Identify and fix deadlock conditions in a multi-threaded producer-consumer pattern.",
        "success_threshold": 0.5,
    },
    "task_7": {
        "index": 6,
        "difficulty": "hard",
        "description": "Fix async/await borrowing conflicts in a concurrent file processor.",
        "success_threshold": 0.5,
    },
    "task_8": {
        "index": 7,
        "difficulty": "hard",
        "description": "Write safe Rust wrappers around unsafe FFI calls to a C library.",
        "success_threshold": 0.5,
    },
    "task_9": {
        "index": 8,
        "difficulty": "hard",
        "description": "Optimize a data pipeline using iterators and avoiding unnecessary allocations.",
        "success_threshold": 0.5,
    },
    "task_10": {
        "index": 9,
        "difficulty": "hard",
        "description": "Fix memory leak patterns and ensure correct Drop implementations.",
        "success_threshold": 0.4,
    },
}

TASK_IDS = list(TASK_REGISTRY.keys())


# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------

@app.get("/health")
async def health_check():
    return {"status": "healthy"}


@app.get("/tasks")
async def list_tasks():
    """
    Return all available tasks.

    The competition platform enumerates this endpoint to discover tasks.
    Each entry includes task_id, difficulty, description, and action_schema.
    """
    return [
        TaskInfo(
            task_id=task_id,
            difficulty=task["difficulty"],
            description=task["description"],
            action_schema=RustCoderAction.model_json_schema(),
        )
        for task_id, task in TASK_REGISTRY.items()
    ]


@app.post("/grader")
async def grader(task_id: str, action: RustCoderAction):
    """
    Programmatic grader for a specific task.

    Usage:  POST /grader?task_id=task_1
    Body:   {"code": "<rust source code>"}

    Scores are strictly in the open interval (0, 1):
      - Minimum 0.01  — floor for any submission (even empty/non-compiling)
      - Maximum 0.99  — ceiling so no submission hits the theoretical perfect
      - Weighted: Compilation(40%) + Correctness(20%) + Coverage(20%) +
                  Elegance(10%) + Efficiency(10%)
    """
    task_meta = TASK_REGISTRY.get(task_id)
    if task_meta is None:
        raise HTTPException(
            status_code=404,
            detail=f"Unknown task_id '{task_id}'. Valid IDs: {TASK_IDS}",
        )

    _EMPTY_BREAKDOWN = {
        "compilation": 0.0,
        "correctness": 0.0,
        "coverage":    0.0,
        "elegance":    0.0,
        "efficiency":  0.0,
    }

    # Fast path: empty code — skip compilation + avoid triggering auto-LLM
    if not action.code.strip():
        return {
            "task_id":            task_id,
            "score":              0.01,
            "passed":             0,
            "total":              1,
            "metric":             "rust_code_quality",
            "reward_breakdown":   _EMPTY_BREAKDOWN,
            "compilation_success": False,
            "compilation_output": "No code submitted.",
            "test_results":       [],
        }

    # Full evaluation path
    env = RustCoderEnvironment()
    env.reset(start_index=task_meta["index"])
    obs = env.step(action)

    # Explicit None check — 0.0 is falsy but a valid reward
    raw_score = float(obs.reward if obs.reward is not None else 0.0)
    # Enforce strictly open interval (0, 1) — never exactly 0.0 or 1.0
    score   = round(max(0.01, min(0.99, raw_score)), 4)
    success = score >= task_meta["success_threshold"]

    return {
        "task_id":             task_id,
        "score":               score,
        "passed":              1 if success else 0,
        "total":               1,
        "metric":              "rust_code_quality",
        "reward_breakdown":    obs.reward_breakdown,
        "compilation_success": obs.compilation_success,
        "compilation_output":  obs.compilation_output,
        "test_results":        obs.test_results,
    }


def main(host: str = "0.0.0.0", port: int = 8000) -> None:
    import uvicorn
    uvicorn.run(app, host=host, port=port)


if __name__ == "__main__":
    main()