Spaces:

exploring-solver
/

deprec

Sleeping

File size: 19,021 Bytes

"""
Core DevOpsEnv environment logic.

Simulates a broken Linux server with:
- Task 1: Crashed Nginx service needing restart
- Task 2: Misconfigured Docker container
- Task 3: Memory leak in Python mock API

Manages episode lifecycle:
  reset() → Observation
  step(action) → StepResult
  get_state() → State
  grade() → (score, breakdown, feedback)
"""
from __future__ import annotations

import uuid
import json
import re
from typing import Any, Dict, Optional, Tuple, List

from data import TASK_META
from graders import grade_task
from models import (
    Action,
    Observation,
    Reward,
    State,
    StepResult,
    SystemState,
)

# In-memory store: episode_id → EpisodeState dict
_EPISODES: Dict[str, Dict[str, Any]] = {}


# ---------------------------------------------------------------------------
# Mock filesystem and system state
# ---------------------------------------------------------------------------

def _create_initial_state_task1() -> Dict[str, Any]:
    """Task 1: Nginx is crashed."""
    return {
        "running_processes": [
            {"pid": 100, "name": "systemd"},
            {"pid": 105, "name": "sshd"},
            # nginx NOT running
        ],
        "service_status": {
            "nginx": "inactive",
            "docker": "active",
            "mockapi": "active",
        },
        "http_ports_open": [8080],  # 80 is down
        "docker_containers": [],
        "logs": "2026-03-29 01:30:00 nginx crashed\nCore dump detected.\n",
        "files": {
            NGINX_CONFIG_PATH: """
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;

events {
    worker_connections 1024;
}

http {
    include /etc/nginx/mime.types;
    default_type application/octet-stream;
    sendfile on;
    keepalive_timeout 65;

    server {
        listen 80 default_server;
        server_name _;
        location / {
            return 200 "OK\\n";
        }
    }
}""",
            "/etc/systemd/system/nginx.service": """
[Unit]
Description=The NGINX HTTP and reverse proxy server
After=network.target

[Service]
Type=forking
PIDFile=/var/run/nginx.pid
ExecStartPre=/usr/sbin/nginx -t
ExecStart=/usr/sbin/nginx
ExecReload=/bin/kill -s HUP $MAINPID
ExecStop=/bin/kill -s QUIT $MAINPID
PrivateTmp=true

[Install]
WantedBy=multi-user.target""",
        },
        "cpu_usage": 45.2,
        "memory_usage_mb": 256,
    }


def _create_initial_state_task2() -> Dict[str, Any]:
    """Task 2: Docker misconfigured."""
    return {
        "running_processes": [
            {"pid": 100, "name": "systemd"},
            {"pid": 105, "name": "sshd"},
            {"pid": 200, "name": "dockerd"},
        ],
        "service_status": {
            "nginx": "active",
            "docker": "active",
            "mockapi": "inactive",
        },
        "http_ports_open": [80],
        "docker_containers": [
            {"id": "abc123", "name": "mockapi-svc", "status": "running", "ports": "8000->3000/tcp"}
        ],
        "logs": "docker: port 3000 already in use\n",
        "files": {
            "/srv/docker-compose.yml": """
version: '3.8'
services:
  mockapi:
    image: mockapi:latest
    ports:
            - "8000:3000"
    environment:
      - PORT=3000
    volumes:
      - ./app.py:/app/app.py""",
        },
        "cpu_usage": 62.0,
        "memory_usage_mb": 1024,
    }


def _create_initial_state_task3() -> Dict[str, Any]:
    """Task 3: Memory leak in mock API."""
    return {
        "running_processes": [
            {"pid": 100, "name": "systemd"},
            {"pid": 105, "name": "sshd"},
            {"pid": 300, "name": "python3", "rss_mb": 2048, "user": "appuser"},  # MEMORY LEAK
        ],
        "service_status": {
            "nginx": "active",
            "docker": "active",
            "mockapi": "active",
        },
        "http_ports_open": [80, 5000],
        "docker_containers": [],
        "logs": (
            "2026-03-29 01:45:00 mockapi started\n"
            "2026-03-29 01:46:00 memory usage: 512 MB\n"
            "2026-03-29 01:47:00 memory usage: 1024 MB\n"
            "2026-03-29 01:48:00 memory usage: 1536 MB (WARNING: HIGH)\n"
            "2026-03-29 01:49:00 memory usage: 2048 MB (CRITICAL)\n"
        ),
        "files": {
            "/opt/mockapi/app.py": """
import json
from flask import Flask

app = Flask(__name__)

# BUG: This list grows unbounded
request_cache = []

@app.route('/api/data', methods=['GET'])
def get_data():
    data = {"timestamp": 123456, "value": 42}
    request_cache.append(data)  # MEMORY LEAK!
    return json.dumps(data)

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)
""",
        },
        "cpu_usage": 85.5,
        "memory_usage_mb": 2048,
    }


NGINX_CONFIG_PATH = "/etc/nginx/nginx.conf"
DOCKER_COMPOSE_PATH = "/srv/docker-compose.yml"
MOCK_API_PATH = "/opt/mockapi/app.py"


def _build_system_state(task_id: str, ep_state: Dict[str, Any]) -> SystemState:
    """Build a SystemState object from episode state."""
    state_dict = ep_state["system_state"]
    return SystemState(
        task_id=task_id,
        available_commands=["systemctl", "nginx", "docker", "curl", "ps", "cat", "vim"],
        filesystem_snapshot=json.dumps({
            k: v for k, v in state_dict.get("files", {}).items()
        }),
        running_processes=state_dict.get("running_processes", []),
        service_status=state_dict.get("service_status", {}),
        logs=state_dict.get("logs", ""),
        http_ports_open=state_dict.get("http_ports_open", []),
        docker_containers=state_dict.get("docker_containers", []),
        cpu_usage=state_dict.get("cpu_usage", 0.0),
        memory_usage_mb=state_dict.get("memory_usage_mb", 0),
    )


# ---------------------------------------------------------------------------
# Dynamic execution simulation
# ---------------------------------------------------------------------------

def _simulate_bash_cmd(cmd: str, task_id: str, ep_state: Dict[str, Any]) -> str:
    """Simulate bash command execution."""
    state_dict = ep_state["system_state"]
    lower_cmd = cmd.lower()

    # Task 1: Nginx commands
    if task_id == "task1":
        if "systemctl restart nginx" in lower_cmd or "systemctl start nginx" in lower_cmd:
            state_dict["service_status"]["nginx"] = "active"
            state_dict["running_processes"].append({"pid": 999, "name": "nginx"})
            state_dict["http_ports_open"] = [80]
            return "Job for nginx.service started successfully."
        elif "systemctl status nginx" in lower_cmd:
            if state_dict["service_status"]["nginx"] == "active":
                return "● nginx.service - NGINX HTTP Server\n   Loaded: loaded (/etc/systemd/system/nginx.service)\n   Active: active (running)"
            return "● nginx.service - NGINX HTTP Server\n   Active: inactive (dead)"
        elif "nginx -t" in lower_cmd:
            return "nginx: the configuration file /etc/nginx/nginx.conf syntax is ok\nnginx: configuration file /etc/nginx/nginx.conf test is successful"
        elif "curl http://localhost:80" in lower_cmd or "curl http://localhost" in lower_cmd:
            if 80 in state_dict["http_ports_open"]:
                return "OK"
            return "curl: (7) Failed to connect to localhost port 80: Connection refused"

    # Task 2: Docker commands
    elif task_id == "task2":
        if "docker-compose up -d" in lower_cmd:
            if DOCKER_COMPOSE_PATH in state_dict["files"]:
                compose_content = state_dict["files"][DOCKER_COMPOSE_PATH]
                # Check if port is now correct
                if "3000:3000" in compose_content and "8000:3000" not in compose_content:
                    state_dict["docker_containers"] = [
                        {"id": "xyz789", "name": "mockapi-svc", "status": "running", "ports": "3000:3000/tcp"}
                    ]
                    state_dict["service_status"]["mockapi"] = "active"
                    return "Creating mockapi ... done"
            return "ERROR: docker-compose.yml not found or invalid"
        elif "docker ps" in lower_cmd:
            if state_dict["docker_containers"]:
                return "\n".join([f"{c['id']} {c['name']} {c['status']}" for c in state_dict["docker_containers"]])
            return "No containers running"

    # Task 3: Process/memory commands
    elif task_id == "task3":
        if "ps aux" in lower_cmd or "ps aux grep python" in lower_cmd:
            output = ""
            for proc in state_dict["running_processes"]:
                if proc.get("name") == "python3":
                    output += f"appuser {proc['pid']} 85.5 {proc.get('rss_mb', 512)} python3 /opt/mockapi/app.py\n"
            return output if output else "No python processes found"
        elif "kill" in lower_cmd:
            if "300" in lower_cmd or "python" in lower_cmd:
                state_dict["running_processes"] = [p for p in state_dict["running_processes"] if p.get("name") != "python3"]
                state_dict["service_status"]["mockapi"] = "inactive"
                state_dict["memory_usage_mb"] = 1100
                return "Process killed"
            return "Process not found"
        elif "python3 /opt/mockapi/app.py &" in lower_cmd or "python3 /opt/mockapi/app.py" in lower_cmd:
            app_content = state_dict.get("files", {}).get(MOCK_API_PATH, "")
            leak_fixed = "request_cache.append" not in app_content
            rss_mb = 256 if leak_fixed else 1700
            state_dict["running_processes"].append({"pid": 301, "name": "python3", "rss_mb": rss_mb, "user": "appuser"})
            state_dict["service_status"]["mockapi"] = "active"
            state_dict["http_ports_open"] = [80, 5000]
            state_dict["memory_usage_mb"] = 700 if leak_fixed else 1800
            return "Application started"

    return f"Command '{cmd}' executed (simulated)"


def _simulate_file_edit(file_path: str, new_content: str, ep_state: Dict[str, Any]) -> str:
    """Simulate file editing."""
    state_dict = ep_state["system_state"]
    
    if file_path not in state_dict.get("files", {}):
        return f"ERROR: File {file_path} not found"

    # Detect task 2: Check docker-compose.yml fix
    if file_path == DOCKER_COMPOSE_PATH and "3000:3000" in new_content:
        state_dict["files"][file_path] = new_content
        return f"File {file_path} updated successfully"

    # Detect task 3: Check mock API fix
    elif file_path == MOCK_API_PATH and "request_cache = []" not in new_content:
        # Verify fix removes the memory leak
        state_dict["files"][file_path] = new_content
        return f"File {file_path} patched successfully"

    state_dict["files"][file_path] = new_content
    return f"File {file_path} edited"


# ---------------------------------------------------------------------------
# Reward calculation
# ---------------------------------------------------------------------------

def _calculate_step_reward(task_id: str, action: Action, ep_state: Dict[str, Any]) -> Tuple[float, str]:
    """Calculate reward based on action and task."""
    base_step_cost = -0.02
    reward = base_step_cost
    explanation = "Step taken"

    history = ep_state.get("action_history", [])
    if len(history) >= 2:
        prev = history[-2]
        curr = history[-1]
        if (
            prev.get("action_type") == curr.get("action_type")
            and prev.get("command") == curr.get("command")
            and prev.get("file_path") == curr.get("file_path")
        ):
            reward -= 0.05
            explanation = "Repeated identical action penalty"

    if action.action_type == "bash_cmd":
        cmd = action.command or ""
        reward += 0.05
        explanation = f"Executed: {cmd[:50]}"

        if task_id == "task1" and "nginx -t" in cmd.lower():
            reward += 0.05
            explanation += " | validated nginx config"
        if task_id == "task1" and "curl" in cmd.lower():
            last_output = str(ep_state["action_history"][-1].get("output", ""))
            if "OK" in last_output:
                reward += 0.08
                explanation += " | verified HTTP health"

        if task_id == "task2" and "docker-compose up -d" in cmd.lower():
            output = str(ep_state["action_history"][-1].get("output", "")).lower()
            if "done" in output or "creating" in output:
                reward += 0.1
                explanation += " | compose bring-up success"

        if task_id == "task3" and "kill" in cmd.lower():
            reward += 0.07
            explanation += " | terminated leaky process"
        if task_id == "task3" and "python3 /opt/mockapi/app.py" in cmd.lower():
            mem = ep_state["system_state"].get("memory_usage_mb", 2048)
            if mem < 1024:
                reward += 0.12
                explanation += " | restarted with lower memory"

        return reward, explanation

    elif action.action_type == "file_edit":
        reward += 0.03
        explanation = f"Edited: {action.file_path}"

        result = str(ep_state["action_history"][-1].get("result", ""))
        if "ERROR" in result:
            reward -= 0.12
            explanation += " | invalid edit target"
        elif task_id == "task2" and action.file_path == DOCKER_COMPOSE_PATH:
            content = action.file_content or ""
            if "3000:3000" in content and "8000:3000" not in content:
                reward += 0.12
                explanation += " | corrected port mapping"
        elif task_id == "task3" and action.file_path == MOCK_API_PATH:
            content = action.file_content or ""
            if "request_cache.append" not in content:
                reward += 0.12
                explanation += " | removed leak pattern"

        return reward, explanation

    elif action.action_type == "submit":
        reward += 0.1
        explanation = "Episode submitted for grading"
        return reward, explanation

    return reward, "Step taken"


# ---------------------------------------------------------------------------
# Core API functions
# ---------------------------------------------------------------------------

def reset(task_id: str) -> Observation:
    """Create a new episode for the given task."""
    if task_id not in TASK_META:
        raise ValueError(f"Unknown task_id {task_id!r}. Valid: {list(TASK_META)}")

    meta = TASK_META[task_id]
    
    # Initialize system state based on task
    if task_id == "task1":
        initial_sys_state = _create_initial_state_task1()
    elif task_id == "task2":
        initial_sys_state = _create_initial_state_task2()
    elif task_id == "task3":
        initial_sys_state = _create_initial_state_task3()
    else:
        initial_sys_state = {}

    episode_id = str(uuid.uuid4())
    _EPISODES[episode_id] = {
        "task_id": task_id,
        "step_number": 0,
        "max_steps": meta["max_steps"],
        "done": False,
        "total_reward": 0.0,
        "action_history": [],
        "final_score": None,
        "system_state": initial_sys_state,
    }

    system_state = _build_system_state(task_id, _EPISODES[episode_id])

    return Observation(
        task_id=task_id,
        task_description=meta["description"],
        episode_id=episode_id,
        system_state=system_state,
        thread_history=[],
        available_actions=meta["available_actions"],
        step_number=0,
        max_steps=meta["max_steps"],
        hint="Start by diagnosing the system state with basic commands.",
    )


def step(episode_id: str, action: Action) -> StepResult:
    """Advance the episode by one step."""
    ep = _EPISODES.get(episode_id)
    if ep is None:
        raise KeyError(f"Episode {episode_id} not found")

    if ep["done"]:
        raise ValueError(f"Episode {episode_id} is already done.")

    task_id = ep["task_id"]
    meta = TASK_META[task_id]

    ep["step_number"] += 1
    ep["action_history"].append(action.model_dump())

    # Execute action
    if action.action_type == "bash_cmd":
        cmd_output = _simulate_bash_cmd(action.command or "", task_id, ep)
        ep["action_history"][-1]["output"] = cmd_output
    elif action.action_type == "file_edit":
        edit_result = _simulate_file_edit(action.file_path or "", action.file_content or "", ep)
        ep["action_history"][-1]["result"] = edit_result

    # Determine if done
    done = False
    if action.action_type == "submit":
        done = True
    elif ep["step_number"] >= ep["max_steps"]:
        done = True

    # Calculate reward
    step_reward, explanation = _calculate_step_reward(task_id, action, ep)

    # Apply grader bonus when done
    if done:
        final_score, breakdown, grader_feedback = grade_task(task_id, ep)
        ep["final_score"] = final_score
        bonus = final_score * 0.5
        step_reward += bonus
        explanation += f" | Grader score: {final_score:.3f} (+{bonus:.3f} bonus)"
    else:
        final_score = None

    ep["total_reward"] = round(ep["total_reward"] + step_reward, 4)
    ep["done"] = done

    # Build observation
    system_state = _build_system_state(task_id, ep)
    thread_history = [
        {"role": "agent", "content": str(a)} for a in ep["action_history"]
    ]

    obs = Observation(
        task_id=task_id,
        task_description=meta["description"],
        episode_id=episode_id,
        system_state=system_state,
        thread_history=thread_history,
        available_actions=meta["available_actions"] if not done else [],
        step_number=ep["step_number"],
        max_steps=ep["max_steps"],
        hint=None if done else "Continue diagnosing and fixing the issue.",
    )

    reward = Reward(
        step_reward=round(step_reward, 4),
        total_reward=ep["total_reward"],
        explanation=explanation,
    )

    info = {"step": ep["step_number"]}
    if done:
        info["final_score"] = final_score

    return StepResult(observation=obs, reward=reward, done=done, info=info)


def get_state(episode_id: str) -> State:
    """Return the current state of an episode."""
    ep = _EPISODES.get(episode_id)
    if ep is None:
        raise KeyError(f"Episode {episode_id} not found")

    return State(
        task_id=ep["task_id"],
        episode_id=episode_id,
        step_number=ep["step_number"],
        max_steps=ep["max_steps"],
        done=ep["done"],
        total_reward=ep["total_reward"],
        history=ep["action_history"],
        final_score=ep.get("final_score"),
    )


def grade(episode_id: str) -> Tuple[float, Dict[str, float], str]:
    """Grade a finished episode."""
    ep = _EPISODES.get(episode_id)
    if ep is None:
        raise KeyError(f"Episode {episode_id} not found")

    if not ep.get("done"):
        raise ValueError(f"Episode {episode_id} is not done yet")

    task_id = ep["task_id"]
    score, breakdown, feedback = grade_task(task_id, ep)
    ep["final_score"] = score

    return score, breakdown, feedback