Spaces:
Running
Running
Commit ·
9347ce5
1
Parent(s): 250ab26
FIX: improve some structure and added env variables
Browse files- Dockerfile +12 -0
- server/app.py → app.py +2 -2
- server/environment.py → environment.py +1 -1
- server/graders.py → graders.py +0 -0
- inference.py +9 -6
- openenv.yaml +7 -7
- server/Dockerfile +0 -80
- server/Incident_Triage_environment.py +0 -104
- server/requirements.txt +0 -6
Dockerfile
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY requirements.txt .
|
| 6 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
+
|
| 8 |
+
COPY . .
|
| 9 |
+
|
| 10 |
+
EXPOSE 7860
|
| 11 |
+
|
| 12 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
server/app.py → app.py
RENAMED
|
@@ -4,8 +4,8 @@
|
|
| 4 |
import uuid
|
| 5 |
from fastapi import FastAPI, HTTPException
|
| 6 |
from models import IncidentAction, StepResult
|
| 7 |
-
from
|
| 8 |
-
from
|
| 9 |
|
| 10 |
app = FastAPI(title="Incident Triage Environment")
|
| 11 |
|
|
|
|
| 4 |
import uuid
|
| 5 |
from fastapi import FastAPI, HTTPException
|
| 6 |
from models import IncidentAction, StepResult
|
| 7 |
+
from environment import IncidentEnv
|
| 8 |
+
from graders import GRADERS
|
| 9 |
|
| 10 |
app = FastAPI(title="Incident Triage Environment")
|
| 11 |
|
server/environment.py → environment.py
RENAMED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
import random
|
| 5 |
from models import IncidentAction, IncidentObservation, StepResult
|
| 6 |
from incidents import TICKETS
|
| 7 |
-
from
|
| 8 |
|
| 9 |
|
| 10 |
class IncidentEnv:
|
|
|
|
| 4 |
import random
|
| 5 |
from models import IncidentAction, IncidentObservation, StepResult
|
| 6 |
from incidents import TICKETS
|
| 7 |
+
from graders import GRADERS
|
| 8 |
|
| 9 |
|
| 10 |
class IncidentEnv:
|
server/graders.py → graders.py
RENAMED
|
File without changes
|
inference.py
CHANGED
|
@@ -12,8 +12,8 @@ load_dotenv()
|
|
| 12 |
|
| 13 |
BASE_URL = "http://localhost:8000"
|
| 14 |
client = OpenAI(
|
| 15 |
-
base_url=
|
| 16 |
-
api_key=os.getenv("
|
| 17 |
)
|
| 18 |
|
| 19 |
SYSTEM_PROMPT = """You are an expert SRE (Site Reliability Engineer) triaging production incidents.
|
|
@@ -67,7 +67,7 @@ def call_llm(observation: dict) -> str:
|
|
| 67 |
full_response = ""
|
| 68 |
try:
|
| 69 |
completion = client.chat.completions.create(
|
| 70 |
-
model=
|
| 71 |
messages=[
|
| 72 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 73 |
{"role": "user", "content": build_user_prompt(observation)}
|
|
@@ -133,6 +133,8 @@ def run_episode(task_type: str = None) -> dict:
|
|
| 133 |
step_response = requests.post(f"{BASE_URL}/step", json=action, params={"session_id": session_id})
|
| 134 |
step_response.raise_for_status()
|
| 135 |
result = step_response.json()
|
|
|
|
|
|
|
| 136 |
|
| 137 |
print(f"Answer : {result['agent_answer']}")
|
| 138 |
print(f"Expected : {result['ground_truth']}")
|
|
@@ -150,9 +152,10 @@ def run_episode(task_type: str = None) -> dict:
|
|
| 150 |
|
| 151 |
|
| 152 |
def run_full_eval():
|
|
|
|
| 153 |
task_types = ["task1", "task2", "task3"]
|
| 154 |
|
| 155 |
-
rounds = len(TICKETS)
|
| 156 |
scores = []
|
| 157 |
errors = 0
|
| 158 |
|
|
@@ -185,7 +188,7 @@ def run_full_eval():
|
|
| 185 |
if task_scores[task]:
|
| 186 |
acc = sum(task_scores[task]) / len(task_scores[task]) * 100
|
| 187 |
print(f"{task} Accuracy : {acc:.2f}%")
|
| 188 |
-
|
| 189 |
-
|
| 190 |
if __name__ == "__main__":
|
| 191 |
run_full_eval()
|
|
|
|
| 12 |
|
| 13 |
BASE_URL = "http://localhost:8000"
|
| 14 |
client = OpenAI(
|
| 15 |
+
base_url=os.getenv("API_BASE_URL"),
|
| 16 |
+
api_key=os.getenv("HF_TOKEN")
|
| 17 |
)
|
| 18 |
|
| 19 |
SYSTEM_PROMPT = """You are an expert SRE (Site Reliability Engineer) triaging production incidents.
|
|
|
|
| 67 |
full_response = ""
|
| 68 |
try:
|
| 69 |
completion = client.chat.completions.create(
|
| 70 |
+
model=os.getenv("MODEL_NAME"),
|
| 71 |
messages=[
|
| 72 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 73 |
{"role": "user", "content": build_user_prompt(observation)}
|
|
|
|
| 133 |
step_response = requests.post(f"{BASE_URL}/step", json=action, params={"session_id": session_id})
|
| 134 |
step_response.raise_for_status()
|
| 135 |
result = step_response.json()
|
| 136 |
+
# This need to be kept for submission grading, so we print it in a structured way
|
| 137 |
+
print(f"[STEP] task_id={result['task_type']} action={result['agent_answer']} reward={result['reward']}")
|
| 138 |
|
| 139 |
print(f"Answer : {result['agent_answer']}")
|
| 140 |
print(f"Expected : {result['ground_truth']}")
|
|
|
|
| 152 |
|
| 153 |
|
| 154 |
def run_full_eval():
|
| 155 |
+
print("[START]")
|
| 156 |
task_types = ["task1", "task2", "task3"]
|
| 157 |
|
| 158 |
+
rounds = len(TICKETS) # 🔥 FIXED
|
| 159 |
scores = []
|
| 160 |
errors = 0
|
| 161 |
|
|
|
|
| 188 |
if task_scores[task]:
|
| 189 |
acc = sum(task_scores[task]) / len(task_scores[task]) * 100
|
| 190 |
print(f"{task} Accuracy : {acc:.2f}%")
|
| 191 |
+
print("[END]")
|
| 192 |
+
|
| 193 |
if __name__ == "__main__":
|
| 194 |
run_full_eval()
|
openenv.yaml
CHANGED
|
@@ -2,8 +2,8 @@ spec_version: 1
|
|
| 2 |
name: Incident_Triage
|
| 3 |
type: space
|
| 4 |
runtime: fastapi
|
| 5 |
-
app:
|
| 6 |
-
port:
|
| 7 |
version: "1.0.0"
|
| 8 |
description: >
|
| 9 |
RL-style environment for SRE incident triage.
|
|
@@ -11,7 +11,7 @@ description: >
|
|
| 11 |
identify root cause, or recommend remediation actions.
|
| 12 |
|
| 13 |
api:
|
| 14 |
-
base_url: http://
|
| 15 |
endpoints:
|
| 16 |
reset:
|
| 17 |
method: POST
|
|
@@ -62,11 +62,11 @@ tasks:
|
|
| 62 |
reward: binary # 1.0 correct | 0.0 incorrect
|
| 63 |
|
| 64 |
dataset:
|
| 65 |
-
total_tickets:
|
| 66 |
split:
|
| 67 |
-
task1:
|
| 68 |
-
task2:
|
| 69 |
-
task3:
|
| 70 |
|
| 71 |
reproducibility:
|
| 72 |
llm_seed: 42
|
|
|
|
| 2 |
name: Incident_Triage
|
| 3 |
type: space
|
| 4 |
runtime: fastapi
|
| 5 |
+
app: app:app
|
| 6 |
+
port: 7860
|
| 7 |
version: "1.0.0"
|
| 8 |
description: >
|
| 9 |
RL-style environment for SRE incident triage.
|
|
|
|
| 11 |
identify root cause, or recommend remediation actions.
|
| 12 |
|
| 13 |
api:
|
| 14 |
+
base_url: http://0.0.0.0:7860
|
| 15 |
endpoints:
|
| 16 |
reset:
|
| 17 |
method: POST
|
|
|
|
| 62 |
reward: binary # 1.0 correct | 0.0 incorrect
|
| 63 |
|
| 64 |
dataset:
|
| 65 |
+
total_tickets: 36
|
| 66 |
split:
|
| 67 |
+
task1: 13
|
| 68 |
+
task2: 12
|
| 69 |
+
task3: 11
|
| 70 |
|
| 71 |
reproducibility:
|
| 72 |
llm_seed: 42
|
server/Dockerfile
DELETED
|
@@ -1,80 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the BSD-style license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
# Multi-stage build using openenv-base
|
| 8 |
-
# This Dockerfile is flexible and works for both:
|
| 9 |
-
# - In-repo environments (with local OpenEnv sources)
|
| 10 |
-
# - Standalone environments (with openenv from PyPI/Git)
|
| 11 |
-
# The build script (openenv build) handles context detection and sets appropriate build args.
|
| 12 |
-
|
| 13 |
-
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 14 |
-
FROM ${BASE_IMAGE} AS builder
|
| 15 |
-
|
| 16 |
-
WORKDIR /app
|
| 17 |
-
|
| 18 |
-
# Ensure git is available (required for installing dependencies from VCS)
|
| 19 |
-
RUN apt-get update && \
|
| 20 |
-
apt-get install -y --no-install-recommends git && \
|
| 21 |
-
rm -rf /var/lib/apt/lists/*
|
| 22 |
-
|
| 23 |
-
# Build argument to control whether we're building standalone or in-repo
|
| 24 |
-
ARG BUILD_MODE=in-repo
|
| 25 |
-
ARG ENV_NAME=Incident_Triage
|
| 26 |
-
|
| 27 |
-
# Copy environment code (always at root of build context)
|
| 28 |
-
COPY . /app/env
|
| 29 |
-
|
| 30 |
-
# For in-repo builds, openenv is already vendored in the build context
|
| 31 |
-
# For standalone builds, openenv will be installed via pyproject.toml
|
| 32 |
-
WORKDIR /app/env
|
| 33 |
-
|
| 34 |
-
# Ensure uv is available (for local builds where base image lacks it)
|
| 35 |
-
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 36 |
-
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 37 |
-
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 38 |
-
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 39 |
-
fi
|
| 40 |
-
|
| 41 |
-
# Install dependencies using uv sync
|
| 42 |
-
# If uv.lock exists, use it; otherwise resolve on the fly
|
| 43 |
-
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 44 |
-
if [ -f uv.lock ]; then \
|
| 45 |
-
uv sync --frozen --no-install-project --no-editable; \
|
| 46 |
-
else \
|
| 47 |
-
uv sync --no-install-project --no-editable; \
|
| 48 |
-
fi
|
| 49 |
-
|
| 50 |
-
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 51 |
-
if [ -f uv.lock ]; then \
|
| 52 |
-
uv sync --frozen --no-editable; \
|
| 53 |
-
else \
|
| 54 |
-
uv sync --no-editable; \
|
| 55 |
-
fi
|
| 56 |
-
|
| 57 |
-
# Final runtime stage
|
| 58 |
-
FROM ${BASE_IMAGE}
|
| 59 |
-
|
| 60 |
-
WORKDIR /app
|
| 61 |
-
|
| 62 |
-
# Copy the virtual environment from builder
|
| 63 |
-
COPY --from=builder /app/env/.venv /app/.venv
|
| 64 |
-
|
| 65 |
-
# Copy the environment code
|
| 66 |
-
COPY --from=builder /app/env /app/env
|
| 67 |
-
|
| 68 |
-
# Set PATH to use the virtual environment
|
| 69 |
-
ENV PATH="/app/.venv/bin:$PATH"
|
| 70 |
-
|
| 71 |
-
# Set PYTHONPATH so imports work correctly
|
| 72 |
-
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 73 |
-
|
| 74 |
-
# Health check
|
| 75 |
-
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 76 |
-
CMD curl -f http://localhost:8000/health || exit 1
|
| 77 |
-
|
| 78 |
-
# Run the FastAPI server
|
| 79 |
-
# The module path is constructed to work with the /app/env structure
|
| 80 |
-
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
server/Incident_Triage_environment.py
DELETED
|
@@ -1,104 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the BSD-style license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
"""
|
| 8 |
-
Incident Triage Environment Implementation.
|
| 9 |
-
|
| 10 |
-
A simple test environment that echoes back messages sent to it.
|
| 11 |
-
Perfect for testing HTTP server infrastructure.
|
| 12 |
-
"""
|
| 13 |
-
|
| 14 |
-
from uuid import uuid4
|
| 15 |
-
|
| 16 |
-
from openenv.core.env_server.interfaces import Environment
|
| 17 |
-
from openenv.core.env_server.types import State
|
| 18 |
-
|
| 19 |
-
try:
|
| 20 |
-
from ..models import IncidentTriageAction, IncidentTriageObservation
|
| 21 |
-
except ImportError:
|
| 22 |
-
from models import IncidentTriageAction, IncidentTriageObservation
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
class IncidentTriageEnvironment(Environment):
|
| 26 |
-
"""
|
| 27 |
-
A simple echo environment that echoes back messages.
|
| 28 |
-
|
| 29 |
-
This environment is designed for testing the HTTP server infrastructure.
|
| 30 |
-
It maintains minimal state and simply echoes back whatever message it receives.
|
| 31 |
-
|
| 32 |
-
Example:
|
| 33 |
-
>>> env = IncidentTriageEnvironment()
|
| 34 |
-
>>> obs = env.reset()
|
| 35 |
-
>>> print(obs.echoed_message) # "Incident Triage environment ready!"
|
| 36 |
-
>>>
|
| 37 |
-
>>> obs = env.step(IncidentTriageAction(message="Hello"))
|
| 38 |
-
>>> print(obs.echoed_message) # "Hello"
|
| 39 |
-
>>> print(obs.message_length) # 5
|
| 40 |
-
"""
|
| 41 |
-
|
| 42 |
-
# Enable concurrent WebSocket sessions.
|
| 43 |
-
# Set to True if your environment isolates state between instances.
|
| 44 |
-
# When True, multiple WebSocket clients can connect simultaneously, each
|
| 45 |
-
# getting their own environment instance (when using factory mode in app.py).
|
| 46 |
-
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 47 |
-
|
| 48 |
-
def __init__(self):
|
| 49 |
-
"""Initialize the Incident_Triage environment."""
|
| 50 |
-
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 51 |
-
self._reset_count = 0
|
| 52 |
-
|
| 53 |
-
def reset(self) -> IncidentTriageObservation:
|
| 54 |
-
"""
|
| 55 |
-
Reset the environment.
|
| 56 |
-
|
| 57 |
-
Returns:
|
| 58 |
-
IncidentTriageObservation with a ready message
|
| 59 |
-
"""
|
| 60 |
-
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 61 |
-
self._reset_count += 1
|
| 62 |
-
|
| 63 |
-
return IncidentTriageObservation(
|
| 64 |
-
echoed_message="Incident Triage environment ready!",
|
| 65 |
-
message_length=0,
|
| 66 |
-
done=False,
|
| 67 |
-
reward=0.0,
|
| 68 |
-
)
|
| 69 |
-
|
| 70 |
-
def step(self, action: IncidentTriageAction) -> IncidentTriageObservation: # type: ignore[override]
|
| 71 |
-
"""
|
| 72 |
-
Execute a step in the environment by echoing the message.
|
| 73 |
-
|
| 74 |
-
Args:
|
| 75 |
-
action: IncidentTriageAction containing the message to echo
|
| 76 |
-
|
| 77 |
-
Returns:
|
| 78 |
-
IncidentTriageObservation with the echoed message and its length
|
| 79 |
-
"""
|
| 80 |
-
self._state.step_count += 1
|
| 81 |
-
|
| 82 |
-
message = action.message
|
| 83 |
-
length = len(message)
|
| 84 |
-
|
| 85 |
-
# Simple reward: longer messages get higher rewards
|
| 86 |
-
reward = length * 0.1
|
| 87 |
-
|
| 88 |
-
return IncidentTriageObservation(
|
| 89 |
-
echoed_message=message,
|
| 90 |
-
message_length=length,
|
| 91 |
-
done=False,
|
| 92 |
-
reward=reward,
|
| 93 |
-
metadata={"original_message": message, "step": self._state.step_count},
|
| 94 |
-
)
|
| 95 |
-
|
| 96 |
-
@property
|
| 97 |
-
def state(self) -> State:
|
| 98 |
-
"""
|
| 99 |
-
Get the current environment state.
|
| 100 |
-
|
| 101 |
-
Returns:
|
| 102 |
-
Current State with episode_id and step_count
|
| 103 |
-
"""
|
| 104 |
-
return self._state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
server/requirements.txt
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
openenv[core]>=0.2.0
|
| 2 |
-
fastapi>=0.115.0
|
| 3 |
-
uvicorn>=0.24.0
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|