openops / server /app.py
arya89's picture
Update server/app.py
49aae56 verified
"""
OpenOps FastAPI Server
Provides REST API endpoints for the incident management environment
"""
import sys
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import JSONResponse, HTMLResponse
from pydantic import BaseModel
from typing import Dict, Any, Optional
import uvicorn
from server.my_env_environment import MyEnvEnvironment
from models import IncidentAction, IncidentObservation
# FastAPI app
app = FastAPI(
title="OpenOps API",
description="Production Incident Management Environment API",
version="1.0.0"
)
# Global environment instance
env_instance: Optional[MyEnvEnvironment] = None
# Request Models
class StepRequest(BaseModel):
action_id: int
task_id: Optional[int] = 1
# ============================================================
# LANDING PAGE WITH STATUS
# ============================================================
@app.get("/", response_class=HTMLResponse)
async def root():
"""Landing page with environment status and documentation."""
return """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>OpenOps - AI Incident Commander</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 900px;
margin: 0 auto;
background: white;
border-radius: 12px;
box-shadow: 0 20px 60px rgba(0,0,0,0.3);
overflow: hidden;
}
.header {
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
color: white;
padding: 40px 30px;
text-align: center;
}
.header h1 {
font-size: 2.5em;
margin-bottom: 10px;
text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
}
.status {
display: inline-block;
background: rgba(255,255,255,0.2);
padding: 8px 20px;
border-radius: 20px;
font-size: 0.9em;
margin-top: 10px;
}
.content {
padding: 40px 30px;
}
.section {
margin-bottom: 30px;
}
.section h2 {
color: #333;
margin-bottom: 15px;
font-size: 1.5em;
border-left: 4px solid #667eea;
padding-left: 15px;
}
.endpoint {
background: #f8f9fa;
border-left: 4px solid #667eea;
padding: 15px 20px;
margin: 10px 0;
border-radius: 4px;
transition: all 0.3s;
}
.endpoint:hover {
background: #e9ecef;
transform: translateX(5px);
}
.endpoint strong {
color: #667eea;
display: block;
margin-bottom: 5px;
}
.endpoint code {
background: white;
padding: 2px 8px;
border-radius: 3px;
font-family: 'Courier New', monospace;
color: #e83e8c;
}
.stats {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
margin-top: 20px;
}
.stat-card {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 20px;
border-radius: 8px;
text-align: center;
}
.stat-card h3 {
font-size: 2em;
margin-bottom: 5px;
}
.stat-card p {
opacity: 0.9;
font-size: 0.9em;
}
.features {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 15px;
margin-top: 20px;
}
.feature {
background: #f8f9fa;
padding: 20px;
border-radius: 8px;
border-left: 4px solid #28a745;
}
.feature h3 {
color: #28a745;
margin-bottom: 10px;
font-size: 1.1em;
}
.button {
display: inline-block;
background: #667eea;
color: white;
padding: 12px 30px;
text-decoration: none;
border-radius: 6px;
margin: 10px 10px 10px 0;
transition: all 0.3s;
font-weight: 600;
}
.button:hover {
background: #764ba2;
transform: translateY(-2px);
box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4);
}
.footer {
background: #f8f9fa;
padding: 20px;
text-align: center;
color: #6c757d;
font-size: 0.9em;
}
.badge {
display: inline-block;
background: #28a745;
color: white;
padding: 4px 12px;
border-radius: 12px;
font-size: 0.85em;
margin-left: 10px;
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🚨 OpenOps</h1>
<p style="font-size: 1.2em; margin-top: 10px;">AI Incident Commander Environment</p>
<div class="status">✅ Status: Running</div>
</div>
<div class="content">
<div class="section">
<h2>📊 Performance Metrics</h2>
<div class="stats">
<div class="stat-card">
<h3>0.85</h3>
<p>Task 1 Score<br>(Easy)</p>
</div>
<div class="stat-card">
<h3>0.85</h3>
<p>Task 2 Score<br>(Medium)</p>
</div>
<div class="stat-card">
<h3>1.00</h3>
<p>Task 3 Score<br>(Hard)</p>
</div>
<div class="stat-card">
<h3>0.90</h3>
<p>Average Score<br>⭐</p>
</div>
</div>
</div>
<div class="section">
<h2>🎯 Environment Overview</h2>
<p style="line-height: 1.8; color: #555;">
OpenOps simulates realistic production incidents where AI agents must investigate alerts,
identify root causes, execute mitigation actions, communicate with stakeholders,
and resolve incidents to minimize revenue loss.
</p>
</div>
<div class="section">
<h2>✨ Key Features</h2>
<div class="features">
<div class="feature">
<h3>🔍 Smart Detection</h3>
<p>Intelligent incident classification based on alerts and logs</p>
</div>
<div class="feature">
<h3>📈 Progressive Difficulty</h3>
<p>Three tasks from simple crashes to cascading failures</p>
</div>
<div class="feature">
<h3>⚡ Rule-Based Agent</h3>
<p>Reliable baseline with optimized playbooks</p>
</div>
<div class="feature">
<h3>🎮 21 Actions</h3>
<p>Investigate, mitigate, communicate, and resolve</p>
</div>
</div>
</div>
<div class="section">
<h2>🔌 API Endpoints</h2>
<div class="endpoint">
<strong>API Documentation</strong>
<code>GET /docs</code>
<p style="margin-top: 8px; color: #666;">Interactive Swagger UI with all endpoints</p>
</div>
<div class="endpoint">
<strong>Reset Environment</strong>
<code>POST /reset?task_id=1</code>
<p style="margin-top: 8px; color: #666;">Initialize a new incident scenario (task_id: 1, 2, or 3)</p>
</div>
<div class="endpoint">
<strong>Execute Action</strong>
<code>POST /step</code>
<p style="margin-top: 8px; color: #666;">Take an action and receive observation</p>
</div>
<div class="endpoint">
<strong>Get State</strong>
<code>GET /state</code>
<p style="margin-top: 8px; color: #666;">Retrieve current environment state</p>
</div>
<div class="endpoint">
<strong>List Actions</strong>
<code>GET /actions</code>
<p style="margin-top: 8px; color: #666;">Get all available actions</p>
</div>
</div>
<div class="section">
<h2>🚀 Quick Start</h2>
<a href="/docs" class="button">📚 View API Docs</a>
<a href="https://github.com/arya89/OpenOps" class="button">💻 GitHub</a>
<a href="https://huggingface.co/spaces/arya89/openops-incident-commander" class="button">🤗 HuggingFace</a>
</div>
<div class="section">
<h2>📖 Three Progressive Tasks</h2>
<div class="endpoint">
<strong>Task 1: Simple API Crash <span class="badge">Easy</span></strong>
<p style="margin-top: 8px; color: #666;">API service down due to OOM → Inspect logs → Restart → Resolve</p>
</div>
<div class="endpoint">
<strong>Task 2: Bad Deployment <span class="badge">Medium</span></strong>
<p style="margin-top: 8px; color: #666;">Database deployment broke queries → Rollback → Notify team → Resolve</p>
</div>
<div class="endpoint">
<strong>Task 3: Cascading Failure <span class="badge">Hard</span></strong>
<p style="margin-top: 8px; color: #666;">DB overload → API timeouts → Scale DB → Restart API → Communicate → Resolve</p>
</div>
</div>
</div>
<div class="footer">
<p><strong>OpenOps v1.0.0</strong></p>
<p>Built for OpenEnv AI Hackathon 2026 | Meta PyTorch x Scaler</p>
<p style="margin-top: 10px;">Created by arya89 | Production-ready incident management environment</p>
</div>
</div>
</body>
</html>
"""
@app.get("/health")
async def health():
"""Detailed health check."""
return {
"status": "healthy",
"environment_loaded": env_instance is not None,
"current_task": env_instance.task_id if env_instance else None,
"version": "1.0.0"
}
# ============================================================
# ENVIRONMENT API ENDPOINTS
# ============================================================
@app.post("/reset")
async def reset(task_id: int = Query(default=1, ge=1, le=3)) -> Dict[str, Any]:
"""
Reset the environment for a specific task.
OpenEnv standard endpoint.
Args:
task_id: Task difficulty (1=easy, 2=medium, 3=hard)
Returns:
Initial observation after reset
"""
global env_instance
try:
# Create new environment instance
env_instance = MyEnvEnvironment()
obs = env_instance.reset(task_id=task_id)
# Return observation in OpenEnv format
return obs.model_dump()
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to reset environment: {str(e)}"
)
@app.post("/step")
async def step(request: StepRequest) -> Dict[str, Any]:
"""
Execute an action in the environment.
Args:
request: StepRequest with action_id and task_id
Returns:
Observation after taking the action
"""
global env_instance
try:
# Check if environment is initialized
if env_instance is None:
raise HTTPException(
status_code=400,
detail="Environment not initialized. Call /reset first."
)
# Validate action_id
if request.action_id < 0 or request.action_id > 20:
raise HTTPException(
status_code=400,
detail=f"Invalid action_id: {request.action_id}. Must be 0-20."
)
# Create action
action = IncidentAction(
action_id=request.action_id,
task_id=request.task_id if request.task_id else env_instance.task_id
)
# Execute step
obs = env_instance.step(action)
# Return observation in OpenEnv format
return obs.model_dump()
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to execute step: {str(e)}"
)
@app.get("/state")
async def get_state() -> Dict[str, Any]:
"""
Get current environment state.
Returns:
Current state of the environment
"""
global env_instance
try:
if env_instance is None:
raise HTTPException(
status_code=400,
detail="Environment not initialized. Call /reset first."
)
state = env_instance.state
return {
"state": state.model_dump() if hasattr(state, 'model_dump') else state,
"task_id": env_instance.task_id,
"total_reward": env_instance.total_reward,
"incident_resolved": env_instance.incident_resolved,
"time_elapsed": env_instance.time_elapsed
}
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to get state: {str(e)}"
)
@app.get("/actions")
async def get_actions() -> Dict[str, Any]:
"""
Get list of available actions.
Returns:
Dictionary of action IDs and names
"""
try:
temp_env = MyEnvEnvironment()
return {
"actions": temp_env.ACTION_NAMES,
"total_actions": len(temp_env.ACTION_NAMES)
}
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to get actions: {str(e)}"
)
# ============================================================
# ERROR HANDLERS
# ============================================================
@app.exception_handler(HTTPException)
async def http_exception_handler(request, exc):
"""Handle HTTP exceptions."""
return JSONResponse(
status_code=exc.status_code,
content={
"error": exc.detail,
"status_code": exc.status_code
}
)
@app.exception_handler(Exception)
async def general_exception_handler(request, exc):
"""Handle general exceptions."""
return JSONResponse(
status_code=500,
content={
"error": "Internal server error",
"detail": str(exc)
}
)
# ============================================================
# MAIN ENTRY POINT (Required by OpenEnv)
# ============================================================
def main():
"""
Main entry point for the server.
Required by OpenEnv validation.
"""
uvicorn.run(
app,
host="0.0.0.0",
port=7860,
log_level="info"
)
if __name__ == "__main__":
main()