| """ |
| OpenOps FastAPI Server |
| Provides REST API endpoints for the incident management environment |
| """ |
|
|
| import sys |
| from pathlib import Path |
|
|
| |
| sys.path.insert(0, str(Path(__file__).parent.parent)) |
|
|
| from fastapi import FastAPI, HTTPException, Query |
| from fastapi.responses import JSONResponse, HTMLResponse |
| from pydantic import BaseModel |
| from typing import Dict, Any, Optional |
| import uvicorn |
|
|
| from server.my_env_environment import MyEnvEnvironment |
| from models import IncidentAction, IncidentObservation |
|
|
|
|
| |
| app = FastAPI( |
| title="OpenOps API", |
| description="Production Incident Management Environment API", |
| version="1.0.0" |
| ) |
|
|
| |
| env_instance: Optional[MyEnvEnvironment] = None |
|
|
|
|
| |
| class StepRequest(BaseModel): |
| action_id: int |
| task_id: Optional[int] = 1 |
|
|
|
|
| |
| |
| |
|
|
| @app.get("/", response_class=HTMLResponse) |
| async def root(): |
| """Landing page with environment status and documentation.""" |
| return """ |
| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>OpenOps - AI Incident Commander</title> |
| <style> |
| * { margin: 0; padding: 0; box-sizing: border-box; } |
| body { |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif; |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| min-height: 100vh; |
| padding: 20px; |
| } |
| .container { |
| max-width: 900px; |
| margin: 0 auto; |
| background: white; |
| border-radius: 12px; |
| box-shadow: 0 20px 60px rgba(0,0,0,0.3); |
| overflow: hidden; |
| } |
| .header { |
| background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); |
| color: white; |
| padding: 40px 30px; |
| text-align: center; |
| } |
| .header h1 { |
| font-size: 2.5em; |
| margin-bottom: 10px; |
| text-shadow: 2px 2px 4px rgba(0,0,0,0.2); |
| } |
| .status { |
| display: inline-block; |
| background: rgba(255,255,255,0.2); |
| padding: 8px 20px; |
| border-radius: 20px; |
| font-size: 0.9em; |
| margin-top: 10px; |
| } |
| .content { |
| padding: 40px 30px; |
| } |
| .section { |
| margin-bottom: 30px; |
| } |
| .section h2 { |
| color: #333; |
| margin-bottom: 15px; |
| font-size: 1.5em; |
| border-left: 4px solid #667eea; |
| padding-left: 15px; |
| } |
| .endpoint { |
| background: #f8f9fa; |
| border-left: 4px solid #667eea; |
| padding: 15px 20px; |
| margin: 10px 0; |
| border-radius: 4px; |
| transition: all 0.3s; |
| } |
| .endpoint:hover { |
| background: #e9ecef; |
| transform: translateX(5px); |
| } |
| .endpoint strong { |
| color: #667eea; |
| display: block; |
| margin-bottom: 5px; |
| } |
| .endpoint code { |
| background: white; |
| padding: 2px 8px; |
| border-radius: 3px; |
| font-family: 'Courier New', monospace; |
| color: #e83e8c; |
| } |
| .stats { |
| display: grid; |
| grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); |
| gap: 20px; |
| margin-top: 20px; |
| } |
| .stat-card { |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| color: white; |
| padding: 20px; |
| border-radius: 8px; |
| text-align: center; |
| } |
| .stat-card h3 { |
| font-size: 2em; |
| margin-bottom: 5px; |
| } |
| .stat-card p { |
| opacity: 0.9; |
| font-size: 0.9em; |
| } |
| .features { |
| display: grid; |
| grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); |
| gap: 15px; |
| margin-top: 20px; |
| } |
| .feature { |
| background: #f8f9fa; |
| padding: 20px; |
| border-radius: 8px; |
| border-left: 4px solid #28a745; |
| } |
| .feature h3 { |
| color: #28a745; |
| margin-bottom: 10px; |
| font-size: 1.1em; |
| } |
| .button { |
| display: inline-block; |
| background: #667eea; |
| color: white; |
| padding: 12px 30px; |
| text-decoration: none; |
| border-radius: 6px; |
| margin: 10px 10px 10px 0; |
| transition: all 0.3s; |
| font-weight: 600; |
| } |
| .button:hover { |
| background: #764ba2; |
| transform: translateY(-2px); |
| box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4); |
| } |
| .footer { |
| background: #f8f9fa; |
| padding: 20px; |
| text-align: center; |
| color: #6c757d; |
| font-size: 0.9em; |
| } |
| .badge { |
| display: inline-block; |
| background: #28a745; |
| color: white; |
| padding: 4px 12px; |
| border-radius: 12px; |
| font-size: 0.85em; |
| margin-left: 10px; |
| } |
| </style> |
| </head> |
| <body> |
| <div class="container"> |
| <div class="header"> |
| <h1>🚨 OpenOps</h1> |
| <p style="font-size: 1.2em; margin-top: 10px;">AI Incident Commander Environment</p> |
| <div class="status">✅ Status: Running</div> |
| </div> |
| |
| <div class="content"> |
| <div class="section"> |
| <h2>📊 Performance Metrics</h2> |
| <div class="stats"> |
| <div class="stat-card"> |
| <h3>0.85</h3> |
| <p>Task 1 Score<br>(Easy)</p> |
| </div> |
| <div class="stat-card"> |
| <h3>0.85</h3> |
| <p>Task 2 Score<br>(Medium)</p> |
| </div> |
| <div class="stat-card"> |
| <h3>1.00</h3> |
| <p>Task 3 Score<br>(Hard)</p> |
| </div> |
| <div class="stat-card"> |
| <h3>0.90</h3> |
| <p>Average Score<br>⭐</p> |
| </div> |
| </div> |
| </div> |
| |
| <div class="section"> |
| <h2>🎯 Environment Overview</h2> |
| <p style="line-height: 1.8; color: #555;"> |
| OpenOps simulates realistic production incidents where AI agents must investigate alerts, |
| identify root causes, execute mitigation actions, communicate with stakeholders, |
| and resolve incidents to minimize revenue loss. |
| </p> |
| </div> |
| |
| <div class="section"> |
| <h2>✨ Key Features</h2> |
| <div class="features"> |
| <div class="feature"> |
| <h3>🔍 Smart Detection</h3> |
| <p>Intelligent incident classification based on alerts and logs</p> |
| </div> |
| <div class="feature"> |
| <h3>📈 Progressive Difficulty</h3> |
| <p>Three tasks from simple crashes to cascading failures</p> |
| </div> |
| <div class="feature"> |
| <h3>⚡ Rule-Based Agent</h3> |
| <p>Reliable baseline with optimized playbooks</p> |
| </div> |
| <div class="feature"> |
| <h3>🎮 21 Actions</h3> |
| <p>Investigate, mitigate, communicate, and resolve</p> |
| </div> |
| </div> |
| </div> |
| |
| <div class="section"> |
| <h2>🔌 API Endpoints</h2> |
| |
| <div class="endpoint"> |
| <strong>API Documentation</strong> |
| <code>GET /docs</code> |
| <p style="margin-top: 8px; color: #666;">Interactive Swagger UI with all endpoints</p> |
| </div> |
| |
| <div class="endpoint"> |
| <strong>Reset Environment</strong> |
| <code>POST /reset?task_id=1</code> |
| <p style="margin-top: 8px; color: #666;">Initialize a new incident scenario (task_id: 1, 2, or 3)</p> |
| </div> |
| |
| <div class="endpoint"> |
| <strong>Execute Action</strong> |
| <code>POST /step</code> |
| <p style="margin-top: 8px; color: #666;">Take an action and receive observation</p> |
| </div> |
| |
| <div class="endpoint"> |
| <strong>Get State</strong> |
| <code>GET /state</code> |
| <p style="margin-top: 8px; color: #666;">Retrieve current environment state</p> |
| </div> |
| |
| <div class="endpoint"> |
| <strong>List Actions</strong> |
| <code>GET /actions</code> |
| <p style="margin-top: 8px; color: #666;">Get all available actions</p> |
| </div> |
| </div> |
| |
| <div class="section"> |
| <h2>🚀 Quick Start</h2> |
| <a href="/docs" class="button">📚 View API Docs</a> |
| <a href="https://github.com/arya89/OpenOps" class="button">💻 GitHub</a> |
| <a href="https://huggingface.co/spaces/arya89/openops-incident-commander" class="button">🤗 HuggingFace</a> |
| </div> |
| |
| <div class="section"> |
| <h2>📖 Three Progressive Tasks</h2> |
| <div class="endpoint"> |
| <strong>Task 1: Simple API Crash <span class="badge">Easy</span></strong> |
| <p style="margin-top: 8px; color: #666;">API service down due to OOM → Inspect logs → Restart → Resolve</p> |
| </div> |
| <div class="endpoint"> |
| <strong>Task 2: Bad Deployment <span class="badge">Medium</span></strong> |
| <p style="margin-top: 8px; color: #666;">Database deployment broke queries → Rollback → Notify team → Resolve</p> |
| </div> |
| <div class="endpoint"> |
| <strong>Task 3: Cascading Failure <span class="badge">Hard</span></strong> |
| <p style="margin-top: 8px; color: #666;">DB overload → API timeouts → Scale DB → Restart API → Communicate → Resolve</p> |
| </div> |
| </div> |
| </div> |
| |
| <div class="footer"> |
| <p><strong>OpenOps v1.0.0</strong></p> |
| <p>Built for OpenEnv AI Hackathon 2026 | Meta PyTorch x Scaler</p> |
| <p style="margin-top: 10px;">Created by arya89 | Production-ready incident management environment</p> |
| </div> |
| </div> |
| </body> |
| </html> |
| """ |
|
|
|
|
| @app.get("/health") |
| async def health(): |
| """Detailed health check.""" |
| return { |
| "status": "healthy", |
| "environment_loaded": env_instance is not None, |
| "current_task": env_instance.task_id if env_instance else None, |
| "version": "1.0.0" |
| } |
|
|
|
|
| |
| |
| |
|
|
| @app.post("/reset") |
| async def reset(task_id: int = Query(default=1, ge=1, le=3)) -> Dict[str, Any]: |
| """ |
| Reset the environment for a specific task. |
| OpenEnv standard endpoint. |
| |
| Args: |
| task_id: Task difficulty (1=easy, 2=medium, 3=hard) |
| |
| Returns: |
| Initial observation after reset |
| """ |
| global env_instance |
| |
| try: |
| |
| env_instance = MyEnvEnvironment() |
| obs = env_instance.reset(task_id=task_id) |
| |
| |
| return obs.model_dump() |
| |
| except Exception as e: |
| raise HTTPException( |
| status_code=500, |
| detail=f"Failed to reset environment: {str(e)}" |
| ) |
|
|
|
|
| @app.post("/step") |
| async def step(request: StepRequest) -> Dict[str, Any]: |
| """ |
| Execute an action in the environment. |
| |
| Args: |
| request: StepRequest with action_id and task_id |
| |
| Returns: |
| Observation after taking the action |
| """ |
| global env_instance |
| |
| try: |
| |
| if env_instance is None: |
| raise HTTPException( |
| status_code=400, |
| detail="Environment not initialized. Call /reset first." |
| ) |
| |
| |
| if request.action_id < 0 or request.action_id > 20: |
| raise HTTPException( |
| status_code=400, |
| detail=f"Invalid action_id: {request.action_id}. Must be 0-20." |
| ) |
| |
| |
| action = IncidentAction( |
| action_id=request.action_id, |
| task_id=request.task_id if request.task_id else env_instance.task_id |
| ) |
| |
| |
| obs = env_instance.step(action) |
| |
| |
| return obs.model_dump() |
| |
| except Exception as e: |
| raise HTTPException( |
| status_code=500, |
| detail=f"Failed to execute step: {str(e)}" |
| ) |
|
|
|
|
| @app.get("/state") |
| async def get_state() -> Dict[str, Any]: |
| """ |
| Get current environment state. |
| |
| Returns: |
| Current state of the environment |
| """ |
| global env_instance |
| |
| try: |
| if env_instance is None: |
| raise HTTPException( |
| status_code=400, |
| detail="Environment not initialized. Call /reset first." |
| ) |
| |
| state = env_instance.state |
| |
| return { |
| "state": state.model_dump() if hasattr(state, 'model_dump') else state, |
| "task_id": env_instance.task_id, |
| "total_reward": env_instance.total_reward, |
| "incident_resolved": env_instance.incident_resolved, |
| "time_elapsed": env_instance.time_elapsed |
| } |
| |
| except Exception as e: |
| raise HTTPException( |
| status_code=500, |
| detail=f"Failed to get state: {str(e)}" |
| ) |
|
|
|
|
| @app.get("/actions") |
| async def get_actions() -> Dict[str, Any]: |
| """ |
| Get list of available actions. |
| |
| Returns: |
| Dictionary of action IDs and names |
| """ |
| try: |
| temp_env = MyEnvEnvironment() |
| |
| return { |
| "actions": temp_env.ACTION_NAMES, |
| "total_actions": len(temp_env.ACTION_NAMES) |
| } |
| |
| except Exception as e: |
| raise HTTPException( |
| status_code=500, |
| detail=f"Failed to get actions: {str(e)}" |
| ) |
|
|
|
|
| |
| |
| |
|
|
| @app.exception_handler(HTTPException) |
| async def http_exception_handler(request, exc): |
| """Handle HTTP exceptions.""" |
| return JSONResponse( |
| status_code=exc.status_code, |
| content={ |
| "error": exc.detail, |
| "status_code": exc.status_code |
| } |
| ) |
|
|
|
|
| @app.exception_handler(Exception) |
| async def general_exception_handler(request, exc): |
| """Handle general exceptions.""" |
| return JSONResponse( |
| status_code=500, |
| content={ |
| "error": "Internal server error", |
| "detail": str(exc) |
| } |
| ) |
|
|
|
|
| |
| |
| |
|
|
| def main(): |
| """ |
| Main entry point for the server. |
| Required by OpenEnv validation. |
| """ |
| uvicorn.run( |
| app, |
| host="0.0.0.0", |
| port=7860, |
| log_level="info" |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| main() |