""" OpenOps FastAPI Server Provides REST API endpoints for the incident management environment """ import sys from pathlib import Path # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent.parent)) from fastapi import FastAPI, HTTPException, Query from fastapi.responses import JSONResponse, HTMLResponse from pydantic import BaseModel from typing import Dict, Any, Optional import uvicorn from server.my_env_environment import MyEnvEnvironment from models import IncidentAction, IncidentObservation # FastAPI app app = FastAPI( title="OpenOps API", description="Production Incident Management Environment API", version="1.0.0" ) # Global environment instance env_instance: Optional[MyEnvEnvironment] = None # Request Models class StepRequest(BaseModel): action_id: int task_id: Optional[int] = 1 # ============================================================ # LANDING PAGE WITH STATUS # ============================================================ @app.get("/", response_class=HTMLResponse) async def root(): """Landing page with environment status and documentation.""" return """ OpenOps - AI Incident Commander

🚨 OpenOps

AI Incident Commander Environment

✅ Status: Running

📊 Performance Metrics

0.85

Task 1 Score
(Easy)

0.85

Task 2 Score
(Medium)

1.00

Task 3 Score
(Hard)

0.90

Average Score

🎯 Environment Overview

OpenOps simulates realistic production incidents where AI agents must investigate alerts, identify root causes, execute mitigation actions, communicate with stakeholders, and resolve incidents to minimize revenue loss.

✨ Key Features

🔍 Smart Detection

Intelligent incident classification based on alerts and logs

📈 Progressive Difficulty

Three tasks from simple crashes to cascading failures

⚡ Rule-Based Agent

Reliable baseline with optimized playbooks

🎮 21 Actions

Investigate, mitigate, communicate, and resolve

🔌 API Endpoints

API Documentation GET /docs

Interactive Swagger UI with all endpoints

Reset Environment POST /reset?task_id=1

Initialize a new incident scenario (task_id: 1, 2, or 3)

Execute Action POST /step

Take an action and receive observation

Get State GET /state

Retrieve current environment state

List Actions GET /actions

Get all available actions

📖 Three Progressive Tasks

Task 1: Simple API Crash Easy

API service down due to OOM → Inspect logs → Restart → Resolve

Task 2: Bad Deployment Medium

Database deployment broke queries → Rollback → Notify team → Resolve

Task 3: Cascading Failure Hard

DB overload → API timeouts → Scale DB → Restart API → Communicate → Resolve

""" @app.get("/health") async def health(): """Detailed health check.""" return { "status": "healthy", "environment_loaded": env_instance is not None, "current_task": env_instance.task_id if env_instance else None, "version": "1.0.0" } # ============================================================ # ENVIRONMENT API ENDPOINTS # ============================================================ @app.post("/reset") async def reset(task_id: int = Query(default=1, ge=1, le=3)) -> Dict[str, Any]: """ Reset the environment for a specific task. OpenEnv standard endpoint. Args: task_id: Task difficulty (1=easy, 2=medium, 3=hard) Returns: Initial observation after reset """ global env_instance try: # Create new environment instance env_instance = MyEnvEnvironment() obs = env_instance.reset(task_id=task_id) # Return observation in OpenEnv format return obs.model_dump() except Exception as e: raise HTTPException( status_code=500, detail=f"Failed to reset environment: {str(e)}" ) @app.post("/step") async def step(request: StepRequest) -> Dict[str, Any]: """ Execute an action in the environment. Args: request: StepRequest with action_id and task_id Returns: Observation after taking the action """ global env_instance try: # Check if environment is initialized if env_instance is None: raise HTTPException( status_code=400, detail="Environment not initialized. Call /reset first." ) # Validate action_id if request.action_id < 0 or request.action_id > 20: raise HTTPException( status_code=400, detail=f"Invalid action_id: {request.action_id}. Must be 0-20." ) # Create action action = IncidentAction( action_id=request.action_id, task_id=request.task_id if request.task_id else env_instance.task_id ) # Execute step obs = env_instance.step(action) # Return observation in OpenEnv format return obs.model_dump() except Exception as e: raise HTTPException( status_code=500, detail=f"Failed to execute step: {str(e)}" ) @app.get("/state") async def get_state() -> Dict[str, Any]: """ Get current environment state. Returns: Current state of the environment """ global env_instance try: if env_instance is None: raise HTTPException( status_code=400, detail="Environment not initialized. Call /reset first." ) state = env_instance.state return { "state": state.model_dump() if hasattr(state, 'model_dump') else state, "task_id": env_instance.task_id, "total_reward": env_instance.total_reward, "incident_resolved": env_instance.incident_resolved, "time_elapsed": env_instance.time_elapsed } except Exception as e: raise HTTPException( status_code=500, detail=f"Failed to get state: {str(e)}" ) @app.get("/actions") async def get_actions() -> Dict[str, Any]: """ Get list of available actions. Returns: Dictionary of action IDs and names """ try: temp_env = MyEnvEnvironment() return { "actions": temp_env.ACTION_NAMES, "total_actions": len(temp_env.ACTION_NAMES) } except Exception as e: raise HTTPException( status_code=500, detail=f"Failed to get actions: {str(e)}" ) # ============================================================ # ERROR HANDLERS # ============================================================ @app.exception_handler(HTTPException) async def http_exception_handler(request, exc): """Handle HTTP exceptions.""" return JSONResponse( status_code=exc.status_code, content={ "error": exc.detail, "status_code": exc.status_code } ) @app.exception_handler(Exception) async def general_exception_handler(request, exc): """Handle general exceptions.""" return JSONResponse( status_code=500, content={ "error": "Internal server error", "detail": str(exc) } ) # ============================================================ # MAIN ENTRY POINT (Required by OpenEnv) # ============================================================ def main(): """ Main entry point for the server. Required by OpenEnv validation. """ uvicorn.run( app, host="0.0.0.0", port=7860, log_level="info" ) if __name__ == "__main__": main()