from fastapi import FastAPI from pydantic import BaseModel from typing import Dict, Any, Optional from cicd_debug_env.env import CICDDebugEnv from cicd_debug_env.models import Action from cicd_debug_env.tasks import ALL_TASKS from cicd_debug_env.curriculum import CurriculumController app = FastAPI(title="CICD Debug RL Environment") env = CICDDebugEnv() curriculum = CurriculumController() class ActionRequest(BaseModel): action_type: str parameters: Dict[str, Any] confidence: float reasoning: str @app.post("/reset") def reset(task_id: Optional[str] = None): obs = env.reset(task_id) return {"observation": obs.__dict__} @app.post("/step") def step(action_req: ActionRequest): action = Action( action_type=action_req.action_type, parameters=action_req.parameters, confidence=action_req.confidence, reasoning=action_req.reasoning ) obs, reward, done, info = env.step(action) if done: curriculum.update(reward) return { "observation": obs.__dict__, "reward": reward, "done": done, "info": info } @app.get("/state") def state(): return env.state() @app.get("/tasks") def list_tasks(): return [{"id": t["id"], "difficulty": t["difficulty"]} for t in ALL_TASKS] @app.get("/metrics") def metrics(): stats = curriculum.get_difficulty_stats() return stats @app.get("/health") def health(): return {"status": "ok"}