Spaces:

bumie-e
/

MARL-Gym

Sleeping

App Files Files Community

bumie-e commited on Dec 3, 2025

Commit

a91c2bd

1 Parent(s): 779ee0c

Updated render implementation

Browse files

Files changed (1) hide show

app.py +192 -408

app.py CHANGED Viewed

@@ -1,10 +1,13 @@
-from fastapi import FastAPI, BackgroundTasks, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from typing import Dict, Any, List
 import uuid
 import threading
-import numpy as np
 import gymnasium as gym
 from stable_baselines3 import PPO
 from stable_baselines3.common.monitor import Monitor
@@ -12,6 +15,8 @@ from stable_baselines3.common.evaluation import evaluate_policy
 from stable_baselines3.common.callbacks import BaseCallback
 from datetime import datetime
 import asyncio
 app = FastAPI()
@@ -28,7 +33,6 @@ app.add_middleware(
 training_jobs: Dict[str, Dict[str, Any]] = {}
 class TrainingJob(BaseModel):
-    code: str
     env_name: str = "CartPole-v1"
     total_timesteps: int = 100000
     learning_rate: float = 0.001
@@ -36,24 +40,117 @@ class TrainingJob(BaseModel):
     batch_size: int = 64
     n_epochs: int = 10
 class MetricsCallback(BaseCallback):
     """Custom callback to track training metrics in real-time"""
-    def __init__(self, job_id: str):
         super().__init__()
         self.job_id = job_id
         self.episode_count = 0
     def _on_step(self) -> bool:
         job = training_jobs.get(self.job_id)
         if not job:
             return False
         # Update timestep count
         job["metrics"]["timesteps"] = self.num_timesteps
         job["metrics"]["progress"] = int(
             (self.num_timesteps / job["config"]["total_timesteps"]) * 100
         )
         # Check for episode completion
         if self.locals.get("dones", [False])[0]:
             if "infos" in self.locals and len(self.locals["infos"]) > 0:
@@ -62,12 +159,12 @@ class MetricsCallback(BaseCallback):
                     self.episode_count += 1
                     ep_reward = float(info["episode"]["r"])
                     ep_length = int(info["episode"]["l"])
                     job["metrics"]["episodes"] = self.episode_count
                     job["metrics"]["episode_rewards"].append(ep_reward)
                     job["metrics"]["episode_lengths"].append(ep_length)
                     job["metrics"]["current_episode_reward"] = ep_reward
                     # Calculate running average
                     if len(job["metrics"]["episode_rewards"]) > 0:
                         job["metrics"]["mean_reward"] = float(
@@ -76,23 +173,22 @@ class MetricsCallback(BaseCallback):
                         job["metrics"]["std_reward"] = float(
                             np.std(job["metrics"]["episode_rewards"][-100:])
                         )
                     # Add log entry
-                    log_entry = f"[{datetime.now().strftime('%H:%M:%S')}] Episode {self.episode_count}: reward = {ep_reward:.2f}"
                     job["metrics"]["logs"].append(log_entry)
-                    if len(job["metrics"]["logs"]) > 50:
                         job["metrics"]["logs"].pop(0)
         return True
 def run_training(job_id: str, config: Dict[str, Any]):
-    """
-    This function runs the training and updates the job status with real-time metrics.
-    """
-    print(f"--- Starting Training for job {job_id} ---")
     training_jobs[job_id]["status"] = "training"
     training_jobs[job_id]["start_time"] = datetime.now()
     try:
         env_name = config.get("env_name", "CartPole-v1")
         total_timesteps = config.get("total_timesteps", 100000)
@@ -100,11 +196,11 @@ def run_training(job_id: str, config: Dict[str, Any]):
         n_steps = config.get("n_steps", 2048)
         batch_size = config.get("batch_size", 64)
         n_epochs = config.get("n_epochs", 10)
-        # Initialize environment
-        env = gym.make(env_name)
         env = Monitor(env)
         # Initialize model
         model = PPO(
             "MlpPolicy",
@@ -115,58 +211,66 @@ def run_training(job_id: str, config: Dict[str, Any]):
             batch_size=batch_size,
             n_epochs=n_epochs,
         )
         # Add initial logs
         training_jobs[job_id]["metrics"]["logs"].append(
-            f"[{datetime.now().strftime('%H:%M:%S')}] Initializing environment: {env_name}"
         )
         training_jobs[job_id]["metrics"]["logs"].append(
-            f"[{datetime.now().strftime('%H:%M:%S')}] Creating PPO agent with MlpPolicy..."
         )
         training_jobs[job_id]["metrics"]["logs"].append(
-            f"[{datetime.now().strftime('%H:%M:%S')}] Starting training for {total_timesteps:,} timesteps"
         )
         # Train with callback
         model.learn(
             total_timesteps=total_timesteps,
-            callback=MetricsCallback(job_id),
         )
         # Evaluate
         training_jobs[job_id]["metrics"]["logs"].append(
-            f"[{datetime.now().strftime('%H:%M:%S')}] Training completed! Evaluating model..."
         )
-        mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=100)
         training_jobs[job_id]["metrics"]["eval_mean_reward"] = float(mean_reward)
         training_jobs[job_id]["metrics"]["eval_std_reward"] = float(std_reward)
         # Save model
-        model.save(f"{env_name}_ppo_{job_id}")
         training_jobs[job_id]["metrics"]["logs"].append(
-            f"[{datetime.now().strftime('%H:%M:%S')}] Model saved as {env_name}_ppo_{job_id}.zip"
         )
         # Store results
         training_jobs[job_id]["status"] = "completed"
         training_jobs[job_id]["results"] = {
             "mean_reward": mean_reward,
             "std_reward": std_reward,
-            "model_path": f"{env_name}_ppo_{job_id}.zip",
             "total_episodes": training_jobs[job_id]["metrics"]["episodes"],
             "total_timesteps": total_timesteps,
         }
         training_jobs[job_id]["metrics"]["progress"] = 100
-        print(f"--- Training for job {job_id} Finished ---")
     except Exception as e:
         training_jobs[job_id]["status"] = "failed"
         training_jobs[job_id]["error"] = str(e)
         training_jobs[job_id]["metrics"]["logs"].append(
             f"[{datetime.now().strftime('%H:%M:%S')}] ERROR: {str(e)}"
         )
-        print(f"--- Training for job {job_id} Failed: {e} ---")
 @app.get("/")
 def read_root():
@@ -176,8 +280,7 @@ def read_root():
 def start_training(job: TrainingJob, background_tasks: BackgroundTasks):
     """Start a new training job"""
     job_id = str(uuid.uuid4())
-    # Initialize the job in our in-memory storage
     training_jobs[job_id] = {
         "status": "queued",
         "config": {
@@ -205,10 +308,9 @@ def start_training(job: TrainingJob, background_tasks: BackgroundTasks):
         "error": None,
         "start_time": None,
     }
-    # Start the training in the background
     background_tasks.add_task(run_training, job_id, training_jobs[job_id]["config"])
     return {
         "message": "Training job started successfully!",
         "job_id": job_id,
@@ -216,18 +318,15 @@ def start_training(job: TrainingJob, background_tasks: BackgroundTasks):
 @app.get("/train/{job_id}/status")
 def get_training_status(job_id: str):
-    """
-    Returns the status and metrics of a training job.
-    """
     job = training_jobs.get(job_id)
     if not job:
         raise HTTPException(status_code=404, detail="Job not found")
-    # Calculate elapsed time
     elapsed_time = 0
     if job.get("start_time"):
         elapsed_time = (datetime.now() - job["start_time"]).total_seconds()
     return {
         "status": job["status"],
         "metrics": job["metrics"],
@@ -238,19 +337,15 @@ def get_training_status(job_id: str):
 @app.get("/train/{job_id}/metrics")
 def get_training_metrics(job_id: str):
-    """
-    Returns only the metrics of a training job (lightweight endpoint for polling).
-    """
     job = training_jobs.get(job_id)
     if not job:
-        print(f"DEBUG: Job {job_id} not found in training_jobs")
-        print(f"DEBUG: Available jobs: {list(training_jobs.keys())}")
-        raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
     elapsed_time = 0
     if job.get("start_time"):
         elapsed_time = (datetime.now() - job["start_time"]).total_seconds()
     return {
         "status": job["status"],
         "metrics": job["metrics"],
@@ -259,13 +354,11 @@ def get_training_metrics(job_id: str):
 @app.post("/train/{job_id}/stop")
 def stop_training(job_id: str):
-    """
-    Stop a training job.
-    """
     job = training_jobs.get(job_id)
     if not job:
         raise HTTPException(status_code=404, detail="Job not found")
     if job["status"] == "training":
         job["status"] = "stopped"
         job["metrics"]["logs"].append(
@@ -274,351 +367,42 @@ def stop_training(job_id: str):
         return {"message": "Training stopped successfully!"}
     else:
         raise HTTPException(status_code=400, detail="Job is not currently training")
-@app.get("/debug")
-def debug():
-    return {"jobs": list(training_jobs.keys())}
-# from fastapi import FastAPI, BackgroundTasks, HTTPException
-# from fastapi.middleware.cors import CORSMiddleware
-# from pydantic import BaseModel
-# from typing import Dict, Any, List
-# import uuid
-# import threading
-# import numpy as np
-# import gymnasium as gym
-# from stable_baselines3 import PPO
-# from stable_baselines3.common.monitor import Monitor
-# from stable_baselines3.common.evaluation import evaluate_policy
-# from stable_baselines3.common.callbacks import BaseCallback
-# from datetime import datetime
-# import asyncio
-# app = FastAPI()
-# # Add CORS middleware
-# app.add_middleware(
-#     CORSMiddleware,
-#     allow_origins=["*"],
-#     allow_credentials=True,
-#     allow_methods=["*"],
-#     allow_headers=["*"],
-# )
-# # In-memory storage for training jobs
-# training_jobs: Dict[str, Dict[str, Any]] = {}
-# class TrainingJob(BaseModel):
-#     code: str
-#     env_name: str = "CartPole-v1"
-#     total_timesteps: int = 100000
-#     learning_rate: float = 0.001
-#     n_steps: int = 2048
-#     batch_size: int = 64
-#     n_epochs: int = 10
-# class MetricsCallback(BaseCallback):
-#     """Custom callback to track training metrics in real-time"""
-#     def __init__(self, job_id: str):
-#         super().__init__()
-#         self.job_id = job_id
-#         self.episode_count = 0
-#     def _on_step(self) -> bool:
-#         job = training_jobs.get(self.job_id)
-#         if not job:
-#             return False
-#         # Update timestep count
-#         job["metrics"]["timesteps"] = self.num_timesteps
-#         job["metrics"]["progress"] = int(
-#             (self.num_timesteps / job["config"]["total_timesteps"]) * 100
-#         )
-#         # Check for episode completion
-#         if self.locals.get("dones", [False])[0]:
-#             if "infos" in self.locals and len(self.locals["infos"]) > 0:
-#                 info = self.locals["infos"][0]
-#                 if "episode" in info:
-#                     self.episode_count += 1
-#                     ep_reward = float(info["episode"]["r"])
-#                     ep_length = int(info["episode"]["l"])
-#                     job["metrics"]["episodes"] = self.episode_count
-#                     job["metrics"]["episode_rewards"].append(ep_reward)
-#                     job["metrics"]["episode_lengths"].append(ep_length)
-#                     job["metrics"]["current_episode_reward"] = ep_reward
-#                     # Calculate running average
-#                     if len(job["metrics"]["episode_rewards"]) > 0:
-#                         job["metrics"]["mean_reward"] = float(
-#                             np.mean(job["metrics"]["episode_rewards"][-100:])
-#                         )
-#                         job["metrics"]["std_reward"] = float(
-#                             np.std(job["metrics"]["episode_rewards"][-100:])
-#                         )
-#                     # Add log entry
-#                     log_entry = f"[{datetime.now().strftime('%H:%M:%S')}] Episode {self.episode_count}: reward = {ep_reward:.2f}"
-#                     job["metrics"]["logs"].append(log_entry)
-#                     if len(job["metrics"]["logs"]) > 50:
-#                         job["metrics"]["logs"].pop(0)
-#         return True
-# def run_training(job_id: str, config: Dict[str, Any]):
-#     """
-#     This function runs the training and updates the job status with real-time metrics.
-#     """
-#     print(f"--- Starting Training for job {job_id} ---")
-#     training_jobs[job_id]["status"] = "training"
-#     training_jobs[job_id]["start_time"] = datetime.now()
-#     try:
-#         env_name = config.get("env_name", "CartPole-v1")
-#         total_timesteps = config.get("total_timesteps", 100000)
-#         learning_rate = config.get("learning_rate", 0.001)
-#         n_steps = config.get("n_steps", 2048)
-#         batch_size = config.get("batch_size", 64)
-#         n_epochs = config.get("n_epochs", 10)
-#         # Initialize environment
-#         env = gym.make(env_name)
-#         env = Monitor(env)
-#         # Initialize model
-#         model = PPO(
-#             "MlpPolicy",
-#             env,
-#             verbose=0,
-#             learning_rate=learning_rate,
-#             n_steps=n_steps,
-#             batch_size=batch_size,
-#             n_epochs=n_epochs,
-#         )
-#         # Add initial logs
-#         training_jobs[job_id]["metrics"]["logs"].append(
-#             f"[{datetime.now().strftime('%H:%M:%S')}] Initializing environment: {env_name}"
-#         )
-#         training_jobs[job_id]["metrics"]["logs"].append(
-#             f"[{datetime.now().strftime('%H:%M:%S')}] Creating PPO agent with MlpPolicy..."
-#         )
-#         training_jobs[job_id]["metrics"]["logs"].append(
-#             f"[{datetime.now().strftime('%H:%M:%S')}] Starting training for {total_timesteps:,} timesteps"
-#         )
-#         # Train with callback
-#         model.learn(
-#             total_timesteps=total_timesteps,
-#             callback=MetricsCallback(job_id),
-#         )
-#         # Evaluate
-#         training_jobs[job_id]["metrics"]["logs"].append(
-#             f"[{datetime.now().strftime('%H:%M:%S')}] Training completed! Evaluating model..."
-#         )
-#         mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=100)
-#         training_jobs[job_id]["metrics"]["eval_mean_reward"] = float(mean_reward)
-#         training_jobs[job_id]["metrics"]["eval_std_reward"] = float(std_reward)
-#         # Save model
-#         model.save(f"{env_name}_ppo_{job_id}")
-#         training_jobs[job_id]["metrics"]["logs"].append(
-#             f"[{datetime.now().strftime('%H:%M:%S')}] Model saved as {env_name}_ppo_{job_id}.zip"
-#         )
-#         # Store results
-#         training_jobs[job_id]["status"] = "completed"
-#         training_jobs[job_id]["results"] = {
-#             "mean_reward": mean_reward,
-#             "std_reward": std_reward,
-#             "model_path": f"{env_name}_ppo_{job_id}.zip",
-#             "total_episodes": training_jobs[job_id]["metrics"]["episodes"],
-#             "total_timesteps": total_timesteps,
-#         }
-#         training_jobs[job_id]["metrics"]["progress"] = 100
-#         print(f"--- Training for job {job_id} Finished ---")
-#     except Exception as e:
-#         training_jobs[job_id]["status"] = "failed"
-#         training_jobs[job_id]["error"] = str(e)
-#         training_jobs[job_id]["metrics"]["logs"].append(
-#             f"[{datetime.now().strftime('%H:%M:%S')}] ERROR: {str(e)}"
-#         )
-#         print(f"--- Training for job {job_id} Failed: {e} ---")
-# @app.get("/")
-# def read_root():
-#     return {"message": "Welcome to the RL Training API!"}
-# @app.post("/train")
-# def start_training(job: TrainingJob, background_tasks: BackgroundTasks):
-#     """Start a new training job"""
-#     job_id = str(uuid.uuid4())
-#     # Initialize the job in our in-memory storage
-#     training_jobs[job_id] = {
-#         "status": "queued",
-#         "config": {
-#             "env_name": job.env_name,
-#             "total_timesteps": job.total_timesteps,
-#             "learning_rate": job.learning_rate,
-#             "n_steps": job.n_steps,
-#             "batch_size": job.batch_size,
-#             "n_epochs": job.n_epochs,
-#         },
-#         "metrics": {
-#             "timesteps": 0,
-#             "episodes": 0,
-#             "progress": 0,
-#             "episode_rewards": [],
-#             "episode_lengths": [],
-#             "current_episode_reward": 0,
-#             "mean_reward": 0,
-#             "std_reward": 0,
-#             "eval_mean_reward": None,
-#             "eval_std_reward": None,
-#             "logs": [],
-#         },
-#         "results": None,
-#         "error": None,
-#         "start_time": None,
-#     }
-#     # Start the training in the background
-#     background_tasks.add_task(run_training, job_id, training_jobs[job_id]["config"])
-#     return {
-#         "message": "Training job started successfully!",
-#         "job_id": job_id,
-#     }
-# @app.get("/train/{job_id}/status")
-# def get_training_status(job_id: str):
-#     """
-#     Returns the status and metrics of a training job.
-#     """
-#     job = training_jobs.get(job_id)
-#     if not job:
-#         raise HTTPException(status_code=404, detail="Job not found")
-#     # Calculate elapsed time
-#     elapsed_time = 0
-#     if job.get("start_time"):
-#         elapsed_time = (datetime.now() - job["start_time"]).total_seconds()
-#     return {
-#         "status": job["status"],
-#         "metrics": job["metrics"],
-#         "elapsed_time": elapsed_time,
-#         "results": job["results"],
-#         "error": job["error"],
-#     }
-# @app.get("/train/{job_id}/metrics")
-# def get_training_metrics(job_id: str):
-#     """
-#     Returns only the metrics of a training job (lightweight endpoint for polling).
-#     """
-#     job = training_jobs.get(job_id)
-#     if not job:
-#         raise HTTPException(status_code=404, detail="Job not found")
-#     elapsed_time = 0
-#     if job.get("start_time"):
-#         elapsed_time = (datetime.now() - job["start_time"]).total_seconds()
-#     return {
-#         "status": job["status"],
-#         "metrics": job["metrics"],
-#         "elapsed_time": elapsed_time,
-#     }
-# @app.post("/train/{job_id}/stop")
-# def stop_training(job_id: str):
-#     """
-#     Stop a training job.
-#     """
-#     job = training_jobs.get(job_id)
-#     if not job:
-#         raise HTTPException(status_code=404, detail="Job not found")
-#     if job["status"] == "training":
-#         job["status"] = "stopped"
-#         job["metrics"]["logs"].append(
-#             f"[{datetime.now().strftime('%H:%M:%S')}] Training stopped by user"
-#         )
-#         return {"message": "Training stopped successfully!"}
-#     else:
-#         raise HTTPException(status_code=400, detail="Job is not currently training")
-# # from fastapi import FastAPI, BackgroundTasks, HTTPException
-# # from pydantic import BaseModel
-# # import os
-# # import uuid
-# # from typing import Dict, Any
-# # app = FastAPI()
-# # # In-memory storage for training jobs
-# # training_jobs: Dict[str, Dict[str, Any]] = {}
-# # # Define the request body for the training job
-# # class TrainingJob(BaseModel):
-# #     code: str
-# # # This is where you'll put your training logic
-# # def run_training(job_id: str, user_code: str):
-# #     """
-# #     This function runs the user's code and updates the job status.
-# #     """
-# #     print(f"--- Starting Training for job {job_id} ---")
-# #     training_jobs[job_id]["status"] = "training"
-# #     try:
-# #         # Create a dictionary to serve as the local namespace for exec
-# #         local_namespace = {}
-# #         # Execute the user's code
-# #         exec(user_code, {}, local_namespace)
-# #         # Assume the user's code stores results in a 'results' dictionary
-# #         results = local_namespace.get('results', {})
-# #         # Store the results and mark the job as completed
-# #         training_jobs[job_id]["status"] = "completed"
-# #         training_jobs[job_id]["results"] = results
-# #         print(f"--- Training for job {job_id} Finished ---")
-# #     except Exception as e:
-# #        # Mark the job as failed and store the error message
-# #        training_jobs[job_id]["status"] = "failed"
-# #        training_jobs[job_id]["error"] = str(e)
-# #        print(f"--- Training for job {job_id} Failed: {e} ---")
-# # @app.get('/')
-# # def read_root():
-# #     return {"message": "Welcome to the Training API!"}
-# # @app.post("/train")
-# # def start_training(job: TrainingJob, background_tasks: BackgroundTasks):
-# #     # Generate a unique job ID
-# #     job_id = str(uuid.uuid4())
-# #     # Initialize the job in our in-memory storage
-# #     training_jobs[job_id] = {"status": "queued"}
-# #     # Start the training in the background
-# #     background_tasks.add_task(run_training, job_id, job.code)
-# #     return {"message": "Training job started successfully!", "job_id": job_id}
-# # @app.get("/train/{job_id}/status")
-# # def get_training_status(job_id: str):
-# #     """
-# #     Returns the status and results of a training job.
-# #     """
-# #     job = training_jobs.get(job_id)
-# #     if not job:
-# #         raise HTTPException(status_code=404, detail="Job not found")
-# #     return job

+from fastapi import FastAPI, BackgroundTasks, HTTPException, WebSocket, WebSocketDisconnect
 from fastapi.middleware.cors import CORSMiddleware
+import base64
+import cv2
+import numpy as np
+from collections import deque
 from pydantic import BaseModel
+from typing import Dict, Any, List, Optional
 import uuid
 import threading
 import gymnasium as gym
 from stable_baselines3 import PPO
 from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.callbacks import BaseCallback
 from datetime import datetime
 import asyncio
+import os
+from enum import Enum
 app = FastAPI()
 training_jobs: Dict[str, Dict[str, Any]] = {}
 class TrainingJob(BaseModel):
     env_name: str = "CartPole-v1"
     total_timesteps: int = 100000
     learning_rate: float = 0.001
     batch_size: int = 64
     n_epochs: int = 10
+class ConnectionManager:
+    """Manages WebSocket connections and frame broadcasting"""
+    def __init__(self):
+        self.active_connections: Dict[str, List[WebSocket]] = {}
+        self.frames: Dict[str, deque] = {}
+    async def connect(self, job_id: str, websocket: WebSocket):
+        await websocket.accept()
+        if job_id not in self.active_connections:
+            self.active_connections[job_id] = []
+            self.frames[job_id] = deque(maxlen=1)
+        self.active_connections[job_id].append(websocket)
+        print(f"[WS] Client connected to job {job_id}")
+    def disconnect(self, job_id: str, websocket: WebSocket):
+        if job_id in self.active_connections:
+            self.active_connections[job_id].remove(websocket)
+            if not self.active_connections[job_id]:
+                del self.active_connections[job_id]
+                if job_id in self.frames:
+                    del self.frames[job_id]
+        print(f"[WS] Client disconnected from job {job_id}")
+    def add_frame(self, job_id: str, frame: np.ndarray):
+        """Store the latest frame for this job"""
+        if job_id not in self.frames:
+            self.frames[job_id] = deque(maxlen=1)
+        self.frames[job_id].append(frame)
+    async def broadcast_frame(self, job_id: str):
+        """Broadcast the latest frame to all connected clients"""
+        if job_id not in self.frames or not self.frames[job_id]:
+            return
+        frame = self.frames[job_id][-1]
+        # Ensure frame is in RGB format (not BGR from cv2)
+        if len(frame.shape) == 3 and frame.shape[2] == 3:
+            # Assume BGR from gym, convert to RGB
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        else:
+            frame_rgb = frame
+        # Resize for efficient transmission (optional)
+        height, width = frame_rgb.shape[:2]
+        if height > 512 or width > 512:
+            scale = 512 / max(height, width)
+            new_size = (int(width * scale), int(height * scale))
+            frame_rgb = cv2.resize(frame_rgb, new_size, interpolation=cv2.INTER_LINEAR)
+        # Encode to JPEG
+        success, buffer = cv2.imencode('.jpg', frame_rgb, [cv2.IMWRITE_JPEG_QUALITY, 85])
+        if not success:
+            print(f"[ERROR] Failed to encode frame for job {job_id}")
+            return
+        frame_base64 = base64.b64encode(buffer).decode('utf-8')
+        # Broadcast to all connected clients
+        if job_id in self.active_connections:
+            disconnected = []
+            for connection in self.active_connections[job_id]:
+                try:
+                    await connection.send_json({
+                        "type": "frame",
+                        "job_id": job_id,
+                        "data": frame_base64,
+                        "timestamp": datetime.now().isoformat()
+                    })
+                except Exception as e:
+                    print(f"[ERROR] Failed to send frame: {e}")
+                    disconnected.append(connection)
+            # Remove disconnected clients
+            for conn in disconnected:
+                self.disconnect(job_id, conn)
+manager = ConnectionManager()
 class MetricsCallback(BaseCallback):
     """Custom callback to track training metrics in real-time"""
+    def __init__(self, job_id: str, render_freq: int = 5):
         super().__init__()
         self.job_id = job_id
         self.episode_count = 0
+        self.step_count = 0
+        self.render_freq = render_freq  # Render every N steps
+        self.env = None
     def _on_step(self) -> bool:
         job = training_jobs.get(self.job_id)
         if not job:
             return False
+        self.step_count += 1
         # Update timestep count
         job["metrics"]["timesteps"] = self.num_timesteps
         job["metrics"]["progress"] = int(
             (self.num_timesteps / job["config"]["total_timesteps"]) * 100
         )
+        # Render frame periodically
+        if self.step_count % self.render_freq == 0:
+            try:
+                frame = self.model.get_env().render()
+                if frame is not None:
+                    manager.add_frame(self.job_id, frame)
+            except Exception as e:
+                print(f"[ERROR] Failed to render frame: {e}")
         # Check for episode completion
         if self.locals.get("dones", [False])[0]:
             if "infos" in self.locals and len(self.locals["infos"]) > 0:
                     self.episode_count += 1
                     ep_reward = float(info["episode"]["r"])
                     ep_length = int(info["episode"]["l"])
                     job["metrics"]["episodes"] = self.episode_count
                     job["metrics"]["episode_rewards"].append(ep_reward)
                     job["metrics"]["episode_lengths"].append(ep_length)
                     job["metrics"]["current_episode_reward"] = ep_reward
                     # Calculate running average
                     if len(job["metrics"]["episode_rewards"]) > 0:
                         job["metrics"]["mean_reward"] = float(
                         job["metrics"]["std_reward"] = float(
                             np.std(job["metrics"]["episode_rewards"][-100:])
                         )
                     # Add log entry
+                    log_entry = f"[{datetime.now().strftime('%H:%M:%S')}] Episode {self.episode_count}: reward = {ep_reward:.2f}, length = {ep_length}"
                     job["metrics"]["logs"].append(log_entry)
+                    if len(job["metrics"]["logs"]) > 100:
                         job["metrics"]["logs"].pop(0)
         return True
 def run_training(job_id: str, config: Dict[str, Any]):
+    """Run the RL training loop with rendering"""
+    print(f"[TRAIN] Starting training for job {job_id}")
     training_jobs[job_id]["status"] = "training"
     training_jobs[job_id]["start_time"] = datetime.now()
+    env = None
     try:
         env_name = config.get("env_name", "CartPole-v1")
         total_timesteps = config.get("total_timesteps", 100000)
         n_steps = config.get("n_steps", 2048)
         batch_size = config.get("batch_size", 64)
         n_epochs = config.get("n_epochs", 10)
+        # Initialize environment with rgb_array rendering
+        env = gym.make(env_name, render_mode='rgb_array')
         env = Monitor(env)
         # Initialize model
         model = PPO(
             "MlpPolicy",
             batch_size=batch_size,
             n_epochs=n_epochs,
         )
         # Add initial logs
         training_jobs[job_id]["metrics"]["logs"].append(
+            f"[{datetime.now().strftime('%H:%M:%S')}] Environment: {env_name}"
         )
         training_jobs[job_id]["metrics"]["logs"].append(
+            f"[{datetime.now().strftime('%H:%M:%S')}] Total timesteps: {total_timesteps:,}"
         )
         training_jobs[job_id]["metrics"]["logs"].append(
+            f"[{datetime.now().strftime('%H:%M:%S')}] Starting training..."
         )
         # Train with callback
         model.learn(
             total_timesteps=total_timesteps,
+            callback=MetricsCallback(job_id, render_freq=5),
         )
         # Evaluate
         training_jobs[job_id]["metrics"]["logs"].append(
+            f"[{datetime.now().strftime('%H:%M:%S')}] Training completed! Evaluating..."
         )
+        mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
         training_jobs[job_id]["metrics"]["eval_mean_reward"] = float(mean_reward)
         training_jobs[job_id]["metrics"]["eval_std_reward"] = float(std_reward)
         # Save model
+        model_path = f"models/{env_name}_ppo_{job_id}"
+        os.makedirs("models", exist_ok=True)
+        model.save(model_path)
         training_jobs[job_id]["metrics"]["logs"].append(
+            f"[{datetime.now().strftime('%H:%M:%S')}] Model saved!"
         )
         # Store results
         training_jobs[job_id]["status"] = "completed"
         training_jobs[job_id]["results"] = {
             "mean_reward": mean_reward,
             "std_reward": std_reward,
+            "model_path": f"{model_path}.zip",
             "total_episodes": training_jobs[job_id]["metrics"]["episodes"],
             "total_timesteps": total_timesteps,
         }
         training_jobs[job_id]["metrics"]["progress"] = 100
+        print(f"[TRAIN] Training completed for job {job_id}")
     except Exception as e:
         training_jobs[job_id]["status"] = "failed"
         training_jobs[job_id]["error"] = str(e)
         training_jobs[job_id]["metrics"]["logs"].append(
             f"[{datetime.now().strftime('%H:%M:%S')}] ERROR: {str(e)}"
         )
+        print(f"[ERROR] Training failed for job {job_id}: {e}")
+    finally:
+        if env:
+            env.close()
+# REST Endpoints
 @app.get("/")
 def read_root():
 def start_training(job: TrainingJob, background_tasks: BackgroundTasks):
     """Start a new training job"""
     job_id = str(uuid.uuid4())
     training_jobs[job_id] = {
         "status": "queued",
         "config": {
         "error": None,
         "start_time": None,
     }
     background_tasks.add_task(run_training, job_id, training_jobs[job_id]["config"])
     return {
         "message": "Training job started successfully!",
         "job_id": job_id,
 @app.get("/train/{job_id}/status")
 def get_training_status(job_id: str):
+    """Get full training status with metrics"""
     job = training_jobs.get(job_id)
     if not job:
         raise HTTPException(status_code=404, detail="Job not found")
     elapsed_time = 0
     if job.get("start_time"):
         elapsed_time = (datetime.now() - job["start_time"]).total_seconds()
     return {
         "status": job["status"],
         "metrics": job["metrics"],
 @app.get("/train/{job_id}/metrics")
 def get_training_metrics(job_id: str):
+    """Lightweight endpoint for polling metrics"""
     job = training_jobs.get(job_id)
     if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
     elapsed_time = 0
     if job.get("start_time"):
         elapsed_time = (datetime.now() - job["start_time"]).total_seconds()
     return {
         "status": job["status"],
         "metrics": job["metrics"],
 @app.post("/train/{job_id}/stop")
 def stop_training(job_id: str):
+    """Stop a training job"""
     job = training_jobs.get(job_id)
     if not job:
         raise HTTPException(status_code=404, detail="Job not found")
     if job["status"] == "training":
         job["status"] = "stopped"
         job["metrics"]["logs"].append(
         return {"message": "Training stopped successfully!"}
     else:
         raise HTTPException(status_code=400, detail="Job is not currently training")
+# WebSocket Endpoint
+@app.websocket("/ws/render/{job_id}")
+async def websocket_render_endpoint(websocket: WebSocket, job_id: str):
+    """
+    WebSocket endpoint for real-time environment rendering.
+    Connect from frontend with: ws://localhost:8000/ws/render/{job_id}
+    """
+    await manager.connect(job_id, websocket)
+    try:
+        while True:
+            # Keep connection alive and handle messages
+            data = await websocket.receive_text()
+            if data == "request_frame":
+                await manager.broadcast_frame(job_id)
+            elif data == "ping":
+                await websocket.send_json({"type": "pong"})
+    except WebSocketDisconnect:
+        manager.disconnect(job_id, websocket)
+    except Exception as e:
+        print(f"[ERROR] WebSocket error for job {job_id}: {e}")
+        manager.disconnect(job_id, websocket)
+@app.get("/debug/jobs")
+def debug_jobs():
+    """Debug endpoint to list all jobs"""
+    return {
+        "jobs": [
+            {
+                "job_id": job_id,
+                "status": job["status"],
+                "progress": job["metrics"]["progress"],
+                "episodes": job["metrics"]["episodes"],
+            }
+            for job_id, job in training_jobs.items()
+        ]
+    }