Spaces:

bumie-e
/

MARL-Gym

Sleeping

App Files Files Community

bumie-e commited on Dec 4, 2025

Commit

d84d915

1 Parent(s): de3f783

Added support for dynamic code execution

Browse files

Files changed (1) hide show

app.py +105 -251

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from fastapi import FastAPI, BackgroundTasks, HTTPException, WebSocket, WebSocketDisconnect
 from fastapi.middleware.cors import CORSMiddleware
 import base64
 import numpy as np
@@ -7,20 +8,19 @@ from pydantic import BaseModel
 from typing import Dict, Any, List, Optional
 import uuid
 import gymnasium as gym
-from stable_baselines3 import PPO
 from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.evaluation import evaluate_policy
 from stable_baselines3.common.callbacks import BaseCallback
 from datetime import datetime
 import asyncio
 import os
 import logging
 from io import BytesIO
 from PIL import Image
-# Add to imports in app.py
-from fastapi.responses import FileResponse
 import imageio
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -28,7 +28,6 @@ logger = logging.getLogger(__name__)
 app = FastAPI()
-# Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -37,19 +36,17 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# In-memory storage for training jobs
 training_jobs: Dict[str, Dict[str, Any]] = {}
-class TrainingJob(BaseModel):
-    env_name: str = "CartPole-v1"
-    total_timesteps: int = 100000
-    learning_rate: float = 0.001
-    n_steps: int = 2048
-    batch_size: int = 64
-    n_epochs: int = 10
 class ConnectionManager:
-    """Manages WebSocket connections and frame broadcasting"""
     def __init__(self):
         self.active_connections: Dict[str, List[WebSocket]] = {}
         self.frames: Dict[str, deque] = {}
@@ -60,7 +57,6 @@ class ConnectionManager:
             self.active_connections[job_id] = []
             self.frames[job_id] = deque(maxlen=1)
         self.active_connections[job_id].append(websocket)
-        logger.info(f"[WS] Client connected to job {job_id}")
     def disconnect(self, job_id: str, websocket: WebSocket):
         if job_id in self.active_connections:
@@ -69,328 +65,186 @@ class ConnectionManager:
                 del self.active_connections[job_id]
                 if job_id in self.frames:
                     del self.frames[job_id]
-        logger.info(f"[WS] Client disconnected from job {job_id}")
     def add_frame(self, job_id: str, frame: np.ndarray):
-        """Store the latest frame for this job"""
         if job_id not in self.frames:
             self.frames[job_id] = deque(maxlen=1)
         self.frames[job_id].append(frame)
     async def broadcast_frame(self, job_id: str):
-        """Broadcast the latest frame to all connected clients"""
-        if job_id not in self.frames or not self.frames[job_id]:
-            return
         frame = self.frames[job_id][-1]
         try:
-            # Convert numpy array to PIL Image for encoding
             if isinstance(frame, np.ndarray):
-                # Handle different frame formats
-                if frame.dtype != np.uint8:
-                    frame = np.clip(frame * 255, 0, 255).astype(np.uint8)
-                # Convert BGR to RGB if needed
-                if len(frame.shape) == 3 and frame.shape[2] == 3:
-                    # Assuming BGR from gym, convert to RGB
-                    frame = frame[:, :, ::-1]  # BGR to RGB
                 img = Image.fromarray(frame)
-            else:
-                logger.error(f"[ENCODE] Unexpected frame type: {type(frame)}")
-                return
-            # Resize for efficient transmission
             max_size = 512
             if img.width > max_size or img.height > max_size:
                 ratio = max_size / max(img.width, img.height)
-                new_size = (int(img.width * ratio), int(img.height * ratio))
-                img = img.resize(new_size, Image.Resampling.LANCZOS)
-            # Encode to JPEG
             buffer = BytesIO()
             img.save(buffer, format='JPEG', quality=85)
-            frame_bytes = buffer.getvalue()
-            frame_base64 = base64.b64encode(frame_bytes).decode('utf-8')
-            # Broadcast to all connected clients
             if job_id in self.active_connections:
-                disconnected = []
                 for connection in self.active_connections[job_id]:
-                    try:
-                        await connection.send_json({
-                            "type": "frame",
-                            "job_id": job_id,
-                            "data": frame_base64,
-                            "timestamp": datetime.now().isoformat()
-                        })
-                    except Exception as e:
-                        logger.error(f"[WS] Failed to send frame: {e}")
-                        disconnected.append(connection)
-                # Remove disconnected clients
-                for conn in disconnected:
-                    self.disconnect(job_id, conn)
-        except Exception as e:
-            logger.error(f"[ENCODE] Failed to encode frame: {e}")
 manager = ConnectionManager()
 class MetricsCallback(BaseCallback):
-    """Custom callback to track training metrics in real-time"""
-    def __init__(self, job_id: str, render_freq: int = 5):
         super().__init__()
         self.job_id = job_id
         self.episode_count = 0
-        self.step_count = 0
         self.render_freq = render_freq
     def _on_step(self) -> bool:
         job = training_jobs.get(self.job_id)
-        # FIX: Check if job exists OR if status is marked as stopped
-        if not job or job["status"] == "stopped":
-            logger.info(f"[CALLBACK] Stopping job {self.job_id}")
-            return False  # Returning False in SB3 stops the training immediately
-        # if not job:
-        #     return False
-        self.step_count += 1
-        # Update timestep count
         job["metrics"]["timesteps"] = self.num_timesteps
-        job["metrics"]["progress"] = int(
-            (self.num_timesteps / job["config"]["total_timesteps"]) * 100
-        )
-        # Render frame periodically
-        # if self.step_count % self.render_freq == 0:
-        #     try:
-        #         frame = self.model.get_env().render()
-        #         if frame is not None and isinstance(frame, np.ndarray):
-        #             manager.add_frame(self.job_id, frame)
-        #     except Exception as e:
-        #         logger.debug(f"[RENDER] Render not available: {e}")
-        # RENDER & RECORD
-        # We process frames at the render frequency
-        if self.step_count % self.render_freq == 0:
             try:
-                # Capture frame
                 frame = self.model.get_env().render()
                 if frame is not None and isinstance(frame, np.ndarray):
-                    # 1. Send to WebSocket for live view
                     manager.add_frame(self.job_id, frame)
-                    # 2. Store in memory for video download
-                    # We skip every other captured frame to keep video file size manageable
-                    # (Capturing effectively at render_freq * 2)
-                    if len(job["video_buffer"]) < 2000: # Safety cap: max 2000 frames to prevent RAM overflow
-                         job["video_buffer"].append(frame)
-            except Exception as e:
-                logger.debug(f"[RENDER] Render error: {e}")
-        # Check for episode completion
         if self.locals.get("dones", [False])[0]:
             if "infos" in self.locals and len(self.locals["infos"]) > 0:
                 info = self.locals["infos"][0]
                 if "episode" in info:
                     self.episode_count += 1
                     ep_reward = float(info["episode"]["r"])
-                    ep_length = int(info["episode"]["l"])
                     job["metrics"]["episodes"] = self.episode_count
                     job["metrics"]["episode_rewards"].append(ep_reward)
-                    job["metrics"]["episode_lengths"].append(ep_length)
                     job["metrics"]["current_episode_reward"] = ep_reward
-                    # Calculate running average
                     if len(job["metrics"]["episode_rewards"]) > 0:
-                        job["metrics"]["mean_reward"] = float(
-                            np.mean(job["metrics"]["episode_rewards"][-100:])
-                        )
-                        job["metrics"]["std_reward"] = float(
-                            np.std(job["metrics"]["episode_rewards"][-100:])
-                        )
-                    # Add log entry
-                    log_entry = f"[{datetime.now().strftime('%H:%M:%S')}] Episode {self.episode_count}: reward = {ep_reward:.2f}, length = {ep_length}"
                     job["metrics"]["logs"].append(log_entry)
-                    if len(job["metrics"]["logs"]) > 100:
-                        job["metrics"]["logs"].pop(0)
         return True
-def save_video_from_buffer(job_id: str):
-    """Helper to compile stored frames into MP4"""
     job = training_jobs.get(job_id)
-    if not job or not job["video_buffer"]:
-        return None
     try:
-        env_name = job["config"]["env_name"]
         video_path = f"models/{env_name}_replay_{job_id}.mp4"
-        os.makedirs("models", exist_ok=True)
-        # Save video at 30 FPS
-        logger.info(f"[VIDEO] Saving {len(job['video_buffer'])} frames to {video_path}")
         imageio.mimsave(video_path, job['video_buffer'], fps=30)
-        # Clear buffer to free memory
         job["video_buffer"] = []
         return video_path
-    except Exception as e:
-        logger.error(f"[VIDEO] Failed to save video: {e}")
-        return None
-def run_training(job_id: str, config: Dict[str, Any]):
-    """Run the RL training loop with rendering"""
-    logger.info(f"[TRAIN] Starting training for job {job_id}")
     training_jobs[job_id]["status"] = "training"
     training_jobs[job_id]["start_time"] = datetime.now()
-    env = None
     try:
-        env_name = config.get("env_name", "CartPole-v1")
-        total_timesteps = config.get("total_timesteps", 100000)
-        learning_rate = config.get("learning_rate", 0.001)
-        n_steps = config.get("n_steps", 2048)
-        batch_size = config.get("batch_size", 64)
-        n_epochs = config.get("n_epochs", 10)
-        # Initialize environment with rgb_array rendering
-        logger.info(f"[TRAIN] Creating environment: {env_name}")
-        env = gym.make(env_name, render_mode='rgb_array')
-        env = Monitor(env)
-        # Initialize model
-        logger.info(f"[TRAIN] Creating PPO model")
-        model = PPO(
-            "MlpPolicy",
-            env,
-            verbose=0,
-            learning_rate=learning_rate,
-            n_steps=n_steps,
-            batch_size=batch_size,
-            n_epochs=n_epochs,
-        )
-        # Add initial logs
-        training_jobs[job_id]["metrics"]["logs"].append(
-            f"[{datetime.now().strftime('%H:%M:%S')}] Environment: {env_name}"
-        )
-        training_jobs[job_id]["metrics"]["logs"].append(
-            f"[{datetime.now().strftime('%H:%M:%S')}] Total timesteps: {total_timesteps:,}"
-        )
-        training_jobs[job_id]["metrics"]["logs"].append(
-            f"[{datetime.now().strftime('%H:%M:%S')}] Starting training..."
-        )
-        # Train with callback
-        logger.info(f"[TRAIN] Starting learning loop")
-        model.learn(
-            total_timesteps=total_timesteps,
-            callback=MetricsCallback(job_id, render_freq=5),
-        )
-        # Evaluate
-        logger.info(f"[TRAIN] Evaluating model")
-        training_jobs[job_id]["metrics"]["logs"].append(
-            f"[{datetime.now().strftime('%H:%M:%S')}] Training completed! Evaluating..."
-        )
-        mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
-        training_jobs[job_id]["metrics"]["eval_mean_reward"] = float(mean_reward)
-        training_jobs[job_id]["metrics"]["eval_std_reward"] = float(std_reward)
-        # Save model
-        model_path = f"models/{env_name}_ppo_{job_id}"
-        os.makedirs("models", exist_ok=True)
-        model.save(model_path)
-        # --- NEW: SAVE VIDEO FROM BUFFER ---
-        video_path = save_video_from_buffer(job_id)
-        # -----------------------------------
-        training_jobs[job_id]["metrics"]["logs"].append(
-            f"[{datetime.now().strftime('%H:%M:%S')}] Model saved!"
-        )
-        # Store results
         training_jobs[job_id]["status"] = "completed"
         training_jobs[job_id]["results"] = {
-            "mean_reward": mean_reward,
-            "std_reward": std_reward,
-            "model_path": f"{model_path}.zip",
-            "video_path": video_path, # Add this to results
             "total_episodes": training_jobs[job_id]["metrics"]["episodes"],
-            "total_timesteps": total_timesteps,
         }
         training_jobs[job_id]["metrics"]["progress"] = 100
-        logger.info(f"[TRAIN] Training completed for job {job_id}")
     except Exception as e:
         training_jobs[job_id]["status"] = "failed"
         training_jobs[job_id]["error"] = str(e)
-        training_jobs[job_id]["metrics"]["logs"].append(
-            f"[{datetime.now().strftime('%H:%M:%S')}] ERROR: {str(e)}"
-        )
-        logger.error(f"[TRAIN] Training failed for job {job_id}: {e}", exc_info=True)
-    finally:
-        if env:
-            try:
-                env.close()
-            except:
-                pass
-# REST Endpoints
-@app.get("/")
-def read_root():
-    return {"message": "Welcome to the RL Training API!"}
 @app.post("/train")
-def start_training(job: TrainingJob, background_tasks: BackgroundTasks):
-    """Start a new training job"""
     job_id = str(uuid.uuid4())
     training_jobs[job_id] = {
         "status": "queued",
-        "config": {
-            "env_name": job.env_name,
-            "total_timesteps": job.total_timesteps,
-            "learning_rate": job.learning_rate,
-            "n_steps": job.n_steps,
-            "batch_size": job.batch_size,
-            "n_epochs": job.n_epochs,
-        },
         "metrics": {
-            "timesteps": 0,
-            "episodes": 0,
-            "progress": 0,
-            "episode_rewards": [],
-            "episode_lengths": [],
-            "current_episode_reward": 0,
-            "mean_reward": 0,
-            "std_reward": 0,
-            "eval_mean_reward": None,
-            "eval_std_reward": None,
-            "logs": [],
         },
-        "video_buffer": [], # <--- NEW: Initialize empty buffer
-        "results": None,
-        "error": None,
-        "start_time": None,
     }
-    background_tasks.add_task(run_training, job_id, training_jobs[job_id]["config"])
-    return {
-        "message": "Training job started successfully!",
-        "job_id": job_id,
-    }
 @app.get("/train/{job_id}/status")
 def get_training_status(job_id: str):

 from fastapi import FastAPI, BackgroundTasks, HTTPException, WebSocket, WebSocketDisconnect
+from fastapi.responses import FileResponse
 from fastapi.middleware.cors import CORSMiddleware
 import base64
 import numpy as np
 from typing import Dict, Any, List, Optional
 import uuid
 import gymnasium as gym
+from stable_baselines3 import PPO, DQN, A2C # Added common algos
 from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.evaluation import evaluate_policy
 from stable_baselines3.common.callbacks import BaseCallback
 from datetime import datetime
 import asyncio
 import os
+import glob
 import logging
 from io import BytesIO
 from PIL import Image
 import imageio
+import traceback
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+os.makedirs("models", exist_ok=True)
+# In-memory storage
 training_jobs: Dict[str, Dict[str, Any]] = {}
+class TrainingRequest(BaseModel):
+    env_name: str
+    code: str  # <--- WE NOW ACCEPT RAW CODE
+# --- WEBSOCKET MANAGER (Unchanged) ---
 class ConnectionManager:
     def __init__(self):
         self.active_connections: Dict[str, List[WebSocket]] = {}
         self.frames: Dict[str, deque] = {}
             self.active_connections[job_id] = []
             self.frames[job_id] = deque(maxlen=1)
         self.active_connections[job_id].append(websocket)
     def disconnect(self, job_id: str, websocket: WebSocket):
         if job_id in self.active_connections:
                 del self.active_connections[job_id]
                 if job_id in self.frames:
                     del self.frames[job_id]
     def add_frame(self, job_id: str, frame: np.ndarray):
         if job_id not in self.frames:
             self.frames[job_id] = deque(maxlen=1)
         self.frames[job_id].append(frame)
     async def broadcast_frame(self, job_id: str):
+        if job_id not in self.frames or not self.frames[job_id]: return
         frame = self.frames[job_id][-1]
         try:
             if isinstance(frame, np.ndarray):
+                if frame.dtype != np.uint8: frame = np.clip(frame * 255, 0, 255).astype(np.uint8)
                 img = Image.fromarray(frame)
+            else: return
             max_size = 512
             if img.width > max_size or img.height > max_size:
                 ratio = max_size / max(img.width, img.height)
+                img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.Resampling.LANCZOS)
             buffer = BytesIO()
             img.save(buffer, format='JPEG', quality=85)
+            frame_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
             if job_id in self.active_connections:
                 for connection in self.active_connections[job_id]:
+                    try: await connection.send_json({"type": "frame", "job_id": job_id, "data": frame_base64})
+                    except: pass
+        except Exception: pass
 manager = ConnectionManager()
+# --- CALLBACK (Modified for Generic Use) ---
 class MetricsCallback(BaseCallback):
+    def __init__(self, job_id: str, render_freq: int = 4):
         super().__init__()
         self.job_id = job_id
         self.episode_count = 0
         self.render_freq = render_freq
     def _on_step(self) -> bool:
         job = training_jobs.get(self.job_id)
+        if not job or job["status"] == "stopped": return False
+        # Update metrics
         job["metrics"]["timesteps"] = self.num_timesteps
+        # We try to guess total timesteps if user set it, otherwise just show progress
+        total = job.get("total_timesteps_guess", 100000)
+        job["metrics"]["progress"] = min(100, int((self.num_timesteps / total) * 100))
+        # Render
+        if self.num_timesteps % self.render_freq == 0:
             try:
                 frame = self.model.get_env().render()
                 if frame is not None and isinstance(frame, np.ndarray):
                     manager.add_frame(self.job_id, frame)
+                    if len(job["video_buffer"]) < 2000: job["video_buffer"].append(frame)
+            except: pass
+        # Episode handling
         if self.locals.get("dones", [False])[0]:
             if "infos" in self.locals and len(self.locals["infos"]) > 0:
                 info = self.locals["infos"][0]
                 if "episode" in info:
                     self.episode_count += 1
                     ep_reward = float(info["episode"]["r"])
                     job["metrics"]["episodes"] = self.episode_count
                     job["metrics"]["episode_rewards"].append(ep_reward)
                     job["metrics"]["current_episode_reward"] = ep_reward
                     if len(job["metrics"]["episode_rewards"]) > 0:
+                        job["metrics"]["mean_reward"] = float(np.mean(job["metrics"]["episode_rewards"][-100:]))
+                        job["metrics"]["std_reward"] = float(np.std(job["metrics"]["episode_rewards"][-100:]))
+                    log_entry = f"[{datetime.now().strftime('%H:%M:%S')}] Episode {self.episode_count}: reward = {ep_reward:.2f}"
                     job["metrics"]["logs"].append(log_entry)
+                    if len(job["metrics"]["logs"]) > 100: job["metrics"]["logs"].pop(0)
         return True
+def save_video_from_buffer(job_id: str, env_name="env"):
     job = training_jobs.get(job_id)
+    if not job or not job["video_buffer"]: return None
     try:
         video_path = f"models/{env_name}_replay_{job_id}.mp4"
         imageio.mimsave(video_path, job['video_buffer'], fps=30)
         job["video_buffer"] = []
         return video_path
+    except: return None
+# --- DYNAMIC EXECUTION ENGINE ---
+def run_custom_code(job_id: str, code: str, env_name: str):
+    logger.info(f"[EXEC] Starting job {job_id}")
     training_jobs[job_id]["status"] = "training"
     training_jobs[job_id]["start_time"] = datetime.now()
+    # 1. Define a specific Callback class for THIS job
+    # The user code will simply call `StreamCallback()`
+    class StreamCallback(MetricsCallback):
+        def __init__(self, render_freq=4):
+            super().__init__(job_id, render_freq)
+    # 2. Setup the execution scope (Variables available to user script)
+    # We inject 'StreamCallback' so the user can pass it to .learn()
+    local_scope = {
+        "gym": gym,
+        "PPO": PPO,
+        "DQN": DQN,
+        "A2C": A2C,
+        "evaluate_policy": evaluate_policy,
+        "Monitor": Monitor,
+        "np": np,
+        "StreamCallback": StreamCallback, # <--- CRITICAL INJECTION
+        "model_save_path": f"models/model_{job_id}", # User should use this path
+    }
     try:
+        # 3. EXECUTE USER CODE
+        # WARNING: This is dangerous in production (RCE).
+        exec(code, local_scope)
+        # 4. Post-Execution Cleanup
+        # We look for variables the user might have set in local_scope to save results
+        # Save video
+        video_path = save_video_from_buffer(job_id, env_name)
+        # Check if model file exists (User should have used model_save_path)
+        expected_model_path = f"models/model_{job_id}.zip"
+        # final_model_path = expected_model_path if os.path.exists(expected_model_path) else None
+        # Check if user put results in a 'results' variable
+        user_results = local_scope.get("results", {})
         training_jobs[job_id]["status"] = "completed"
         training_jobs[job_id]["results"] = {
+            "mean_reward": user_results.get("mean_reward", 0),
+            "std_reward": user_results.get("std_reward", 0),
+            "model_path": expected_model_path, # We enforce this naming convention
+            "video_path": video_path,
             "total_episodes": training_jobs[job_id]["metrics"]["episodes"],
         }
         training_jobs[job_id]["metrics"]["progress"] = 100
     except Exception as e:
+        error_msg = traceback.format_exc()
+        logger.error(f"[EXEC] Error in job {job_id}: {error_msg}")
         training_jobs[job_id]["status"] = "failed"
         training_jobs[job_id]["error"] = str(e)
+        training_jobs[job_id]["metrics"]["logs"].append(f"ERROR: {str(e)}")
 @app.post("/train")
+def start_training(request: TrainingRequest, background_tasks: BackgroundTasks):
     job_id = str(uuid.uuid4())
+    # Basic guess of timesteps for progress bar (parsing strings is hard, defaulting)
+    total_timesteps_guess = 100000
+    if "total_timesteps=" in request.code:
+        try:
+            # Very naive parsing to make progress bar sort of work
+            part = request.code.split("total_timesteps=")[1].split(")")[0].split(",")[0]
+            total_timesteps_guess = int(part)
+        except: pass
     training_jobs[job_id] = {
         "status": "queued",
+        "config": {"env_name": request.env_name}, # Kept for compatibility
+        "total_timesteps_guess": total_timesteps_guess,
         "metrics": {
+            "timesteps": 0, "episodes": 0, "progress": 0,
+            "episode_rewards": [], "episode_lengths": [],
+            "current_episode_reward": 0, "mean_reward": 0, "std_reward": 0,
+            "eval_mean_reward": None, "eval_std_reward": None, "logs": [],
         },
+        "video_buffer": [],
+        "results": None, "error": None, "start_time": None,
     }
+    background_tasks.add_task(run_custom_code, job_id, request.code, request.env_name)
+    return {"message": "Started", "job_id": job_id}
 @app.get("/train/{job_id}/status")
 def get_training_status(job_id: str):