Spaces:

openenv-community
/

optigami

Sleeping

ianalin123 commited on Mar 8

Commit

0bcd0b1

1 Parent(s): ca61c8d

fix: rename server.py to server_legacy.py, add server/ package

server/ package (new engine-based environment) was shadowing server.py,
causing 'Attribute app not found in module server'. Renamed the old
monolithic server.py to server_legacy.py to resolve the conflict.

For local dev use: uvicorn openenv_server.app:app --reload

Files changed (6) hide show

server/__init__.py +0 -0
server/models.py +59 -0
server/origami_environment.py +211 -0
server/tasks.py +123 -0
server/training_broadcast.py +207 -0
server.py → server_legacy.py +0 -0

server/__init__.py ADDED Viewed

File without changes

server/models.py ADDED Viewed

	@@ -0,0 +1,59 @@

+"""
+OpenEnv Pydantic models for the origami RL environment.
+OrigamiAction  — one fold per step
+OrigamiObservation — everything the LLM and Three.js viewer need
+OrigamiState   — server-side episode tracking
+"""
+from __future__ import annotations
+from typing import Any, Optional
+from pydantic import Field
+from openenv.core.env_server.types import Action, Observation, State
+class OrigamiAction(Action):
+    """One fold operation sent by the client each step."""
+    fold_type: str = Field(
+        default="valley",
+        description="'valley' | 'mountain' | 'pleat' | 'crimp' | 'stop'",
+    )
+    fold_line: dict[str, list[float]] = Field(
+        default_factory=lambda: {"start": [0.0, 0.5], "end": [1.0, 0.5]},
+        description="{'start': [x, y], 'end': [x, y]} normalized 0-1",
+    )
+    fold_angle: float = Field(
+        default=180.0,
+        description="Fold angle in degrees, 0-180",
+    )
+    layer_select: str = Field(
+        default="all",
+        description="'all' | 'top' | 'bottom'",
+    )
+class OrigamiObservation(Observation):
+    """Everything the LLM and Three.js viewer need.
+    paper_state contains FOLD-compatible geometry + physics data.
+    metrics contains all computed quality metrics.
+    No render_urls — the browser renders from paper_state directly.
+    """
+    task: dict[str, Any] = Field(default_factory=dict)
+    paper_state: dict[str, Any] = Field(default_factory=dict)
+    metrics: dict[str, Any] = Field(default_factory=dict)
+    fold_history: list[dict[str, Any]] = Field(default_factory=list)
+    error: Optional[str] = Field(default=None)
+class OrigamiState(State):
+    """Server-side episode tracking."""
+    task_name: str = Field(default="")
+    num_folds_applied: int = Field(default=0)
+    is_valid: bool = Field(default=True)
+    total_reward: float = Field(default=0.0)

server/origami_environment.py ADDED Viewed

	@@ -0,0 +1,211 @@

+"""
+OrigamiEnvironment — OpenEnv environment wrapping the origami physics engine.
+Implements reset() / step() / state following the OpenEnv interface.
+Engine (physics, fold, validation, metrics) lives in engine/.
+No server-side image rendering — paper_state contains all geometry data.
+"""
+from __future__ import annotations
+import json
+import os
+import uuid
+from typing import Any, Optional
+from openenv.core.env_server.interfaces import Environment
+from engine.paper import Paper
+from engine.fold_engine import apply_fold
+from engine.physics import simulate
+from engine.validation import validate_state
+from engine.metrics import compute_all_metrics
+from server.models import OrigamiAction, OrigamiObservation, OrigamiState
+from server.tasks import get_task_by_name, sample_task
+def _get_material(name: str):
+    """Get material by name, falling back to paper."""
+    try:
+        from engine.materials import get_material
+        return get_material(name)
+    except Exception:
+        from engine.materials import get_material
+        return get_material("paper")
+class OrigamiEnvironment(Environment[OrigamiAction, OrigamiObservation, OrigamiState]):
+    """Origami folding RL environment.
+    Each episode: agent receives paper_state + task, applies folds one at a
+    time via step(), receives metrics + reward, ends with 'stop' action or
+    when max_folds is reached.
+    """
+    SUPPORTS_CONCURRENT_SESSIONS = False
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._paper: Optional[Paper] = None
+        self._task: Optional[dict] = None
+        self._fold_history: list[dict] = []
+        self._metrics: dict = {}
+        self._validation: dict = {}
+        self._error: Optional[str] = None
+        self._episode_id: Optional[str] = None
+        self._step_count: int = 0
+        self._total_reward: float = 0.0
+    # ── reset ─────────────────────────────────────────────────────────
+    def reset(
+        self,
+        seed: Optional[int] = None,
+        episode_id: Optional[str] = None,
+        **kwargs: Any,
+    ) -> OrigamiObservation:
+        self._episode_id = episode_id or str(uuid.uuid4())
+        self._step_count = 0
+        self._fold_history = []
+        self._error = None
+        self._total_reward = 0.0
+        # Select task
+        task_name = kwargs.get("task_name")
+        if task_name:
+            self._task = get_task_by_name(task_name)
+        if not self._task:
+            self._task = sample_task(seed=seed)
+        # Create flat sheet
+        mat = _get_material(self._task["material"])
+        self._paper = Paper.create_flat_sheet(
+            width=self._task["width"],
+            height=self._task["height"],
+            material=mat,
+        )
+        # Initial validation + metrics (no physics needed for flat sheet)
+        self._validation = validate_state(self._paper)
+        self._metrics = compute_all_metrics(self._paper, self._task, self._validation)
+        return self._make_observation(done=False, reward=None)
+    # ── step ──────────────────────────────────────────────────────────
+    def step(
+        self,
+        action: OrigamiAction,
+        timeout_s: Optional[float] = None,
+        **kwargs: Any,
+    ) -> OrigamiObservation:
+        if self._paper is None or self._task is None:
+            return self._make_observation(done=True, reward=-5.0)
+        self._step_count += 1
+        self._error = None
+        # ── Stop action ───────────────────────────────────────────────
+        if action.fold_type == "stop":
+            return self._finalize_episode()
+        # ── Build fold dict ───────────────────────────────────────────
+        fold_dict = {
+            "type": action.fold_type,
+            "line": action.fold_line,
+            "angle": action.fold_angle,
+        }
+        # ── Apply fold ────────────────────────────────────────────────
+        new_paper, err = apply_fold(self._paper, fold_dict)
+        if err:
+            self._error = err
+            return self._make_observation(done=True, reward=-5.0)
+        self._paper = new_paper
+        self._fold_history.append({**fold_dict, "step": self._step_count})
+        # ── Physics relaxation ────────────────────────────────────────
+        try:
+            self._paper = simulate(self._paper, fold_percent=1.0)
+        except Exception as exc:
+            self._error = f"Physics failed: {exc}"
+            # Continue — don't abort episode on physics failure
+        # ── Validate ──────────────────────────────────────────────────
+        self._validation = validate_state(self._paper)
+        # ── Metrics ───────────────────────────────────────────────────
+        self._metrics = compute_all_metrics(self._paper, self._task, self._validation)
+        # ── Check termination ─────────────────────────────────────────
+        max_folds = self._task.get("max_folds", 50)
+        if self._step_count >= max_folds:
+            return self._finalize_episode()
+        if self._validation.get("self_intersections", 0) > 0:
+            self._error = "Self-intersection detected"
+            return self._finalize_episode()
+        return self._make_observation(done=False, reward=None)
+    # ── state ─────────────────────────────────────────────────────────
+    @property
+    def state(self) -> OrigamiState:
+        return OrigamiState(
+            episode_id=self._episode_id,
+            step_count=self._step_count,
+            task_name=self._task.get("name", "") if self._task else "",
+            num_folds_applied=len(self._fold_history),
+            is_valid=self._metrics.get("is_valid", True),
+            total_reward=self._total_reward,
+        )
+    # ── internals ─────────────────────────────────────────────────────
+    def _finalize_episode(self) -> OrigamiObservation:
+        reward = self._compute_reward()
+        self._total_reward = reward
+        return self._make_observation(done=True, reward=reward)
+    def _make_observation(self, done: bool, reward: Optional[float]) -> OrigamiObservation:
+        return OrigamiObservation(
+            done=done,
+            reward=reward,
+            task=self._task or {},
+            paper_state=self._paper.to_observation_dict() if self._paper else {},
+            metrics=self._metrics,
+            fold_history=self._fold_history,
+            error=self._error,
+        )
+    def _compute_reward(self) -> float:
+        m = self._metrics
+        reward = 0.0
+        # Compactness is the main signal
+        reward += m.get("compactness", 0.0) * 20.0
+        # Bonus for fitting in target box
+        if m.get("fits_target_box", False):
+            reward += 10.0
+        # Bonus for deployability (if task requires it)
+        if m.get("is_deployable", False):
+            reward += 5.0
+        # Penalties for violations
+        reward -= m.get("kawasaki_violations", 0) * 2.0
+        reward -= m.get("maekawa_violations", 0) * 2.0
+        reward -= m.get("self_intersections", 0) * 5.0
+        # Penalty for too many folds (encourage efficiency)
+        reward -= m.get("fold_count", 0) * 0.5
+        # Penalty for exceeding material strain limit
+        max_strain = m.get("max_strain", 0.0)
+        strain_limit = self._paper.material.max_strain if self._paper else 0.05
+        if max_strain > strain_limit:
+            reward -= 3.0 * (max_strain / strain_limit)
+        return float(reward)

server/tasks.py ADDED Viewed

	@@ -0,0 +1,123 @@

+"""
+Task pool and curriculum for the origami RL environment.
+7 tasks across 4 difficulty levels.
+"""
+from __future__ import annotations
+import random
+from typing import Optional
+TASKS: dict[str, dict] = {
+    "half_fold": {
+        "name": "half_fold",
+        "description": "Fold a 1x1 paper sheet in half along the horizontal midline.",
+        "width": 1.0,
+        "height": 1.0,
+        "material": "paper",
+        "target_ratio": 0.50,
+        "max_folds": 3,
+        "target_box": [1.0, 0.5, 0.02],
+        "must_deploy": False,
+        "difficulty": 1,
+    },
+    "quarter_fold": {
+        "name": "quarter_fold",
+        "description": "Fold a 1x1 paper sheet into quarters using two perpendicular folds.",
+        "width": 1.0,
+        "height": 1.0,
+        "material": "paper",
+        "target_ratio": 0.25,
+        "max_folds": 5,
+        "target_box": [0.5, 0.5, 0.04],
+        "must_deploy": False,
+        "difficulty": 1,
+    },
+    "letter_fold": {
+        "name": "letter_fold",
+        "description": "Fold a 1x1 paper into thirds (letter fold) using two parallel folds.",
+        "width": 1.0,
+        "height": 1.0,
+        "material": "paper",
+        "target_ratio": 0.33,
+        "max_folds": 5,
+        "target_box": [1.0, 0.34, 0.03],
+        "must_deploy": False,
+        "difficulty": 2,
+    },
+    "map_fold": {
+        "name": "map_fold",
+        "description": "Fold a 1x1 paper into eighths using a grid fold pattern. Must be re-deployable.",
+        "width": 1.0,
+        "height": 1.0,
+        "material": "paper",
+        "target_ratio": 0.125,
+        "max_folds": 8,
+        "target_box": [0.5, 0.25, 0.08],
+        "must_deploy": True,
+        "difficulty": 2,
+    },
+    "solar_panel": {
+        "name": "solar_panel",
+        "description": "Pack a 1x1 Mylar solar panel into a compact configuration using a Miura-ori style fold. Must deploy.",
+        "width": 1.0,
+        "height": 1.0,
+        "material": "mylar",
+        "target_ratio": 0.05,
+        "max_folds": 20,
+        "target_box": [0.25, 0.25, 0.05],
+        "must_deploy": True,
+        "difficulty": 3,
+    },
+    "shelter_wall": {
+        "name": "shelter_wall",
+        "description": "Fold a 1x1 aluminum sheet into a compact structural panel within strain limits.",
+        "width": 1.0,
+        "height": 1.0,
+        "material": "aluminum",
+        "target_ratio": 0.10,
+        "max_folds": 15,
+        "target_box": [0.5, 0.25, 0.1],
+        "must_deploy": False,
+        "difficulty": 3,
+    },
+    "stent": {
+        "name": "stent",
+        "description": "Fold a 0.5x1.5 nitinol sheet into a compact tube configuration for a medical stent. Superelastic material.",
+        "width": 0.5,
+        "height": 1.5,
+        "material": "nitinol",
+        "target_ratio": 0.09,
+        "max_folds": 25,
+        "target_box": [0.1, 0.1, 0.15],
+        "must_deploy": True,
+        "difficulty": 4,
+    },
+}
+def get_task_by_name(name: str) -> Optional[dict]:
+    """Return task dict by name, or None if not found."""
+    return TASKS.get(name)
+def sample_task(seed: Optional[int] = None, difficulty: Optional[int] = None) -> dict:
+    """Sample a random task, optionally filtered by difficulty level."""
+    rng = random.Random(seed)
+    pool = list(TASKS.values())
+    if difficulty is not None:
+        pool = [t for t in pool if t["difficulty"] == difficulty]
+    if not pool:
+        pool = list(TASKS.values())
+    return dict(rng.choice(pool))
+def get_tasks_by_difficulty(level: int) -> list[dict]:
+    """Return all tasks at a given difficulty level."""
+    return [dict(t) for t in TASKS.values() if t["difficulty"] == level]
+def available_task_names() -> list[str]:
+    """Return sorted list of all task names."""
+    return sorted(TASKS.keys())

server/training_broadcast.py ADDED Viewed

	@@ -0,0 +1,207 @@

+"""
+TrainingBroadcastServer — fire-and-forget broadcast hub for live training viewer.
+The RL training process calls publish() after each env.step().
+Spectator browsers connect via /ws/training WebSocket.
+Broadcast is async and non-blocking: if no viewers are connected, observations are dropped.
+"""
+from __future__ import annotations
+import asyncio
+import json
+import logging
+from dataclasses import dataclass, field
+from typing import Any, Optional
+from fastapi import WebSocket, WebSocketDisconnect
+logger = logging.getLogger(__name__)
+@dataclass
+class EpisodeInfo:
+    episode_id: str
+    task_name: str
+    status: str = "running"       # "running" | "done" | "timeout" | "error"
+    step: int = 0
+    observation: dict = field(default_factory=dict)
+    metrics: dict = field(default_factory=dict)
+    fold_history: list = field(default_factory=list)
+    score: Optional[float] = None
+    final_metrics: Optional[dict] = None
+class TrainingBroadcastServer:
+    """Central hub for broadcasting RL training observations to spectator WebSockets.
+    Thread-safe: publish() can be called from training threads (ThreadPoolExecutor).
+    WebSocket handlers run in the asyncio event loop.
+    """
+    def __init__(self) -> None:
+        self._spectators: list[WebSocket] = []
+        self._registry: dict[str, EpisodeInfo] = {}
+        self._batch_id: int = 0
+        self._loop: Optional[asyncio.AbstractEventLoop] = None
+        self._lock = asyncio.Lock()
+    # ── Episode publishing (called from training thread / async context) ──
+    def publish(self, episode_id: str, data: dict) -> None:
+        """Fire-and-forget: push an update from the training process.
+        Safe to call from any thread. If no event loop is running, logs and returns.
+        """
+        try:
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                asyncio.ensure_future(self._async_publish(episode_id, data), loop=loop)
+            else:
+                loop.run_until_complete(self._async_publish(episode_id, data))
+        except RuntimeError:
+            # No event loop — training without server
+            pass
+    async def _async_publish(self, episode_id: str, data: dict) -> None:
+        msg_type = data.get("type", "episode_update")
+        async with self._lock:
+            if msg_type == "batch_start":
+                self._batch_id = data.get("batch_id", self._batch_id + 1)
+                self._registry.clear()
+                await self._broadcast(data)
+                return
+            if msg_type == "batch_done":
+                await self._broadcast(data)
+                return
+            if msg_type == "training_done":
+                await self._broadcast(data)
+                return
+            # episode_update or episode_done
+            ep = self._registry.setdefault(
+                episode_id,
+                EpisodeInfo(episode_id=episode_id, task_name=data.get("task_name", "")),
+            )
+            if msg_type == "episode_done":
+                ep.status = data.get("status", "done")
+                ep.score = data.get("score")
+                ep.final_metrics = data.get("final_metrics")
+            else:
+                ep.step = data.get("step", ep.step)
+                ep.status = "running"
+                obs = data.get("observation", {})
+                ep.observation = obs
+                ep.metrics = obs.get("metrics", {})
+                ep.fold_history = obs.get("fold_history", ep.fold_history)
+        await self._broadcast({"episode_id": episode_id, **data})
+    # ── Spectator management ──
+    async def connect_spectator(self, websocket: WebSocket) -> None:
+        """Accept a new viewer WebSocket and serve it until disconnect."""
+        await websocket.accept()
+        async with self._lock:
+            self._spectators.append(websocket)
+        # Send current registry snapshot immediately
+        await self._send_registry(websocket)
+        try:
+            while True:
+                # Viewers are read-only; drain any incoming messages (pings etc)
+                await asyncio.wait_for(websocket.receive_text(), timeout=30.0)
+        except (WebSocketDisconnect, asyncio.TimeoutError, Exception):
+            pass
+        finally:
+            await self.disconnect_spectator(websocket)
+    async def disconnect_spectator(self, websocket: WebSocket) -> None:
+        async with self._lock:
+            self._spectators = [s for s in self._spectators if s is not websocket]
+    # ── Batch control ──
+    async def start_batch(self, batch_id: int, num_episodes: int, prompt_index: int = 0) -> None:
+        """Call before starting a new training batch."""
+        data = {
+            "type": "batch_start",
+            "batch_id": batch_id,
+            "num_episodes": num_episodes,
+            "prompt_index": prompt_index,
+        }
+        await self._async_publish("__batch__", data)
+    async def finish_batch(
+        self,
+        batch_id: int,
+        scores: list[float],
+        best_episode_id: str = "",
+    ) -> None:
+        """Call after all episodes in a batch complete."""
+        data = {
+            "type": "batch_done",
+            "batch_id": batch_id,
+            "scores": scores,
+            "best_episode_id": best_episode_id,
+            "avg_score": sum(scores) / len(scores) if scores else 0.0,
+        }
+        await self._async_publish("__batch__", data)
+    async def clear_batch(self) -> None:
+        """Reset episode registry for next batch."""
+        async with self._lock:
+            self._registry.clear()
+    # ── Internals ──
+    async def _broadcast(self, message: dict) -> None:
+        """Send message to all spectators, removing dead connections."""
+        if not self._spectators:
+            return
+        payload = json.dumps(message, default=str)
+        dead: list[WebSocket] = []
+        for ws in list(self._spectators):
+            try:
+                await ws.send_text(payload)
+            except Exception:
+                dead.append(ws)
+        for ws in dead:
+            self._spectators = [s for s in self._spectators if s is not ws]
+    async def _send_registry(self, websocket: WebSocket) -> None:
+        """Send the full episode registry to a newly connected viewer."""
+        async with self._lock:
+            episodes = {
+                ep_id: {
+                    "status": ep.status,
+                    "task": ep.task_name,
+                    "step": ep.step,
+                    "observation": ep.observation,
+                    "metrics": ep.metrics,
+                    "score": ep.score,
+                }
+                for ep_id, ep in self._registry.items()
+            }
+            payload = {
+                "type": "registry",
+                "batch_id": self._batch_id,
+                "episodes": episodes,
+            }
+        try:
+            await websocket.send_text(json.dumps(payload, default=str))
+        except Exception:
+            pass
+    @property
+    def spectator_count(self) -> int:
+        return len(self._spectators)
+    @property
+    def active_episodes(self) -> int:
+        return sum(1 for ep in self._registry.values() if ep.status == "running")

server.py → server_legacy.py RENAMED Viewed

File without changes