Spaces:

openenv-community
/

optigami

Sleeping

App Files Files Community

ianalin123 commited on Mar 8

Commit

4c6824f

1 Parent(s): 0153179

Merge origin/main into pr/6 — keep pr/6 refactor (openenv, env, no legacy engine)

Browse files

Files changed (11) hide show

openenv_server/app.py +0 -1
server/app.py +162 -0
server/models.py +72 -0
server/origami_environment.py +221 -0
server/tasks.py +123 -0
src/App.js +1 -1
src/components/Fold3DCanvas.js +6 -5
training/__init__.py +0 -0
training/demo.py +251 -0
training/demo_llm.py +232 -0
training/runner.py +191 -0

openenv_server/app.py CHANGED Viewed

@@ -160,7 +160,6 @@ def _graph_state_to_fold(paper_dict: dict) -> dict:
             edges_assignment.append(asgn)
     faces_vertices = _triangulate_vertices(vertices_coords)
     return {
         "vertices_coords": vertices_coords,
         "edges_vertices": edges_vertices,

             edges_assignment.append(asgn)
     faces_vertices = _triangulate_vertices(vertices_coords)
     return {
         "vertices_coords": vertices_coords,
         "edges_vertices": edges_vertices,

server/app.py ADDED Viewed

	@@ -0,0 +1,162 @@

+"""
+server/app.py — Training WebSocket server for Colab environment.
+Provides /ws/training for live streaming of RL training episodes to browsers.
+Mount at a publicly accessible URL in Colab (e.g., via ngrok or Colab's proxy).
+Usage in training:
+    from server.app import broadcast
+    broadcast.publish(episode_id, {"type": "episode_update", ...})
+"""
+from __future__ import annotations
+from pathlib import Path
+import uvicorn
+from fastapi import FastAPI, HTTPException, WebSocket
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from server.training_broadcast import TrainingBroadcastServer
+app = FastAPI(title="Optigami Training Server", version="1.0")
+# Allow cross-origin connections (Colab public URL → browser)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Global broadcast server — import and use from training code
+broadcast = TrainingBroadcastServer()
+@app.on_event("startup")
+async def _store_loop() -> None:
+    """Capture the asyncio event loop so training threads can schedule coroutines."""
+    import asyncio
+    broadcast._loop = asyncio.get_running_loop()
+@app.websocket("/ws/training")
+async def training_ws(websocket: WebSocket) -> None:
+    """Spectator WebSocket endpoint. Viewers connect here to watch training."""
+    await broadcast.connect_spectator(websocket)
+@app.get("/health")
+def health() -> dict:
+    return {
+        "status": "ok",
+        "spectators": broadcast.spectator_count,
+        "active_episodes": broadcast.active_episodes,
+    }
+# ── Demo endpoints (same as openenv_server/app.py so the React UI works) ──
+@app.get("/targets")
+def get_targets() -> dict:
+    from server.tasks import available_task_names, get_task_by_name
+    return {
+        name: {
+            "name": name,
+            "level": t["difficulty"],
+            "description": t.get("description", ""),
+            "n_creases": t.get("max_folds", 3),
+            "difficulty": t["difficulty"],
+            "material": t.get("material", "paper"),
+        }
+        for name in available_task_names()
+        if (t := get_task_by_name(name))
+    }
+_DEMO_SEQUENCES: dict[str, list[dict]] = {
+    "half_fold":    [{"type": "valley",   "line": {"start": [0.0, 0.5],   "end": [1.0, 0.5]},   "angle": 180.0}],
+    "quarter_fold": [{"type": "valley",   "line": {"start": [0.0, 0.5],   "end": [1.0, 0.5]},   "angle": 180.0},
+                     {"type": "valley",   "line": {"start": [0.5, 0.0],   "end": [0.5, 1.0]},   "angle": 180.0}],
+    "letter_fold":  [{"type": "valley",   "line": {"start": [0.0, 0.333], "end": [1.0, 0.333]}, "angle": 180.0},
+                     {"type": "mountain", "line": {"start": [0.0, 0.667], "end": [1.0, 0.667]}, "angle": 180.0}],
+    "map_fold":     [{"type": "valley",   "line": {"start": [0.0, 0.5],   "end": [1.0, 0.5]},   "angle": 180.0},
+                     {"type": "mountain", "line": {"start": [0.5, 0.0],   "end": [0.5, 1.0]},   "angle": 180.0}],
+    "solar_panel":  [{"type": "valley",   "line": {"start": [0.0, 0.25],  "end": [1.0, 0.25]},  "angle": 180.0},
+                     {"type": "mountain", "line": {"start": [0.0, 0.5],   "end": [1.0, 0.5]},   "angle": 180.0},
+                     {"type": "valley",   "line": {"start": [0.0, 0.75],  "end": [1.0, 0.75]},  "angle": 180.0}],
+}
+@app.get("/episode/demo")
+def demo_episode(target: str = "half_fold") -> dict:
+    from server.origami_environment import OrigamiEnvironment
+    from server.models import OrigamiAction as NewAction
+    from server.tasks import get_task_by_name
+    folds = _DEMO_SEQUENCES.get(target, _DEMO_SEQUENCES["half_fold"])
+    env = OrigamiEnvironment()
+    obs = env.reset(task_name=target)
+    steps: list[dict] = []
+    for i, fold_dict in enumerate(folds):
+        action = NewAction(
+            fold_type=fold_dict["type"],
+            fold_line=fold_dict["line"],
+            fold_angle=float(fold_dict.get("angle", 180.0)),
+        )
+        obs = env.step(action)
+        steps.append({"step": i + 1, "fold": fold_dict,
+                       "paper_state": obs.paper_state, "metrics": obs.metrics,
+                       "done": obs.done})
+        if obs.done:
+            break
+    return {"task_name": target, "task": get_task_by_name(target) or {},
+            "steps": steps, "final_metrics": obs.metrics if steps else {}}
+@app.get("/episode/replay/{ep_id}")
+def replay_episode(ep_id: str) -> dict:
+    """Return a stored training episode in the same format as /episode/demo."""
+    from server.tasks import get_task_by_name
+    ep = broadcast._registry.get(ep_id)
+    if not ep:
+        raise HTTPException(status_code=404, detail=f"Episode '{ep_id}' not found in registry")
+    return {
+        "task_name": ep.task_name,
+        "task": get_task_by_name(ep.task_name) or {},
+        "steps": ep.steps,
+        "final_metrics": ep.final_metrics or (ep.steps[-1]["metrics"] if ep.steps else {}),
+    }
+# ── Static files — viewer first, then React app (LAST, catch-all) ──
+_VIEWER_DIR = Path(__file__).resolve().parent.parent / "viewer"
+_BUILD_DIR  = Path(__file__).resolve().parent.parent / "build"
+if _VIEWER_DIR.exists():
+    app.mount("/viewer", StaticFiles(directory=str(_VIEWER_DIR), html=True), name="viewer")
+if _BUILD_DIR.exists():
+    app.mount("/", StaticFiles(directory=str(_BUILD_DIR), html=True), name="react")
+else:
+    @app.get("/", include_in_schema=False)
+    def _no_build() -> HTMLResponse:
+        return HTMLResponse(
+            "<p>React build not found. Run <code>npm run build</code> in the frontend directory.</p>"
+            "<p>Training viewer: <a href='/viewer/training.html'>/viewer/training.html</a></p>"
+        )
+def run(host: str = "0.0.0.0", port: int = 9001) -> None:
+    """Start the training server. Call from Colab notebook."""
+    uvicorn.run(app, host=host, port=port)
+if __name__ == "__main__":
+    run()

server/models.py ADDED Viewed

	@@ -0,0 +1,72 @@

+"""
+OpenEnv Pydantic models for the origami RL environment.
+OrigamiAction  — one fold per step
+OrigamiObservation — everything the LLM and Three.js viewer need
+OrigamiState   — server-side episode tracking
+"""
+from __future__ import annotations
+from typing import Any, Optional
+from pydantic import BaseModel, Field
+# openenv base classes — use them if available, fall back to plain Pydantic
+try:
+    from openenv.core.env_server.types import Action, Observation, State
+except ImportError:
+    Action = BaseModel
+    class State(BaseModel):
+        """Minimal stand-in for openenv State base class."""
+        episode_id: Optional[str] = None
+        step_count: int = 0
+    class Observation(BaseModel):
+        """Minimal stand-in for openenv Observation base class."""
+        done: bool = False
+        reward: Optional[float] = None
+class OrigamiAction(Action):
+    """One fold operation sent by the client each step."""
+    fold_type: str = Field(
+        default="valley",
+        description="'valley' | 'mountain' | 'pleat' | 'crimp' | 'stop'",
+    )
+    fold_line: dict[str, list[float]] = Field(
+        default_factory=lambda: {"start": [0.0, 0.5], "end": [1.0, 0.5]},
+        description="{'start': [x, y], 'end': [x, y]} normalized 0-1",
+    )
+    fold_angle: float = Field(
+        default=180.0,
+        description="Fold angle in degrees, 0-180",
+    )
+    layer_select: str = Field(
+        default="all",
+        description="'all' | 'top' | 'bottom'",
+    )
+class OrigamiObservation(Observation):
+    """Everything the LLM and Three.js viewer need.
+    paper_state contains FOLD-compatible geometry + physics data.
+    metrics contains all computed quality metrics.
+    No render_urls — the browser renders from paper_state directly.
+    """
+    task: dict[str, Any] = Field(default_factory=dict)
+    paper_state: dict[str, Any] = Field(default_factory=dict)
+    metrics: dict[str, Any] = Field(default_factory=dict)
+    fold_history: list[dict[str, Any]] = Field(default_factory=list)
+    error: Optional[str] = Field(default=None)
+class OrigamiState(State):
+    """Server-side episode tracking."""
+    task_name: str = Field(default="")
+    num_folds_applied: int = Field(default=0)
+    is_valid: bool = Field(default=True)
+    total_reward: float = Field(default=0.0)

server/origami_environment.py ADDED Viewed

	@@ -0,0 +1,221 @@

+"""
+OrigamiEnvironment — OpenEnv environment wrapping the origami physics engine.
+Implements reset() / step() / state following the OpenEnv interface.
+Engine (physics, fold, validation, metrics) lives in engine/.
+No server-side image rendering — paper_state contains all geometry data.
+"""
+from __future__ import annotations
+import json
+import os
+import uuid
+from typing import Any, Optional
+# openenv base class — fall back to plain object if not installed
+try:
+    from openenv.core.env_server.interfaces import Environment
+except ImportError:
+    from typing import Generic, TypeVar
+    A = TypeVar("A")
+    O = TypeVar("O")
+    S = TypeVar("S")
+    class Environment(Generic[A, O, S]):
+        """Minimal stand-in for openenv.core.env_server.interfaces.Environment."""
+        def __init__(self, **kwargs): pass
+from engine.paper import Paper
+from engine.fold_engine import apply_fold
+from engine.physics import simulate
+from engine.validation import validate_state
+from engine.metrics import compute_all_metrics
+from server.models import OrigamiAction, OrigamiObservation, OrigamiState
+from server.tasks import get_task_by_name, sample_task
+def _get_material(name: str):
+    """Get material by name, falling back to paper."""
+    try:
+        from engine.materials import get_material
+        return get_material(name)
+    except Exception:
+        from engine.materials import get_material
+        return get_material("paper")
+class OrigamiEnvironment(Environment[OrigamiAction, OrigamiObservation, OrigamiState]):
+    """Origami folding RL environment.
+    Each episode: agent receives paper_state + task, applies folds one at a
+    time via step(), receives metrics + reward, ends with 'stop' action or
+    when max_folds is reached.
+    """
+    SUPPORTS_CONCURRENT_SESSIONS = False
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._paper: Optional[Paper] = None
+        self._task: Optional[dict] = None
+        self._fold_history: list[dict] = []
+        self._metrics: dict = {}
+        self._validation: dict = {}
+        self._error: Optional[str] = None
+        self._episode_id: Optional[str] = None
+        self._step_count: int = 0
+        self._total_reward: float = 0.0
+    # ── reset ─────────────────────────────────────────────────────────
+    def reset(
+        self,
+        seed: Optional[int] = None,
+        episode_id: Optional[str] = None,
+        **kwargs: Any,
+    ) -> OrigamiObservation:
+        self._episode_id = episode_id or str(uuid.uuid4())
+        self._step_count = 0
+        self._fold_history = []
+        self._error = None
+        self._total_reward = 0.0
+        # Select task
+        task_name = kwargs.get("task_name")
+        if task_name:
+            self._task = get_task_by_name(task_name)
+        if not self._task:
+            self._task = sample_task(seed=seed)
+        # Create flat sheet
+        mat = _get_material(self._task["material"])
+        self._paper = Paper.create_flat_sheet(
+            width=self._task["width"],
+            height=self._task["height"],
+            material=mat,
+        )
+        # Initial validation + metrics (no physics needed for flat sheet)
+        self._validation = validate_state(self._paper)
+        self._metrics = compute_all_metrics(self._paper, self._task, self._validation)
+        return self._make_observation(done=False, reward=None)
+    # ── step ──────────────────────────────────────────────────────────
+    def step(
+        self,
+        action: OrigamiAction,
+        timeout_s: Optional[float] = None,
+        **kwargs: Any,
+    ) -> OrigamiObservation:
+        if self._paper is None or self._task is None:
+            return self._make_observation(done=True, reward=-5.0)
+        self._step_count += 1
+        self._error = None
+        # ── Stop action ───────────────────────────────────────────────
+        if action.fold_type == "stop":
+            return self._finalize_episode()
+        # ── Build fold dict ───────────────────────────────────────────
+        fold_dict = {
+            "type": action.fold_type,
+            "line": action.fold_line,
+            "angle": action.fold_angle,
+        }
+        # ── Apply fold ────────────────────────────────────────────────
+        new_paper, err = apply_fold(self._paper, fold_dict)
+        if err:
+            self._error = err
+            return self._make_observation(done=True, reward=-5.0)
+        self._paper = new_paper
+        self._fold_history.append({**fold_dict, "step": self._step_count})
+        # ── Physics relaxation ────────────────────────────────────────
+        try:
+            self._paper = simulate(self._paper, fold_percent=1.0)
+        except Exception as exc:
+            self._error = f"Physics failed: {exc}"
+            # Continue — don't abort episode on physics failure
+        # ── Validate ──────────────────────────────────────────────────
+        self._validation = validate_state(self._paper)
+        # ── Metrics ───────────────────────────────────────────────────
+        self._metrics = compute_all_metrics(self._paper, self._task, self._validation)
+        # ── Check termination ─────────────────────────────────────────
+        max_folds = self._task.get("max_folds", 50)
+        if self._step_count >= max_folds:
+            return self._finalize_episode()
+        if self._validation.get("self_intersections", 0) > 0:
+            self._error = "Self-intersection detected"
+            return self._finalize_episode()
+        return self._make_observation(done=False, reward=None)
+    # ── state ─────────────────────────────────────────────────────────
+    @property
+    def state(self) -> OrigamiState:
+        return OrigamiState(
+            episode_id=self._episode_id,
+            step_count=self._step_count,
+            task_name=self._task.get("name", "") if self._task else "",
+            num_folds_applied=len(self._fold_history),
+            is_valid=self._metrics.get("is_valid", True),
+            total_reward=self._total_reward,
+        )
+    # ── internals ─────────────────────────────────────────────────────
+    def _finalize_episode(self) -> OrigamiObservation:
+        reward = self._compute_reward()
+        self._total_reward = reward
+        return self._make_observation(done=True, reward=reward)
+    def _make_observation(self, done: bool, reward: Optional[float]) -> OrigamiObservation:
+        return OrigamiObservation(
+            done=done,
+            reward=reward,
+            task=self._task or {},
+            paper_state=self._paper.to_observation_dict() if self._paper else {},
+            metrics=self._metrics,
+            fold_history=self._fold_history,
+            error=self._error,
+        )
+    def _compute_reward(self) -> float:
+        m = self._metrics
+        reward = 0.0
+        # Compactness is the main signal
+        reward += m.get("compactness", 0.0) * 20.0
+        # Bonus for fitting in target box
+        if m.get("fits_target_box", False):
+            reward += 10.0
+        # Bonus for deployability (if task requires it)
+        if m.get("is_deployable", False):
+            reward += 5.0
+        # Penalties for violations
+        reward -= m.get("kawasaki_violations", 0) * 2.0
+        reward -= m.get("maekawa_violations", 0) * 2.0
+        reward -= m.get("self_intersections", 0) * 5.0
+        # Penalty for too many folds (encourage efficiency)
+        reward -= m.get("fold_count", 0) * 0.5
+        # Penalty for exceeding material strain limit
+        max_strain = m.get("max_strain", 0.0)
+        strain_limit = self._paper.material.max_strain if self._paper else 0.05
+        if max_strain > strain_limit:
+            reward -= 3.0 * (max_strain / strain_limit)
+        return float(reward)

server/tasks.py ADDED Viewed

	@@ -0,0 +1,123 @@

+"""
+Task pool and curriculum for the origami RL environment.
+7 tasks across 4 difficulty levels.
+"""
+from __future__ import annotations
+import random
+from typing import Optional
+TASKS: dict[str, dict] = {
+    "half_fold": {
+        "name": "half_fold",
+        "description": "Fold a 1x1 paper sheet in half along the horizontal midline.",
+        "width": 1.0,
+        "height": 1.0,
+        "material": "paper",
+        "target_ratio": 0.50,
+        "max_folds": 3,
+        "target_box": [1.0, 0.5, 0.02],
+        "must_deploy": False,
+        "difficulty": 1,
+    },
+    "quarter_fold": {
+        "name": "quarter_fold",
+        "description": "Fold a 1x1 paper sheet into quarters using two perpendicular folds.",
+        "width": 1.0,
+        "height": 1.0,
+        "material": "paper",
+        "target_ratio": 0.25,
+        "max_folds": 5,
+        "target_box": [0.5, 0.5, 0.04],
+        "must_deploy": False,
+        "difficulty": 1,
+    },
+    "letter_fold": {
+        "name": "letter_fold",
+        "description": "Fold a 1x1 paper into thirds (letter fold) using two parallel folds.",
+        "width": 1.0,
+        "height": 1.0,
+        "material": "paper",
+        "target_ratio": 0.33,
+        "max_folds": 5,
+        "target_box": [1.0, 0.34, 0.03],
+        "must_deploy": False,
+        "difficulty": 2,
+    },
+    "map_fold": {
+        "name": "map_fold",
+        "description": "Fold a 1x1 paper into eighths using a grid fold pattern. Must be re-deployable.",
+        "width": 1.0,
+        "height": 1.0,
+        "material": "paper",
+        "target_ratio": 0.125,
+        "max_folds": 8,
+        "target_box": [0.5, 0.25, 0.08],
+        "must_deploy": True,
+        "difficulty": 2,
+    },
+    "solar_panel": {
+        "name": "solar_panel",
+        "description": "Pack a 1x1 Mylar solar panel into a compact configuration using a Miura-ori style fold. Must deploy.",
+        "width": 1.0,
+        "height": 1.0,
+        "material": "mylar",
+        "target_ratio": 0.05,
+        "max_folds": 20,
+        "target_box": [0.25, 0.25, 0.05],
+        "must_deploy": True,
+        "difficulty": 3,
+    },
+    "shelter_wall": {
+        "name": "shelter_wall",
+        "description": "Fold a 1x1 aluminum sheet into a compact structural panel within strain limits.",
+        "width": 1.0,
+        "height": 1.0,
+        "material": "aluminum",
+        "target_ratio": 0.10,
+        "max_folds": 15,
+        "target_box": [0.5, 0.25, 0.1],
+        "must_deploy": False,
+        "difficulty": 3,
+    },
+    "stent": {
+        "name": "stent",
+        "description": "Fold a 0.5x1.5 nitinol sheet into a compact tube configuration for a medical stent. Superelastic material.",
+        "width": 0.5,
+        "height": 1.5,
+        "material": "nitinol",
+        "target_ratio": 0.09,
+        "max_folds": 25,
+        "target_box": [0.1, 0.1, 0.15],
+        "must_deploy": True,
+        "difficulty": 4,
+    },
+}
+def get_task_by_name(name: str) -> Optional[dict]:
+    """Return task dict by name, or None if not found."""
+    return TASKS.get(name)
+def sample_task(seed: Optional[int] = None, difficulty: Optional[int] = None) -> dict:
+    """Sample a random task, optionally filtered by difficulty level."""
+    rng = random.Random(seed)
+    pool = list(TASKS.values())
+    if difficulty is not None:
+        pool = [t for t in pool if t["difficulty"] == difficulty]
+    if not pool:
+        pool = list(TASKS.values())
+    return dict(rng.choice(pool))
+def get_tasks_by_difficulty(level: int) -> list[dict]:
+    """Return all tasks at a given difficulty level."""
+    return [dict(t) for t in TASKS.values() if t["difficulty"] == level]
+def available_task_names() -> list[str]:
+    """Return sorted list of all task names."""
+    return sorted(TASKS.keys())

src/App.js CHANGED Viewed

@@ -16,7 +16,7 @@ const REPLAY_EP_ID = _urlParams.get('ep') || null;
 function App() {
   const [targets, setTargets] = useState({});
-  const [selectedTarget, setSelectedTarget] = useState('half_horizontal');
   const [episode, setEpisode] = useState(null);
   const [currentStep, setCurrentStep] = useState(0);
   const [playing, setPlaying] = useState(false);

 function App() {
   const [targets, setTargets] = useState({});
+  const [selectedTarget, setSelectedTarget] = useState('half_fold');
   const [episode, setEpisode] = useState(null);
   const [currentStep, setCurrentStep] = useState(0);
   const [playing, setPlaying] = useState(false);

src/components/Fold3DCanvas.js CHANGED Viewed

@@ -7,10 +7,8 @@ const PITCH_MAX = Math.PI / 2 - 0.1;
 const ZOOM_MIN = 0.3;
 const ZOOM_MAX = 5.0;
 const LIGHT_DIR = normalize3([0.4, -0.45, 1.0]);
-const MAX_FOLD_RAD = Math.PI * 0.92;
-const SIDE_EPS = 1e-7;
-const MOUNTAIN_COLOR = 'rgba(245, 158, 11, 0.95)';
-const VALLEY_COLOR = 'rgba(56, 189, 248, 0.95)';
 function clamp(value, min, max) {
   return Math.min(Math.max(value, min), max);
@@ -46,6 +44,9 @@ function shadePaper(intensity) {
   return `rgb(${r}, ${g}, ${b})`;
 }
 function buildGridMesh(resolution = 18) {
   const vertices = [];
   for (let y = 0; y <= resolution; y += 1) {
@@ -170,7 +171,7 @@ function applyAllFolds(vertices, foldMasks, progresses) {
 function projectVertex(vertex, dim, pitch, yaw, zoom) {
   let x = vertex[0] - 0.5;
   let y = vertex[1] - 0.5;
-  let z = vertex[2];
   const cp = Math.cos(pitch);
   const sp = Math.sin(pitch);

 const ZOOM_MIN = 0.3;
 const ZOOM_MAX = 5.0;
 const LIGHT_DIR = normalize3([0.4, -0.45, 1.0]);
+const MOUNTAIN_COLOR = 'rgba(245, 158, 11, 0.9)';
+const VALLEY_COLOR = 'rgba(56, 189, 248, 0.9)';
 function clamp(value, min, max) {
   return Math.min(Math.max(value, min), max);
   return `rgb(${r}, ${g}, ${b})`;
 }
+const SIDE_EPS = 1e-10;
+const MAX_FOLD_RAD = Math.PI;
 function buildGridMesh(resolution = 18) {
   const vertices = [];
   for (let y = 0; y <= resolution; y += 1) {
 function projectVertex(vertex, dim, pitch, yaw, zoom) {
   let x = vertex[0] - 0.5;
   let y = vertex[1] - 0.5;
+  let z = vertex[2] || 0;
   const cp = Math.cos(pitch);
   const sp = Math.sin(pitch);

training/__init__.py ADDED Viewed

File without changes

training/demo.py ADDED Viewed

	@@ -0,0 +1,251 @@

+"""
+training/demo.py — Run 8 zero-shot rollouts and stream them to the grid viewer.
+Usage:
+    cd /path/to/optigami
+    python -m training.demo
+Then open: http://localhost:9001/viewer/training.html
+Each of the 8 "strategies" is a heuristic that mimics what a pretrained LLM might
+produce for different tasks — varying from near-optimal to poor.  This exercises
+the full broadcast → grid viewer pipeline without requiring an LLM API key.
+"""
+from __future__ import annotations
+import asyncio
+import time
+import uuid
+from typing import Callable
+import uvicorn
+from server.app import app, broadcast
+from training.runner import run_batch
+# ── 8 zero-shot heuristic strategies ──────────────────────────────────────────
+# Each is a callable: paper_state (dict) → fold_dict
+# These represent the range of strategies a pretrained LLM might generate.
+def strategy_perfect_half(paper_state: dict) -> dict:
+    """Valley fold exactly at horizontal midline — optimal for half_fold."""
+    return {"type": "valley", "line": {"start": [0.0, 0.5], "end": [1.0, 0.5]}, "angle": 180.0}
+def strategy_slight_offset(paper_state: dict) -> dict:
+    """Valley fold slightly off-center — almost optimal."""
+    return {"type": "valley", "line": {"start": [0.0, 0.48], "end": [1.0, 0.48]}, "angle": 180.0}
+def strategy_thirds(paper_state: dict) -> dict:
+    """Letter fold at one-third — wrong for half_fold, generates interesting geometry."""
+    fold_count = paper_state.get("fold_count", 0)
+    positions = [0.333, 0.667]
+    if fold_count >= len(positions):
+        return {"type": "stop", "line": {"start": [0.0, 0.5], "end": [1.0, 0.5]}, "angle": 0.0}
+    return {
+        "type": "valley" if fold_count == 0 else "mountain",
+        "line": {"start": [0.0, positions[fold_count]], "end": [1.0, positions[fold_count]]},
+        "angle": 180.0,
+    }
+def strategy_vertical(paper_state: dict) -> dict:
+    """Vertical fold — gets compactness but in wrong dimension for target_box."""
+    return {"type": "valley", "line": {"start": [0.5, 0.0], "end": [0.5, 1.0]}, "angle": 180.0}
+def strategy_mountain(paper_state: dict) -> dict:
+    """Mountain fold at midline — same geometry, different assignment."""
+    return {"type": "mountain", "line": {"start": [0.0, 0.5], "end": [1.0, 0.5]}, "angle": 180.0}
+def strategy_accordion(paper_state: dict) -> dict:
+    """Accordion 3-fold — overfolds, achieves high compactness but more folds."""
+    fold_count = paper_state.get("fold_count", 0)
+    positions = [0.25, 0.5, 0.75]
+    assignments = ["valley", "mountain", "valley"]
+    if fold_count >= len(positions):
+        return {"type": "stop", "line": {"start": [0.0, 0.5], "end": [1.0, 0.5]}, "angle": 0.0}
+    return {
+        "type": assignments[fold_count],
+        "line": {"start": [0.0, positions[fold_count]], "end": [1.0, positions[fold_count]]},
+        "angle": 180.0,
+    }
+def strategy_diagonal(paper_state: dict) -> dict:
+    """Diagonal fold — achieves compactness but irregular bounding box."""
+    return {"type": "valley", "line": {"start": [0.0, 0.0], "end": [1.0, 1.0]}, "angle": 180.0}
+def strategy_quarter(paper_state: dict) -> dict:
+    """Two perpendicular folds — 4x compactness for quarter_fold task."""
+    fold_count = paper_state.get("fold_count", 0)
+    if fold_count == 0:
+        return {"type": "valley", "line": {"start": [0.0, 0.5], "end": [1.0, 0.5]}, "angle": 180.0}
+    if fold_count == 1:
+        return {"type": "valley", "line": {"start": [0.5, 0.0], "end": [0.5, 1.0]}, "angle": 180.0}
+    return {"type": "stop", "line": {"start": [0.0, 0.5], "end": [1.0, 0.5]}, "angle": 0.0}
+STRATEGIES: list[tuple[str, Callable]] = [
+    ("perfect_half",  strategy_perfect_half),
+    ("slight_offset", strategy_slight_offset),
+    ("thirds_fold",   strategy_thirds),
+    ("vertical_fold", strategy_vertical),
+    ("mountain_fold", strategy_mountain),
+    ("accordion_3",   strategy_accordion),
+    ("diagonal",      strategy_diagonal),
+    ("quarter_fold",  strategy_quarter),
+]
+# ── Demo runner ────────────────────────────────────────────────────────────────
+async def run_demo(task_name: str = "half_fold", delay_s: float = 0.5) -> None:
+    """Wait for server to be ready, then fire 8 episodes."""
+    # Give uvicorn time to bind and call startup hook (sets broadcast._loop)
+    await asyncio.sleep(1.5)
+    batch_id = 1
+    names, fns = zip(*STRATEGIES)
+    ep_ids = [f"ep_{name}" for name in names]
+    print(f"\n[demo] Starting batch {batch_id} — task: {task_name}")
+    print(f"[demo] Open http://localhost:9001/viewer/training.html\n")
+    # Signal grid to clear and show G=8
+    await broadcast.start_batch(batch_id, len(fns))
+    await asyncio.sleep(delay_s)
+    # Run all 8 episodes in the thread pool; broadcast_fn fires into this loop
+    results = await asyncio.gather(*[
+        asyncio.to_thread(
+            _run_one,
+            fn,
+            task_name,
+            ep_id,
+            broadcast.publish,
+        )
+        for fn, ep_id in zip(fns, ep_ids)
+    ])
+    scores = [r["score"] for r in results]
+    best_idx = max(range(len(scores)), key=lambda i: scores[i])
+    await broadcast.finish_batch(batch_id, scores, best_episode_id=ep_ids[best_idx])
+    print("\n[demo] Results:")
+    for name, result in zip(names, results):
+        print(f"  {name:20s}  score={result['score']:+.2f}  status={result['status']}")
+    print(f"\n[demo] Best: {names[best_idx]} (score={scores[best_idx]:+.2f})")
+    print("\n[demo] Grid viewer running. Press Ctrl+C to stop.\n")
+def _run_one(
+    strategy_fn: Callable,
+    task_name: str,
+    ep_id: str,
+    broadcast_fn: Callable,
+) -> dict:
+    """Thin wrapper: adds a small sleep between steps so the viewer can animate."""
+    from server.models import OrigamiAction
+    from server.origami_environment import OrigamiEnvironment
+    env = OrigamiEnvironment()
+    obs = env.reset(task_name=task_name)
+    broadcast_fn(ep_id, {
+        "type": "episode_update",
+        "episode_id": ep_id,
+        "task_name": task_name,
+        "step": 0,
+        "observation": _obs_dict(obs),
+    })
+    max_steps = env._task.get("max_folds", 10) if env._task else 10
+    status = "done"
+    for step_idx in range(max_steps):
+        if obs.done:
+            break
+        time.sleep(0.3)  # pace so the viewer can animate each step
+        fold_dict = strategy_fn(obs.paper_state)
+        if fold_dict.get("type") == "stop":
+            break
+        action = OrigamiAction(
+            fold_type=fold_dict["type"],
+            fold_line=fold_dict["line"],
+            fold_angle=float(fold_dict.get("angle", 180.0)),
+        )
+        obs = env.step(action)
+        broadcast_fn(ep_id, {
+            "type": "episode_update",
+            "episode_id": ep_id,
+            "task_name": task_name,
+            "step": step_idx + 1,
+            "observation": _obs_dict(obs),
+        })
+        if obs.done:
+            break
+    else:
+        status = "timeout"
+    score = obs.reward if obs.reward is not None else env._total_reward or 0.0
+    broadcast_fn(ep_id, {
+        "type": "episode_done",
+        "episode_id": ep_id,
+        "status": status,
+        "score": float(score),
+        "final_metrics": obs.metrics,
+    })
+    return {
+        "episode_id": ep_id,
+        "score": float(score),
+        "final_metrics": obs.metrics,
+        "status": status,
+    }
+def _obs_dict(obs) -> dict:
+    try:
+        return obs.model_dump()
+    except AttributeError:
+        return {
+            "paper_state": getattr(obs, "paper_state", {}),
+            "metrics": getattr(obs, "metrics", {}),
+            "fold_history": getattr(obs, "fold_history", []),
+            "done": getattr(obs, "done", False),
+            "reward": getattr(obs, "reward", None),
+        }
+# ── Entry point ────────────────────────────────────────────────────────────────
+async def _main() -> None:
+    config = uvicorn.Config(app, host="0.0.0.0", port=9001, log_level="warning")
+    server = uvicorn.Server(config)
+    # Run demo concurrently with the uvicorn server
+    await asyncio.gather(
+        server.serve(),
+        run_demo(task_name="half_fold"),
+    )
+if __name__ == "__main__":
+    try:
+        asyncio.run(_main())
+    except KeyboardInterrupt:
+        print("\n[demo] Stopped.")

training/demo_llm.py ADDED Viewed

	@@ -0,0 +1,232 @@

+"""
+training/demo_llm.py — 8 rollouts using Claude as the zero-shot fold strategist.
+Usage:
+    cd /path/to/optigami
+    ANTHROPIC_API_KEY=sk-... python -m training.demo_llm
+Each of the 8 episodes calls Claude (claude-haiku-4-5) once per fold step.
+Claude sees the current paper_state metrics and decides the next fold.
+"""
+from __future__ import annotations
+import asyncio
+import json
+import os
+import re
+import time
+from typing import Any
+import anthropic
+import uvicorn
+from server.app import app, broadcast
+from server.models import OrigamiAction
+from server.origami_environment import OrigamiEnvironment
+from server.tasks import get_task_by_name
+TASK_NAME = "half_fold"
+NUM_EPISODES = 8
+MODEL = "claude-haiku-4-5-20251001"
+# ── LLM strategy factory ───────────────────────────────────────────────────────
+def make_llm_strategy(client: anthropic.Anthropic, task: dict, episode_num: int):
+    """Return a strategy_fn for one episode. Each episode gets its own call history."""
+    history: list[dict[str, Any]] = []
+    def strategy(paper_state: dict) -> dict:
+        fold_count = paper_state.get("fold_count", 0)
+        compactness = paper_state.get("compactness", 0)
+        bb = paper_state.get("bounding_box", [1, 1, 0])
+        target_box = task.get("target_box", [1, 0.5, 0.02])
+        max_folds = task.get("max_folds", 3)
+        user_msg = f"""You are folding a {task['width']}x{task['height']} sheet of {task['material']}.
+Task: {task['description']}
+Target box to fit inside: {target_box}
+Max folds allowed: {max_folds}
+Current state (fold {fold_count}/{max_folds}):
+  compactness: {compactness:.3f}  (1.0 = fully packed, 0.0 = flat)
+  bounding_box: [{bb[0]:.3f}, {bb[1]:.3f}, {bb[2]:.4f}]
+  fits_target_box: {paper_state.get('fits_target_box', False)}
+Choose the next fold. Respond with ONLY valid JSON, no other text:
+{{
+  "type": "valley" or "mountain" or "stop",
+  "line": {{"start": [x, y], "end": [x, y]}},
+  "angle": 180
+}}
+Coordinates are normalized 0-1. Use "stop" if done."""
+        history.append({"role": "user", "content": user_msg})
+        response = client.messages.create(
+            model=MODEL,
+            max_tokens=120,
+            messages=history,
+        )
+        reply = response.content[0].text.strip()
+        history.append({"role": "assistant", "content": reply})
+        # Extract JSON — handle markdown code blocks
+        match = re.search(r'\{[^{}]+\}', reply, re.DOTALL)
+        if not match:
+            return {"type": "stop", "line": {"start": [0, 0.5], "end": [1, 0.5]}, "angle": 0.0}
+        fold_dict = json.loads(match.group())
+        # Normalize: ensure required keys
+        fold_dict.setdefault("type", "valley")
+        fold_dict.setdefault("line", {"start": [0.0, 0.5], "end": [1.0, 0.5]})
+        fold_dict.setdefault("angle", 180.0)
+        return fold_dict
+    return strategy
+# ── Episode runner ─────────────────────────────────────────────────────────────
+def run_episode_llm(
+    strategy_fn,
+    task_name: str,
+    ep_id: str,
+    broadcast_fn,
+) -> dict:
+    env = OrigamiEnvironment()
+    obs = env.reset(task_name=task_name)
+    task = env._task or {}
+    broadcast_fn(ep_id, {
+        "type": "episode_update",
+        "episode_id": ep_id,
+        "task_name": task_name,
+        "step": 0,
+        "observation": _obs_dict(obs),
+    })
+    max_steps = task.get("max_folds", 5)
+    status = "done"
+    for step_idx in range(max_steps):
+        if obs.done:
+            break
+        # Build a flat paper_state dict for the LLM (add metrics inline)
+        ps = dict(obs.paper_state)
+        ps.update(obs.metrics)  # compactness, fits_target_box, etc.
+        ps["fold_count"] = step_idx
+        try:
+            fold_dict = strategy_fn(ps)
+        except Exception as exc:
+            broadcast_fn(ep_id, {
+                "type": "episode_done", "episode_id": ep_id,
+                "status": "error", "score": 0.0,
+                "final_metrics": obs.metrics, "error": str(exc),
+            })
+            return {"episode_id": ep_id, "score": 0.0, "status": "error"}
+        if fold_dict.get("type") == "stop":
+            break
+        time.sleep(0.4)  # pace for viewer animation
+        action = OrigamiAction(
+            fold_type=fold_dict["type"],
+            fold_line=fold_dict["line"],
+            fold_angle=float(fold_dict.get("angle", 180.0)),
+        )
+        obs = env.step(action)
+        broadcast_fn(ep_id, {
+            "type": "episode_update",
+            "episode_id": ep_id,
+            "task_name": task_name,
+            "step": step_idx + 1,
+            "observation": _obs_dict(obs),
+        })
+        if obs.done:
+            break
+    else:
+        status = "timeout"
+    score = obs.reward if obs.reward is not None else (env._total_reward or 0.0)
+    broadcast_fn(ep_id, {
+        "type": "episode_done",
+        "episode_id": ep_id,
+        "status": status,
+        "score": float(score),
+        "final_metrics": obs.metrics,
+    })
+    return {"episode_id": ep_id, "score": float(score), "status": status}
+def _obs_dict(obs) -> dict:
+    try:
+        return obs.model_dump()
+    except AttributeError:
+        return {
+            "paper_state": getattr(obs, "paper_state", {}),
+            "metrics": getattr(obs, "metrics", {}),
+            "fold_history": getattr(obs, "fold_history", []),
+            "done": getattr(obs, "done", False),
+            "reward": getattr(obs, "reward", None),
+        }
+# ── Main ──────────────────────────────────────────────────────────────────────
+async def run_demo() -> None:
+    api_key = os.environ.get("ANTHROPIC_API_KEY")
+    if not api_key:
+        raise RuntimeError("Set ANTHROPIC_API_KEY environment variable")
+    client = anthropic.Anthropic(api_key=api_key)
+    task = get_task_by_name(TASK_NAME)
+    await asyncio.sleep(1.5)  # wait for server startup
+    print(f"\n[llm-demo] Model: {MODEL}")
+    print(f"[llm-demo] Task: {TASK_NAME} — {task['description']}")
+    print(f"[llm-demo] Open: http://localhost:9001/viewer/training.html\n")
+    await broadcast.start_batch(1, NUM_EPISODES)
+    ep_ids = [f"ep_{i:02d}" for i in range(NUM_EPISODES)]
+    strategies = [make_llm_strategy(client, task, i) for i in range(NUM_EPISODES)]
+    # Run all episodes concurrently (each makes its own Claude API calls)
+    results = await asyncio.gather(*[
+        asyncio.to_thread(run_episode_llm, fn, TASK_NAME, ep_id, broadcast.publish)
+        for fn, ep_id in zip(strategies, ep_ids)
+    ])
+    scores = [r["score"] for r in results]
+    best_idx = max(range(len(scores)), key=lambda i: scores[i])
+    await broadcast.finish_batch(1, scores, best_episode_id=ep_ids[best_idx])
+    print("\n[llm-demo] Results:")
+    for i, result in enumerate(results):
+        print(f"  ep_{i:02d}  score={result['score']:+.2f}  status={result['status']}")
+    print(f"\n[llm-demo] Best: ep_{best_idx:02d} (score={scores[best_idx]:+.2f})")
+    print("\n[llm-demo] Press Ctrl+C to stop.\n")
+async def _main() -> None:
+    config = uvicorn.Config(app, host="0.0.0.0", port=9001, log_level="warning")
+    server = uvicorn.Server(config)
+    await asyncio.gather(server.serve(), run_demo())
+if __name__ == "__main__":
+    try:
+        asyncio.run(_main())
+    except KeyboardInterrupt:
+        print("\n[llm-demo] Stopped.")

training/runner.py ADDED Viewed

	@@ -0,0 +1,191 @@

+"""
+TrainingRunner — parallel episode executor for GRPO training.
+Each episode runs in a ThreadPoolExecutor thread.
+After every env.step(), observations are pushed to the broadcast server (fire-and-forget).
+"""
+from __future__ import annotations
+import uuid
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Any, Callable, Optional
+from server.models import OrigamiAction
+from server.origami_environment import OrigamiEnvironment
+BroadcastFn = Callable[[str, dict], None]
+def run_episode(
+    strategy_fn: Callable[[dict], dict],
+    task_name: str,
+    ep_id: Optional[str] = None,
+    broadcast_fn: Optional[BroadcastFn] = None,
+    max_steps: Optional[int] = None,
+) -> dict:
+    """Run a single origami episode with a given strategy function.
+    Args:
+        strategy_fn: Callable that receives paper_state dict and returns a fold dict:
+                     {"type": "valley"|"mountain"|"pleat"|"crimp"|"stop",
+                      "line": {"start": [x, y], "end": [x, y]},
+                      "angle": 180.0}
+        task_name: Name of the task (from server/tasks.py)
+        ep_id: Episode identifier for broadcast; auto-generated if None
+        broadcast_fn: Optional callback(ep_id, data) for live streaming
+        max_steps: Override task's max_folds if provided
+    Returns:
+        dict with keys: episode_id, score, final_metrics, fold_history, status
+    """
+    ep_id = ep_id or str(uuid.uuid4())[:8]
+    env = OrigamiEnvironment()
+    obs = env.reset(task_name=task_name)
+    if broadcast_fn:
+        broadcast_fn(ep_id, {
+            "type": "episode_update",
+            "episode_id": ep_id,
+            "task_name": task_name,
+            "step": 0,
+            "observation": _obs_to_dict(obs),
+        })
+    step_limit = max_steps or env._task.get("max_folds", 20) if env._task else 20
+    status = "done"
+    for step_idx in range(step_limit):
+        if obs.done:
+            break
+        # Strategy generates a fold dict
+        try:
+            fold_dict = strategy_fn(obs.paper_state)
+        except Exception as exc:
+            status = "error"
+            if broadcast_fn:
+                broadcast_fn(ep_id, {
+                    "type": "episode_done",
+                    "episode_id": ep_id,
+                    "status": "error",
+                    "score": obs.reward or 0.0,
+                    "final_metrics": obs.metrics,
+                    "error": str(exc),
+                })
+            break
+        fold_type = fold_dict.get("type", "valley")
+        fold_line = fold_dict.get("line", {"start": [0, 0.5], "end": [1, 0.5]})
+        fold_angle = float(fold_dict.get("angle", 180.0))
+        action = OrigamiAction(
+            fold_type=fold_type,
+            fold_line=fold_line,
+            fold_angle=fold_angle,
+        )
+        obs = env.step(action)
+        if broadcast_fn:
+            broadcast_fn(ep_id, {
+                "type": "episode_update",
+                "episode_id": ep_id,
+                "task_name": task_name,
+                "step": step_idx + 1,
+                "observation": _obs_to_dict(obs),
+            })
+        if obs.done:
+            break
+    else:
+        status = "timeout"
+    score = obs.reward if obs.reward is not None else (env._total_reward or 0.0)
+    if broadcast_fn:
+        broadcast_fn(ep_id, {
+            "type": "episode_done",
+            "episode_id": ep_id,
+            "status": status,
+            "score": float(score),
+            "final_metrics": obs.metrics,
+        })
+    return {
+        "episode_id": ep_id,
+        "score": float(score),
+        "final_metrics": obs.metrics,
+        "fold_history": obs.fold_history,
+        "status": status,
+    }
+def run_batch(
+    strategy_fns: list[Callable[[dict], dict]],
+    task_name: str,
+    broadcast_fn: Optional[BroadcastFn] = None,
+    batch_id: Optional[int] = None,
+    max_workers: int = 8,
+) -> list[dict]:
+    """Run G episodes in parallel with a ThreadPoolExecutor.
+    Args:
+        strategy_fns: List of G strategy callables (one per completion)
+        task_name: Task to use for all episodes
+        broadcast_fn: Optional broadcast callback, called after each step
+        batch_id: Batch identifier for broadcast
+        max_workers: Max parallel threads (bounded by G)
+    Returns:
+        List of episode result dicts, in same order as strategy_fns
+    """
+    n = len(strategy_fns)
+    ep_ids = [f"ep_{(batch_id or 0):04d}_{i:02d}" for i in range(n)]
+    workers = min(max_workers, n)
+    results: list[dict] = [{}] * n
+    with ThreadPoolExecutor(max_workers=workers) as pool:
+        futures = {
+            pool.submit(
+                run_episode,
+                fn,
+                task_name,
+                ep_ids[i],
+                broadcast_fn,
+            ): i
+            for i, fn in enumerate(strategy_fns)
+        }
+        for future in as_completed(futures):
+            idx = futures[future]
+            try:
+                results[idx] = future.result()
+            except Exception as exc:
+                results[idx] = {
+                    "episode_id": ep_ids[idx],
+                    "score": 0.0,
+                    "final_metrics": {},
+                    "fold_history": [],
+                    "status": "error",
+                    "error": str(exc),
+                }
+    return results
+def _obs_to_dict(obs) -> dict:
+    """Convert OrigamiObservation to a JSON-serializable dict."""
+    try:
+        return obs.model_dump()
+    except AttributeError:
+        return {
+            "task": obs.task if hasattr(obs, "task") else {},
+            "paper_state": obs.paper_state if hasattr(obs, "paper_state") else {},
+            "metrics": obs.metrics if hasattr(obs, "metrics") else {},
+            "fold_history": obs.fold_history if hasattr(obs, "fold_history") else [],
+            "done": obs.done if hasattr(obs, "done") else False,
+            "reward": obs.reward if hasattr(obs, "reward") else None,
+            "error": obs.error if hasattr(obs, "error") else None,
+        }