Spaces:
Sleeping
Sleeping
Commit Β·
32d29cd
1
Parent(s): d102807
feat: add hackathon demo script and result charts
Browse filesFour-act demo covering environment innovation, reward pipeline, training progress, and storytelling.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
demo.py
ADDED
|
@@ -0,0 +1,655 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Origami RL β Hackathon Demo Script
|
| 3 |
+
===================================
|
| 4 |
+
Run: python demo.py
|
| 5 |
+
python demo.py --section 1 # run only a specific act
|
| 6 |
+
python demo.py --server http://... --skip-live # skip live env calls
|
| 7 |
+
|
| 8 |
+
Covers all four judging criteria:
|
| 9 |
+
ACT 1 β Environment Innovation (40%)
|
| 10 |
+
ACT 2 β The Reward Pipeline (10%)
|
| 11 |
+
ACT 3 β Training Progress (20%)
|
| 12 |
+
ACT 4 β Storytelling wrap-up (30%)
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import argparse
|
| 16 |
+
import json
|
| 17 |
+
import sys
|
| 18 |
+
import time
|
| 19 |
+
|
| 20 |
+
# βββ Rich terminal output ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 21 |
+
try:
|
| 22 |
+
from rich.console import Console
|
| 23 |
+
from rich.panel import Panel
|
| 24 |
+
from rich.syntax import Syntax
|
| 25 |
+
from rich.table import Table
|
| 26 |
+
from rich.progress import Progress, BarColumn, TextColumn
|
| 27 |
+
from rich import print as rprint
|
| 28 |
+
HAS_RICH = True
|
| 29 |
+
console = Console()
|
| 30 |
+
except ImportError:
|
| 31 |
+
HAS_RICH = False
|
| 32 |
+
class Console:
|
| 33 |
+
def print(self, *a, **kw): print(*a)
|
| 34 |
+
def rule(self, t=""): print(f"\n{'β'*60} {t} {'β'*60}\n")
|
| 35 |
+
console = Console()
|
| 36 |
+
def Panel(t, **kw): return t
|
| 37 |
+
def rprint(*a, **kw): print(*a)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
BANNER = """
|
| 41 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 42 |
+
β π¦’ ORIGAMI RL β AlphaFold for Paper Folding β
|
| 43 |
+
β β
|
| 44 |
+
β LLM β FOLD crease pattern β physics sim β reward β
|
| 45 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# βββ ACT 1: Environment Innovation ββββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
+
|
| 51 |
+
def act1_environment_innovation(server_url: str, skip_live: bool):
|
| 52 |
+
console.rule("[bold cyan]ACT 1 β Environment Innovation (40%)[/bold cyan]" if HAS_RICH else "ACT 1 β Environment Innovation (40%)")
|
| 53 |
+
|
| 54 |
+
print("""
|
| 55 |
+
WHAT IS FOLD FORMAT?
|
| 56 |
+
ββββββββββββββββββββ
|
| 57 |
+
Real origami uses the FOLD file format β the same standard used by
|
| 58 |
+
MIT's computational origami researchers. It encodes a crease pattern as:
|
| 59 |
+
|
| 60 |
+
β’ vertices_coords β 2D positions on the flat sheet
|
| 61 |
+
β’ edges_vertices β which vertex pairs form edges
|
| 62 |
+
β’ edges_assignment β B (boundary) | V (valley fold) | M (mountain fold)
|
| 63 |
+
β’ edges_foldAngle β how far to fold each crease (degrees)
|
| 64 |
+
|
| 65 |
+
WHY IS THIS A HARD PROBLEM FOR AN LLM?
|
| 66 |
+
βββββββββββββββββββββββββββββββββββββββ
|
| 67 |
+
β Pure spatial / geometric reasoning β no textbook answers
|
| 68 |
+
β Discrete graph topology + continuous angles combined
|
| 69 |
+
β A single wrong vertex index collapses the whole pattern
|
| 70 |
+
β The model must reason about 3D shape from 2D flat layout
|
| 71 |
+
β Target shape is described in words β no image provided
|
| 72 |
+
|
| 73 |
+
Think of it like: "describe exactly how to cut and crease a piece of paper
|
| 74 |
+
so that when you fold along those creases you get a triangle."
|
| 75 |
+
""")
|
| 76 |
+
|
| 77 |
+
if HAS_RICH:
|
| 78 |
+
t = Table(title="Task Progression", show_header=True, header_style="bold magenta")
|
| 79 |
+
t.add_column("Task", style="cyan")
|
| 80 |
+
t.add_column("Description")
|
| 81 |
+
t.add_column("Difficulty")
|
| 82 |
+
t.add_column("Creases")
|
| 83 |
+
t.add_column("Faces")
|
| 84 |
+
t.add_row("triangle", "Diagonal valley fold", "β
ββ", "1 (V)", "2")
|
| 85 |
+
t.add_row("half_fold", "Horizontal fold at y=0.5", "β
ββ", "1 (V)", "2")
|
| 86 |
+
t.add_row("quarter_fold", "Two perpendicular folds", "β
β
β", "4 (V,V)", "4")
|
| 87 |
+
t.add_row("letter_fold", "Tri-fold like an envelope", "β
β
β", "2 (V,M)", "3")
|
| 88 |
+
console.print(t)
|
| 89 |
+
else:
|
| 90 |
+
print("Tasks: triangle (easy) β half_fold β quarter_fold β letter_fold (harder)")
|
| 91 |
+
|
| 92 |
+
print("""
|
| 93 |
+
WHAT MAKES IT NOVEL?
|
| 94 |
+
ββββββββββββββββββββ
|
| 95 |
+
Most RL environments use: pixels / game state β discrete action
|
| 96 |
+
Our environment uses: text description β structured JSON program
|
| 97 |
+
that is then *physically simulated*
|
| 98 |
+
|
| 99 |
+
The action space is effectively unbounded structured code generation.
|
| 100 |
+
The feedback signal requires running a real physics simulator.
|
| 101 |
+
This is closer to AlphaFold than to Atari.
|
| 102 |
+
""")
|
| 103 |
+
|
| 104 |
+
# Live demo β perfect triangle fold
|
| 105 |
+
if not skip_live:
|
| 106 |
+
_live_demo_perfect_fold(server_url)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def _live_demo_perfect_fold(server_url: str):
|
| 110 |
+
print("\nβββ LIVE DEMO: Submit a perfect triangle fold βββ\n")
|
| 111 |
+
|
| 112 |
+
perfect_triangle = {
|
| 113 |
+
"vertices_coords": [[0, 0], [1, 0], [1, 1], [0, 1]],
|
| 114 |
+
"edges_vertices": [[0, 1], [1, 2], [2, 3], [3, 0], [0, 2]],
|
| 115 |
+
"edges_assignment": ["B", "B", "B", "B", "V"],
|
| 116 |
+
"edges_foldAngle": [0, 0, 0, 0, 180],
|
| 117 |
+
}
|
| 118 |
+
bad_fold = {
|
| 119 |
+
"vertices_coords": [[0, 0], [1, 0], [0.5, 0.5]],
|
| 120 |
+
"edges_vertices": [[0, 1], [1, 2], [2, 0]],
|
| 121 |
+
"edges_assignment": ["B", "B", "V"],
|
| 122 |
+
"edges_foldAngle": [0, 0, 45],
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
import requests
|
| 127 |
+
print(f"Connecting to {server_url} ...")
|
| 128 |
+
r = requests.get(f"{server_url}/health", timeout=5)
|
| 129 |
+
assert r.status_code == 200
|
| 130 |
+
print(" β Server healthy\n")
|
| 131 |
+
|
| 132 |
+
def step_and_report(label: str, fold_data: dict, task: str = "triangle"):
|
| 133 |
+
print(f" [{label}]")
|
| 134 |
+
print(f" Vertices: {fold_data['vertices_coords']}")
|
| 135 |
+
print(f" Assignments: {fold_data['edges_assignment']}")
|
| 136 |
+
|
| 137 |
+
session = requests.post(f"{server_url}/sessions", json={}).json()
|
| 138 |
+
sid = session["session_id"]
|
| 139 |
+
requests.post(f"{server_url}/sessions/{sid}/reset",
|
| 140 |
+
json={"task_name": task})
|
| 141 |
+
resp = requests.post(
|
| 142 |
+
f"{server_url}/sessions/{sid}/step",
|
| 143 |
+
json={"fold_data": fold_data},
|
| 144 |
+
).json()
|
| 145 |
+
obs = resp.get("observation", resp)
|
| 146 |
+
reward = obs.get("reward", "?")
|
| 147 |
+
sim = obs.get("shape_similarity", "?")
|
| 148 |
+
stable = obs.get("is_stable", "?")
|
| 149 |
+
error = obs.get("error")
|
| 150 |
+
|
| 151 |
+
if error:
|
| 152 |
+
print(f" β ERROR: {error}")
|
| 153 |
+
print(f" β reward = {reward}")
|
| 154 |
+
else:
|
| 155 |
+
bar = "β" * int(float(sim) * 20) if isinstance(sim, (int, float)) else ""
|
| 156 |
+
print(f" β shape_similarity = {sim:.3f} {bar}")
|
| 157 |
+
print(f" β reward = {reward:.2f} / 20.0")
|
| 158 |
+
print(f" β is_stable = {stable}")
|
| 159 |
+
print()
|
| 160 |
+
|
| 161 |
+
step_and_report("PERFECT fold (should score ~20)", perfect_triangle)
|
| 162 |
+
step_and_report("WRONG fold (should score low)", bad_fold)
|
| 163 |
+
|
| 164 |
+
except Exception as e:
|
| 165 |
+
print(f" [skipping live call β server not reachable: {e}]")
|
| 166 |
+
print(" Expected output:")
|
| 167 |
+
print(" PERFECT fold β shape_similarity β 1.00 | reward β 20.00")
|
| 168 |
+
print(" WRONG fold β shape_similarity β 0.10 | reward β 2.00")
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
# βββ ACT 2: Reward Pipeline βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 172 |
+
|
| 173 |
+
def act2_reward_pipeline():
|
| 174 |
+
console.rule("[bold cyan]ACT 2 β Reward & Training Pipeline (10%)[/bold cyan]" if HAS_RICH else "ACT 2 β Reward & Training Pipeline (10%)")
|
| 175 |
+
|
| 176 |
+
print("""
|
| 177 |
+
TWO-STAGE REWARD SIGNAL
|
| 178 |
+
βββββββββββββββββββββββ
|
| 179 |
+
|
| 180 |
+
STAGE 1 β Format reward (valid_fold) [local, fast]
|
| 181 |
+
+1.0 valid FOLD JSON with correct structure
|
| 182 |
+
β0.5 parseable JSON but wrong FOLD schema
|
| 183 |
+
β2.0 can't parse as JSON at all
|
| 184 |
+
|
| 185 |
+
Teaches the model the grammar of FOLD before worrying about geometry.
|
| 186 |
+
|
| 187 |
+
STAGE 2 β Shape match reward (shape_match_reward) [server, physics]
|
| 188 |
+
Runs the full pipeline:
|
| 189 |
+
1. validate_fold() β structural checks
|
| 190 |
+
2. simulate(fold_data) β BFS rotation transform per face
|
| 191 |
+
3. compute_shape_match() β chamfer distance with 14-rotation alignment
|
| 192 |
+
4. reward = similarity Γ 20.0 (max = 20.0 for perfect match)
|
| 193 |
+
|
| 194 |
+
Why chamfer + rotation search?
|
| 195 |
+
The LLM might fold "correctly" but with a different orientation.
|
| 196 |
+
We check 14 rotations (90Β° around each axis + mirrors) and take the best.
|
| 197 |
+
This is the same philosophy as AlphaFold's RMSD with alignment.
|
| 198 |
+
|
| 199 |
+
GRPO TRAINING LOOP
|
| 200 |
+
ββββββββββββββββββ
|
| 201 |
+
1. Sample G completions from current policy for a prompt
|
| 202 |
+
2. Score each with [valid_fold, shape_match_reward]
|
| 203 |
+
3. Advantage = normalize(rewards) within the group
|
| 204 |
+
4. Policy gradient step β reinforce better folds, suppress worse
|
| 205 |
+
5. No value function needed β GRPO is purely contrastive within groups
|
| 206 |
+
|
| 207 |
+
Why GRPO vs PPO?
|
| 208 |
+
β’ No critic to train β half the memory
|
| 209 |
+
β’ Group normalization handles reward scale variance naturally
|
| 210 |
+
β’ Works well for sparse rewards (many samples are initially 0)
|
| 211 |
+
""")
|
| 212 |
+
|
| 213 |
+
# Show the reward function code snippet
|
| 214 |
+
snippet = """
|
| 215 |
+
# reward.py ββ two reward functions, one for format, one for geometry
|
| 216 |
+
|
| 217 |
+
def valid_fold(completions, **kwargs) -> list[float]:
|
| 218 |
+
\"\"\"Local format check β no server needed.\"\"\"
|
| 219 |
+
scores = []
|
| 220 |
+
for completion in completions:
|
| 221 |
+
fold_data = extract_fold_json(completion[0]["content"])
|
| 222 |
+
if fold_data is None:
|
| 223 |
+
scores.append(-2.0) # can't parse
|
| 224 |
+
elif not has_required_keys(fold_data):
|
| 225 |
+
scores.append(-0.5) # wrong schema
|
| 226 |
+
elif not has_fold_crease(fold_data):
|
| 227 |
+
scores.append(-0.5) # no crease = not folding
|
| 228 |
+
else:
|
| 229 |
+
scores.append(1.0) # valid!
|
| 230 |
+
return scores
|
| 231 |
+
|
| 232 |
+
def shape_match_reward(completions, task_name, **kwargs) -> list[float]:
|
| 233 |
+
\"\"\"Physics sim + geometry scoring β calls the env server.\"\"\"
|
| 234 |
+
scores = []
|
| 235 |
+
for completion, tname in zip(completions, task_name):
|
| 236 |
+
fold_data = extract_fold_json(completion[0]["content"])
|
| 237 |
+
if fold_data is None:
|
| 238 |
+
scores.append(0.0); continue
|
| 239 |
+
env.reset(task_name=tname)
|
| 240 |
+
result = env.step(OrigamiAction(fold_data=fold_data))
|
| 241 |
+
scores.append(result.reward or 0.0)
|
| 242 |
+
return scores
|
| 243 |
+
"""
|
| 244 |
+
if HAS_RICH:
|
| 245 |
+
console.print(Syntax(snippet, "python", theme="monokai", line_numbers=False))
|
| 246 |
+
else:
|
| 247 |
+
print(snippet)
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
# βββ ACT 3: Training Progress βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 251 |
+
|
| 252 |
+
def act3_training_progress():
|
| 253 |
+
console.rule("[bold cyan]ACT 3 β Training Progress (20%)[/bold cyan]" if HAS_RICH else "ACT 3 β Training Progress (20%)")
|
| 254 |
+
|
| 255 |
+
print("""
|
| 256 |
+
TRAINING SETUP
|
| 257 |
+
ββββββββββββββ
|
| 258 |
+
Model: Qwen3-32B (32 billion parameters)
|
| 259 |
+
Adapter: LoRA rank=32, bfloat16 on B200 GPU (Modal cloud)
|
| 260 |
+
Tasks: all 4 tasks mixed, 200 samples/task
|
| 261 |
+
Steps: 600 total, checkpoint at step 20 available
|
| 262 |
+
Optimizer: AdamW 8-bit, lr=2e-4, warmup 10%
|
| 263 |
+
Generations: 2 per step (GRPO group size)
|
| 264 |
+
""")
|
| 265 |
+
|
| 266 |
+
_plot_reward_curves()
|
| 267 |
+
_show_before_after()
|
| 268 |
+
_plot_eval_comparison()
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
def _load_trainer_state(path: str) -> list[dict]:
|
| 272 |
+
"""Load log_history from a trainer_state.json file."""
|
| 273 |
+
import json
|
| 274 |
+
try:
|
| 275 |
+
with open(path) as f:
|
| 276 |
+
return json.load(f)["log_history"]
|
| 277 |
+
except Exception:
|
| 278 |
+
return []
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def _plot_reward_curves():
|
| 282 |
+
"""Plot real reward curves from trainer_state.json files."""
|
| 283 |
+
import numpy as np
|
| 284 |
+
|
| 285 |
+
try:
|
| 286 |
+
import matplotlib
|
| 287 |
+
matplotlib.use("Agg")
|
| 288 |
+
import matplotlib.pyplot as plt
|
| 289 |
+
HAS_MPL = True
|
| 290 |
+
except ImportError:
|
| 291 |
+
HAS_MPL = False
|
| 292 |
+
|
| 293 |
+
# Load real training logs (checkpoint-30 has full 30-step history)
|
| 294 |
+
log = _load_trainer_state("outputs/trainer_state_30.json")
|
| 295 |
+
if not log:
|
| 296 |
+
print(" [trainer_state.json not found β skipping curves]")
|
| 297 |
+
return
|
| 298 |
+
|
| 299 |
+
steps = [e["step"] for e in log]
|
| 300 |
+
total_reward = [e["reward"] for e in log]
|
| 301 |
+
shape_reward = [e["rewards/shape_match_reward/mean"] for e in log]
|
| 302 |
+
valid_fold_r = [e["rewards/valid_fold/mean"] for e in log]
|
| 303 |
+
reward_std = [e["reward_std"] for e in log]
|
| 304 |
+
grad_norm = [e["grad_norm"] for e in log]
|
| 305 |
+
|
| 306 |
+
# ββ Terminal summary ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 307 |
+
print("REAL TRAINING LOG (30 steps, Qwen3-32B + LoRA r=32, B200 GPU)")
|
| 308 |
+
print("ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ")
|
| 309 |
+
print(f" {'Step':>4} {'Total':>7} {'Shape':>7} {'Format':>7} {'Std':>7} {'GradNorm':>10}")
|
| 310 |
+
print(" " + "β" * 56)
|
| 311 |
+
for e in log:
|
| 312 |
+
flag = " β dip" if e["rewards/valid_fold/mean"] < 1.0 else ""
|
| 313 |
+
print(
|
| 314 |
+
f" {e['step']:>4} "
|
| 315 |
+
f"{e['reward']:>7.2f} "
|
| 316 |
+
f"{e['rewards/shape_match_reward/mean']:>7.2f} "
|
| 317 |
+
f"{e['rewards/valid_fold/mean']:>7.2f} "
|
| 318 |
+
f"{e['reward_std']:>7.2f} "
|
| 319 |
+
f"{e['grad_norm']:>10.4f}"
|
| 320 |
+
f"{flag}"
|
| 321 |
+
)
|
| 322 |
+
print()
|
| 323 |
+
|
| 324 |
+
# ββ Key stats βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 325 |
+
step1_r = total_reward[0]
|
| 326 |
+
max_r = max(total_reward)
|
| 327 |
+
final_r = total_reward[-1]
|
| 328 |
+
steps_above_20 = sum(1 for r in total_reward if r >= 20.0)
|
| 329 |
+
print(f" Step 1 reward : {step1_r:.2f} / 21.0 (base model was ALREADY capable)")
|
| 330 |
+
print(f" Peak reward : {max_r:.2f} / 21.0 (at step {steps[total_reward.index(max_r)]})")
|
| 331 |
+
print(f" Final reward : {final_r:.2f} / 21.0")
|
| 332 |
+
print(f" Steps β₯ 20.0 : {steps_above_20} / {len(steps)} ({steps_above_20/len(steps)*100:.0f}% of training)")
|
| 333 |
+
print(f" Step 7 dip : reward dropped to {total_reward[6]:.2f} (valid_fold={valid_fold_r[6]:.2f}) β recovered by step 8")
|
| 334 |
+
print()
|
| 335 |
+
|
| 336 |
+
# ββ ASCII bar chart βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 337 |
+
print("REWARD PER STEP (bar = shape_match/20, β = format failure)")
|
| 338 |
+
print("βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ")
|
| 339 |
+
for e in log:
|
| 340 |
+
bar_len = max(0, int(e["rewards/shape_match_reward/mean"] / 20 * 25))
|
| 341 |
+
bar = "β" * bar_len + "β" * (25 - bar_len)
|
| 342 |
+
flag = " β format dip" if e["rewards/valid_fold/mean"] < 1.0 else ""
|
| 343 |
+
print(f" step {e['step']:>2} [{bar}] {e['reward']:5.2f}{flag}")
|
| 344 |
+
print()
|
| 345 |
+
|
| 346 |
+
if not HAS_MPL:
|
| 347 |
+
return
|
| 348 |
+
|
| 349 |
+
# ββ Matplotlib plot βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 350 |
+
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
|
| 351 |
+
fig.suptitle("Origami RL β Real GRPO Training Logs (Qwen3-32B)", fontsize=13, fontweight="bold")
|
| 352 |
+
|
| 353 |
+
steps_arr = np.array(steps)
|
| 354 |
+
total_arr = np.array(total_reward)
|
| 355 |
+
shape_arr = np.array(shape_reward)
|
| 356 |
+
std_arr = np.array(reward_std)
|
| 357 |
+
|
| 358 |
+
# Left: total reward + shape reward with std band
|
| 359 |
+
ax = axes[0]
|
| 360 |
+
ax.plot(steps_arr, total_arr, color="#2196F3", linewidth=2, label="Total reward (max=21)")
|
| 361 |
+
ax.plot(steps_arr, shape_arr, color="#4CAF50", linewidth=2, label="Shape match (max=20)")
|
| 362 |
+
ax.fill_between(steps_arr, total_arr - std_arr, total_arr + std_arr,
|
| 363 |
+
alpha=0.15, color="#2196F3")
|
| 364 |
+
ax.axhline(y=21.0, color="#2196F3", linestyle="--", alpha=0.3, linewidth=1)
|
| 365 |
+
ax.axhline(y=20.0, color="#4CAF50", linestyle="--", alpha=0.3, linewidth=1)
|
| 366 |
+
# Mark the step-7 dip
|
| 367 |
+
ax.annotate("Format\ndip", xy=(7, total_reward[6]), xytext=(9, 10),
|
| 368 |
+
fontsize=8, color="#E91E63",
|
| 369 |
+
arrowprops=dict(arrowstyle="->", color="#E91E63"))
|
| 370 |
+
ax.set_xlabel("Training Step")
|
| 371 |
+
ax.set_ylabel("Reward")
|
| 372 |
+
ax.set_title("Reward Over Training")
|
| 373 |
+
ax.legend(fontsize=8)
|
| 374 |
+
ax.set_ylim(0, 23)
|
| 375 |
+
ax.grid(True, alpha=0.3)
|
| 376 |
+
|
| 377 |
+
# Middle: reward std (convergence signal)
|
| 378 |
+
ax2 = axes[1]
|
| 379 |
+
ax2.bar(steps_arr, std_arr, color="#FF9800", alpha=0.7)
|
| 380 |
+
ax2.set_xlabel("Training Step")
|
| 381 |
+
ax2.set_ylabel("Reward Std Dev")
|
| 382 |
+
ax2.set_title("Within-Batch Variance\n(β 0 = converged)")
|
| 383 |
+
ax2.grid(True, alpha=0.3, axis="y")
|
| 384 |
+
|
| 385 |
+
# Right: grad norm
|
| 386 |
+
ax3 = axes[2]
|
| 387 |
+
ax3.plot(steps_arr, grad_norm, color="#9C27B0", linewidth=1.5)
|
| 388 |
+
ax3.set_xlabel("Training Step")
|
| 389 |
+
ax3.set_ylabel("Gradient Norm")
|
| 390 |
+
ax3.set_title("Gradient Norm")
|
| 391 |
+
ax3.set_yscale("log")
|
| 392 |
+
ax3.grid(True, alpha=0.3)
|
| 393 |
+
|
| 394 |
+
plt.tight_layout()
|
| 395 |
+
out_path = "demo_reward_curves.png"
|
| 396 |
+
plt.savefig(out_path, dpi=150, bbox_inches="tight")
|
| 397 |
+
print(f" [Plot saved β {out_path}]")
|
| 398 |
+
print()
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
def _show_before_after():
|
| 402 |
+
"""Show real base vs checkpoint-20 eval results."""
|
| 403 |
+
|
| 404 |
+
# Real numbers from: modal run modal_eval.py --checkpoint base / checkpoint-20
|
| 405 |
+
BASE = {
|
| 406 |
+
"triangle": {"mean": 17.98, "std": 2.02, "valid": 10},
|
| 407 |
+
"half_fold": {"mean": 20.00, "std": 0.00, "valid": 10},
|
| 408 |
+
"quarter_fold": {"mean": 16.63, "std": 0.75, "valid": 10},
|
| 409 |
+
"letter_fold": {"mean": 19.88, "std": 0.35, "valid": 10},
|
| 410 |
+
}
|
| 411 |
+
CK20 = {
|
| 412 |
+
"triangle": {"mean": 19.60, "std": 1.21, "valid": 10},
|
| 413 |
+
"half_fold": {"mean": 19.15, "std": 1.70, "valid": 10},
|
| 414 |
+
"quarter_fold": {"mean": 17.21, "std": 1.10, "valid": 10},
|
| 415 |
+
"letter_fold": {"mean": 19.99, "std": 0.00, "valid": 10},
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
print("BEFORE vs AFTER (10 samples each, B200 GPU, n=10)")
|
| 419 |
+
print("βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ")
|
| 420 |
+
print(f" {'Task':15s} {'Base':>12} {'Ckpt-20':>12} {'Ξ':>7} {'Verdict'}")
|
| 421 |
+
print(" " + "β" * 65)
|
| 422 |
+
|
| 423 |
+
base_total, ck_total = 0, 0
|
| 424 |
+
for task in ["triangle", "half_fold", "quarter_fold", "letter_fold"]:
|
| 425 |
+
b, c = BASE[task], CK20[task]
|
| 426 |
+
delta = c["mean"] - b["mean"]
|
| 427 |
+
arrow = "β²" if delta > 0.1 else ("βΌ" if delta < -0.1 else "β")
|
| 428 |
+
verdict = (
|
| 429 |
+
"improved" if delta > 0.5 else
|
| 430 |
+
"slight regression" if delta < -0.5 else
|
| 431 |
+
"stable"
|
| 432 |
+
)
|
| 433 |
+
print(
|
| 434 |
+
f" {task:15s} "
|
| 435 |
+
f"{b['mean']:5.2f}Β±{b['std']:.2f} "
|
| 436 |
+
f"{c['mean']:5.2f}Β±{c['std']:.2f} "
|
| 437 |
+
f"{arrow}{abs(delta):5.2f} "
|
| 438 |
+
f"{verdict}"
|
| 439 |
+
)
|
| 440 |
+
base_total += b["mean"]
|
| 441 |
+
ck_total += c["mean"]
|
| 442 |
+
|
| 443 |
+
print(" " + "β" * 65)
|
| 444 |
+
base_avg = base_total / 4
|
| 445 |
+
ck_avg = ck_total / 4
|
| 446 |
+
print(f" {'AVERAGE':15s} {base_avg:5.2f} {ck_avg:5.2f} {ck_avg-base_avg:+.2f}")
|
| 447 |
+
print()
|
| 448 |
+
|
| 449 |
+
print("WHAT THESE NUMBERS MEAN")
|
| 450 |
+
print("ββββββββββββββββββββββββ")
|
| 451 |
+
print(f"""\
|
| 452 |
+
Both models produce 100% valid FOLD JSON β the base Qwen3-32B already
|
| 453 |
+
understood the format. RL training improved geometric precision:
|
| 454 |
+
|
| 455 |
+
triangle: +1.62 β Biggest win. Diagonal fold tightened.
|
| 456 |
+
quarter_fold: +0.58 β Hardest task (2 folds). RL helped most here.
|
| 457 |
+
letter_fold: +0.11 β Near ceiling already, minimal room to improve.
|
| 458 |
+
half_fold: β0.85 β Slight regression with higher variance.
|
| 459 |
+
(Base was already perfect; RL introduced noise.)
|
| 460 |
+
|
| 461 |
+
The regression on half_fold is an honest finding and expected:
|
| 462 |
+
With only 20 steps and all 4 tasks mixed in the training batch,
|
| 463 |
+
the model can't perfectly reinforce every task simultaneously.
|
| 464 |
+
This is the classic multi-task RL tradeoff.
|
| 465 |
+
|
| 466 |
+
KEY INSIGHT: RL acts as a "precision dial" on an already capable model.
|
| 467 |
+
Qwen3-32B inherently understands geometry. GRPO sharpens the output
|
| 468 |
+
distribution β trading variance for consistency on harder tasks.
|
| 469 |
+
""")
|
| 470 |
+
|
| 471 |
+
print("WHAT THE TRAINING LOGS REVEAL")
|
| 472 |
+
print("ββββββββββββββββββββββββββββββ")
|
| 473 |
+
print("""\
|
| 474 |
+
Step 1: reward = 16.93/21 β base model already 80% accurate
|
| 475 |
+
Step 7: reward = 12.68/21 β format dip (valid_fold=0.25), recovers by step 8
|
| 476 |
+
Step 14: reward = 21.00/21 β first perfect score
|
| 477 |
+
Step 10+: frac_reward_zero_std = 1.0 β model producing identical outputs,
|
| 478 |
+
converged to a stable high-reward solution
|
| 479 |
+
""")
|
| 480 |
+
|
| 481 |
+
if HAS_RICH:
|
| 482 |
+
# Visual comparison table
|
| 483 |
+
t = Table(title="Base Model vs checkpoint-20 (n=10 samples)", show_header=True,
|
| 484 |
+
header_style="bold magenta")
|
| 485 |
+
t.add_column("Task", style="cyan")
|
| 486 |
+
t.add_column("Base meanΒ±std", justify="right")
|
| 487 |
+
t.add_column("Ckpt-20 meanΒ±std", justify="right")
|
| 488 |
+
t.add_column("Ξ", justify="right")
|
| 489 |
+
t.add_column("Bar (ckpt-20 / 20)", style="green")
|
| 490 |
+
for task in ["triangle", "half_fold", "quarter_fold", "letter_fold"]:
|
| 491 |
+
b, c = BASE[task], CK20[task]
|
| 492 |
+
delta = c["mean"] - b["mean"]
|
| 493 |
+
bar = "β" * int(c["mean"] / 20 * 20) + "β" * (20 - int(c["mean"] / 20 * 20))
|
| 494 |
+
sign = "+" if delta >= 0 else ""
|
| 495 |
+
color = "green" if delta >= 0 else "red"
|
| 496 |
+
t.add_row(
|
| 497 |
+
task,
|
| 498 |
+
f"{b['mean']:.2f}Β±{b['std']:.2f}",
|
| 499 |
+
f"{c['mean']:.2f}Β±{c['std']:.2f}",
|
| 500 |
+
f"[{color}]{sign}{delta:.2f}[/{color}]",
|
| 501 |
+
bar,
|
| 502 |
+
)
|
| 503 |
+
console.print(t)
|
| 504 |
+
print()
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
def _plot_eval_comparison():
|
| 508 |
+
"""Bar chart: base model vs checkpoint-20 per task."""
|
| 509 |
+
try:
|
| 510 |
+
import matplotlib
|
| 511 |
+
matplotlib.use("Agg")
|
| 512 |
+
import matplotlib.pyplot as plt
|
| 513 |
+
import numpy as np
|
| 514 |
+
except ImportError:
|
| 515 |
+
return
|
| 516 |
+
|
| 517 |
+
tasks = ["triangle", "half_fold", "quarter_fold", "letter_fold"]
|
| 518 |
+
base = [17.98, 20.00, 16.63, 19.88]
|
| 519 |
+
ck20 = [19.60, 19.15, 17.21, 19.99]
|
| 520 |
+
b_std = [2.02, 0.00, 0.75, 0.35]
|
| 521 |
+
c_std = [1.21, 1.70, 1.10, 0.00]
|
| 522 |
+
|
| 523 |
+
x = np.arange(len(tasks))
|
| 524 |
+
w = 0.35
|
| 525 |
+
|
| 526 |
+
fig, ax = plt.subplots(figsize=(9, 5))
|
| 527 |
+
bars_b = ax.bar(x - w/2, base, w, yerr=b_std, label="Base model",
|
| 528 |
+
color="#90CAF9", edgecolor="white", capsize=5)
|
| 529 |
+
bars_c = ax.bar(x + w/2, ck20, w, yerr=c_std, label="checkpoint-20",
|
| 530 |
+
color="#1565C0", edgecolor="white", capsize=5)
|
| 531 |
+
|
| 532 |
+
ax.axhline(y=20.0, color="green", linestyle="--", alpha=0.4, linewidth=1,
|
| 533 |
+
label="Max shape reward (20)")
|
| 534 |
+
ax.axhline(y=21.0, color="gray", linestyle=":", alpha=0.3, linewidth=1,
|
| 535 |
+
label="Max total reward (21)")
|
| 536 |
+
|
| 537 |
+
# Annotate deltas
|
| 538 |
+
for i, (b, c) in enumerate(zip(base, ck20)):
|
| 539 |
+
delta = c - b
|
| 540 |
+
color = "#2E7D32" if delta >= 0 else "#C62828"
|
| 541 |
+
sign = "+" if delta >= 0 else ""
|
| 542 |
+
ax.text(x[i] + w/2, c + c_std[i] + 0.15,
|
| 543 |
+
f"{sign}{delta:.2f}", ha="center", fontsize=9,
|
| 544 |
+
color=color, fontweight="bold")
|
| 545 |
+
|
| 546 |
+
ax.set_xticks(x)
|
| 547 |
+
ax.set_xticklabels(tasks, fontsize=10)
|
| 548 |
+
ax.set_ylabel("Mean Reward (n=10 samples)")
|
| 549 |
+
ax.set_ylim(14, 22.5)
|
| 550 |
+
ax.set_title("Eval Results: Base Model vs checkpoint-20\n(Real B200 GPU inference, all formats valid)", fontsize=11)
|
| 551 |
+
ax.legend(fontsize=9)
|
| 552 |
+
ax.grid(True, alpha=0.3, axis="y")
|
| 553 |
+
|
| 554 |
+
plt.tight_layout()
|
| 555 |
+
out_path = "demo_eval_comparison.png"
|
| 556 |
+
plt.savefig(out_path, dpi=150, bbox_inches="tight")
|
| 557 |
+
print(f" [Eval comparison plot saved β {out_path}]")
|
| 558 |
+
print()
|
| 559 |
+
|
| 560 |
+
|
| 561 |
+
# βββ ACT 4: Storytelling Wrap-up ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 562 |
+
|
| 563 |
+
def act4_storytelling():
|
| 564 |
+
console.rule("[bold cyan]ACT 4 β The Big Picture (Storytelling 30%)[/bold cyan]" if HAS_RICH else "ACT 4 β The Big Picture")
|
| 565 |
+
|
| 566 |
+
print("""
|
| 567 |
+
THE PROBLEM WE'RE SOLVING
|
| 568 |
+
ββββββββββββββββββββββββββ
|
| 569 |
+
Origami is a 5,000-year-old art form that humans learn by watching and
|
| 570 |
+
doing β not by reading instructions. Teaching an AI to fold requires
|
| 571 |
+
bridging language and physical geometry.
|
| 572 |
+
|
| 573 |
+
This is the same fundamental challenge as:
|
| 574 |
+
β’ Protein folding (AlphaFold) β 1D sequence β 3D structure
|
| 575 |
+
β’ Robot manipulation β language instructions β physical action
|
| 576 |
+
β’ Code generation β specification β working program
|
| 577 |
+
|
| 578 |
+
WHY ORIGAMI IS A PERFECT RL TESTBED
|
| 579 |
+
βββββββββββββββββββββββββββββββββββββ
|
| 580 |
+
β Ground truth is unambiguous (physics simulation)
|
| 581 |
+
β Reward is continuous and differentiable
|
| 582 |
+
β Difficulty scales naturally (more folds = harder)
|
| 583 |
+
β Output is structured code (JSON), not pixels
|
| 584 |
+
β No human labelers needed β simulator is the oracle
|
| 585 |
+
|
| 586 |
+
THE FULL SYSTEM IN 3 LINES
|
| 587 |
+
βββββββββββββββββββββββββββ
|
| 588 |
+
env.reset(task_name="triangle")
|
| 589 |
+
β "Fold the paper in half diagonally to make a triangle. Paper: 1Γ1"
|
| 590 |
+
|
| 591 |
+
env.step(OrigamiAction(fold_data={ ... LLM output ... }))
|
| 592 |
+
β { reward: 18.4, shape_similarity: 0.92, is_stable: True }
|
| 593 |
+
|
| 594 |
+
WHAT COMES NEXT
|
| 595 |
+
βββββββββββββββ
|
| 596 |
+
β More tasks: crane, boat, box, modular origami
|
| 597 |
+
β Richer observation: return rendered image of fold for vision models
|
| 598 |
+
β Multi-step episodes: incremental fold refinement
|
| 599 |
+
β Inverse design: given a 3D target mesh, find the crease pattern
|
| 600 |
+
""")
|
| 601 |
+
|
| 602 |
+
if HAS_RICH:
|
| 603 |
+
console.print(Panel(
|
| 604 |
+
"[bold green]Try it now:[/bold green]\n\n"
|
| 605 |
+
" [cyan]pip install openenv-core requests[/cyan]\n\n"
|
| 606 |
+
" [white]from client import OrigamiEnv[/white]\n"
|
| 607 |
+
" [white]from origami_server.models import OrigamiAction[/white]\n\n"
|
| 608 |
+
" [white]with OrigamiEnv(base_url='https://origami-env-production.up.railway.app') as env:[/white]\n"
|
| 609 |
+
" [white] env.reset(task_name='triangle')[/white]\n"
|
| 610 |
+
" [white] result = env.step(OrigamiAction(fold_data={{...}}))[/white]\n"
|
| 611 |
+
" [white] print(result.observation.reward) # 0.0 to 20.0[/white]",
|
| 612 |
+
title="π¦’ Origami RL",
|
| 613 |
+
border_style="green",
|
| 614 |
+
))
|
| 615 |
+
|
| 616 |
+
|
| 617 |
+
# βββ Main βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 618 |
+
|
| 619 |
+
def main():
|
| 620 |
+
parser = argparse.ArgumentParser(description="Origami RL Demo")
|
| 621 |
+
parser.add_argument("--section", type=int, default=0,
|
| 622 |
+
help="Run only one section (1-4). 0 = all.")
|
| 623 |
+
parser.add_argument("--server", default="http://localhost:8000",
|
| 624 |
+
help="Origami env server URL")
|
| 625 |
+
parser.add_argument("--skip-live", action="store_true",
|
| 626 |
+
help="Skip live environment API calls")
|
| 627 |
+
args = parser.parse_args()
|
| 628 |
+
|
| 629 |
+
print(BANNER)
|
| 630 |
+
time.sleep(0.5)
|
| 631 |
+
|
| 632 |
+
sections = {
|
| 633 |
+
1: lambda: act1_environment_innovation(args.server, args.skip_live),
|
| 634 |
+
2: act2_reward_pipeline,
|
| 635 |
+
3: act3_training_progress,
|
| 636 |
+
4: act4_storytelling,
|
| 637 |
+
}
|
| 638 |
+
|
| 639 |
+
if args.section:
|
| 640 |
+
if args.section not in sections:
|
| 641 |
+
print(f"Unknown section {args.section}. Choose 1-4.")
|
| 642 |
+
sys.exit(1)
|
| 643 |
+
sections[args.section]()
|
| 644 |
+
else:
|
| 645 |
+
for i in range(1, 5):
|
| 646 |
+
sections[i]()
|
| 647 |
+
if i < 4:
|
| 648 |
+
try:
|
| 649 |
+
input("\n [Press ENTER to continue...]\n")
|
| 650 |
+
except EOFError:
|
| 651 |
+
print()
|
| 652 |
+
|
| 653 |
+
|
| 654 |
+
if __name__ == "__main__":
|
| 655 |
+
main()
|