origami_env / demo.py
ianalin123's picture
feat: add hackathon demo script and result charts
32d29cd
"""
Origami RL β€” Hackathon Demo Script
===================================
Run: python demo.py
python demo.py --section 1 # run only a specific act
python demo.py --server http://... --skip-live # skip live env calls
Covers all four judging criteria:
ACT 1 β€” Environment Innovation (40%)
ACT 2 β€” The Reward Pipeline (10%)
ACT 3 β€” Training Progress (20%)
ACT 4 β€” Storytelling wrap-up (30%)
"""
import argparse
import json
import sys
import time
# ─── Rich terminal output ──────────────────────────────────────────────────────
try:
from rich.console import Console
from rich.panel import Panel
from rich.syntax import Syntax
from rich.table import Table
from rich.progress import Progress, BarColumn, TextColumn
from rich import print as rprint
HAS_RICH = True
console = Console()
except ImportError:
HAS_RICH = False
class Console:
def print(self, *a, **kw): print(*a)
def rule(self, t=""): print(f"\n{'─'*60} {t} {'─'*60}\n")
console = Console()
def Panel(t, **kw): return t
def rprint(*a, **kw): print(*a)
BANNER = """
╔═══════════════════════════════════════════════════════════╗
β•‘ 🦒 ORIGAMI RL β€” AlphaFold for Paper Folding β•‘
β•‘ β•‘
β•‘ LLM β†’ FOLD crease pattern β†’ physics sim β†’ reward β•‘
β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
"""
# ─── ACT 1: Environment Innovation ────────────────────────────────────────────
def act1_environment_innovation(server_url: str, skip_live: bool):
console.rule("[bold cyan]ACT 1 β€” Environment Innovation (40%)[/bold cyan]" if HAS_RICH else "ACT 1 β€” Environment Innovation (40%)")
print("""
WHAT IS FOLD FORMAT?
────────────────────
Real origami uses the FOLD file format β€” the same standard used by
MIT's computational origami researchers. It encodes a crease pattern as:
β€’ vertices_coords β€” 2D positions on the flat sheet
β€’ edges_vertices β€” which vertex pairs form edges
β€’ edges_assignment β€” B (boundary) | V (valley fold) | M (mountain fold)
β€’ edges_foldAngle β€” how far to fold each crease (degrees)
WHY IS THIS A HARD PROBLEM FOR AN LLM?
───────────────────────────────────────
βœ— Pure spatial / geometric reasoning β€” no textbook answers
βœ— Discrete graph topology + continuous angles combined
βœ— A single wrong vertex index collapses the whole pattern
βœ— The model must reason about 3D shape from 2D flat layout
βœ— Target shape is described in words β€” no image provided
Think of it like: "describe exactly how to cut and crease a piece of paper
so that when you fold along those creases you get a triangle."
""")
if HAS_RICH:
t = Table(title="Task Progression", show_header=True, header_style="bold magenta")
t.add_column("Task", style="cyan")
t.add_column("Description")
t.add_column("Difficulty")
t.add_column("Creases")
t.add_column("Faces")
t.add_row("triangle", "Diagonal valley fold", "β˜…β˜†β˜†", "1 (V)", "2")
t.add_row("half_fold", "Horizontal fold at y=0.5", "β˜…β˜†β˜†", "1 (V)", "2")
t.add_row("quarter_fold", "Two perpendicular folds", "β˜…β˜…β˜†", "4 (V,V)", "4")
t.add_row("letter_fold", "Tri-fold like an envelope", "β˜…β˜…β˜†", "2 (V,M)", "3")
console.print(t)
else:
print("Tasks: triangle (easy) β†’ half_fold β†’ quarter_fold β†’ letter_fold (harder)")
print("""
WHAT MAKES IT NOVEL?
────────────────────
Most RL environments use: pixels / game state β†’ discrete action
Our environment uses: text description β†’ structured JSON program
that is then *physically simulated*
The action space is effectively unbounded structured code generation.
The feedback signal requires running a real physics simulator.
This is closer to AlphaFold than to Atari.
""")
# Live demo β€” perfect triangle fold
if not skip_live:
_live_demo_perfect_fold(server_url)
def _live_demo_perfect_fold(server_url: str):
print("\n─── LIVE DEMO: Submit a perfect triangle fold ───\n")
perfect_triangle = {
"vertices_coords": [[0, 0], [1, 0], [1, 1], [0, 1]],
"edges_vertices": [[0, 1], [1, 2], [2, 3], [3, 0], [0, 2]],
"edges_assignment": ["B", "B", "B", "B", "V"],
"edges_foldAngle": [0, 0, 0, 0, 180],
}
bad_fold = {
"vertices_coords": [[0, 0], [1, 0], [0.5, 0.5]],
"edges_vertices": [[0, 1], [1, 2], [2, 0]],
"edges_assignment": ["B", "B", "V"],
"edges_foldAngle": [0, 0, 45],
}
try:
import requests
print(f"Connecting to {server_url} ...")
r = requests.get(f"{server_url}/health", timeout=5)
assert r.status_code == 200
print(" βœ“ Server healthy\n")
def step_and_report(label: str, fold_data: dict, task: str = "triangle"):
print(f" [{label}]")
print(f" Vertices: {fold_data['vertices_coords']}")
print(f" Assignments: {fold_data['edges_assignment']}")
session = requests.post(f"{server_url}/sessions", json={}).json()
sid = session["session_id"]
requests.post(f"{server_url}/sessions/{sid}/reset",
json={"task_name": task})
resp = requests.post(
f"{server_url}/sessions/{sid}/step",
json={"fold_data": fold_data},
).json()
obs = resp.get("observation", resp)
reward = obs.get("reward", "?")
sim = obs.get("shape_similarity", "?")
stable = obs.get("is_stable", "?")
error = obs.get("error")
if error:
print(f" β†’ ERROR: {error}")
print(f" β†’ reward = {reward}")
else:
bar = "β–ˆ" * int(float(sim) * 20) if isinstance(sim, (int, float)) else ""
print(f" β†’ shape_similarity = {sim:.3f} {bar}")
print(f" β†’ reward = {reward:.2f} / 20.0")
print(f" β†’ is_stable = {stable}")
print()
step_and_report("PERFECT fold (should score ~20)", perfect_triangle)
step_and_report("WRONG fold (should score low)", bad_fold)
except Exception as e:
print(f" [skipping live call β€” server not reachable: {e}]")
print(" Expected output:")
print(" PERFECT fold β†’ shape_similarity β‰ˆ 1.00 | reward β‰ˆ 20.00")
print(" WRONG fold β†’ shape_similarity β‰ˆ 0.10 | reward β‰ˆ 2.00")
# ─── ACT 2: Reward Pipeline ───────────────────────────────────────────────────
def act2_reward_pipeline():
console.rule("[bold cyan]ACT 2 β€” Reward & Training Pipeline (10%)[/bold cyan]" if HAS_RICH else "ACT 2 β€” Reward & Training Pipeline (10%)")
print("""
TWO-STAGE REWARD SIGNAL
───────────────────────
STAGE 1 β€” Format reward (valid_fold) [local, fast]
+1.0 valid FOLD JSON with correct structure
βˆ’0.5 parseable JSON but wrong FOLD schema
βˆ’2.0 can't parse as JSON at all
Teaches the model the grammar of FOLD before worrying about geometry.
STAGE 2 β€” Shape match reward (shape_match_reward) [server, physics]
Runs the full pipeline:
1. validate_fold() β€” structural checks
2. simulate(fold_data) β€” BFS rotation transform per face
3. compute_shape_match() β€” chamfer distance with 14-rotation alignment
4. reward = similarity Γ— 20.0 (max = 20.0 for perfect match)
Why chamfer + rotation search?
The LLM might fold "correctly" but with a different orientation.
We check 14 rotations (90Β° around each axis + mirrors) and take the best.
This is the same philosophy as AlphaFold's RMSD with alignment.
GRPO TRAINING LOOP
──────────────────
1. Sample G completions from current policy for a prompt
2. Score each with [valid_fold, shape_match_reward]
3. Advantage = normalize(rewards) within the group
4. Policy gradient step β€” reinforce better folds, suppress worse
5. No value function needed β€” GRPO is purely contrastive within groups
Why GRPO vs PPO?
β€’ No critic to train β†’ half the memory
β€’ Group normalization handles reward scale variance naturally
β€’ Works well for sparse rewards (many samples are initially 0)
""")
# Show the reward function code snippet
snippet = """
# reward.py ── two reward functions, one for format, one for geometry
def valid_fold(completions, **kwargs) -> list[float]:
\"\"\"Local format check β€” no server needed.\"\"\"
scores = []
for completion in completions:
fold_data = extract_fold_json(completion[0]["content"])
if fold_data is None:
scores.append(-2.0) # can't parse
elif not has_required_keys(fold_data):
scores.append(-0.5) # wrong schema
elif not has_fold_crease(fold_data):
scores.append(-0.5) # no crease = not folding
else:
scores.append(1.0) # valid!
return scores
def shape_match_reward(completions, task_name, **kwargs) -> list[float]:
\"\"\"Physics sim + geometry scoring β€” calls the env server.\"\"\"
scores = []
for completion, tname in zip(completions, task_name):
fold_data = extract_fold_json(completion[0]["content"])
if fold_data is None:
scores.append(0.0); continue
env.reset(task_name=tname)
result = env.step(OrigamiAction(fold_data=fold_data))
scores.append(result.reward or 0.0)
return scores
"""
if HAS_RICH:
console.print(Syntax(snippet, "python", theme="monokai", line_numbers=False))
else:
print(snippet)
# ─── ACT 3: Training Progress ─────────────────────────────────────────────────
def act3_training_progress():
console.rule("[bold cyan]ACT 3 β€” Training Progress (20%)[/bold cyan]" if HAS_RICH else "ACT 3 β€” Training Progress (20%)")
print("""
TRAINING SETUP
──────────────
Model: Qwen3-32B (32 billion parameters)
Adapter: LoRA rank=32, bfloat16 on B200 GPU (Modal cloud)
Tasks: all 4 tasks mixed, 200 samples/task
Steps: 600 total, checkpoint at step 20 available
Optimizer: AdamW 8-bit, lr=2e-4, warmup 10%
Generations: 2 per step (GRPO group size)
""")
_plot_reward_curves()
_show_before_after()
_plot_eval_comparison()
def _load_trainer_state(path: str) -> list[dict]:
"""Load log_history from a trainer_state.json file."""
import json
try:
with open(path) as f:
return json.load(f)["log_history"]
except Exception:
return []
def _plot_reward_curves():
"""Plot real reward curves from trainer_state.json files."""
import numpy as np
try:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
HAS_MPL = True
except ImportError:
HAS_MPL = False
# Load real training logs (checkpoint-30 has full 30-step history)
log = _load_trainer_state("outputs/trainer_state_30.json")
if not log:
print(" [trainer_state.json not found β€” skipping curves]")
return
steps = [e["step"] for e in log]
total_reward = [e["reward"] for e in log]
shape_reward = [e["rewards/shape_match_reward/mean"] for e in log]
valid_fold_r = [e["rewards/valid_fold/mean"] for e in log]
reward_std = [e["reward_std"] for e in log]
grad_norm = [e["grad_norm"] for e in log]
# ── Terminal summary ──────────────────────────────────────────────────────
print("REAL TRAINING LOG (30 steps, Qwen3-32B + LoRA r=32, B200 GPU)")
print("──────────────────────────────────────────────────────────────────")
print(f" {'Step':>4} {'Total':>7} {'Shape':>7} {'Format':>7} {'Std':>7} {'GradNorm':>10}")
print(" " + "─" * 56)
for e in log:
flag = " β—€ dip" if e["rewards/valid_fold/mean"] < 1.0 else ""
print(
f" {e['step']:>4} "
f"{e['reward']:>7.2f} "
f"{e['rewards/shape_match_reward/mean']:>7.2f} "
f"{e['rewards/valid_fold/mean']:>7.2f} "
f"{e['reward_std']:>7.2f} "
f"{e['grad_norm']:>10.4f}"
f"{flag}"
)
print()
# ── Key stats ─────────────────────────────────────────────────────────────
step1_r = total_reward[0]
max_r = max(total_reward)
final_r = total_reward[-1]
steps_above_20 = sum(1 for r in total_reward if r >= 20.0)
print(f" Step 1 reward : {step1_r:.2f} / 21.0 (base model was ALREADY capable)")
print(f" Peak reward : {max_r:.2f} / 21.0 (at step {steps[total_reward.index(max_r)]})")
print(f" Final reward : {final_r:.2f} / 21.0")
print(f" Steps β‰₯ 20.0 : {steps_above_20} / {len(steps)} ({steps_above_20/len(steps)*100:.0f}% of training)")
print(f" Step 7 dip : reward dropped to {total_reward[6]:.2f} (valid_fold={valid_fold_r[6]:.2f}) β€” recovered by step 8")
print()
# ── ASCII bar chart ───────────────────────────────────────────────────────
print("REWARD PER STEP (bar = shape_match/20, βœ— = format failure)")
print("─────────────────────────────────────────────────────────────")
for e in log:
bar_len = max(0, int(e["rewards/shape_match_reward/mean"] / 20 * 25))
bar = "β–ˆ" * bar_len + "β–‘" * (25 - bar_len)
flag = " βœ— format dip" if e["rewards/valid_fold/mean"] < 1.0 else ""
print(f" step {e['step']:>2} [{bar}] {e['reward']:5.2f}{flag}")
print()
if not HAS_MPL:
return
# ── Matplotlib plot ───────────────────────────────────────────────────────
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
fig.suptitle("Origami RL β€” Real GRPO Training Logs (Qwen3-32B)", fontsize=13, fontweight="bold")
steps_arr = np.array(steps)
total_arr = np.array(total_reward)
shape_arr = np.array(shape_reward)
std_arr = np.array(reward_std)
# Left: total reward + shape reward with std band
ax = axes[0]
ax.plot(steps_arr, total_arr, color="#2196F3", linewidth=2, label="Total reward (max=21)")
ax.plot(steps_arr, shape_arr, color="#4CAF50", linewidth=2, label="Shape match (max=20)")
ax.fill_between(steps_arr, total_arr - std_arr, total_arr + std_arr,
alpha=0.15, color="#2196F3")
ax.axhline(y=21.0, color="#2196F3", linestyle="--", alpha=0.3, linewidth=1)
ax.axhline(y=20.0, color="#4CAF50", linestyle="--", alpha=0.3, linewidth=1)
# Mark the step-7 dip
ax.annotate("Format\ndip", xy=(7, total_reward[6]), xytext=(9, 10),
fontsize=8, color="#E91E63",
arrowprops=dict(arrowstyle="->", color="#E91E63"))
ax.set_xlabel("Training Step")
ax.set_ylabel("Reward")
ax.set_title("Reward Over Training")
ax.legend(fontsize=8)
ax.set_ylim(0, 23)
ax.grid(True, alpha=0.3)
# Middle: reward std (convergence signal)
ax2 = axes[1]
ax2.bar(steps_arr, std_arr, color="#FF9800", alpha=0.7)
ax2.set_xlabel("Training Step")
ax2.set_ylabel("Reward Std Dev")
ax2.set_title("Within-Batch Variance\n(β†’ 0 = converged)")
ax2.grid(True, alpha=0.3, axis="y")
# Right: grad norm
ax3 = axes[2]
ax3.plot(steps_arr, grad_norm, color="#9C27B0", linewidth=1.5)
ax3.set_xlabel("Training Step")
ax3.set_ylabel("Gradient Norm")
ax3.set_title("Gradient Norm")
ax3.set_yscale("log")
ax3.grid(True, alpha=0.3)
plt.tight_layout()
out_path = "demo_reward_curves.png"
plt.savefig(out_path, dpi=150, bbox_inches="tight")
print(f" [Plot saved β†’ {out_path}]")
print()
def _show_before_after():
"""Show real base vs checkpoint-20 eval results."""
# Real numbers from: modal run modal_eval.py --checkpoint base / checkpoint-20
BASE = {
"triangle": {"mean": 17.98, "std": 2.02, "valid": 10},
"half_fold": {"mean": 20.00, "std": 0.00, "valid": 10},
"quarter_fold": {"mean": 16.63, "std": 0.75, "valid": 10},
"letter_fold": {"mean": 19.88, "std": 0.35, "valid": 10},
}
CK20 = {
"triangle": {"mean": 19.60, "std": 1.21, "valid": 10},
"half_fold": {"mean": 19.15, "std": 1.70, "valid": 10},
"quarter_fold": {"mean": 17.21, "std": 1.10, "valid": 10},
"letter_fold": {"mean": 19.99, "std": 0.00, "valid": 10},
}
print("BEFORE vs AFTER (10 samples each, B200 GPU, n=10)")
print("─────────────────────────────────────────────────────────────────────")
print(f" {'Task':15s} {'Base':>12} {'Ckpt-20':>12} {'Ξ”':>7} {'Verdict'}")
print(" " + "─" * 65)
base_total, ck_total = 0, 0
for task in ["triangle", "half_fold", "quarter_fold", "letter_fold"]:
b, c = BASE[task], CK20[task]
delta = c["mean"] - b["mean"]
arrow = "β–²" if delta > 0.1 else ("β–Ό" if delta < -0.1 else "β‰ˆ")
verdict = (
"improved" if delta > 0.5 else
"slight regression" if delta < -0.5 else
"stable"
)
print(
f" {task:15s} "
f"{b['mean']:5.2f}Β±{b['std']:.2f} "
f"{c['mean']:5.2f}Β±{c['std']:.2f} "
f"{arrow}{abs(delta):5.2f} "
f"{verdict}"
)
base_total += b["mean"]
ck_total += c["mean"]
print(" " + "─" * 65)
base_avg = base_total / 4
ck_avg = ck_total / 4
print(f" {'AVERAGE':15s} {base_avg:5.2f} {ck_avg:5.2f} {ck_avg-base_avg:+.2f}")
print()
print("WHAT THESE NUMBERS MEAN")
print("────────────────────────")
print(f"""\
Both models produce 100% valid FOLD JSON β€” the base Qwen3-32B already
understood the format. RL training improved geometric precision:
triangle: +1.62 ← Biggest win. Diagonal fold tightened.
quarter_fold: +0.58 ← Hardest task (2 folds). RL helped most here.
letter_fold: +0.11 ← Near ceiling already, minimal room to improve.
half_fold: βˆ’0.85 ← Slight regression with higher variance.
(Base was already perfect; RL introduced noise.)
The regression on half_fold is an honest finding and expected:
With only 20 steps and all 4 tasks mixed in the training batch,
the model can't perfectly reinforce every task simultaneously.
This is the classic multi-task RL tradeoff.
KEY INSIGHT: RL acts as a "precision dial" on an already capable model.
Qwen3-32B inherently understands geometry. GRPO sharpens the output
distribution β€” trading variance for consistency on harder tasks.
""")
print("WHAT THE TRAINING LOGS REVEAL")
print("──────────────────────────────")
print("""\
Step 1: reward = 16.93/21 β†’ base model already 80% accurate
Step 7: reward = 12.68/21 β†’ format dip (valid_fold=0.25), recovers by step 8
Step 14: reward = 21.00/21 β†’ first perfect score
Step 10+: frac_reward_zero_std = 1.0 β†’ model producing identical outputs,
converged to a stable high-reward solution
""")
if HAS_RICH:
# Visual comparison table
t = Table(title="Base Model vs checkpoint-20 (n=10 samples)", show_header=True,
header_style="bold magenta")
t.add_column("Task", style="cyan")
t.add_column("Base meanΒ±std", justify="right")
t.add_column("Ckpt-20 meanΒ±std", justify="right")
t.add_column("Ξ”", justify="right")
t.add_column("Bar (ckpt-20 / 20)", style="green")
for task in ["triangle", "half_fold", "quarter_fold", "letter_fold"]:
b, c = BASE[task], CK20[task]
delta = c["mean"] - b["mean"]
bar = "β–ˆ" * int(c["mean"] / 20 * 20) + "β–‘" * (20 - int(c["mean"] / 20 * 20))
sign = "+" if delta >= 0 else ""
color = "green" if delta >= 0 else "red"
t.add_row(
task,
f"{b['mean']:.2f}Β±{b['std']:.2f}",
f"{c['mean']:.2f}Β±{c['std']:.2f}",
f"[{color}]{sign}{delta:.2f}[/{color}]",
bar,
)
console.print(t)
print()
def _plot_eval_comparison():
"""Bar chart: base model vs checkpoint-20 per task."""
try:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
except ImportError:
return
tasks = ["triangle", "half_fold", "quarter_fold", "letter_fold"]
base = [17.98, 20.00, 16.63, 19.88]
ck20 = [19.60, 19.15, 17.21, 19.99]
b_std = [2.02, 0.00, 0.75, 0.35]
c_std = [1.21, 1.70, 1.10, 0.00]
x = np.arange(len(tasks))
w = 0.35
fig, ax = plt.subplots(figsize=(9, 5))
bars_b = ax.bar(x - w/2, base, w, yerr=b_std, label="Base model",
color="#90CAF9", edgecolor="white", capsize=5)
bars_c = ax.bar(x + w/2, ck20, w, yerr=c_std, label="checkpoint-20",
color="#1565C0", edgecolor="white", capsize=5)
ax.axhline(y=20.0, color="green", linestyle="--", alpha=0.4, linewidth=1,
label="Max shape reward (20)")
ax.axhline(y=21.0, color="gray", linestyle=":", alpha=0.3, linewidth=1,
label="Max total reward (21)")
# Annotate deltas
for i, (b, c) in enumerate(zip(base, ck20)):
delta = c - b
color = "#2E7D32" if delta >= 0 else "#C62828"
sign = "+" if delta >= 0 else ""
ax.text(x[i] + w/2, c + c_std[i] + 0.15,
f"{sign}{delta:.2f}", ha="center", fontsize=9,
color=color, fontweight="bold")
ax.set_xticks(x)
ax.set_xticklabels(tasks, fontsize=10)
ax.set_ylabel("Mean Reward (n=10 samples)")
ax.set_ylim(14, 22.5)
ax.set_title("Eval Results: Base Model vs checkpoint-20\n(Real B200 GPU inference, all formats valid)", fontsize=11)
ax.legend(fontsize=9)
ax.grid(True, alpha=0.3, axis="y")
plt.tight_layout()
out_path = "demo_eval_comparison.png"
plt.savefig(out_path, dpi=150, bbox_inches="tight")
print(f" [Eval comparison plot saved β†’ {out_path}]")
print()
# ─── ACT 4: Storytelling Wrap-up ──────────────────────────────────────────────
def act4_storytelling():
console.rule("[bold cyan]ACT 4 β€” The Big Picture (Storytelling 30%)[/bold cyan]" if HAS_RICH else "ACT 4 β€” The Big Picture")
print("""
THE PROBLEM WE'RE SOLVING
──────────────────────────
Origami is a 5,000-year-old art form that humans learn by watching and
doing β€” not by reading instructions. Teaching an AI to fold requires
bridging language and physical geometry.
This is the same fundamental challenge as:
β€’ Protein folding (AlphaFold) β€” 1D sequence β†’ 3D structure
β€’ Robot manipulation β€” language instructions β†’ physical action
β€’ Code generation β€” specification β†’ working program
WHY ORIGAMI IS A PERFECT RL TESTBED
─────────────────────────────────────
βœ“ Ground truth is unambiguous (physics simulation)
βœ“ Reward is continuous and differentiable
βœ“ Difficulty scales naturally (more folds = harder)
βœ“ Output is structured code (JSON), not pixels
βœ“ No human labelers needed β€” simulator is the oracle
THE FULL SYSTEM IN 3 LINES
───────────────────────────
env.reset(task_name="triangle")
β†’ "Fold the paper in half diagonally to make a triangle. Paper: 1Γ—1"
env.step(OrigamiAction(fold_data={ ... LLM output ... }))
β†’ { reward: 18.4, shape_similarity: 0.92, is_stable: True }
WHAT COMES NEXT
───────────────
β†’ More tasks: crane, boat, box, modular origami
β†’ Richer observation: return rendered image of fold for vision models
β†’ Multi-step episodes: incremental fold refinement
β†’ Inverse design: given a 3D target mesh, find the crease pattern
""")
if HAS_RICH:
console.print(Panel(
"[bold green]Try it now:[/bold green]\n\n"
" [cyan]pip install openenv-core requests[/cyan]\n\n"
" [white]from client import OrigamiEnv[/white]\n"
" [white]from origami_server.models import OrigamiAction[/white]\n\n"
" [white]with OrigamiEnv(base_url='https://origami-env-production.up.railway.app') as env:[/white]\n"
" [white] env.reset(task_name='triangle')[/white]\n"
" [white] result = env.step(OrigamiAction(fold_data={{...}}))[/white]\n"
" [white] print(result.observation.reward) # 0.0 to 20.0[/white]",
title="🦒 Origami RL",
border_style="green",
))
# ─── Main ─────────────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Origami RL Demo")
parser.add_argument("--section", type=int, default=0,
help="Run only one section (1-4). 0 = all.")
parser.add_argument("--server", default="http://localhost:8000",
help="Origami env server URL")
parser.add_argument("--skip-live", action="store_true",
help="Skip live environment API calls")
args = parser.parse_args()
print(BANNER)
time.sleep(0.5)
sections = {
1: lambda: act1_environment_innovation(args.server, args.skip_live),
2: act2_reward_pipeline,
3: act3_training_progress,
4: act4_storytelling,
}
if args.section:
if args.section not in sections:
print(f"Unknown section {args.section}. Choose 1-4.")
sys.exit(1)
sections[args.section]()
else:
for i in range(1, 5):
sections[i]()
if i < 4:
try:
input("\n [Press ENTER to continue...]\n")
except EOFError:
print()
if __name__ == "__main__":
main()