Spaces:

openenv-community
/

optigami

Running

ianalin123 Claude Sonnet 4.6 commited on 3 days ago

Commit

c44bdad

1 Parent(s): 39c6d23

feat: implement origami RL environment (Phase 1)

Core environment built with 4-wave agent team:
- CreaseGraph + PaperState geometry engine (Shapely-based)
- Kawasaki, Maekawa, BLB verifiers with correct cyclic ordering
- 8 FOLD target files (levels 1-3, parallel folds only)
- Dense reward: progress (45%) + economy + validity theorems
- OrigamiEnvironment: code-as-policy + step modes, clone() for GRPO
- Prompt formatter: code-as-policy and step-level templates
- GRPO training script with dry-run mode

28 tests passing. Run: python train.py --dry_run

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (25) hide show

.gitignore +2 -0
env/__init__.py +0 -0
env/environment.py +243 -0
env/graph.py +117 -0
env/paper_state.py +150 -0
env/prompts.py +235 -0
env/rewards.py +93 -0
env/targets/__init__.py +0 -0
env/targets/accordion_3h.fold +67 -0
env/targets/accordion_4h.fold +79 -0
env/targets/diagonal_anti.fold +35 -0
env/targets/diagonal_main.fold +35 -0
env/targets/half_horizontal.fold +43 -0
env/targets/half_vertical.fold +43 -0
env/targets/thirds_h.fold +55 -0
env/targets/thirds_v.fold +55 -0
env/targets/validator.py +119 -0
env/targets/validator_check.py +19 -0
env/verifier.py +221 -0
requirements.txt +3 -0
tests/__init__.py +0 -0
tests/test_graph.py +115 -0
tests/test_paper_state.py +77 -0
tests/test_verifier.py +266 -0
train.py +231 -0

.gitignore CHANGED Viewed

@@ -28,3 +28,5 @@ __pycache__/
 # Reference repos (not pushed to HF)
 .reference/

 # Reference repos (not pushed to HF)
 .reference/
+*.pyc
+__pycache__/

env/__init__.py ADDED Viewed

File without changes

env/environment.py ADDED Viewed

	@@ -0,0 +1,243 @@

+import json
+import os
+import copy
+from pathlib import Path
+from typing import Optional
+from .paper_state import PaperState
+from .rewards import compute_reward, compute_terminal_reward, load_target, target_crease_edges
+from .prompts import (
+    code_as_policy_prompt,
+    step_level_prompt,
+    parse_fold_list,
+    parse_single_fold,
+)
+from .verifier import check_all_vertices
+TARGETS_DIR = Path(__file__).parent / 'targets'
+class OrigamiEnvironment:
+    """
+    OpenEnv-compatible origami crease pattern environment.
+    Supports two modes:
+    - code_as_policy: model outputs complete fold sequence, gets terminal reward
+    - step: model outputs one fold at a time, gets per-step reward
+    """
+    def __init__(
+        self,
+        mode: str = 'code_as_policy',  # 'code_as_policy' or 'step'
+        max_steps: int = 8,
+        targets_dir: Optional[str] = None,
+    ):
+        assert mode in ('code_as_policy', 'step'), f"Unknown mode: {mode}"
+        self.mode = mode
+        self.max_steps = max_steps
+        self.targets_dir = Path(targets_dir) if targets_dir else TARGETS_DIR
+        self.paper: Optional[PaperState] = None
+        self.target: Optional[dict] = None
+        self.target_name: Optional[str] = None
+        self.step_count: int = 0
+        self.last_reward: Optional[dict] = None
+        # Cache all available targets
+        self._targets = self._load_all_targets()
+    def _load_all_targets(self) -> dict[str, dict]:
+        targets = {}
+        for fold_file in self.targets_dir.glob('*.fold'):
+            with open(fold_file) as f:
+                targets[fold_file.stem] = json.load(f)
+        return targets
+    def available_targets(self) -> list[str]:
+        return sorted(self._targets.keys())
+    def reset(self, target_name: Optional[str] = None) -> dict:
+        """
+        Reset environment to start of a new episode.
+        Args:
+            target_name: name of target (stem of .fold file). If None, picks level-1 randomly.
+        Returns:
+            observation dict with 'prompt' key containing the LLM prompt string.
+        """
+        import random
+        if target_name:
+            assert target_name in self._targets, f"Unknown target: {target_name}"
+            self.target_name = target_name
+        else:
+            # Default to level-1 targets
+            level1 = [k for k, v in self._targets.items() if v.get('level', 1) == 1]
+            self.target_name = random.choice(level1 if level1 else list(self._targets.keys()))
+        self.target = self._targets[self.target_name]
+        self.paper = PaperState()
+        self.step_count = 0
+        self.last_reward = None
+        return self._get_observation()
+    def step(self, action) -> tuple[dict, dict, bool, dict]:
+        """
+        Execute an action.
+        In code_as_policy mode: action is a string (model completion with <folds> tags)
+            OR a list of fold dicts already parsed.
+        In step mode: action is a string (single fold JSON) or dict.
+        Returns:
+            (observation, reward, done, info)
+        """
+        if self.mode == 'code_as_policy':
+            return self._step_sequence(action)
+        else:
+            return self._step_single(action)
+    def _step_sequence(self, action) -> tuple[dict, dict, bool, dict]:
+        """Execute a complete fold sequence (code-as-policy mode)."""
+        # Parse action if it's a string
+        if isinstance(action, str):
+            try:
+                folds = parse_fold_list(action)
+            except ValueError as e:
+                bad_reward = {'format': 0.0, 'total': -0.1, 'error': str(e)}
+                return self._get_observation(), bad_reward, True, self._info()
+        else:
+            folds = action  # already a list of dicts
+        # Execute each fold sequentially
+        last_result = {'valid': True, 'anchored': True, 'new_vertices': [], 'errors': []}
+        for fold in folds:
+            try:
+                p1 = fold['from']
+                p2 = fold['to']
+                assignment = fold['assignment']
+            except (KeyError, TypeError) as e:
+                last_result = {'valid': False, 'anchored': False, 'new_vertices': [], 'errors': [str(e)]}
+                break
+            last_result = self.paper.add_crease(p1, p2, assignment)
+            self.step_count += 1
+            if not last_result['valid']:
+                break  # stop at first invalid fold, partial credit
+        reward = compute_terminal_reward(self.paper, self.target)
+        self.last_reward = reward
+        return self._get_observation(), reward, True, self._info()
+    def _step_single(self, action) -> tuple[dict, dict, bool, dict]:
+        """Execute a single fold (step mode)."""
+        if isinstance(action, str):
+            try:
+                fold = parse_single_fold(action)
+            except ValueError as e:
+                bad_reward = {'format': 0.0, 'total': -0.1, 'error': str(e)}
+                self.last_reward = bad_reward
+                done = self.step_count >= self.max_steps
+                return self._get_observation(), bad_reward, done, self._info()
+        else:
+            fold = action
+        try:
+            p1 = fold['from']
+            p2 = fold['to']
+            assignment = fold['assignment']
+        except (KeyError, TypeError) as e:
+            bad_reward = {'format': 0.0, 'total': -0.1, 'error': str(e)}
+            self.last_reward = bad_reward
+            done = self.step_count >= self.max_steps
+            return self._get_observation(), bad_reward, done, self._info()
+        result = self.paper.add_crease(p1, p2, assignment)
+        self.step_count += 1
+        reward = compute_reward(self.paper, result, self.target)
+        self.last_reward = reward
+        done = (
+            self.step_count >= self.max_steps or
+            reward.get('completion', 0) > 0
+        )
+        return self._get_observation(), reward, done, self._info()
+    def _get_observation(self) -> dict:
+        """Returns observation dict with the LLM prompt and raw state."""
+        if self.mode == 'code_as_policy':
+            prompt = code_as_policy_prompt(self.target, max_folds=self.max_steps)
+        else:
+            prompt = step_level_prompt(
+                target=self.target,
+                paper_state=self.paper,
+                step=self.step_count,
+                max_steps=self.max_steps,
+                last_reward=self.last_reward,
+            )
+        return {
+            'prompt': prompt,
+            'target_name': self.target_name,
+            'step': self.step_count,
+            'paper_fold_json': self.paper.graph.edges if self.paper else {},
+        }
+    def _info(self) -> dict:
+        """Returns diagnostic info dict for logging."""
+        if self.paper is None:
+            return {}
+        interior = self.paper.graph.interior_vertices()
+        vertex_scores = check_all_vertices(self.paper.graph)
+        return {
+            'local_foldability': (
+                vertex_scores['kawasaki'] == 1.0 and
+                vertex_scores['maekawa'] == 1.0
+            ),
+            'blb_satisfied': vertex_scores['blb'] == 1.0,
+            'global_foldability': 'not_checked',  # NP-complete (Bern-Hayes 1996)
+            'n_interior_vertices': len(interior),
+            'n_creases': len(self.paper.graph.crease_edges()),
+            'target_name': self.target_name,
+        }
+    def state(self) -> dict:
+        """Returns current environment state for logging/inspection."""
+        return {
+            'paper': {
+                'vertices': dict(self.paper.graph.vertices),
+                'edges': {
+                    k: v for k, v in self.paper.graph.edges.items()
+                    if v[2] in ('M', 'V')
+                },
+                'fold_history': self.paper.fold_history,
+            },
+            'target': self.target_name,
+            'step': self.step_count,
+            'mode': self.mode,
+        }
+    def close(self):
+        """Cleanup."""
+        pass
+    def clone(self) -> 'OrigamiEnvironment':
+        """Return a deep copy for parallel evaluation (used in GRPO)."""
+        new_env = OrigamiEnvironment(
+            mode=self.mode,
+            max_steps=self.max_steps,
+            targets_dir=str(self.targets_dir),
+        )
+        if self.paper is not None:
+            new_env.paper = copy.deepcopy(self.paper)
+        new_env.target = self.target
+        new_env.target_name = self.target_name
+        new_env.step_count = self.step_count
+        new_env.last_reward = self.last_reward
+        return new_env

env/graph.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import numpy as np
+from typing import Optional
+BOUNDARY_TOL = 1e-9
+VERTEX_TOL = 1e-9
+class CreaseGraph:
+    """
+    Planar graph representing an origami crease pattern on a unit square.
+    Vertices: points in [0,1]x[0,1], deduplicated by proximity.
+    Edges: segments between vertices, labeled M (mountain), V (valley), or B (boundary).
+    """
+    def __init__(self):
+        self.vertices: dict[int, tuple[float, float]] = {}
+        self.edges: dict[int, tuple[int, int, str]] = {}
+        self.vertex_edges: dict[int, list[int]] = {}
+        self._next_vertex_id: int = 0
+        self._next_edge_id: int = 0
+        corners = [(0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0)]
+        for x, y in corners:
+            vid = self._next_vertex_id
+            self.vertices[vid] = (x, y)
+            self.vertex_edges[vid] = []
+            self._next_vertex_id += 1
+        boundary_pairs = [(0, 1), (1, 2), (2, 3), (3, 0)]
+        for v1, v2 in boundary_pairs:
+            eid = self._next_edge_id
+            self.edges[eid] = (v1, v2, 'B')
+            self.vertex_edges[v1].append(eid)
+            self.vertex_edges[v2].append(eid)
+            self._next_edge_id += 1
+    def add_vertex(self, x: float, y: float) -> int:
+        for vid, (vx, vy) in self.vertices.items():
+            if abs(vx - x) < VERTEX_TOL and abs(vy - y) < VERTEX_TOL:
+                return vid
+        vid = self._next_vertex_id
+        self.vertices[vid] = (float(x), float(y))
+        self.vertex_edges[vid] = []
+        self._next_vertex_id += 1
+        return vid
+    def add_edge(self, v1_id: int, v2_id: int, assignment: str) -> int:
+        pair = frozenset((v1_id, v2_id))
+        for eid, (ev1, ev2, _) in self.edges.items():
+            if frozenset((ev1, ev2)) == pair:
+                return eid
+        eid = self._next_edge_id
+        self.edges[eid] = (v1_id, v2_id, assignment)
+        self.vertex_edges[v1_id].append(eid)
+        self.vertex_edges[v2_id].append(eid)
+        self._next_edge_id += 1
+        return eid
+    def get_cyclic_edges(self, vertex_id: int) -> list[int]:
+        vx, vy = self.vertices[vertex_id]
+        edge_ids = self.vertex_edges[vertex_id]
+        def angle_of_edge(eid: int) -> float:
+            ev1, ev2, _ = self.edges[eid]
+            other_id = ev2 if ev1 == vertex_id else ev1
+            ox, oy = self.vertices[other_id]
+            return float(np.arctan2(oy - vy, ox - vx))
+        return sorted(edge_ids, key=angle_of_edge)
+    def interior_vertices(self) -> list[int]:
+        result = []
+        for vid, (x, y) in self.vertices.items():
+            if (
+                x > BOUNDARY_TOL
+                and x < 1.0 - BOUNDARY_TOL
+                and y > BOUNDARY_TOL
+                and y < 1.0 - BOUNDARY_TOL
+            ):
+                result.append(vid)
+        return result
+    def split_edge(self, edge_id: int, new_vertex_id: int) -> tuple[int, int]:
+        ev1, ev2, assignment = self.edges[edge_id]
+        del self.edges[edge_id]
+        if edge_id in self.vertex_edges[ev1]:
+            self.vertex_edges[ev1].remove(edge_id)
+        if edge_id in self.vertex_edges[ev2]:
+            self.vertex_edges[ev2].remove(edge_id)
+        eid1 = self._next_edge_id
+        self.edges[eid1] = (ev1, new_vertex_id, assignment)
+        self.vertex_edges[ev1].append(eid1)
+        self.vertex_edges[new_vertex_id].append(eid1)
+        self._next_edge_id += 1
+        eid2 = self._next_edge_id
+        self.edges[eid2] = (new_vertex_id, ev2, assignment)
+        self.vertex_edges[new_vertex_id].append(eid2)
+        self.vertex_edges[ev2].append(eid2)
+        self._next_edge_id += 1
+        return (eid1, eid2)
+    def crease_edges(self) -> list[int]:
+        return [eid for eid, (_, _, a) in self.edges.items() if a in ('M', 'V')]
+    def boundary_midpoints(self) -> list[tuple[float, float]]:
+        midpoints = []
+        for eid, (v1, v2, assignment) in self.edges.items():
+            if assignment == 'B':
+                x1, y1 = self.vertices[v1]
+                x2, y2 = self.vertices[v2]
+                midpoints.append(((x1 + x2) / 2.0, (y1 + y2) / 2.0))
+        return midpoints

env/paper_state.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import numpy as np
+from shapely.geometry import LineString, Point, Polygon
+from shapely.ops import unary_union
+from typing import Optional
+from .graph import CreaseGraph, VERTEX_TOL
+UNIT_SQUARE_CORNERS = [(0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0)]
+_UNIT_SQUARE = Polygon(UNIT_SQUARE_CORNERS)
+class PaperState:
+    """
+    Represents the evolving crease pattern on a unit square [0,1]x[0,1].
+    Uses CreaseGraph for the underlying data structure.
+    """
+    def __init__(self):
+        self.graph = CreaseGraph()
+        self.fold_history: list[dict] = []
+    def anchor_points(self) -> list[tuple[float, float]]:
+        points: dict[tuple[float, float], None] = {}
+        for corner in UNIT_SQUARE_CORNERS:
+            points[corner] = None
+        for vid, (x, y) in self.graph.vertices.items():
+            points[(float(x), float(y))] = None
+        return list(points.keys())
+    def _is_anchor(self, pt: tuple[float, float]) -> bool:
+        px, py = pt
+        for ax, ay in self.anchor_points():
+            if abs(ax - px) < VERTEX_TOL and abs(ay - py) < VERTEX_TOL:
+                return True
+        return False
+    def add_crease(self, p1: list, p2: list, assignment: str) -> dict:
+        errors: list[str] = []
+        if assignment not in ('M', 'V'):
+            return {
+                'valid': False,
+                'anchored': False,
+                'new_vertices': [],
+                'errors': ['invalid_assignment'],
+            }
+        p1 = (float(p1[0]), float(p1[1]))
+        p2 = (float(p2[0]), float(p2[1]))
+        anchored = self._is_anchor(p1) and self._is_anchor(p2)
+        seg_len = np.hypot(p2[0] - p1[0], p2[1] - p1[1])
+        if seg_len < VERTEX_TOL:
+            errors.append('zero_length')
+            return {'valid': False, 'anchored': anchored, 'new_vertices': [], 'errors': errors}
+        new_line = LineString([p1, p2])
+        if not _UNIT_SQUARE.contains(new_line) and not _UNIT_SQUARE.boundary.contains(new_line):
+            clipped = new_line.intersection(_UNIT_SQUARE)
+            if clipped.is_empty:
+                errors.append('outside_bounds')
+                return {'valid': False, 'anchored': anchored, 'new_vertices': [], 'errors': errors}
+        intersection_points: list[tuple[float, float]] = []
+        for eid, (ev1, ev2, _) in list(self.graph.edges.items()):
+            ex1, ey1 = self.graph.vertices[ev1]
+            ex2, ey2 = self.graph.vertices[ev2]
+            existing_line = LineString([(ex1, ey1), (ex2, ey2)])
+            inter = new_line.intersection(existing_line)
+            if inter.is_empty:
+                continue
+            if inter.geom_type == 'Point':
+                ix, iy = inter.x, inter.y
+                ep1 = (ex1, ey1)
+                ep2 = (ex2, ey2)
+                if (
+                    abs(ix - ep1[0]) < VERTEX_TOL and abs(iy - ep1[1]) < VERTEX_TOL
+                    or abs(ix - ep2[0]) < VERTEX_TOL and abs(iy - ep2[1]) < VERTEX_TOL
+                ):
+                    continue
+                intersection_points.append((ix, iy))
+            # MultiPoint or LineString intersections (collinear) are skipped
+        new_vertex_coords: list[tuple[float, float]] = []
+        for ix, iy in intersection_points:
+            before = set(self.graph.vertices.keys())
+            vid = self.graph.add_vertex(ix, iy)
+            if vid not in before:
+                new_vertex_coords.append((ix, iy))
+            for eid in list(self.graph.edges.keys()):
+                if eid not in self.graph.edges:
+                    continue
+                ev1, ev2, _ = self.graph.edges[eid]
+                ex1, ey1 = self.graph.vertices[ev1]
+                ex2, ey2 = self.graph.vertices[ev2]
+                seg = LineString([(ex1, ey1), (ex2, ey2)])
+                pt = Point(ix, iy)
+                if seg.distance(pt) < VERTEX_TOL:
+                    if ev1 != vid and ev2 != vid:
+                        self.graph.split_edge(eid, vid)
+        v1_id = self.graph.add_vertex(p1[0], p1[1])
+        v2_id = self.graph.add_vertex(p2[0], p2[1])
+        waypoints = [p1] + sorted(
+            intersection_points,
+            key=lambda pt: np.hypot(pt[0] - p1[0], pt[1] - p1[1]),
+        ) + [p2]
+        waypoint_ids = []
+        for wp in waypoints:
+            wid = self.graph.add_vertex(wp[0], wp[1])
+            waypoint_ids.append(wid)
+        for i in range(len(waypoint_ids) - 1):
+            wa = waypoint_ids[i]
+            wb = waypoint_ids[i + 1]
+            if wa != wb:
+                self.graph.add_edge(wa, wb, assignment)
+        record = {
+            'p1': p1,
+            'p2': p2,
+            'assignment': assignment,
+            'anchored': anchored,
+            'new_vertices': new_vertex_coords,
+        }
+        self.fold_history.append(record)
+        return {
+            'valid': True,
+            'anchored': anchored,
+            'new_vertices': new_vertex_coords,
+            'errors': errors,
+        }
+    def crease_edges(self) -> list[dict]:
+        result = []
+        for eid in self.graph.crease_edges():
+            v1, v2, assignment = self.graph.edges[eid]
+            x1, y1 = self.graph.vertices[v1]
+            x2, y2 = self.graph.vertices[v2]
+            result.append({'v1': (x1, y1), 'v2': (x2, y2), 'assignment': assignment})
+        return result

env/prompts.py ADDED Viewed

	@@ -0,0 +1,235 @@

+import json
+import re
+from typing import Optional
+_CORNERS = {(0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0)}
+_BOUNDARY_X = {0.0, 1.0}
+_BOUNDARY_Y = {0.0, 1.0}
+def _is_corner(x: float, y: float) -> bool:
+    return (round(x, 4), round(y, 4)) in _CORNERS
+def _is_boundary(x: float, y: float) -> bool:
+    return x in _BOUNDARY_X or y in _BOUNDARY_Y
+def format_target_for_prompt(target: dict) -> str:
+    vertices = target["vertices_coords"]
+    edges_v = target["edges_vertices"]
+    edges_a = target["edges_assignment"]
+    lines = []
+    for (v1, v2), assignment in zip(edges_v, edges_a):
+        if assignment not in ("M", "V"):
+            continue
+        x1, y1 = vertices[v1]
+        x2, y2 = vertices[v2]
+        label = "Mountain" if assignment == "M" else "Valley"
+        lines.append(
+            f"{label} fold: ({round(x1, 4)}, {round(y1, 4)}) -> ({round(x2, 4)}, {round(y2, 4)})"
+        )
+    return "\n".join(lines)
+def format_anchor_points(paper_state) -> str:
+    corners = []
+    boundary_pts = []
+    intersections = []
+    for x, y in paper_state.anchor_points():
+        rx, ry = round(x, 4), round(y, 4)
+        if _is_corner(rx, ry):
+            corners.append((rx, ry))
+        elif _is_boundary(rx, ry):
+            boundary_pts.append((rx, ry))
+        else:
+            intersections.append((rx, ry))
+    def fmt_pts(pts: list[tuple[float, float]]) -> str:
+        return "  ".join(f"({x},{y})" for x, y in pts)
+    lines = []
+    if corners:
+        lines.append(f"  Corners:       {fmt_pts(corners)}")
+    if boundary_pts:
+        lines.append(f"  Boundary pts:  {fmt_pts(boundary_pts)}")
+    if intersections:
+        lines.append(f"  Intersections: {fmt_pts(intersections)}")
+    return "\n".join(lines)
+def format_crease_history(paper_state) -> str:
+    history = paper_state.fold_history
+    if not history:
+        return "none"
+    lines = []
+    for i, fold in enumerate(history, 1):
+        p1, p2 = fold["p1"], fold["p2"]
+        assignment = fold["assignment"]
+        label = "Mountain" if assignment == "M" else "Valley"
+        x1, y1 = round(p1[0], 4), round(p1[1], 4)
+        x2, y2 = round(p2[0], 4), round(p2[1], 4)
+        lines.append(f"  {i}. {label} fold: ({x1}, {y1}) -> ({x2}, {y2})")
+    return "\n".join(lines)
+def format_reward_feedback(reward: Optional[dict]) -> str:
+    if not reward:
+        return "(no feedback yet)"
+    keys = ["kawasaki", "maekawa", "blb", "progress", "economy", "total"]
+    parts = []
+    for k in keys:
+        if k in reward:
+            parts.append(f"{k}={reward[k]:.2f}")
+    for k, v in reward.items():
+        if k not in keys:
+            parts.append(f"{k}={v:.2f}")
+    return "  " + "  ".join(parts)
+def code_as_policy_prompt(target: dict, max_folds: int = 8) -> str:
+    formatted_target = format_target_for_prompt(target)
+    return f"""You are an origami designer. Generate a fold sequence for a unit square [0,1]x[0,1].
+TARGET CREASE PATTERN:
+{formatted_target}
+RULES (must hold at every interior vertex):
+  - Kawasaki: alternating sector angles sum equally (each half = 180 degrees)
+  - Maekawa: |mountain_count - valley_count| = 2
+  - Big-Little-Big: folds bounding the smallest sector must have opposite types (one M, one V)
+INITIAL ANCHOR POINTS (valid fold endpoints — new ones appear when creases intersect):
+  Corners:      (0.0,0.0)  (1.0,0.0)  (1.0,1.0)  (0.0,1.0)
+  Midpoints:    (0.0,0.5)  (0.5,0.0)  (1.0,0.5)  (0.5,1.0)
+  Note: new anchor points are created at crease intersections.
+Output at most {max_folds} folds. Both endpoints must be valid anchor points.
+Output ONLY the JSON list, wrapped in <folds> tags:
+<folds>
+[
+  {{"instruction": "Describe the fold in plain English", "from": [x1, y1], "to": [x2, y2], "assignment": "V"}},
+  {{"instruction": "...", "from": [x1, y1], "to": [x2, y2], "assignment": "M"}}
+]
+</folds>"""
+def step_level_prompt(
+    target: dict,
+    paper_state,
+    step: int,
+    max_steps: int,
+    last_reward: Optional[dict] = None,
+) -> str:
+    formatted_target = format_target_for_prompt(target)
+    formatted_history = format_crease_history(paper_state)
+    formatted_anchors = format_anchor_points(paper_state)
+    formatted_reward = format_reward_feedback(last_reward)
+    return f"""You are an origami designer building a crease pattern step by step.
+TARGET:
+{formatted_target}
+CURRENT STATE (step {step} of {max_steps}):
+  Creases placed:
+{formatted_history}
+AVAILABLE ANCHOR POINTS:
+{formatted_anchors}
+LAST REWARD:
+{formatted_reward}
+Add the NEXT crease. Both endpoints must be listed anchor points above.
+Output ONLY valid JSON (no extra text):
+{{"instruction": "...", "from": [x1, y1], "to": [x2, y2], "assignment": "M" or "V"}}"""
+def parse_fold_list(completion: str) -> list[dict]:
+    match = re.search(r"<folds>(.*?)</folds>", completion, re.IGNORECASE | re.DOTALL)
+    if not match:
+        raise ValueError("No <folds>...</folds> tags found in completion")
+    raw = match.group(1).strip()
+    try:
+        data = json.loads(raw)
+    except json.JSONDecodeError as e:
+        raise ValueError(f"Failed to parse JSON inside <folds> tags: {e}") from e
+    if not isinstance(data, list):
+        raise ValueError(f"Expected a JSON list inside <folds> tags, got {type(data).__name__}")
+    cleaned = []
+    for i, item in enumerate(data):
+        if not isinstance(item, dict):
+            raise ValueError(f"Fold {i} is not a dict: {item!r}")
+        for field in ("from", "to", "assignment"):
+            if field not in item:
+                raise ValueError(f"Fold {i} missing required field '{field}'")
+        from_pt = item["from"]
+        to_pt = item["to"]
+        if (
+            not isinstance(from_pt, list)
+            or len(from_pt) != 2
+            or not all(isinstance(v, (int, float)) for v in from_pt)
+        ):
+            raise ValueError(f"Fold {i} 'from' must be a list of 2 numbers, got {from_pt!r}")
+        if (
+            not isinstance(to_pt, list)
+            or len(to_pt) != 2
+            or not all(isinstance(v, (int, float)) for v in to_pt)
+        ):
+            raise ValueError(f"Fold {i} 'to' must be a list of 2 numbers, got {to_pt!r}")
+        if not isinstance(item["assignment"], str):
+            raise ValueError(f"Fold {i} 'assignment' must be a string")
+        cleaned.append(
+            {
+                "from": [float(from_pt[0]), float(from_pt[1])],
+                "to": [float(to_pt[0]), float(to_pt[1])],
+                "assignment": item["assignment"],
+                "instruction": item.get("instruction", ""),
+            }
+        )
+    return cleaned
+def parse_single_fold(completion: str) -> dict:
+    start = completion.find("{")
+    end = completion.rfind("}")
+    if start == -1 or end == -1 or end <= start:
+        raise ValueError("No JSON object found in completion")
+    raw = completion[start : end + 1]
+    try:
+        data = json.loads(raw)
+    except json.JSONDecodeError as e:
+        raise ValueError(f"Failed to parse JSON from completion: {e}") from e
+    if not isinstance(data, dict):
+        raise ValueError(f"Expected a JSON object, got {type(data).__name__}")
+    for field in ("from", "to", "assignment"):
+        if field not in data:
+            raise ValueError(f"Missing required field '{field}' in fold JSON")
+    return data

env/rewards.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import json
+from .verifier import check_all_vertices, geometric_crease_coverage
+from .paper_state import PaperState
+def load_target(target_path: str) -> dict:
+    """Load a .fold target file and return it as a dict."""
+    with open(target_path) as f:
+        return json.load(f)
+def target_crease_edges(target: dict) -> list[dict]:
+    """
+    Extract crease edges from a FOLD target dict as list of
+    {'v1': (x1,y1), 'v2': (x2,y2), 'assignment': 'M'|'V'} dicts.
+    """
+    verts = target['vertices_coords']
+    result = []
+    for i, (v1_idx, v2_idx) in enumerate(target['edges_vertices']):
+        assignment = target['edges_assignment'][i]
+        if assignment in ('M', 'V'):
+            result.append({
+                'v1': tuple(verts[v1_idx]),
+                'v2': tuple(verts[v2_idx]),
+                'assignment': assignment,
+            })
+    return result
+def compute_reward(
+    state: PaperState,
+    action_result: dict,
+    target: dict,
+) -> dict:
+    """
+    Compute the full reward dict for a fold action.
+    Args:
+        state: current PaperState AFTER the action was applied
+        action_result: {'valid': bool, 'anchored': bool, 'new_vertices': list, 'errors': list}
+        target: FOLD target dict
+    Returns dict with keys:
+        format, anchored, kawasaki, maekawa, blb, progress, economy, completion, efficiency, total
+    """
+    r = {}
+    # Gate 1: format — did the action parse and apply?
+    r['format'] = 1.0 if action_result.get('valid', False) else 0.0
+    if not r['format']:
+        r['total'] = -0.1
+        return r
+    # Gate 2: anchoring — were endpoints valid anchor points?
+    r['anchored'] = 1.0 if action_result.get('anchored', False) else 0.3
+    # Vertex-level validity checks (all interior vertices)
+    vertex_scores = check_all_vertices(state.graph)
+    r['kawasaki'] = vertex_scores['kawasaki']
+    r['maekawa'] = vertex_scores['maekawa']
+    r['blb'] = vertex_scores['blb']
+    # Geometric progress
+    t_edges = target_crease_edges(target)
+    coverage, economy = geometric_crease_coverage(state, t_edges)
+    r['progress'] = coverage
+    r['economy'] = economy
+    # Completion bonus: high coverage + all vertex conditions satisfied
+    all_valid = (r['kawasaki'] == 1.0 and r['maekawa'] == 1.0 and r['blb'] == 1.0)
+    r['completion'] = 10.0 if (r['progress'] > 0.9 and all_valid) else 0.0
+    # Step cost
+    r['efficiency'] = -0.01
+    # Weighted total
+    r['total'] = (
+        0.05 * r['anchored'] +
+        0.08 * r['kawasaki'] +
+        0.07 * r['maekawa'] +
+        0.05 * r['blb'] +
+        0.45 * r['progress'] +
+        0.10 * r['economy'] +
+        r['completion'] +
+        r['efficiency']
+    )
+    return r
+def compute_terminal_reward(state: PaperState, target: dict) -> dict:
+    """Compute reward for the final state after a complete fold sequence."""
+    fake_result = {'valid': True, 'anchored': True, 'new_vertices': [], 'errors': []}
+    return compute_reward(state, fake_result, target)

env/targets/__init__.py ADDED Viewed

File without changes

env/targets/accordion_3h.fold ADDED Viewed

	@@ -0,0 +1,67 @@

+{
+  "vertices_coords": [
+    [0.0, 0.0],
+    [1.0, 0.0],
+    [1.0, 1.0],
+    [0.0, 1.0],
+    [0.0, 0.25],
+    [1.0, 0.25],
+    [0.0, 0.5],
+    [1.0, 0.5],
+    [0.0, 0.75],
+    [1.0, 0.75]
+  ],
+  "edges_vertices": [
+    [0, 1],
+    [1, 5],
+    [5, 7],
+    [7, 9],
+    [9, 2],
+    [2, 3],
+    [3, 8],
+    [8, 6],
+    [6, 4],
+    [4, 0],
+    [4, 5],
+    [6, 7],
+    [8, 9]
+  ],
+  "edges_assignment": [
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "V",
+    "M",
+    "V"
+  ],
+  "edges_foldAngle": [
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    -180,
+    -180,
+    -180
+  ],
+  "faces_vertices": [
+    [0, 1, 5, 4],
+    [4, 5, 7, 6],
+    [6, 7, 9, 8],
+    [8, 9, 2, 3]
+  ],
+  "level": 3,
+  "description": "Three alternating horizontal folds at y=0.25 (valley), y=0.5 (mountain), y=0.75 (valley) forming an accordion"
+}

env/targets/accordion_4h.fold ADDED Viewed

	@@ -0,0 +1,79 @@

+{
+  "vertices_coords": [
+    [0.0, 0.0],
+    [1.0, 0.0],
+    [1.0, 1.0],
+    [0.0, 1.0],
+    [0.0, 0.2],
+    [1.0, 0.2],
+    [0.0, 0.4],
+    [1.0, 0.4],
+    [0.0, 0.6],
+    [1.0, 0.6],
+    [0.0, 0.8],
+    [1.0, 0.8]
+  ],
+  "edges_vertices": [
+    [0, 1],
+    [1, 5],
+    [5, 7],
+    [7, 9],
+    [9, 11],
+    [11, 2],
+    [2, 3],
+    [3, 10],
+    [10, 8],
+    [8, 6],
+    [6, 4],
+    [4, 0],
+    [4, 5],
+    [6, 7],
+    [8, 9],
+    [10, 11]
+  ],
+  "edges_assignment": [
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "V",
+    "M",
+    "V",
+    "M"
+  ],
+  "edges_foldAngle": [
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    -180,
+    -180,
+    -180,
+    -180
+  ],
+  "faces_vertices": [
+    [0, 1, 5, 4],
+    [4, 5, 7, 6],
+    [6, 7, 9, 8],
+    [8, 9, 11, 10],
+    [10, 11, 2, 3]
+  ],
+  "level": 3,
+  "description": "Four alternating horizontal folds at y=0.2 (valley), y=0.4 (mountain), y=0.6 (valley), y=0.8 (mountain) forming an accordion"
+}

env/targets/diagonal_anti.fold ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "vertices_coords": [
+    [0.0, 0.0],
+    [1.0, 0.0],
+    [1.0, 1.0],
+    [0.0, 1.0]
+  ],
+  "edges_vertices": [
+    [0, 1],
+    [1, 2],
+    [2, 3],
+    [3, 0],
+    [1, 3]
+  ],
+  "edges_assignment": [
+    "B",
+    "B",
+    "B",
+    "B",
+    "M"
+  ],
+  "edges_foldAngle": [
+    0,
+    0,
+    0,
+    0,
+    -180
+  ],
+  "faces_vertices": [
+    [0, 1, 3],
+    [1, 2, 3]
+  ],
+  "level": 1,
+  "description": "One mountain fold along the anti-diagonal from (1,0) to (0,1)"
+}

env/targets/diagonal_main.fold ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "vertices_coords": [
+    [0.0, 0.0],
+    [1.0, 0.0],
+    [1.0, 1.0],
+    [0.0, 1.0]
+  ],
+  "edges_vertices": [
+    [0, 1],
+    [1, 2],
+    [2, 3],
+    [3, 0],
+    [0, 2]
+  ],
+  "edges_assignment": [
+    "B",
+    "B",
+    "B",
+    "B",
+    "V"
+  ],
+  "edges_foldAngle": [
+    0,
+    0,
+    0,
+    0,
+    -180
+  ],
+  "faces_vertices": [
+    [0, 1, 2],
+    [0, 2, 3]
+  ],
+  "level": 1,
+  "description": "One valley fold along the main diagonal from (0,0) to (1,1)"
+}

env/targets/half_horizontal.fold ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "vertices_coords": [
+    [0.0, 0.0],
+    [1.0, 0.0],
+    [1.0, 1.0],
+    [0.0, 1.0],
+    [0.0, 0.5],
+    [1.0, 0.5]
+  ],
+  "edges_vertices": [
+    [0, 1],
+    [1, 5],
+    [5, 2],
+    [2, 3],
+    [3, 4],
+    [4, 0],
+    [4, 5]
+  ],
+  "edges_assignment": [
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "V"
+  ],
+  "edges_foldAngle": [
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    -180
+  ],
+  "faces_vertices": [
+    [0, 1, 5, 4],
+    [4, 5, 2, 3]
+  ],
+  "level": 1,
+  "description": "One valley fold along y=0.5, folding the paper in half horizontally"
+}

env/targets/half_vertical.fold ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "vertices_coords": [
+    [0.0, 0.0],
+    [1.0, 0.0],
+    [1.0, 1.0],
+    [0.0, 1.0],
+    [0.5, 0.0],
+    [0.5, 1.0]
+  ],
+  "edges_vertices": [
+    [0, 4],
+    [4, 1],
+    [1, 2],
+    [2, 5],
+    [5, 3],
+    [3, 0],
+    [4, 5]
+  ],
+  "edges_assignment": [
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "M"
+  ],
+  "edges_foldAngle": [
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    -180
+  ],
+  "faces_vertices": [
+    [0, 4, 5, 3],
+    [4, 1, 2, 5]
+  ],
+  "level": 1,
+  "description": "One mountain fold along x=0.5, folding the paper in half vertically"
+}

env/targets/thirds_h.fold ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "vertices_coords": [
+    [0.0, 0.0],
+    [1.0, 0.0],
+    [1.0, 1.0],
+    [0.0, 1.0],
+    [0.0, 0.3333333333333333],
+    [1.0, 0.3333333333333333],
+    [0.0, 0.6666666666666666],
+    [1.0, 0.6666666666666666]
+  ],
+  "edges_vertices": [
+    [0, 1],
+    [1, 5],
+    [5, 7],
+    [7, 2],
+    [2, 3],
+    [3, 6],
+    [6, 4],
+    [4, 0],
+    [4, 5],
+    [6, 7]
+  ],
+  "edges_assignment": [
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "V",
+    "V"
+  ],
+  "edges_foldAngle": [
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    -180,
+    -180
+  ],
+  "faces_vertices": [
+    [0, 1, 5, 4],
+    [4, 5, 7, 6],
+    [6, 7, 2, 3]
+  ],
+  "level": 2,
+  "description": "Two parallel valley folds at y=1/3 and y=2/3, dividing the paper into horizontal thirds"
+}

env/targets/thirds_v.fold ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "vertices_coords": [
+    [0.0, 0.0],
+    [1.0, 0.0],
+    [1.0, 1.0],
+    [0.0, 1.0],
+    [0.3333333333333333, 0.0],
+    [0.6666666666666666, 0.0],
+    [0.3333333333333333, 1.0],
+    [0.6666666666666666, 1.0]
+  ],
+  "edges_vertices": [
+    [0, 4],
+    [4, 5],
+    [5, 1],
+    [1, 2],
+    [2, 7],
+    [7, 6],
+    [6, 3],
+    [3, 0],
+    [4, 6],
+    [5, 7]
+  ],
+  "edges_assignment": [
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "B",
+    "M",
+    "M"
+  ],
+  "edges_foldAngle": [
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    -180,
+    -180
+  ],
+  "faces_vertices": [
+    [0, 4, 6, 3],
+    [4, 5, 7, 6],
+    [5, 1, 2, 7]
+  ],
+  "level": 2,
+  "description": "Two parallel mountain folds at x=1/3 and x=2/3, dividing the paper into vertical thirds"
+}

env/targets/validator.py ADDED Viewed

	@@ -0,0 +1,119 @@

+"""
+Validates all .fold target files against origami theorems.
+Run directly: python -m env.targets.validator
+"""
+import json
+import os
+import sys
+from pathlib import Path
+from ..graph import CreaseGraph
+from ..verifier import check_kawasaki_at_vertex, check_maekawa_at_vertex, check_blb_at_vertex
+def build_graph_from_fold(fold_data: dict) -> CreaseGraph:
+    """
+    Reconstruct a CreaseGraph from a FOLD JSON dict.
+    Used to validate target files.
+    """
+    graph = CreaseGraph()
+    verts = fold_data['vertices_coords']
+    edges = fold_data['edges_vertices']
+    assignments = fold_data['edges_assignment']
+    # Map file vertex indices to graph vertex IDs
+    vert_map = {}
+    for i, (x, y) in enumerate(verts):
+        vid = graph.add_vertex(float(x), float(y))
+        vert_map[i] = vid
+    # Add edges (boundary edges from init may already exist, add_edge handles dedup)
+    for i, (v1_idx, v2_idx) in enumerate(edges):
+        v1_id = vert_map[v1_idx]
+        v2_id = vert_map[v2_idx]
+        assignment = assignments[i]
+        graph.add_edge(v1_id, v2_id, assignment)
+    return graph
+def validate_target(fold_path: str) -> dict:
+    """
+    Validate a single .fold target file.
+    Returns {'file': str, 'valid': bool, 'issues': list[str], 'interior_vertices': int}
+    """
+    with open(fold_path) as f:
+        fold_data = json.load(f)
+    issues = []
+    # Basic structure checks
+    required = ['vertices_coords', 'edges_vertices', 'edges_assignment', 'edges_foldAngle']
+    for field in required:
+        if field not in fold_data:
+            issues.append(f"Missing field: {field}")
+    if issues:
+        return {'file': os.path.basename(fold_path), 'valid': False, 'issues': issues, 'interior_vertices': -1}
+    n_edges = len(fold_data['edges_vertices'])
+    if len(fold_data['edges_assignment']) != n_edges:
+        issues.append("edges_assignment length mismatch")
+    if len(fold_data['edges_foldAngle']) != n_edges:
+        issues.append("edges_foldAngle length mismatch")
+    # Build graph and check theorems
+    graph = build_graph_from_fold(fold_data)
+    interior = graph.interior_vertices()
+    for v_id in interior:
+        ok, alt_sum = check_kawasaki_at_vertex(v_id, graph)
+        if not ok:
+            issues.append(f"Kawasaki violated at vertex {v_id} (alt_sum={alt_sum:.6f})")
+        if not check_maekawa_at_vertex(v_id, graph):
+            issues.append(f"Maekawa violated at vertex {v_id}")
+        blb_violations = check_blb_at_vertex(v_id, graph)
+        if blb_violations:
+            issues.append(f"BLB violated at vertex {v_id}: {blb_violations}")
+    return {
+        'file': os.path.basename(fold_path),
+        'valid': len(issues) == 0,
+        'issues': issues,
+        'interior_vertices': len(interior),
+    }
+def validate_all(targets_dir: str = None) -> bool:
+    """Validate all .fold files in the targets directory. Returns True if all pass."""
+    if targets_dir is None:
+        targets_dir = Path(__file__).parent
+    all_pass = True
+    fold_files = sorted(Path(targets_dir).glob('*.fold'))
+    if not fold_files:
+        print("No .fold files found")
+        return False
+    for fold_path in fold_files:
+        result = validate_target(str(fold_path))
+        status = "OK" if result['valid'] else "FAIL"
+        n_interior = result['interior_vertices']
+        print(f"  [{status}] {result['file']} — {n_interior} interior vertices")
+        if result['issues']:
+            for issue in result['issues']:
+                print(f"         ! {issue}")
+        if not result['valid']:
+            all_pass = False
+    return all_pass
+if __name__ == '__main__':
+    print("Validating targets...")
+    ok = validate_all()
+    sys.exit(0 if ok else 1)

env/targets/validator_check.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import json, sys, os
+targets_dir = "/Users/ianalin/Desktop/optigami/env/targets"
+for fname in os.listdir(targets_dir):
+    if not fname.endswith(".fold"):
+        continue
+    with open(os.path.join(targets_dir, fname)) as f:
+        d = json.load(f)
+    n_v = len(d["vertices_coords"])
+    n_e = len(d["edges_vertices"])
+    assert len(d["edges_assignment"]) == n_e, f"{fname}: assignment length mismatch"
+    assert len(d["edges_foldAngle"]) == n_e, f"{fname}: foldAngle length mismatch"
+    for e in d["edges_vertices"]:
+        assert e[0] < n_v and e[1] < n_v, f"{fname}: edge references invalid vertex"
+    for face in d["faces_vertices"]:
+        for vi in face:
+            assert vi < n_v, f"{fname}: face references invalid vertex"
+    creases = [i for i,a in enumerate(d["edges_assignment"]) if a in ('M','V')]
+    print(f"{fname}: {n_v} vertices, {n_e} edges, {len(creases)} creases, level={d.get('level','?')} OK")

env/verifier.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import numpy as np
+from .graph import CreaseGraph
+from .paper_state import PaperState
+def _compute_sector_angles(vertex_id: int, graph: CreaseGraph) -> list[float]:
+    """Compute consecutive sector angles (CCW) at a vertex from its cyclic edges."""
+    cyclic_edges = graph.get_cyclic_edges(vertex_id)
+    n = len(cyclic_edges)
+    vx, vy = graph.vertices[vertex_id]
+    angles = []
+    for eid in cyclic_edges:
+        ev1, ev2, _ = graph.edges[eid]
+        other_id = ev2 if ev1 == vertex_id else ev1
+        ox, oy = graph.vertices[other_id]
+        angles.append(np.arctan2(oy - vy, ox - vx))
+    sectors = []
+    for i in range(n):
+        diff = angles[(i + 1) % n] - angles[i]
+        if diff < 0:
+            diff += 2 * np.pi
+        if diff > 2 * np.pi:
+            diff -= 2 * np.pi
+        sectors.append(diff)
+    return sectors
+def check_kawasaki_at_vertex(vertex_id: int, graph: CreaseGraph) -> tuple[bool, float]:
+    """
+    Checks Kawasaki-Justin theorem at a single vertex.
+    Kawasaki: at an interior vertex with 2n creases, the alternating sum
+    of consecutive sector angles = 0.
+    Equivalently: sum(odd-indexed sectors) == sum(even-indexed sectors) == π.
+    Returns (satisfied: bool, |alternating_sum|: float).
+    Returns (True, 0.0) for vertices with degree < 4 (not an interior fold vertex yet).
+    Returns (False, inf) for odd-degree vertices (impossible for flat folds).
+    """
+    cyclic_edges = graph.get_cyclic_edges(vertex_id)
+    n = len(cyclic_edges)
+    if n % 2 != 0:
+        return (False, float('inf'))
+    if n < 4:
+        return (True, 0.0)
+    sectors = _compute_sector_angles(vertex_id, graph)
+    alt_sum = sum(s * ((-1) ** i) for i, s in enumerate(sectors))
+    return (abs(alt_sum) < 1e-9, abs(alt_sum))
+def check_maekawa_at_vertex(vertex_id: int, graph: CreaseGraph) -> bool:
+    """
+    Checks Maekawa-Justin theorem at a single vertex.
+    Maekawa: |M - V| == 2 where M, V are counts of mountain/valley fold edges
+    at the vertex. BOUNDARY edges ('B') are NOT counted.
+    Returns True if satisfied or if vertex has fewer than 4 fold edges (not yet active).
+    """
+    edge_ids = graph.vertex_edges[vertex_id]
+    fold_edges = [
+        eid for eid in edge_ids
+        if graph.edges[eid][2] in ('M', 'V')
+    ]
+    if len(fold_edges) < 4:
+        return True
+    m_count = sum(1 for eid in fold_edges if graph.edges[eid][2] == 'M')
+    v_count = sum(1 for eid in fold_edges if graph.edges[eid][2] == 'V')
+    return abs(m_count - v_count) == 2
+def check_blb_at_vertex(vertex_id: int, graph: CreaseGraph) -> list[tuple[int, int]]:
+    """
+    Checks Big-Little-Big lemma at a single vertex.
+    BLB: if sector angle i is a strict local minimum (smaller than both neighbors),
+    the fold edges bounding that sector must have OPPOSITE MV assignments.
+    Returns list of (edge_a_id, edge_b_id) pairs where BLB is violated.
+    Empty list = no violations.
+    """
+    cyclic_edges = graph.get_cyclic_edges(vertex_id)
+    n = len(cyclic_edges)
+    if n < 4:
+        return []
+    sectors = _compute_sector_angles(vertex_id, graph)
+    violations = []
+    for i in range(n):
+        prev_sector = sectors[(i - 1) % n]
+        next_sector = sectors[(i + 1) % n]
+        if sectors[i] < prev_sector and sectors[i] < next_sector:
+            edge_a = cyclic_edges[i]
+            edge_b = cyclic_edges[(i + 1) % n]
+            assign_a = graph.edges[edge_a][2]
+            assign_b = graph.edges[edge_b][2]
+            if assign_a in ('M', 'V') and assign_b in ('M', 'V'):
+                if assign_a == assign_b:
+                    violations.append((edge_a, edge_b))
+    return violations
+def _angle_diff(a1: float, a2: float) -> float:
+    """Minimum angle difference between two directed lines (considering 180° symmetry)."""
+    diff = abs(a1 - a2) % np.pi
+    return min(diff, np.pi - diff)
+def geometric_crease_coverage(
+    state: PaperState,
+    target_edges: list[dict],
+    tol_pos: float = 0.05,
+    tol_angle_deg: float = 5.0,
+) -> tuple[float, float]:
+    """
+    Computes how well the current crease pattern matches the target.
+    Args:
+        target_edges: list of {'v1': (x1,y1), 'v2': (x2,y2), 'assignment': 'M'|'V'}
+    Returns:
+        (coverage, economy)
+        coverage: fraction of target creases matched [0, 1]
+        economy: penalty for excess creases [0, 1], 1.0 = no excess
+    """
+    current_edges = state.crease_edges()
+    tol_angle_rad = np.deg2rad(tol_angle_deg)
+    matched = 0
+    for target in target_edges:
+        tx1, ty1 = target['v1']
+        tx2, ty2 = target['v2']
+        t_mid = ((tx1 + tx2) / 2.0, (ty1 + ty2) / 2.0)
+        t_angle = np.arctan2(ty2 - ty1, tx2 - tx1)
+        for current in current_edges:
+            cx1, cy1 = current['v1']
+            cx2, cy2 = current['v2']
+            c_mid = ((cx1 + cx2) / 2.0, (cy1 + cy2) / 2.0)
+            c_angle = np.arctan2(cy2 - cy1, cx2 - cx1)
+            mid_dist = np.hypot(c_mid[0] - t_mid[0], c_mid[1] - t_mid[1])
+            angle_distance = _angle_diff(c_angle, t_angle)
+            if mid_dist <= tol_pos and angle_distance <= tol_angle_rad:
+                matched += 1
+                break
+    coverage = matched / max(len(target_edges), 1)
+    n_excess = max(0, len(current_edges) - len(target_edges))
+    economy = max(0.0, 1.0 - n_excess / max(len(target_edges), 1))
+    return (coverage, economy)
+def check_all_vertices(graph: CreaseGraph) -> dict:
+    """
+    Run all vertex-level checks on every interior vertex.
+    Returns dict with:
+        'kawasaki': float  # fraction of interior vertices passing Kawasaki [0,1]
+        'maekawa': float   # fraction passing Maekawa [0,1]
+        'blb': float       # fraction with no BLB violations [0,1]
+        'n_interior': int  # number of interior vertices checked
+        'per_vertex': list[dict]  # per-vertex details
+    """
+    interior = graph.interior_vertices()
+    if not interior:
+        return {
+            'kawasaki': 1.0,
+            'maekawa': 1.0,
+            'blb': 1.0,
+            'n_interior': 0,
+            'per_vertex': [],
+        }
+    per_vertex = []
+    kaw_pass = 0
+    mae_pass = 0
+    blb_pass = 0
+    for vid in interior:
+        kaw_ok, kaw_val = check_kawasaki_at_vertex(vid, graph)
+        mae_ok = check_maekawa_at_vertex(vid, graph)
+        blb_violations = check_blb_at_vertex(vid, graph)
+        blb_ok = len(blb_violations) == 0
+        kaw_pass += int(kaw_ok)
+        mae_pass += int(mae_ok)
+        blb_pass += int(blb_ok)
+        per_vertex.append({
+            'vertex_id': vid,
+            'kawasaki_ok': kaw_ok,
+            'kawasaki_error': kaw_val,
+            'maekawa_ok': mae_ok,
+            'blb_violations': blb_violations,
+        })
+    n = len(interior)
+    return {
+        'kawasaki': kaw_pass / n,
+        'maekawa': mae_pass / n,
+        'blb': blb_pass / n,
+        'n_interior': n,
+        'per_vertex': per_vertex,
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+shapely>=2.0.0
+numpy>=1.24.0
+pytest>=7.0.0

tests/__init__.py ADDED Viewed

File without changes

tests/test_graph.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import numpy as np
+import pytest
+from env.graph import CreaseGraph, VERTEX_TOL
+def test_init_boundary():
+    g = CreaseGraph()
+    assert len(g.vertices) == 4
+    assert len(g.edges) == 4
+    for eid, (v1, v2, assignment) in g.edges.items():
+        assert assignment == 'B'
+    assert g.interior_vertices() == []
+def test_add_vertex_dedup():
+    g = CreaseGraph()
+    id1 = g.add_vertex(0.5, 0.5)
+    id2 = g.add_vertex(0.5, 0.5)
+    assert id1 == id2
+def test_add_vertex_dedup_near():
+    g = CreaseGraph()
+    id1 = g.add_vertex(0.5, 0.5)
+    id2 = g.add_vertex(0.5 + VERTEX_TOL * 0.5, 0.5)
+    assert id1 == id2
+def test_cyclic_order():
+    g = CreaseGraph()
+    center_id = g.add_vertex(0.5, 0.5)
+    right_id = g.add_vertex(0.8, 0.5)   # 0 degrees
+    top_id = g.add_vertex(0.5, 0.8)     # 90 degrees
+    left_id = g.add_vertex(0.2, 0.5)    # 180 degrees
+    bottom_id = g.add_vertex(0.5, 0.2)  # 270 degrees / -90 degrees
+    e_right = g.add_edge(center_id, right_id, 'M')
+    e_top = g.add_edge(center_id, top_id, 'M')
+    e_left = g.add_edge(center_id, left_id, 'M')
+    e_bottom = g.add_edge(center_id, bottom_id, 'M')
+    cyclic = g.get_cyclic_edges(center_id)
+    # Sorted by angle ascending: right(0), top(90), left(180), bottom(-90 → 270)
+    # arctan2 for bottom gives -pi/2 which sorts before 0 in ascending order
+    # So actual ascending order: bottom(-pi/2), right(0), top(pi/2), left(pi)
+    assert len(cyclic) == 4
+    def edge_angle(eid):
+        ev1, ev2, _ = g.edges[eid]
+        other_id = ev2 if ev1 == center_id else ev1
+        ox, oy = g.vertices[other_id]
+        cx, cy = g.vertices[center_id]
+        return float(np.arctan2(oy - cy, ox - cx))
+    angles = [edge_angle(eid) for eid in cyclic]
+    assert angles == sorted(angles), "Edges should be sorted by ascending angle"
+    assert e_right in cyclic
+    assert e_top in cyclic
+    assert e_left in cyclic
+    assert e_bottom in cyclic
+    # Verify specific order: bottom < right < top < left in angle space
+    pos = {eid: i for i, eid in enumerate(cyclic)}
+    assert pos[e_bottom] < pos[e_right] < pos[e_top] < pos[e_left]
+def test_interior_vertices_empty():
+    g = CreaseGraph()
+    assert g.interior_vertices() == []
+def test_interior_vertices_with_crease_intersection():
+    g = CreaseGraph()
+    center_id = g.add_vertex(0.5, 0.5)
+    assert center_id in g.interior_vertices()
+def test_split_edge():
+    g = CreaseGraph()
+    # Find the bottom boundary edge (0,0)-(1,0) which is edge 0: v0-v1
+    original_edge_id = None
+    for eid, (v1, v2, assignment) in g.edges.items():
+        x1, y1 = g.vertices[v1]
+        x2, y2 = g.vertices[v2]
+        if {(x1, y1), (x2, y2)} == {(0.0, 0.0), (1.0, 0.0)}:
+            original_edge_id = eid
+            original_v1 = v1
+            original_v2 = v2
+            break
+    assert original_edge_id is not None
+    mid_id = g.add_vertex(0.5, 0.0)
+    eid1, eid2 = g.split_edge(original_edge_id, mid_id)
+    assert original_edge_id not in g.edges
+    assert eid1 in g.edges
+    assert eid2 in g.edges
+    _, _, a1 = g.edges[eid1]
+    _, _, a2 = g.edges[eid2]
+    assert a1 == 'B'
+    assert a2 == 'B'
+    def edge_vertex_set(eid):
+        v1, v2, _ = g.edges[eid]
+        return {v1, v2}
+    assert mid_id in edge_vertex_set(eid1)
+    assert mid_id in edge_vertex_set(eid2)
+    assert original_v1 in edge_vertex_set(eid1) or original_v1 in edge_vertex_set(eid2)
+    assert original_v2 in edge_vertex_set(eid1) or original_v2 in edge_vertex_set(eid2)

tests/test_paper_state.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import pytest
+from env.paper_state import PaperState, UNIT_SQUARE_CORNERS
+from env.graph import VERTEX_TOL
+def test_single_crease_no_interior_vertices():
+    paper = PaperState()
+    result = paper.add_crease([0.0, 0.5], [1.0, 0.5], 'V')
+    assert result['valid'] is True
+    interior = paper.graph.interior_vertices()
+    assert interior == [], f"Expected no interior vertices, got {interior}"
+def test_anchor_points_initial():
+    paper = PaperState()
+    anchors = paper.anchor_points()
+    for corner in UNIT_SQUARE_CORNERS:
+        assert any(
+            abs(ax - corner[0]) < VERTEX_TOL and abs(ay - corner[1]) < VERTEX_TOL
+            for ax, ay in anchors
+        ), f"Corner {corner} not found in anchor_points"
+def test_anchor_points_grow():
+    paper = PaperState()
+    result = paper.add_crease([0.0, 0.5], [1.0, 0.5], 'V')
+    assert result['valid'] is True
+    anchors = paper.anchor_points()
+    def has_point(px, py):
+        return any(abs(ax - px) < VERTEX_TOL and abs(ay - py) < VERTEX_TOL for ax, ay in anchors)
+    assert has_point(0.0, 0.5), "(0, 0.5) should be in anchor_points after crease"
+    assert has_point(1.0, 0.5), "(1, 0.5) should be in anchor_points after crease"
+def test_invalid_assignment():
+    paper = PaperState()
+    result = paper.add_crease([0.0, 0.5], [1.0, 0.5], 'X')
+    assert result['valid'] is False
+    assert 'invalid_assignment' in result['errors']
+def test_fold_history():
+    paper = PaperState()
+    paper.add_crease([0.0, 0.5], [1.0, 0.5], 'M')
+    assert len(paper.fold_history) == 1
+def test_unanchored_returns_false_anchored():
+    paper = PaperState()
+    result = paper.add_crease([0.3, 0.3], [0.7, 0.7], 'M')
+    assert result['anchored'] is False
+def test_crease_edges_returned():
+    paper = PaperState()
+    paper.add_crease([0.0, 0.5], [1.0, 0.5], 'M')
+    edges = paper.crease_edges()
+    assert len(edges) >= 1
+    for e in edges:
+        assert e['assignment'] in ('M', 'V')
+        assert 'v1' in e
+        assert 'v2' in e
+def test_two_intersecting_creases():
+    paper = PaperState()
+    r1 = paper.add_crease([0.0, 0.5], [1.0, 0.5], 'M')
+    r2 = paper.add_crease([0.5, 0.0], [0.5, 1.0], 'V')
+    assert r1['valid'] is True
+    assert r2['valid'] is True
+    interior = paper.graph.interior_vertices()
+    assert len(interior) >= 1
+    coords = [paper.graph.vertices[vid] for vid in interior]
+    assert any(abs(x - 0.5) < VERTEX_TOL and abs(y - 0.5) < VERTEX_TOL for x, y in coords)

tests/test_verifier.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import pytest
+import numpy as np
+from env.graph import CreaseGraph
+from env.paper_state import PaperState
+from env.verifier import (
+    check_kawasaki_at_vertex,
+    check_maekawa_at_vertex,
+    check_blb_at_vertex,
+    geometric_crease_coverage,
+    check_all_vertices,
+)
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def make_cross_graph(center_coords=(0.5, 0.5), assignment='M') -> tuple[CreaseGraph, int]:
+    """
+    Degree-4 vertex at center with 4 spokes pointing N/S/E/W.
+    All spokes have the given assignment.
+    """
+    g = CreaseGraph()
+    cx, cy = center_coords
+    vid = g.add_vertex(cx, cy)
+    neighbors = [
+        (0.0, cy),   # left  (180°)
+        (1.0, cy),   # right (0°)
+        (cx, 0.0),   # down  (-90°)
+        (cx, 1.0),   # up    (90°)
+    ]
+    for nx, ny in neighbors:
+        nid = g.add_vertex(nx, ny)
+        g.add_edge(vid, nid, assignment)
+    return g, vid
+# ---------------------------------------------------------------------------
+# Kawasaki tests
+# ---------------------------------------------------------------------------
+def test_kawasaki_no_interior_vertices():
+    paper = PaperState()
+    paper.add_crease([0, 0.5], [1, 0.5], 'V')
+    assert paper.graph.interior_vertices() == []
+    result = check_all_vertices(paper.graph)
+    assert result['kawasaki'] == 1.0
+    assert result['n_interior'] == 0
+def test_kawasaki_valid_degree4_vertex():
+    """Equal 90° sectors → alternating sum = 0 → Kawasaki satisfied."""
+    g, vid = make_cross_graph()
+    ok, err = check_kawasaki_at_vertex(vid, g)
+    assert ok == True
+    assert err == pytest.approx(0.0, abs=1e-9)
+def test_kawasaki_invalid_vertex():
+    """
+    Manually construct a degree-4 vertex whose sectors are 60°,120°,80°,100°.
+    Alternating sum = 60 - 120 + 80 - 100 = -80° ≠ 0 → should fail.
+    """
+    g = CreaseGraph()
+    cx, cy = 0.5, 0.5
+    vid = g.add_vertex(cx, cy)
+    # Place neighbours at specific angles so sectors are exactly as desired.
+    # Sectors are measured CCW between consecutive rays.
+    # We choose ray angles (from center) in ascending arctan2 order:
+    #   a0 = 0°
+    #   a1 = 60°    (sector0 = 60°)
+    #   a2 = 180°   (sector1 = 120°)
+    #   a3 = 260°   = -100°  (sector2 = 80°)
+    #   sector3 (wraparound to a0) = 360° - 260° = 100°
+    # alt_sum = 60 - 120 + 80 - 100 = -80° → |alt_sum| ≈ 1.396 rad
+    r = 0.3
+    angles_deg = [0.0, 60.0, 180.0, 260.0]
+    for deg in angles_deg:
+        rad = np.deg2rad(deg)
+        nx = cx + r * np.cos(rad)
+        ny = cy + r * np.sin(rad)
+        nid = g.add_vertex(nx, ny)
+        g.add_edge(vid, nid, 'M')
+    ok, err = check_kawasaki_at_vertex(vid, g)
+    assert ok == False
+    expected_err = abs(np.deg2rad(60 - 120 + 80 - 100))
+    assert err == pytest.approx(expected_err, abs=1e-6)
+# ---------------------------------------------------------------------------
+# Maekawa tests
+# ---------------------------------------------------------------------------
+def test_maekawa_excludes_boundary():
+    """
+    Boundary edges at a vertex should NOT count toward M/V tally.
+    A corner vertex has only boundary edges; Maekawa should return True
+    (fewer than 4 fold edges → vacuously satisfied).
+    """
+    g = CreaseGraph()
+    corner_id = 0  # vertex (0,0)
+    assert check_maekawa_at_vertex(corner_id, g) is True
+def test_maekawa_valid():
+    """3 M + 1 V → |3-1| = 2 → True."""
+    g = CreaseGraph()
+    cx, cy = 0.5, 0.5
+    vid = g.add_vertex(cx, cy)
+    r = 0.3
+    angles_deg = [0.0, 90.0, 180.0, 270.0]
+    assignments = ['M', 'M', 'M', 'V']
+    for deg, asgn in zip(angles_deg, assignments):
+        rad = np.deg2rad(deg)
+        nid = g.add_vertex(cx + r * np.cos(rad), cy + r * np.sin(rad))
+        g.add_edge(vid, nid, asgn)
+    assert check_maekawa_at_vertex(vid, g) is True
+def test_maekawa_invalid():
+    """2 M + 2 V → |2-2| = 0 → False."""
+    g = CreaseGraph()
+    cx, cy = 0.5, 0.5
+    vid = g.add_vertex(cx, cy)
+    r = 0.3
+    angles_deg = [0.0, 90.0, 180.0, 270.0]
+    assignments = ['M', 'M', 'V', 'V']
+    for deg, asgn in zip(angles_deg, assignments):
+        rad = np.deg2rad(deg)
+        nid = g.add_vertex(cx + r * np.cos(rad), cy + r * np.sin(rad))
+        g.add_edge(vid, nid, asgn)
+    assert check_maekawa_at_vertex(vid, g) is False
+# ---------------------------------------------------------------------------
+# BLB tests
+# ---------------------------------------------------------------------------
+def test_blb_no_violations_equal_sectors():
+    """Equal 90° sectors → no strict local minimum → BLB returns []."""
+    g, vid = make_cross_graph()
+    violations = check_blb_at_vertex(vid, g)
+    assert violations == []
+def test_blb_violation_detected():
+    """
+    Create a vertex with a strict local-minimum sector whose bounding edges
+    share the same MV assignment → BLB violation.
+    Use angles 0°, 10°, 180°, 270° so sector[0]=10° is the strict local min
+    relative to sector[3] (90°) and sector[1] (170°). The two bounding edges
+    are at 0° and 10°; assign both 'M' → violation.
+    """
+    g = CreaseGraph()
+    cx, cy = 0.5, 0.5
+    vid = g.add_vertex(cx, cy)
+    r = 0.3
+    # angles ascending (arctan2 order): 0°, 10°, 180°, 270° (= -90°)
+    # sorted arctan2: -90°, 0°, 10°, 180°
+    # sectors: 90°, 10°, 170°, 90°  (sum=360°)
+    # sector at index 1 (between 0° and 10°) = 10° is strict local min (90 > 10 < 170)
+    angles_deg = [0.0, 10.0, 180.0, 270.0]
+    edge_ids = []
+    for deg in angles_deg:
+        rad = np.deg2rad(deg)
+        nid = g.add_vertex(cx + r * np.cos(rad), cy + r * np.sin(rad))
+        eid = g.add_edge(vid, nid, 'M')
+        edge_ids.append(eid)
+    violations = check_blb_at_vertex(vid, g)
+    assert len(violations) > 0
+def test_blb_no_violation_when_opposite_assignments():
+    """
+    Same geometry as above but with opposite assignments on the two edges
+    bounding the small sector → no BLB violation.
+    """
+    g = CreaseGraph()
+    cx, cy = 0.5, 0.5
+    vid = g.add_vertex(cx, cy)
+    r = 0.3
+    angles_deg = [0.0, 10.0, 180.0, 270.0]
+    # sorted arctan2: -90°(270°), 0°, 10°, 180°
+    # small sector is between 0° and 10° (index 1 and 2 in sorted order)
+    # assign them opposite assignments
+    assignments_by_angle = {
+        0.0: 'M',
+        10.0: 'V',
+        180.0: 'M',
+        270.0: 'V',
+    }
+    for deg in angles_deg:
+        rad = np.deg2rad(deg)
+        nid = g.add_vertex(cx + r * np.cos(rad), cy + r * np.sin(rad))
+        g.add_edge(vid, nid, assignments_by_angle[deg])
+    violations = check_blb_at_vertex(vid, g)
+    assert violations == []
+# ---------------------------------------------------------------------------
+# Coverage tests
+# ---------------------------------------------------------------------------
+def test_coverage_exact_match():
+    """Add exact crease matching target → coverage = 1.0, economy = 1.0."""
+    paper = PaperState()
+    paper.add_crease([0.0, 0.5], [1.0, 0.5], 'M')
+    target = [{'v1': (0.0, 0.5), 'v2': (1.0, 0.5), 'assignment': 'M'}]
+    coverage, economy = geometric_crease_coverage(paper, target)
+    assert coverage == pytest.approx(1.0)
+    assert economy == pytest.approx(1.0)
+def test_coverage_no_match():
+    """No creases added → coverage = 0.0."""
+    paper = PaperState()
+    target = [{'v1': (0.0, 0.5), 'v2': (1.0, 0.5), 'assignment': 'M'}]
+    coverage, economy = geometric_crease_coverage(paper, target)
+    assert coverage == pytest.approx(0.0)
+def test_coverage_excess_penalty():
+    """
+    Target has 1 crease. Add 3 non-intersecting creases, one matching target.
+    coverage = 1.0, economy = 1 - 2/1 → clamped to 0.0 (economy < 1.0).
+    Uses non-intersecting extras to avoid PaperState edge splitting the target crease.
+    """
+    paper = PaperState()
+    paper.add_crease([0.0, 0.5], [1.0, 0.5], 'M')   # matches target (midpoint 0.5,0.5)
+    paper.add_crease([0.0, 0.3], [0.5, 0.3], 'V')   # extra, no intersection
+    paper.add_crease([0.0, 0.7], [0.5, 0.7], 'V')   # extra, no intersection
+    target = [{'v1': (0.0, 0.5), 'v2': (1.0, 0.5), 'assignment': 'M'}]
+    coverage, economy = geometric_crease_coverage(paper, target)
+    assert coverage == pytest.approx(1.0)
+    assert economy < 1.0
+# ---------------------------------------------------------------------------
+# check_all_vertices vacuous test
+# ---------------------------------------------------------------------------
+def test_check_all_vertices_vacuous():
+    """Single horizontal crease → no interior vertices → all scores = 1.0."""
+    paper = PaperState()
+    paper.add_crease([0.0, 0.5], [1.0, 0.5], 'V')
+    result = check_all_vertices(paper.graph)
+    assert result['kawasaki'] == 1.0
+    assert result['maekawa'] == 1.0
+    assert result['blb'] == 1.0
+    assert result['n_interior'] == 0
+    assert result['per_vertex'] == []

train.py ADDED Viewed

	@@ -0,0 +1,231 @@

+"""
+OrigamiRL — GRPO Training Script
+Code-as-policy: model generates complete fold sequence, gets terminal reward.
+Usage:
+    python train.py
+    python train.py --model unsloth/Qwen2.5-7B-Instruct --epochs 3 --output origami-grpo
+"""
+import argparse
+import json
+import copy
+import random
+from pathlib import Path
+from typing import Optional
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model', default='unsloth/Qwen2.5-7B-Instruct')
+    parser.add_argument('--max_seq_length', type=int, default=2048)
+    parser.add_argument('--epochs', type=int, default=3)
+    parser.add_argument('--batch_size', type=int, default=2)
+    parser.add_argument('--grad_accum', type=int, default=4)
+    parser.add_argument('--lr', type=float, default=5e-6)
+    parser.add_argument('--n_generations', type=int, default=8)
+    parser.add_argument('--max_folds', type=int, default=8)
+    parser.add_argument('--output', default='origami-grpo')
+    parser.add_argument('--level', type=int, default=1, help='Target difficulty level (1-3)')
+    parser.add_argument('--dry_run', action='store_true', help='Test reward function without training')
+    return parser.parse_args()
+def build_dataset(env, level: int = 1, max_folds: int = 8) -> list[dict]:
+    """
+    Build a training dataset of prompts from available targets.
+    Each item: {'prompt': str, 'target_name': str}
+    Repeats each target multiple times to give enough training steps.
+    """
+    all_names = env.available_targets()
+    # Filter by level; fall back to all targets if none match
+    level_names = [
+        name for name in all_names
+        if env._targets[name].get('level', 1) == level
+    ]
+    if not level_names:
+        level_names = all_names
+    items = []
+    for name in level_names:
+        obs = env.reset(target_name=name)
+        prompt = obs['prompt']
+        items.append({'prompt': prompt, 'target_name': name})
+    # Repeat each target 10x; ensure at least 50 examples
+    repeat = max(10, (50 + len(items) - 1) // len(items))
+    items = items * repeat
+    random.shuffle(items)
+    return items
+def make_reward_fn(env_template, max_folds: int):
+    """
+    Returns a reward function compatible with trl GRPOTrainer.
+    Signature: reward_fn(completions, prompts=None, **kwargs) -> list[float]
+    For each completion:
+    1. Clone the environment (fresh paper state)
+    2. Reset to the target embedded in the prompt (use target_name from kwargs if available)
+    3. Execute the completion as a fold sequence
+    4. Return the total reward
+    """
+    def reward_fn(completions, prompts=None, **kwargs):
+        rewards = []
+        target_names = kwargs.get('target_names', [None] * len(completions))
+        for completion, target_name in zip(completions, target_names):
+            try:
+                env = env_template.clone()
+                env.reset(target_name=target_name)
+                _, reward_dict, _, _ = env.step(completion)
+                rewards.append(float(reward_dict['total']))
+            except Exception:
+                rewards.append(-0.1)
+        return rewards
+    return reward_fn
+def make_detailed_reward_fns(env_template, max_folds: int) -> list:
+    """
+    Returns a list of reward functions, one per reward component.
+    Used for detailed W&B logging of each component separately.
+    Components: kawasaki, maekawa, blb, progress, economy, completion
+    """
+    components = ['kawasaki', 'maekawa', 'blb', 'progress', 'economy', 'completion']
+    def make_component_fn(component: str):
+        def component_fn(completions, prompts=None, **kwargs):
+            rewards = []
+            target_names = kwargs.get('target_names', [None] * len(completions))
+            for completion, target_name in zip(completions, target_names):
+                try:
+                    env = env_template.clone()
+                    env.reset(target_name=target_name)
+                    _, reward_dict, _, _ = env.step(completion)
+                    rewards.append(float(reward_dict.get(component, 0.0)))
+                except Exception:
+                    rewards.append(0.0)
+            return rewards
+        component_fn.__name__ = f'reward_{component}'
+        return component_fn
+    return [make_component_fn(c) for c in components]
+def main():
+    args = parse_args()
+    # Import here to allow dry_run without GPU
+    from env.environment import OrigamiEnvironment
+    env = OrigamiEnvironment(mode='code_as_policy', max_steps=args.max_folds)
+    # Build dataset
+    dataset_items = build_dataset(env, level=args.level, max_folds=args.max_folds)
+    print(f"Dataset: {len(dataset_items)} examples from level-{args.level} targets")
+    print(f"Targets: {env.available_targets()}")
+    # Dry run: test reward function without loading model
+    if args.dry_run:
+        reward_fn = make_reward_fn(env, args.max_folds)
+        test_completions = [
+            '<folds>[{"instruction": "Valley fold along horizontal center", "from": [0, 0.5], "to": [1, 0.5], "assignment": "V"}]</folds>',
+            '<folds>[{"instruction": "Invalid fold", "from": [0.3, 0.3], "to": [0.7, 0.7], "assignment": "V"}]</folds>',
+            'this is not valid JSON at all',
+        ]
+        target_names = [dataset_items[0]['target_name']] * 3
+        rewards = reward_fn(test_completions, target_names=target_names)
+        print(f"\nDry run rewards: {rewards}")
+        print("Dry run complete — reward function works.")
+        return
+    # Load model via unsloth
+    try:
+        from unsloth import FastLanguageModel
+    except ImportError:
+        print("ERROR: unsloth not installed. Run: pip install unsloth")
+        print("Or run with --dry_run to test the reward function without a model.")
+        return
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name=args.model,
+        max_seq_length=args.max_seq_length,
+        load_in_4bit=True,
+    )
+    model = FastLanguageModel.get_peft_model(
+        model,
+        r=32,
+        target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
+                        "gate_proj", "up_proj", "down_proj"],
+        lora_alpha=32,
+        lora_dropout=0,
+        use_gradient_checkpointing="unsloth",
+    )
+    # Convert dataset to HuggingFace Dataset format
+    from datasets import Dataset
+    # GRPOTrainer expects 'prompt' column and optionally others.
+    # We embed target_name in the dataset so the reward fn can use it.
+    hf_dataset = Dataset.from_list(dataset_items)
+    # Build main reward function
+    reward_fn = make_reward_fn(env, args.max_folds)
+    from trl import GRPOConfig, GRPOTrainer
+    config = GRPOConfig(
+        output_dir=args.output,
+        num_train_epochs=args.epochs,
+        per_device_train_batch_size=args.batch_size,
+        gradient_accumulation_steps=args.grad_accum,
+        learning_rate=args.lr,
+        max_completion_length=512,
+        num_generations=args.n_generations,
+        temperature=1.0,
+        logging_steps=1,
+        report_to="wandb",
+        run_name="origami-grpo",
+    )
+    # GRPOTrainer passes all dataset columns as kwargs to reward_funcs.
+    # The 'target_name' column arrives as a list (one per completion in the batch).
+    def wrapped_reward_fn(completions, target_name=None, **kwargs):
+        """Wrapper that extracts target_name from batch columns."""
+        target_names = target_name if isinstance(target_name, list) else [target_name] * len(completions)
+        return reward_fn(completions, target_names=target_names)
+    trainer = GRPOTrainer(
+        model=model,
+        config=config,
+        train_dataset=hf_dataset,
+        reward_funcs=[wrapped_reward_fn],
+        tokenizer=tokenizer,
+    )
+    print(f"\nStarting GRPO training...")
+    print(f"  Model: {args.model}")
+    print(f"  Level: {args.level} targets")
+    print(f"  Epochs: {args.epochs}")
+    print(f"  Generations per prompt: {args.n_generations}")
+    print(f"  Output: {args.output}/")
+    trainer.train()
+    # Save
+    model.save_pretrained(args.output)
+    tokenizer.save_pretrained(args.output)
+    print(f"\nModel saved to {args.output}/")
+if __name__ == '__main__':
+    main()