Spaces:
Running
Running
Commit ·
aaba0a8
1
Parent(s): 8652f7e
refactor(env): update environment, paper state, rewards, prompts, and verifier
Browse files- env/environment.py +24 -7
- env/paper_state.py +17 -3
- env/prompts.py +59 -10
- env/rewards.py +72 -30
- env/targets/validator_check.py +5 -3
- env/verifier.py +47 -7
env/environment.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
-
import json
|
| 2 |
-
import os
|
| 3 |
import copy
|
|
|
|
| 4 |
from pathlib import Path
|
| 5 |
from typing import Optional
|
| 6 |
|
| 7 |
from .paper_state import PaperState
|
| 8 |
-
from .rewards import compute_reward, compute_terminal_reward
|
| 9 |
from .prompts import (
|
| 10 |
code_as_policy_prompt,
|
|
|
|
| 11 |
step_level_prompt,
|
| 12 |
parse_fold_list,
|
| 13 |
parse_single_fold,
|
|
@@ -32,11 +32,13 @@ class OrigamiEnvironment:
|
|
| 32 |
mode: str = 'code_as_policy', # 'code_as_policy' or 'step'
|
| 33 |
max_steps: int = 8,
|
| 34 |
targets_dir: Optional[str] = None,
|
|
|
|
| 35 |
):
|
| 36 |
assert mode in ('code_as_policy', 'step'), f"Unknown mode: {mode}"
|
| 37 |
self.mode = mode
|
| 38 |
self.max_steps = max_steps
|
| 39 |
self.targets_dir = Path(targets_dir) if targets_dir else TARGETS_DIR
|
|
|
|
| 40 |
|
| 41 |
self.paper: Optional[PaperState] = None
|
| 42 |
self.target: Optional[dict] = None
|
|
@@ -44,7 +46,6 @@ class OrigamiEnvironment:
|
|
| 44 |
self.step_count: int = 0
|
| 45 |
self.last_reward: Optional[dict] = None
|
| 46 |
|
| 47 |
-
# Cache all available targets
|
| 48 |
self._targets = self._load_all_targets()
|
| 49 |
|
| 50 |
def _load_all_targets(self) -> dict[str, dict]:
|
|
@@ -128,7 +129,7 @@ class OrigamiEnvironment:
|
|
| 128 |
if not last_result['valid']:
|
| 129 |
break # stop at first invalid fold, partial credit
|
| 130 |
|
| 131 |
-
reward = compute_terminal_reward(self.paper, self.target)
|
| 132 |
self.last_reward = reward
|
| 133 |
return self._get_observation(), reward, True, self._info()
|
| 134 |
|
|
@@ -155,10 +156,18 @@ class OrigamiEnvironment:
|
|
| 155 |
done = self.step_count >= self.max_steps
|
| 156 |
return self._get_observation(), bad_reward, done, self._info()
|
| 157 |
|
|
|
|
| 158 |
result = self.paper.add_crease(p1, p2, assignment)
|
| 159 |
self.step_count += 1
|
| 160 |
|
| 161 |
-
reward = compute_reward(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
self.last_reward = reward
|
| 163 |
|
| 164 |
done = (
|
|
@@ -169,8 +178,14 @@ class OrigamiEnvironment:
|
|
| 169 |
|
| 170 |
def _get_observation(self) -> dict:
|
| 171 |
"""Returns observation dict with the LLM prompt and raw state."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
if self.mode == 'code_as_policy':
|
| 173 |
-
prompt = code_as_policy_prompt(
|
|
|
|
|
|
|
| 174 |
else:
|
| 175 |
prompt = step_level_prompt(
|
| 176 |
target=self.target,
|
|
@@ -178,6 +193,7 @@ class OrigamiEnvironment:
|
|
| 178 |
step=self.step_count,
|
| 179 |
max_steps=self.max_steps,
|
| 180 |
last_reward=self.last_reward,
|
|
|
|
| 181 |
)
|
| 182 |
|
| 183 |
return {
|
|
@@ -233,6 +249,7 @@ class OrigamiEnvironment:
|
|
| 233 |
mode=self.mode,
|
| 234 |
max_steps=self.max_steps,
|
| 235 |
targets_dir=str(self.targets_dir),
|
|
|
|
| 236 |
)
|
| 237 |
if self.paper is not None:
|
| 238 |
new_env.paper = copy.deepcopy(self.paper)
|
|
|
|
|
|
|
|
|
|
| 1 |
import copy
|
| 2 |
+
import json
|
| 3 |
from pathlib import Path
|
| 4 |
from typing import Optional
|
| 5 |
|
| 6 |
from .paper_state import PaperState
|
| 7 |
+
from .rewards import compute_reward, compute_terminal_reward
|
| 8 |
from .prompts import (
|
| 9 |
code_as_policy_prompt,
|
| 10 |
+
get_semantic_description,
|
| 11 |
step_level_prompt,
|
| 12 |
parse_fold_list,
|
| 13 |
parse_single_fold,
|
|
|
|
| 32 |
mode: str = 'code_as_policy', # 'code_as_policy' or 'step'
|
| 33 |
max_steps: int = 8,
|
| 34 |
targets_dir: Optional[str] = None,
|
| 35 |
+
use_semantic: bool = True,
|
| 36 |
):
|
| 37 |
assert mode in ('code_as_policy', 'step'), f"Unknown mode: {mode}"
|
| 38 |
self.mode = mode
|
| 39 |
self.max_steps = max_steps
|
| 40 |
self.targets_dir = Path(targets_dir) if targets_dir else TARGETS_DIR
|
| 41 |
+
self.use_semantic = use_semantic
|
| 42 |
|
| 43 |
self.paper: Optional[PaperState] = None
|
| 44 |
self.target: Optional[dict] = None
|
|
|
|
| 46 |
self.step_count: int = 0
|
| 47 |
self.last_reward: Optional[dict] = None
|
| 48 |
|
|
|
|
| 49 |
self._targets = self._load_all_targets()
|
| 50 |
|
| 51 |
def _load_all_targets(self) -> dict[str, dict]:
|
|
|
|
| 129 |
if not last_result['valid']:
|
| 130 |
break # stop at first invalid fold, partial credit
|
| 131 |
|
| 132 |
+
reward = compute_terminal_reward(self.paper, self.target, self.max_steps)
|
| 133 |
self.last_reward = reward
|
| 134 |
return self._get_observation(), reward, True, self._info()
|
| 135 |
|
|
|
|
| 156 |
done = self.step_count >= self.max_steps
|
| 157 |
return self._get_observation(), bad_reward, done, self._info()
|
| 158 |
|
| 159 |
+
prev_state = copy.deepcopy(self.paper)
|
| 160 |
result = self.paper.add_crease(p1, p2, assignment)
|
| 161 |
self.step_count += 1
|
| 162 |
|
| 163 |
+
reward = compute_reward(
|
| 164 |
+
prev_state=prev_state,
|
| 165 |
+
action_result=result,
|
| 166 |
+
new_state=self.paper,
|
| 167 |
+
target=self.target,
|
| 168 |
+
step=self.step_count,
|
| 169 |
+
max_steps=self.max_steps,
|
| 170 |
+
)
|
| 171 |
self.last_reward = reward
|
| 172 |
|
| 173 |
done = (
|
|
|
|
| 178 |
|
| 179 |
def _get_observation(self) -> dict:
|
| 180 |
"""Returns observation dict with the LLM prompt and raw state."""
|
| 181 |
+
desc = None
|
| 182 |
+
if self.use_semantic and self.target_name and self.target:
|
| 183 |
+
desc = get_semantic_description(self.target_name, self.target)
|
| 184 |
+
|
| 185 |
if self.mode == 'code_as_policy':
|
| 186 |
+
prompt = code_as_policy_prompt(
|
| 187 |
+
self.target, max_folds=self.max_steps, semantic_description=desc,
|
| 188 |
+
)
|
| 189 |
else:
|
| 190 |
prompt = step_level_prompt(
|
| 191 |
target=self.target,
|
|
|
|
| 193 |
step=self.step_count,
|
| 194 |
max_steps=self.max_steps,
|
| 195 |
last_reward=self.last_reward,
|
| 196 |
+
semantic_description=desc,
|
| 197 |
)
|
| 198 |
|
| 199 |
return {
|
|
|
|
| 249 |
mode=self.mode,
|
| 250 |
max_steps=self.max_steps,
|
| 251 |
targets_dir=str(self.targets_dir),
|
| 252 |
+
use_semantic=self.use_semantic,
|
| 253 |
)
|
| 254 |
if self.paper is not None:
|
| 255 |
new_env.paper = copy.deepcopy(self.paper)
|
env/paper_state.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import numpy as np
|
| 2 |
from shapely.geometry import LineString, Point, Polygon
|
| 3 |
-
from shapely.ops import unary_union
|
| 4 |
from typing import Optional
|
| 5 |
from .graph import CreaseGraph, VERTEX_TOL
|
| 6 |
|
|
@@ -34,6 +33,14 @@ class PaperState:
|
|
| 34 |
return True
|
| 35 |
return False
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
def add_crease(self, p1: list, p2: list, assignment: str) -> dict:
|
| 38 |
errors: list[str] = []
|
| 39 |
|
|
@@ -43,6 +50,7 @@ class PaperState:
|
|
| 43 |
'anchored': False,
|
| 44 |
'new_vertices': [],
|
| 45 |
'errors': ['invalid_assignment'],
|
|
|
|
| 46 |
}
|
| 47 |
|
| 48 |
p1 = (float(p1[0]), float(p1[1]))
|
|
@@ -53,7 +61,7 @@ class PaperState:
|
|
| 53 |
seg_len = np.hypot(p2[0] - p1[0], p2[1] - p1[1])
|
| 54 |
if seg_len < VERTEX_TOL:
|
| 55 |
errors.append('zero_length')
|
| 56 |
-
return {'valid': False, 'anchored': anchored, 'new_vertices': [], 'errors': errors}
|
| 57 |
|
| 58 |
new_line = LineString([p1, p2])
|
| 59 |
|
|
@@ -61,7 +69,7 @@ class PaperState:
|
|
| 61 |
clipped = new_line.intersection(_UNIT_SQUARE)
|
| 62 |
if clipped.is_empty:
|
| 63 |
errors.append('outside_bounds')
|
| 64 |
-
return {'valid': False, 'anchored': anchored, 'new_vertices': [], 'errors': errors}
|
| 65 |
|
| 66 |
intersection_points: list[tuple[float, float]] = []
|
| 67 |
|
|
@@ -118,6 +126,11 @@ class PaperState:
|
|
| 118 |
wid = self.graph.add_vertex(wp[0], wp[1])
|
| 119 |
waypoint_ids.append(wid)
|
| 120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
for i in range(len(waypoint_ids) - 1):
|
| 122 |
wa = waypoint_ids[i]
|
| 123 |
wb = waypoint_ids[i + 1]
|
|
@@ -138,6 +151,7 @@ class PaperState:
|
|
| 138 |
'anchored': anchored,
|
| 139 |
'new_vertices': new_vertex_coords,
|
| 140 |
'errors': errors,
|
|
|
|
| 141 |
}
|
| 142 |
|
| 143 |
def crease_edges(self) -> list[dict]:
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
from shapely.geometry import LineString, Point, Polygon
|
|
|
|
| 3 |
from typing import Optional
|
| 4 |
from .graph import CreaseGraph, VERTEX_TOL
|
| 5 |
|
|
|
|
| 33 |
return True
|
| 34 |
return False
|
| 35 |
|
| 36 |
+
def _edge_exists(self, v1_id: int, v2_id: int) -> bool:
|
| 37 |
+
"""Check if an edge already exists between the two vertex IDs."""
|
| 38 |
+
pair = frozenset((v1_id, v2_id))
|
| 39 |
+
for ev1, ev2, _ in self.graph.edges.values():
|
| 40 |
+
if frozenset((ev1, ev2)) == pair:
|
| 41 |
+
return True
|
| 42 |
+
return False
|
| 43 |
+
|
| 44 |
def add_crease(self, p1: list, p2: list, assignment: str) -> dict:
|
| 45 |
errors: list[str] = []
|
| 46 |
|
|
|
|
| 50 |
'anchored': False,
|
| 51 |
'new_vertices': [],
|
| 52 |
'errors': ['invalid_assignment'],
|
| 53 |
+
'duplicate': False,
|
| 54 |
}
|
| 55 |
|
| 56 |
p1 = (float(p1[0]), float(p1[1]))
|
|
|
|
| 61 |
seg_len = np.hypot(p2[0] - p1[0], p2[1] - p1[1])
|
| 62 |
if seg_len < VERTEX_TOL:
|
| 63 |
errors.append('zero_length')
|
| 64 |
+
return {'valid': False, 'anchored': anchored, 'new_vertices': [], 'errors': errors, 'duplicate': False}
|
| 65 |
|
| 66 |
new_line = LineString([p1, p2])
|
| 67 |
|
|
|
|
| 69 |
clipped = new_line.intersection(_UNIT_SQUARE)
|
| 70 |
if clipped.is_empty:
|
| 71 |
errors.append('outside_bounds')
|
| 72 |
+
return {'valid': False, 'anchored': anchored, 'new_vertices': [], 'errors': errors, 'duplicate': False}
|
| 73 |
|
| 74 |
intersection_points: list[tuple[float, float]] = []
|
| 75 |
|
|
|
|
| 126 |
wid = self.graph.add_vertex(wp[0], wp[1])
|
| 127 |
waypoint_ids.append(wid)
|
| 128 |
|
| 129 |
+
duplicate = any(
|
| 130 |
+
self._edge_exists(waypoint_ids[i], waypoint_ids[i + 1])
|
| 131 |
+
for i in range(len(waypoint_ids) - 1)
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
for i in range(len(waypoint_ids) - 1):
|
| 135 |
wa = waypoint_ids[i]
|
| 136 |
wb = waypoint_ids[i + 1]
|
|
|
|
| 151 |
'anchored': anchored,
|
| 152 |
'new_vertices': new_vertex_coords,
|
| 153 |
'errors': errors,
|
| 154 |
+
'duplicate': duplicate,
|
| 155 |
}
|
| 156 |
|
| 157 |
def crease_edges(self) -> list[dict]:
|
env/prompts.py
CHANGED
|
@@ -6,6 +6,15 @@ _CORNERS = {(0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0)}
|
|
| 6 |
_BOUNDARY_X = {0.0, 1.0}
|
| 7 |
_BOUNDARY_Y = {0.0, 1.0}
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
def _is_corner(x: float, y: float) -> bool:
|
| 11 |
return (round(x, 4), round(y, 4)) in _CORNERS
|
|
@@ -33,6 +42,25 @@ def format_target_for_prompt(target: dict) -> str:
|
|
| 33 |
return "\n".join(lines)
|
| 34 |
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
def format_anchor_points(paper_state) -> str:
|
| 37 |
corners = []
|
| 38 |
boundary_pts = []
|
|
@@ -95,13 +123,25 @@ def format_reward_feedback(reward: Optional[dict]) -> str:
|
|
| 95 |
return " " + " ".join(parts)
|
| 96 |
|
| 97 |
|
| 98 |
-
def code_as_policy_prompt(
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
{formatted_target}
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
RULES (must hold at every interior vertex):
|
| 106 |
- Kawasaki: alternating sector angles sum equally (each half = 180 degrees)
|
| 107 |
- Maekawa: |mountain_count - valley_count| = 2
|
|
@@ -129,16 +169,25 @@ def step_level_prompt(
|
|
| 129 |
step: int,
|
| 130 |
max_steps: int,
|
| 131 |
last_reward: Optional[dict] = None,
|
|
|
|
| 132 |
) -> str:
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
formatted_history = format_crease_history(paper_state)
|
| 135 |
formatted_anchors = format_anchor_points(paper_state)
|
| 136 |
formatted_reward = format_reward_feedback(last_reward)
|
| 137 |
|
| 138 |
-
return f"""
|
| 139 |
-
|
| 140 |
-
TARGET:
|
| 141 |
-
{formatted_target}
|
| 142 |
|
| 143 |
CURRENT STATE (step {step} of {max_steps}):
|
| 144 |
Creases placed:
|
|
|
|
| 6 |
_BOUNDARY_X = {0.0, 1.0}
|
| 7 |
_BOUNDARY_Y = {0.0, 1.0}
|
| 8 |
|
| 9 |
+
# Semantic descriptions for known target patterns (used when target coords are hidden)
|
| 10 |
+
DESCRIPTIONS: dict[str, str] = {
|
| 11 |
+
"valley_fold_half": "Fold the paper in half with a single valley fold along the horizontal center line.",
|
| 12 |
+
"mountain_fold_half": "Fold the paper in half with a single mountain fold along the vertical center line.",
|
| 13 |
+
"letter_fold": "Create a letter fold (two parallel valley folds dividing the paper into thirds).",
|
| 14 |
+
"diagonal_fold": "Fold the paper diagonally from one corner to the opposite corner.",
|
| 15 |
+
"waterbomb_base": "Create a waterbomb base with two valley folds along both diagonals.",
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
|
| 19 |
def _is_corner(x: float, y: float) -> bool:
|
| 20 |
return (round(x, 4), round(y, 4)) in _CORNERS
|
|
|
|
| 42 |
return "\n".join(lines)
|
| 43 |
|
| 44 |
|
| 45 |
+
def get_semantic_description(target_name: str, target: dict) -> str:
|
| 46 |
+
"""Return a natural language description of the target crease pattern."""
|
| 47 |
+
if target_name in DESCRIPTIONS:
|
| 48 |
+
return DESCRIPTIONS[target_name]
|
| 49 |
+
|
| 50 |
+
# Fallback: generate from target dict structure
|
| 51 |
+
edges_a = target.get("edges_assignment", [])
|
| 52 |
+
valley_count = sum(1 for a in edges_a if a == "V")
|
| 53 |
+
mountain_count = sum(1 for a in edges_a if a == "M")
|
| 54 |
+
if valley_count or mountain_count:
|
| 55 |
+
parts = []
|
| 56 |
+
if valley_count:
|
| 57 |
+
parts.append(f"{valley_count} valley fold{'s' if valley_count != 1 else ''}")
|
| 58 |
+
if mountain_count:
|
| 59 |
+
parts.append(f"{mountain_count} mountain fold{'s' if mountain_count != 1 else ''}")
|
| 60 |
+
return f"Create an origami crease pattern with {', '.join(parts)}."
|
| 61 |
+
return "Create an origami crease pattern with the given folds."
|
| 62 |
+
|
| 63 |
+
|
| 64 |
def format_anchor_points(paper_state) -> str:
|
| 65 |
corners = []
|
| 66 |
boundary_pts = []
|
|
|
|
| 123 |
return " " + " ".join(parts)
|
| 124 |
|
| 125 |
|
| 126 |
+
def code_as_policy_prompt(
|
| 127 |
+
target: dict,
|
| 128 |
+
max_folds: int = 8,
|
| 129 |
+
semantic_description: Optional[str] = None,
|
| 130 |
+
) -> str:
|
| 131 |
+
if semantic_description is not None:
|
| 132 |
+
target_section = f"""TASK:
|
| 133 |
+
{semantic_description}
|
| 134 |
+
|
| 135 |
+
You are an origami designer. Given a description of what to fold, output a sequence of fold operations that build a crease pattern on a unit square [0,1]x[0,1]."""
|
| 136 |
+
else:
|
| 137 |
+
formatted_target = format_target_for_prompt(target)
|
| 138 |
+
target_section = f"""TARGET CREASE PATTERN:
|
| 139 |
{formatted_target}
|
| 140 |
|
| 141 |
+
You are an origami designer. Generate a fold sequence for a unit square [0,1]x[0,1]."""
|
| 142 |
+
|
| 143 |
+
return f"""{target_section}
|
| 144 |
+
|
| 145 |
RULES (must hold at every interior vertex):
|
| 146 |
- Kawasaki: alternating sector angles sum equally (each half = 180 degrees)
|
| 147 |
- Maekawa: |mountain_count - valley_count| = 2
|
|
|
|
| 169 |
step: int,
|
| 170 |
max_steps: int,
|
| 171 |
last_reward: Optional[dict] = None,
|
| 172 |
+
semantic_description: Optional[str] = None,
|
| 173 |
) -> str:
|
| 174 |
+
if semantic_description is not None:
|
| 175 |
+
target_section = f"""TASK:
|
| 176 |
+
{semantic_description}
|
| 177 |
+
|
| 178 |
+
You are an origami designer. Given a description of what to fold, add the next crease to build the pattern."""
|
| 179 |
+
else:
|
| 180 |
+
formatted_target = format_target_for_prompt(target)
|
| 181 |
+
target_section = f"""TARGET:
|
| 182 |
+
{formatted_target}
|
| 183 |
+
|
| 184 |
+
You are an origami designer building a crease pattern step by step."""
|
| 185 |
+
|
| 186 |
formatted_history = format_crease_history(paper_state)
|
| 187 |
formatted_anchors = format_anchor_points(paper_state)
|
| 188 |
formatted_reward = format_reward_feedback(last_reward)
|
| 189 |
|
| 190 |
+
return f"""{target_section}
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
CURRENT STATE (step {step} of {max_steps}):
|
| 193 |
Creases placed:
|
env/rewards.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import json
|
| 2 |
-
from .verifier import check_all_vertices, geometric_crease_coverage
|
| 3 |
from .paper_state import PaperState
|
| 4 |
|
| 5 |
|
|
@@ -28,66 +28,108 @@ def target_crease_edges(target: dict) -> list[dict]:
|
|
| 28 |
|
| 29 |
|
| 30 |
def compute_reward(
|
| 31 |
-
|
| 32 |
action_result: dict,
|
|
|
|
| 33 |
target: dict,
|
|
|
|
|
|
|
| 34 |
) -> dict:
|
| 35 |
"""
|
| 36 |
-
Compute the full reward dict for a fold action.
|
| 37 |
|
| 38 |
Args:
|
| 39 |
-
|
| 40 |
-
action_result: {'valid': bool, 'anchored': bool, '
|
|
|
|
| 41 |
target: FOLD target dict
|
|
|
|
|
|
|
| 42 |
|
| 43 |
Returns dict with keys:
|
| 44 |
-
format, anchored, kawasaki, maekawa, blb,
|
|
|
|
|
|
|
| 45 |
"""
|
| 46 |
r = {}
|
| 47 |
|
| 48 |
-
#
|
| 49 |
r['format'] = 1.0 if action_result.get('valid', False) else 0.0
|
| 50 |
if not r['format']:
|
| 51 |
r['total'] = -0.1
|
| 52 |
return r
|
| 53 |
|
| 54 |
-
#
|
| 55 |
r['anchored'] = 1.0 if action_result.get('anchored', False) else 0.3
|
|
|
|
| 56 |
|
| 57 |
-
#
|
| 58 |
-
vertex_scores = check_all_vertices(
|
| 59 |
r['kawasaki'] = vertex_scores['kawasaki']
|
| 60 |
r['maekawa'] = vertex_scores['maekawa']
|
| 61 |
r['blb'] = vertex_scores['blb']
|
|
|
|
| 62 |
|
| 63 |
-
#
|
| 64 |
t_edges = target_crease_edges(target)
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
r['economy'] = economy
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
r['completion'] = 10.0 if (r['progress'] > 0.9 and all_valid) else 0.0
|
| 72 |
|
| 73 |
-
#
|
| 74 |
-
r['efficiency'] = -0.01
|
| 75 |
|
| 76 |
# Weighted total
|
| 77 |
r['total'] = (
|
| 78 |
-
0.05 * r['anchored']
|
| 79 |
-
0.
|
| 80 |
-
0.
|
| 81 |
-
0.
|
| 82 |
-
0.
|
| 83 |
-
0.
|
| 84 |
-
r['
|
| 85 |
-
r['
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
)
|
| 87 |
return r
|
| 88 |
|
| 89 |
|
| 90 |
-
def compute_terminal_reward(
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
+
from .verifier import check_all_vertices, check_degree_sanity, geometric_crease_coverage
|
| 3 |
from .paper_state import PaperState
|
| 4 |
|
| 5 |
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
def compute_reward(
|
| 31 |
+
prev_state: PaperState,
|
| 32 |
action_result: dict,
|
| 33 |
+
new_state: PaperState,
|
| 34 |
target: dict,
|
| 35 |
+
step: int,
|
| 36 |
+
max_steps: int,
|
| 37 |
) -> dict:
|
| 38 |
"""
|
| 39 |
+
Compute the full reward dict for a fold action (lexicographically gated).
|
| 40 |
|
| 41 |
Args:
|
| 42 |
+
prev_state: PaperState BEFORE the action was applied
|
| 43 |
+
action_result: {'valid': bool, 'anchored': bool, 'duplicate': bool, ...}
|
| 44 |
+
new_state: PaperState AFTER the action was applied
|
| 45 |
target: FOLD target dict
|
| 46 |
+
step: current step index
|
| 47 |
+
max_steps: maximum steps in episode
|
| 48 |
|
| 49 |
Returns dict with keys:
|
| 50 |
+
format, anchored, novelty, kawasaki, maekawa, blb, degree_sanity,
|
| 51 |
+
progress, economy, assignment_accuracy, delta, regression,
|
| 52 |
+
completion, efficiency, total
|
| 53 |
"""
|
| 54 |
r = {}
|
| 55 |
|
| 56 |
+
# GATE 1: Format — did the action parse and apply?
|
| 57 |
r['format'] = 1.0 if action_result.get('valid', False) else 0.0
|
| 58 |
if not r['format']:
|
| 59 |
r['total'] = -0.1
|
| 60 |
return r
|
| 61 |
|
| 62 |
+
# GATE 2: Structural sanity
|
| 63 |
r['anchored'] = 1.0 if action_result.get('anchored', False) else 0.3
|
| 64 |
+
r['novelty'] = 0.0 if action_result.get('duplicate', False) is True else 0.2
|
| 65 |
|
| 66 |
+
# LEVEL 3: Local flat-foldability
|
| 67 |
+
vertex_scores = check_all_vertices(new_state.graph)
|
| 68 |
r['kawasaki'] = vertex_scores['kawasaki']
|
| 69 |
r['maekawa'] = vertex_scores['maekawa']
|
| 70 |
r['blb'] = vertex_scores['blb']
|
| 71 |
+
r['degree_sanity'] = check_degree_sanity(new_state.graph)
|
| 72 |
|
| 73 |
+
# LEVEL 4: Progress (absolute + delta)
|
| 74 |
t_edges = target_crease_edges(target)
|
| 75 |
+
old_coverage, _, _ = geometric_crease_coverage(prev_state, t_edges)
|
| 76 |
+
new_coverage, economy, assignment_accuracy = geometric_crease_coverage(new_state, t_edges)
|
|
|
|
| 77 |
|
| 78 |
+
r['progress'] = new_coverage
|
| 79 |
+
r['economy'] = economy
|
| 80 |
+
r['assignment_accuracy'] = assignment_accuracy
|
| 81 |
+
r['delta'] = max(0.0, new_coverage - old_coverage)
|
| 82 |
+
r['regression'] = min(0.0, new_coverage - old_coverage)
|
| 83 |
+
|
| 84 |
+
# LEVEL 5: Completion bonus
|
| 85 |
+
all_valid = (
|
| 86 |
+
r['kawasaki'] == 1.0
|
| 87 |
+
and r['maekawa'] == 1.0
|
| 88 |
+
and r['blb'] == 1.0
|
| 89 |
+
)
|
| 90 |
r['completion'] = 10.0 if (r['progress'] > 0.9 and all_valid) else 0.0
|
| 91 |
|
| 92 |
+
# LEVEL 6: Efficiency — escalating step cost
|
| 93 |
+
r['efficiency'] = -0.01 * (1 + step / max_steps)
|
| 94 |
|
| 95 |
# Weighted total
|
| 96 |
r['total'] = (
|
| 97 |
+
0.05 * r['anchored']
|
| 98 |
+
+ 0.05 * r['novelty']
|
| 99 |
+
+ 0.06 * r['kawasaki']
|
| 100 |
+
+ 0.06 * r['maekawa']
|
| 101 |
+
+ 0.04 * r['blb']
|
| 102 |
+
+ 0.04 * r['degree_sanity']
|
| 103 |
+
+ 0.25 * r['progress']
|
| 104 |
+
+ 0.05 * r['economy']
|
| 105 |
+
+ 0.05 * r['assignment_accuracy']
|
| 106 |
+
+ 0.20 * r['delta']
|
| 107 |
+
+ 0.10 * r['regression']
|
| 108 |
+
+ r['completion']
|
| 109 |
+
+ r['efficiency']
|
| 110 |
)
|
| 111 |
return r
|
| 112 |
|
| 113 |
|
| 114 |
+
def compute_terminal_reward(
|
| 115 |
+
state: PaperState,
|
| 116 |
+
target: dict,
|
| 117 |
+
max_steps: int,
|
| 118 |
+
) -> dict:
|
| 119 |
+
"""
|
| 120 |
+
Compute reward for the final state after a complete fold sequence.
|
| 121 |
+
Uses fresh PaperState as baseline and step = max_steps.
|
| 122 |
+
"""
|
| 123 |
+
fake_result = {
|
| 124 |
+
'valid': True,
|
| 125 |
+
'anchored': True,
|
| 126 |
+
'duplicate': False,
|
| 127 |
+
}
|
| 128 |
+
return compute_reward(
|
| 129 |
+
prev_state=PaperState(),
|
| 130 |
+
action_result=fake_result,
|
| 131 |
+
new_state=state,
|
| 132 |
+
target=target,
|
| 133 |
+
step=max_steps,
|
| 134 |
+
max_steps=max_steps,
|
| 135 |
+
)
|
env/targets/validator_check.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
| 1 |
-
import json
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
targets_dir =
|
| 4 |
for fname in os.listdir(targets_dir):
|
| 5 |
if not fname.endswith(".fold"):
|
| 6 |
continue
|
| 7 |
-
with open(
|
| 8 |
d = json.load(f)
|
| 9 |
n_v = len(d["vertices_coords"])
|
| 10 |
n_e = len(d["edges_vertices"])
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
from pathlib import Path
|
| 4 |
|
| 5 |
+
targets_dir = Path(__file__).parent
|
| 6 |
for fname in os.listdir(targets_dir):
|
| 7 |
if not fname.endswith(".fold"):
|
| 8 |
continue
|
| 9 |
+
with open(targets_dir / fname) as f:
|
| 10 |
d = json.load(f)
|
| 11 |
n_v = len(d["vertices_coords"])
|
| 12 |
n_e = len(d["edges_vertices"])
|
env/verifier.py
CHANGED
|
@@ -125,45 +125,85 @@ def geometric_crease_coverage(
|
|
| 125 |
target_edges: list[dict],
|
| 126 |
tol_pos: float = 0.05,
|
| 127 |
tol_angle_deg: float = 5.0,
|
| 128 |
-
) -> tuple[float, float]:
|
| 129 |
"""
|
| 130 |
Computes how well the current crease pattern matches the target.
|
| 131 |
|
| 132 |
Args:
|
|
|
|
| 133 |
target_edges: list of {'v1': (x1,y1), 'v2': (x2,y2), 'assignment': 'M'|'V'}
|
|
|
|
|
|
|
| 134 |
|
| 135 |
Returns:
|
| 136 |
-
(coverage, economy)
|
| 137 |
-
coverage: fraction of target creases matched [0, 1]
|
|
|
|
| 138 |
economy: penalty for excess creases [0, 1], 1.0 = no excess
|
|
|
|
|
|
|
| 139 |
"""
|
| 140 |
current_edges = state.crease_edges()
|
| 141 |
tol_angle_rad = np.deg2rad(tol_angle_deg)
|
| 142 |
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
| 144 |
for target in target_edges:
|
| 145 |
tx1, ty1 = target['v1']
|
| 146 |
tx2, ty2 = target['v2']
|
| 147 |
t_mid = ((tx1 + tx2) / 2.0, (ty1 + ty2) / 2.0)
|
| 148 |
t_angle = np.arctan2(ty2 - ty1, tx2 - tx1)
|
|
|
|
| 149 |
|
| 150 |
for current in current_edges:
|
| 151 |
cx1, cy1 = current['v1']
|
| 152 |
cx2, cy2 = current['v2']
|
| 153 |
c_mid = ((cx1 + cx2) / 2.0, (cy1 + cy2) / 2.0)
|
| 154 |
c_angle = np.arctan2(cy2 - cy1, cx2 - cx1)
|
|
|
|
| 155 |
|
| 156 |
mid_dist = np.hypot(c_mid[0] - t_mid[0], c_mid[1] - t_mid[1])
|
| 157 |
angle_distance = _angle_diff(c_angle, t_angle)
|
| 158 |
|
| 159 |
if mid_dist <= tol_pos and angle_distance <= tol_angle_rad:
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
break
|
| 162 |
|
| 163 |
-
coverage =
|
| 164 |
n_excess = max(0, len(current_edges) - len(target_edges))
|
| 165 |
economy = max(0.0, 1.0 - n_excess / max(len(target_edges), 1))
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
|
| 169 |
def check_all_vertices(graph: CreaseGraph) -> dict:
|
|
|
|
| 125 |
target_edges: list[dict],
|
| 126 |
tol_pos: float = 0.05,
|
| 127 |
tol_angle_deg: float = 5.0,
|
| 128 |
+
) -> tuple[float, float, float]:
|
| 129 |
"""
|
| 130 |
Computes how well the current crease pattern matches the target.
|
| 131 |
|
| 132 |
Args:
|
| 133 |
+
state: current paper state with crease graph
|
| 134 |
target_edges: list of {'v1': (x1,y1), 'v2': (x2,y2), 'assignment': 'M'|'V'}
|
| 135 |
+
tol_pos: position tolerance for midpoint matching
|
| 136 |
+
tol_angle_deg: angle tolerance in degrees for direction matching
|
| 137 |
|
| 138 |
Returns:
|
| 139 |
+
(coverage, economy, assignment_accuracy)
|
| 140 |
+
coverage: weighted fraction of target creases matched [0, 1];
|
| 141 |
+
1.0 if position+assignment match, 0.5 if position matches but assignment doesn't
|
| 142 |
economy: penalty for excess creases [0, 1], 1.0 = no excess
|
| 143 |
+
assignment_accuracy: fraction of positionally matched edges that also have correct M/V assignment [0, 1];
|
| 144 |
+
returns 1.0 if no positional matches (vacuous case)
|
| 145 |
"""
|
| 146 |
current_edges = state.crease_edges()
|
| 147 |
tol_angle_rad = np.deg2rad(tol_angle_deg)
|
| 148 |
|
| 149 |
+
total_score = 0.0
|
| 150 |
+
position_matches = 0
|
| 151 |
+
assignment_correct = 0
|
| 152 |
+
|
| 153 |
for target in target_edges:
|
| 154 |
tx1, ty1 = target['v1']
|
| 155 |
tx2, ty2 = target['v2']
|
| 156 |
t_mid = ((tx1 + tx2) / 2.0, (ty1 + ty2) / 2.0)
|
| 157 |
t_angle = np.arctan2(ty2 - ty1, tx2 - tx1)
|
| 158 |
+
t_assign = target.get('assignment', 'M')
|
| 159 |
|
| 160 |
for current in current_edges:
|
| 161 |
cx1, cy1 = current['v1']
|
| 162 |
cx2, cy2 = current['v2']
|
| 163 |
c_mid = ((cx1 + cx2) / 2.0, (cy1 + cy2) / 2.0)
|
| 164 |
c_angle = np.arctan2(cy2 - cy1, cx2 - cx1)
|
| 165 |
+
c_assign = current.get('assignment', 'M')
|
| 166 |
|
| 167 |
mid_dist = np.hypot(c_mid[0] - t_mid[0], c_mid[1] - t_mid[1])
|
| 168 |
angle_distance = _angle_diff(c_angle, t_angle)
|
| 169 |
|
| 170 |
if mid_dist <= tol_pos and angle_distance <= tol_angle_rad:
|
| 171 |
+
position_matches += 1
|
| 172 |
+
assign_match = (t_assign == c_assign)
|
| 173 |
+
if assign_match:
|
| 174 |
+
total_score += 1.0
|
| 175 |
+
assignment_correct += 1
|
| 176 |
+
else:
|
| 177 |
+
total_score += 0.5
|
| 178 |
break
|
| 179 |
|
| 180 |
+
coverage = total_score / max(len(target_edges), 1)
|
| 181 |
n_excess = max(0, len(current_edges) - len(target_edges))
|
| 182 |
economy = max(0.0, 1.0 - n_excess / max(len(target_edges), 1))
|
| 183 |
+
assignment_accuracy = (
|
| 184 |
+
assignment_correct / position_matches if position_matches > 0 else 1.0
|
| 185 |
+
)
|
| 186 |
+
return (coverage, economy, assignment_accuracy)
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def check_degree_sanity(graph: CreaseGraph) -> float:
|
| 190 |
+
"""
|
| 191 |
+
Checks that interior vertices have even degree (required for flat-foldability).
|
| 192 |
+
|
| 193 |
+
Returns:
|
| 194 |
+
Fraction of interior vertices with even degree [0, 1].
|
| 195 |
+
1.0 = all interior vertices have even degree.
|
| 196 |
+
0.0 = none do.
|
| 197 |
+
Returns 1.0 if there are no interior vertices (vacuous case).
|
| 198 |
+
"""
|
| 199 |
+
interior = graph.interior_vertices()
|
| 200 |
+
if not interior:
|
| 201 |
+
return 1.0
|
| 202 |
+
even_count = sum(
|
| 203 |
+
1 for vid in interior
|
| 204 |
+
if len(graph.vertex_edges[vid]) % 2 == 0
|
| 205 |
+
)
|
| 206 |
+
return even_count / len(interior)
|
| 207 |
|
| 208 |
|
| 209 |
def check_all_vertices(graph: CreaseGraph) -> dict:
|