Spaces:
Running on Zero
Running on Zero
| """English keyword -> parametric placement spec for the edit path. No model. | |
| Replaces the small-model selector: the edit path is pure Python, instant, and | |
| deterministic. An instruction is parsed into which catalog part to add, how many, | |
| how big, where, whether to rotate, and what color; assembler.assemble_part turns | |
| that into geometry. English-only by design (see CLAUDE.md / wiki). | |
| """ | |
| from __future__ import annotations | |
| import re | |
| from dataclasses import dataclass | |
| from .parts import PARTS, ANCHORS | |
| from .fallback import _COLOR_WORDS, _COLOR_HEX | |
| # Count words. Digits are matched separately. Articles count as 1. | |
| _NUM = {"a": 1, "an": 1, "one": 1, "two": 2, "three": 3, "four": 4, "five": 5, | |
| "six": 6, "seven": 7, "eight": 8, "nine": 9, "ten": 10} | |
| # Size adjective -> float scale (cube size constant; the block grows). | |
| _SIZE = {"tiny": 0.5, "teeny": 0.5, "mini": 0.6, "small": 0.6, "little": 0.6, | |
| "big": 1.5, "large": 1.5, "bigger": 1.5, | |
| "huge": 2.0, "giant": 2.0, "massive": 2.0, "enormous": 2.0} | |
| # Position word -> anchor override. Single tokens, matched on word boundaries | |
| # (so "back" never fires inside "backpack"). | |
| _ANCHOR = {"behind": "rear", "back": "rear", "rear": "rear", | |
| "front": "front", "forward": "front", "ahead": "front", | |
| "top": "top", "above": "top", "head": "top", "atop": "top", | |
| "under": "bottom", "below": "bottom", "beneath": "bottom", "bottom": "bottom", | |
| "left": "left", "right": "right"} | |
| # Rotation phrases -> (axis, quarter_turns). Checked as substrings, longest first. | |
| _ROT = [(("upside down", "upside-down", "inverted"), ("x", 2)), | |
| (("sideways", "on its side", "tilted"), ("z", 1)), | |
| (("rotated", "turned", "spun"), ("y", 1))] | |
| # Tokens that carry no part identity (articles, sizes, verbs, generic nouns). | |
| _STOP = {"a", "an", "the", "of", "with", "and", "to", "its", "it", "on", "in", | |
| "add", "give", "put", "make", "please", "some", "pair", "set", | |
| "big", "small", "tiny", "huge", "giant", "little", "large"} | |
| def _norm(tok: str) -> str: | |
| """Crude singular: strip a trailing plural 's' so wings/wing, ears/ear match.""" | |
| return tok[:-1] if len(tok) > 3 and tok.endswith("s") else tok | |
| def _toks(s: str) -> list[str]: | |
| return [_norm(w) for w in re.findall(r"[a-z]+", s.lower())] | |
| _PART_IDTOKS = {pid: set(_toks(pid.replace("_", " "))) for pid in PARTS} | |
| _PART_DESCTOKS = {pid: (set(_toks(p["desc"])) - _STOP) for pid, p in PARTS.items()} | |
| def match_part(text: str) -> str | None: | |
| """Best catalog part for an instruction, or None. Ranks by id-token overlap | |
| (primary), then generic-id tie-break (fewer id tokens wins, so "wing" -> the | |
| generic `wings` not `wing_bat`), then desc-token overlap.""" | |
| tt = set(_toks(text)) - _STOP | |
| if not tt: | |
| return None | |
| best = None | |
| best_key = None | |
| for pid in PARTS: | |
| ih = len(_PART_IDTOKS[pid] & tt) | |
| dh = len(_PART_DESCTOKS[pid] & tt) | |
| if ih == 0 and dh == 0: | |
| continue | |
| key = (ih, -len(_PART_IDTOKS[pid]), dh) | |
| if best_key is None or key > best_key: | |
| best_key = key | |
| best = pid | |
| return best | |
| class PlacementSpec: | |
| part: str | |
| anchor: str | |
| scale: float | |
| count: int | |
| rotation: tuple | None | |
| color: str | None | |
| def parse_edit(instruction: str) -> PlacementSpec | None: | |
| """Parse an English instruction into a PlacementSpec, or None if no catalog | |
| part is recognised (the caller then falls back to the generative model).""" | |
| t = (instruction or "").lower() | |
| part = match_part(t) | |
| if part is None: | |
| return None | |
| # count: explicit digit wins, else a number word, else 1. Clamp 1..8. | |
| count = 1 | |
| m = re.search(r"\b(\d+)\b", t) | |
| if m: | |
| count = int(m.group(1)) | |
| else: | |
| for w, n in _NUM.items(): | |
| if w in ("a", "an", "one"): | |
| continue # articles don't signal a real multi-count | |
| if re.search(rf"\b{w}\b", t): | |
| count = n | |
| break | |
| count = max(1, min(8, count)) | |
| scale = 1.0 | |
| for w, s in _SIZE.items(): | |
| if re.search(rf"\b{w}\b", t): | |
| scale = s | |
| break | |
| anchor = PARTS[part]["default_anchor"] | |
| for w, a in _ANCHOR.items(): | |
| if re.search(rf"\b{w}\b", t): | |
| anchor = a | |
| break | |
| rotation = None | |
| for phrases, rot in _ROT: | |
| if any(p in t for p in phrases): | |
| rotation = rot | |
| break | |
| color = None | |
| for col, words in _COLOR_WORDS.items(): | |
| if any(w in t for w in words): | |
| color = _COLOR_HEX[col] | |
| break | |
| if anchor not in ANCHORS: | |
| anchor = PARTS[part]["default_anchor"] | |
| return PlacementSpec(part=part, anchor=anchor, scale=scale, | |
| count=count, rotation=rotation, color=color) | |