Spaces:

build-small-hackathon
/

hatchimera

Running on Zero

App Files Files Community

hatchimera / src /buddy_fusion /edit_parser.py

arkai2025

feat(edit): replace small-model selector with keyword + parametric assembler

f2b4b4e 20 days ago

Raw

History Blame Contribute Delete

4.89 kB

	"""English keyword -> parametric placement spec for the edit path. No model.

	Replaces the small-model selector: the edit path is pure Python, instant, and
	deterministic. An instruction is parsed into which catalog part to add, how many,
	how big, where, whether to rotate, and what color; assembler.assemble_part turns
	that into geometry. English-only by design (see CLAUDE.md / wiki).
	"""
	from __future__ import annotations

	import re
	from dataclasses import dataclass

	from .parts import PARTS, ANCHORS
	from .fallback import _COLOR_WORDS, _COLOR_HEX

	# Count words. Digits are matched separately. Articles count as 1.
	_NUM = {"a": 1, "an": 1, "one": 1, "two": 2, "three": 3, "four": 4, "five": 5,
	"six": 6, "seven": 7, "eight": 8, "nine": 9, "ten": 10}

	# Size adjective -> float scale (cube size constant; the block grows).
	_SIZE = {"tiny": 0.5, "teeny": 0.5, "mini": 0.6, "small": 0.6, "little": 0.6,
	"big": 1.5, "large": 1.5, "bigger": 1.5,
	"huge": 2.0, "giant": 2.0, "massive": 2.0, "enormous": 2.0}

	# Position word -> anchor override. Single tokens, matched on word boundaries
	# (so "back" never fires inside "backpack").
	_ANCHOR = {"behind": "rear", "back": "rear", "rear": "rear",
	"front": "front", "forward": "front", "ahead": "front",
	"top": "top", "above": "top", "head": "top", "atop": "top",
	"under": "bottom", "below": "bottom", "beneath": "bottom", "bottom": "bottom",
	"left": "left", "right": "right"}

	# Rotation phrases -> (axis, quarter_turns). Checked as substrings, longest first.
	_ROT = [(("upside down", "upside-down", "inverted"), ("x", 2)),
	(("sideways", "on its side", "tilted"), ("z", 1)),
	(("rotated", "turned", "spun"), ("y", 1))]

	# Tokens that carry no part identity (articles, sizes, verbs, generic nouns).
	_STOP = {"a", "an", "the", "of", "with", "and", "to", "its", "it", "on", "in",
	"add", "give", "put", "make", "please", "some", "pair", "set",
	"big", "small", "tiny", "huge", "giant", "little", "large"}


	def _norm(tok: str) -> str:
	"""Crude singular: strip a trailing plural 's' so wings/wing, ears/ear match."""
	return tok[:-1] if len(tok) > 3 and tok.endswith("s") else tok


	def _toks(s: str) -> list[str]:
	return [_norm(w) for w in re.findall(r"[a-z]+", s.lower())]


	_PART_IDTOKS = {pid: set(_toks(pid.replace("_", " "))) for pid in PARTS}
	_PART_DESCTOKS = {pid: (set(_toks(p["desc"])) - _STOP) for pid, p in PARTS.items()}


	def match_part(text: str) -> str \| None:
	"""Best catalog part for an instruction, or None. Ranks by id-token overlap
	(primary), then generic-id tie-break (fewer id tokens wins, so "wing" -> the
	generic `wings` not `wing_bat`), then desc-token overlap."""
	tt = set(_toks(text)) - _STOP
	if not tt:
	return None
	best = None
	best_key = None
	for pid in PARTS:
	ih = len(_PART_IDTOKS[pid] & tt)
	dh = len(_PART_DESCTOKS[pid] & tt)
	if ih == 0 and dh == 0:
	continue
	key = (ih, -len(_PART_IDTOKS[pid]), dh)
	if best_key is None or key > best_key:
	best_key = key
	best = pid
	return best


	@dataclass(frozen=True)
	class PlacementSpec:
	part: str
	anchor: str
	scale: float
	count: int
	rotation: tuple \| None
	color: str \| None


	def parse_edit(instruction: str) -> PlacementSpec \| None:
	"""Parse an English instruction into a PlacementSpec, or None if no catalog
	part is recognised (the caller then falls back to the generative model)."""
	t = (instruction or "").lower()
	part = match_part(t)
	if part is None:
	return None

	# count: explicit digit wins, else a number word, else 1. Clamp 1..8.
	count = 1
	m = re.search(r"\b(\d+)\b", t)
	if m:
	count = int(m.group(1))
	else:
	for w, n in _NUM.items():
	if w in ("a", "an", "one"):
	continue # articles don't signal a real multi-count
	if re.search(rf"\b{w}\b", t):
	count = n
	break
	count = max(1, min(8, count))

	scale = 1.0
	for w, s in _SIZE.items():
	if re.search(rf"\b{w}\b", t):
	scale = s
	break

	anchor = PARTS[part]["default_anchor"]
	for w, a in _ANCHOR.items():
	if re.search(rf"\b{w}\b", t):
	anchor = a
	break

	rotation = None
	for phrases, rot in _ROT:
	if any(p in t for p in phrases):
	rotation = rot
	break

	color = None
	for col, words in _COLOR_WORDS.items():
	if any(w in t for w in words):
	color = _COLOR_HEX[col]
	break

	if anchor not in ANCHORS:
	anchor = PARTS[part]["default_anchor"]
	return PlacementSpec(part=part, anchor=anchor, scale=scale,
	count=count, rotation=rotation, color=color)