ThreeGen / llm.py
bolajiev
Sprint 5: hierarchical GroupNode with layout engine
cf56b0c
Raw
History Blame Contribute Delete
22.5 kB
"""
Small-model layer. Default: Qwen/Qwen2.5-Coder-1.5B-Instruct.
Swap MODEL_ID to Qwen2.5-Coder-0.5B-Instruct (smaller, stronger "build small"
pitch) or -3B-Instruct (safety net) via env var. The model only ever emits the
scene JSON defined in scene.py — never raw Three.js.
UPGRADE PATH (recommended for reliability): replace the free-form generate()
with grammar-constrained decoding (outlines / xgrammar / llguidance) bound to
the Scene schema, so output is *guaranteed* valid JSON. See CLAUDE_CODE_BRIEF.md.
"""
from __future__ import annotations
import json
import logging
import os
import threading
MODEL_ID = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-Coder-3B-Instruct")
MAX_PROMPT_CHARS = int(os.environ.get("MAX_PROMPT_CHARS", "500"))
log = logging.getLogger(__name__)
SYSTEM = """You convert a short scene description into a JSON scene graph for a 3D renderer.
Output ONLY valid JSON. No markdown, no comments, no prose, no code fences.
Schema — primitive object:
{
"background": "#RRGGBB",
"objects": [
{
"shape": "box|sphere|cylinder|cone|torus|torusKnot|plane|tetrahedron|icosahedron|dodecahedron|octahedron|capsule|ring|circle|tube|roundedBox",
"position": [x, y, z],
"rotation": [x, y, z],
"scale": [x, y, z],
"color": "#RRGGBB",
"material": "standard|basic|phong|wireframe",
"preset": "gold|chrome|glass|neon|matte|plastic",
"metalness": 0.0-1.0,
"roughness": 0.0-1.0,
"emissive": "#RRGGBB",
"params": {}
}
],
"lights": [
{"type": "ambient|directional|point", "color": "#RRGGBB", "intensity": 1.0, "position": [x, y, z]}
],
"animation": {"type": "none|rotate|float|orbit", "speed": 1.0, "axis": "x|y|z"}
}
For stars, badges, shields, hearts, hexagons, coins, logos, or emblems use an EXTRUDE object instead:
{
"type": "extrude",
"shape": "star|heart|hexagon|badge|shield",
"depth": 0.2,
"bevel": true,
"color": "#RRGGBB",
"preset": "gold|chrome|glass|neon|matte|plastic",
"metalness": 0.0-1.0,
"roughness": 0.0-1.0,
"emissive": "#RRGGBB",
"position": [x, y, z]
}
For 3D text (words, numbers, short labels) use a TEXT3D object (Latin chars only, max 24 chars):
{
"type": "text3d",
"text": "HELLO",
"size": 0.8,
"depth": 0.2,
"bevel": true,
"color": "#RRGGBB",
"preset": "gold|chrome|glass|neon|matte|plastic",
"metalness": 0.0-1.0,
"roughness": 0.0-1.0,
"position": [x, y, z]
}
Use text3d for: single words, numbers, initials, short labels (max 2 text objects per scene).
For a badge/emblem with text on it, use the badge_with_text TEMPLATE — never place text3d manually on an extrude:
{
"background": "#0d1117",
"template": {
"name": "badge_with_text",
"shape": "star|shield|hexagon|badge|heart",
"text": "WORD",
"color_badge": "#RRGGBB",
"color_text": "#RRGGBB",
"metalness": 0.0-1.0,
"roughness": 0.0-1.0,
"preset_badge": "gold|chrome|glass|neon|matte|plastic"
},
"lights": [...],
"animation": {...}
}
The template positions and scales the text automatically — never set text position or size.
Material preset notes:
- gold/chrome: use for shiny metallic looks; color overrides the base hue
- glass: transparent refractive; color adds a tint
- neon: glowing edge; set color to the desired glow color
- matte: no shine, soft diffuse
- plastic: slight sheen, clearcoat
Omit preset to use metalness/roughness directly.
To arrange multiple objects together use a GROUP node:
{
"type": "group",
"layout": "row|column|stack|grid|none",
"gap": 0.3,
"cols": 3,
"position": [x, y, z],
"rotation": [x, y, z],
"scale": [x, y, z],
"children": [ ...any node types, including nested groups... ]
}
Layout rules:
- "row": children spaced along x-axis, auto-centered
- "column" or "stack": children spaced along y-axis, auto-centered (first child = bottom)
- "grid": children tiled on x-z plane, cols wide
- "none": children keep their own positions
When layout is set, DO NOT set child positions — the compiler places them automatically.
Groups can be nested. Use groups for: rows of items, stacked arrangements, any scene needing multiple evenly-spaced objects.
Keep scenes small (1-6 objects total). Center the composition near the origin."""
FEWSHOT = [
{"role": "user", "content": "a glowing green icosahedron floating in the dark"},
{"role": "assistant", "content": json.dumps({
"background": "#05060a",
"objects": [{
"shape": "icosahedron", "color": "#39ff14", "material": "standard",
"metalness": 0.2, "roughness": 0.3, "emissive": "#0a3a0a",
"position": [0, 0, 0], "params": {"radius": 0.8}
}],
"lights": [
{"type": "ambient", "intensity": 0.4},
{"type": "point", "color": "#39ff14", "intensity": 2.0, "position": [2, 2, 2]}
],
"animation": {"type": "float", "speed": 1.2, "axis": "y"}
})},
{"role": "user", "content": "three glowing cubes stacked into a tower"},
{"role": "assistant", "content": json.dumps({
"background": "#0b0e14",
"objects": [
{"shape": "box", "color": "#39ff14", "emissive": "#0a3a0a", "material": "standard",
"metalness": 0.2, "roughness": 0.3, "position": [0, 0.8, 0]},
{"shape": "box", "color": "#39ff14", "emissive": "#0a3a0a", "material": "standard",
"metalness": 0.2, "roughness": 0.3, "position": [0, 0.0, 0]},
{"shape": "box", "color": "#39ff14", "emissive": "#0a3a0a", "material": "standard",
"metalness": 0.2, "roughness": 0.3, "position": [0, -0.8, 0]},
],
"lights": [
{"type": "ambient", "intensity": 0.5},
{"type": "directional", "intensity": 1.1, "position": [5, 8, 6]},
],
"animation": {"type": "rotate", "speed": 1.0, "axis": "y"},
})},
{"role": "user", "content": "a red sphere floating above a flat grey plane"},
{"role": "assistant", "content": json.dumps({
"background": "#1a1a2e",
"objects": [
{"shape": "sphere", "color": "#ff4444", "emissive": "#330000", "material": "standard",
"metalness": 0.1, "roughness": 0.5, "position": [0, 0.8, 0], "params": {"radius": 0.5}},
{"shape": "plane", "color": "#555566", "material": "standard",
"metalness": 0.0, "roughness": 0.9, "position": [0, -0.2, 0],
"rotation": [-1.5708, 0, 0], "params": {"width": 6, "height": 6}},
],
"lights": [
{"type": "ambient", "intensity": 0.4},
{"type": "directional", "color": "#ffffff", "intensity": 1.2, "position": [5, 8, 6]},
],
"animation": {"type": "float", "speed": 0.8, "axis": "y"},
})},
{"role": "user", "content": "a classic burger"},
{"role": "assistant", "content": json.dumps({
"background": "#1a1209",
"objects": [{
"type": "stack",
"axis": "y",
"gap": 0.02,
"children": [
{"shape": "sphere", "color": "#c8a96e", "params": {"radius": 0.45}},
{"shape": "cylinder", "color": "#3a8a3a", "params": {"radiusTop": 0.52, "radiusBottom": 0.52, "height": 0.1}},
{"shape": "cylinder", "color": "#5a3a1a", "params": {"radiusTop": 0.5, "radiusBottom": 0.5, "height": 0.18}},
{"shape": "cylinder", "color": "#c8a96e", "params": {"radiusTop": 0.52, "radiusBottom": 0.55, "height": 0.32}},
]
}],
"lights": [
{"type": "ambient", "intensity": 0.5},
{"type": "directional", "intensity": 1.3, "position": [5, 8, 6]},
],
"animation": {"type": "rotate", "speed": 0.8, "axis": "y"},
})},
{"role": "user", "content": "build me a snowman"},
{"role": "assistant", "content": json.dumps({
"background": "#0d1b2a",
"template": {"name": "snowman", "color_body": "#e8e8e8", "color_hat": "#1a1a1a"},
"lights": [
{"type": "ambient", "intensity": 0.5},
{"type": "directional", "intensity": 1.3, "position": [5, 8, 6]},
],
"animation": {"type": "rotate", "speed": 0.5, "axis": "y"},
})},
{"role": "user", "content": "a cute capsule character with a rounded body"},
{"role": "assistant", "content": json.dumps({
"background": "#0d1b2a",
"objects": [
{"shape": "capsule", "color": "#ff6b6b", "emissive": "#330000", "material": "standard",
"metalness": 0.1, "roughness": 0.5, "position": [0, 0, 0],
"params": {"radius": 0.35, "length": 0.6}},
{"shape": "sphere", "color": "#ffe8cc", "material": "standard",
"metalness": 0.0, "roughness": 0.8, "position": [0, 0.72, 0],
"params": {"radius": 0.28}},
],
"lights": [
{"type": "ambient", "intensity": 0.5},
{"type": "directional", "intensity": 1.3, "position": [5, 8, 6]},
],
"animation": {"type": "float", "speed": 1.0, "axis": "y"},
})},
{"role": "user", "content": "a stack of soft rounded boxes"},
{"role": "assistant", "content": json.dumps({
"background": "#0b0e14",
"objects": [{
"type": "stack", "axis": "y", "gap": 0.08,
"children": [
{"shape": "roundedBox", "color": "#ff9f43", "material": "standard",
"metalness": 0.1, "roughness": 0.4,
"params": {"width": 1.2, "height": 0.4, "depth": 1.2, "radius": 0.12}},
{"shape": "roundedBox", "color": "#ee5a24", "material": "standard",
"metalness": 0.1, "roughness": 0.4,
"params": {"width": 1.0, "height": 0.4, "depth": 1.0, "radius": 0.12}},
{"shape": "roundedBox", "color": "#c0392b", "material": "standard",
"metalness": 0.1, "roughness": 0.4,
"params": {"width": 0.8, "height": 0.4, "depth": 0.8, "radius": 0.12}},
],
}],
"lights": [
{"type": "ambient", "intensity": 0.5},
{"type": "directional", "intensity": 1.3, "position": [5, 8, 6]},
],
"animation": {"type": "rotate", "speed": 0.8, "axis": "y"},
})},
{"role": "user", "content": "a gold star badge"},
{"role": "assistant", "content": json.dumps({
"background": "#1a1205",
"objects": [{
"type": "extrude", "shape": "star", "depth": 0.3, "bevel": True,
"color": "#ffd700", "material": "standard",
"metalness": 0.9, "roughness": 0.2, "emissive": "#332200",
"position": [0, 0, 0],
}],
"lights": [
{"type": "ambient", "intensity": 0.3},
{"type": "directional", "color": "#fff8e0", "intensity": 2.0, "position": [3, 5, 4]},
],
"animation": {"type": "rotate", "speed": 0.6, "axis": "y"},
})},
{"role": "user", "content": "a shield emblem"},
{"role": "assistant", "content": json.dumps({
"background": "#0d1117",
"objects": [{
"type": "extrude", "shape": "shield", "depth": 0.25, "bevel": True,
"color": "#3a6bc4", "material": "standard",
"metalness": 0.5, "roughness": 0.3, "emissive": "#061833",
"position": [0, 0, 0],
}],
"lights": [
{"type": "ambient", "intensity": 0.4},
{"type": "directional", "color": "#ffffff", "intensity": 1.8, "position": [4, 6, 3]},
],
"animation": {"type": "rotate", "speed": 0.5, "axis": "y"},
})},
{"role": "user", "content": "a glass star badge"},
{"role": "assistant", "content": json.dumps({
"background": "#0a0a18",
"objects": [{
"type": "extrude", "shape": "star", "depth": 0.25, "bevel": True,
"color": "#aaddff", "preset": "glass",
"position": [0, 0, 0],
}],
"lights": [
{"type": "ambient", "intensity": 0.6},
{"type": "directional", "color": "#ffffff", "intensity": 2.0, "position": [5, 8, 4]},
{"type": "point", "color": "#88ccff", "intensity": 1.5, "position": [-3, 2, 3]},
],
"animation": {"type": "rotate", "speed": 0.5, "axis": "y"},
})},
{"role": "user", "content": "a neon glowing shield"},
{"role": "assistant", "content": json.dumps({
"background": "#050508",
"objects": [{
"type": "extrude", "shape": "shield", "depth": 0.2, "bevel": True,
"color": "#ff2266", "preset": "neon",
"position": [0, 0, 0],
}],
"lights": [
{"type": "ambient", "intensity": 0.1},
{"type": "point", "color": "#ff2266", "intensity": 3.0, "position": [0, 1, 2]},
],
"animation": {"type": "float", "speed": 1.0, "axis": "y"},
})},
{"role": "user", "content": "a chrome hexagon"},
{"role": "assistant", "content": json.dumps({
"background": "#1a1a2e",
"objects": [{
"type": "extrude", "shape": "hexagon", "depth": 0.3, "bevel": True,
"color": "#ffffff", "preset": "chrome",
"position": [0, 0, 0],
}],
"lights": [
{"type": "ambient", "intensity": 0.4},
{"type": "directional", "color": "#ffffff", "intensity": 2.5, "position": [4, 6, 3]},
{"type": "point", "color": "#88aaff", "intensity": 1.0, "position": [-3, 2, 2]},
],
"animation": {"type": "rotate", "speed": 0.6, "axis": "y"},
})},
{"role": "user", "content": "the word HELLO in 3D"},
{"role": "assistant", "content": json.dumps({
"background": "#0b0e14",
"objects": [{
"type": "text3d", "text": "HELLO",
"size": 0.8, "depth": 0.2, "bevel": True,
"color": "#88ccff", "metalness": 0.3, "roughness": 0.4,
"position": [0, 0, 0],
}],
"lights": [
{"type": "ambient", "intensity": 0.4},
{"type": "directional", "color": "#ffffff", "intensity": 2.0, "position": [5, 8, 5]},
],
"animation": {"type": "rotate", "speed": 0.5, "axis": "y"},
})},
{"role": "user", "content": "a gold 3D number 1"},
{"role": "assistant", "content": json.dumps({
"background": "#1a1205",
"objects": [{
"type": "text3d", "text": "1",
"size": 1.2, "depth": 0.3, "bevel": True,
"color": "#ffd700", "preset": "gold",
"position": [0, 0, 0],
}],
"lights": [
{"type": "ambient", "intensity": 0.3},
{"type": "directional", "color": "#fff8e0", "intensity": 2.5, "position": [4, 6, 4]},
],
"animation": {"type": "rotate", "speed": 0.5, "axis": "y"},
})},
{"role": "user", "content": "a star badge with the text PRO"},
{"role": "assistant", "content": json.dumps({
"background": "#0d1117",
"template": {
"name": "badge_with_text",
"shape": "star",
"text": "PRO",
"color_badge": "#3a6bc4",
"color_text": "#ffffff",
"metalness": 0.7,
"roughness": 0.2,
},
"lights": [
{"type": "ambient", "intensity": 0.4},
{"type": "directional", "color": "#ffffff", "intensity": 2.0, "position": [3, 5, 4]},
],
"animation": {"type": "rotate", "speed": 0.5, "axis": "y"},
})},
{"role": "user", "content": "a shield badge with the text NEW"},
{"role": "assistant", "content": json.dumps({
"background": "#0d1117",
"template": {
"name": "badge_with_text",
"shape": "shield",
"text": "NEW",
"color_badge": "#22aa66",
"color_text": "#ffffff",
"metalness": 0.5,
"roughness": 0.3,
},
"lights": [
{"type": "ambient", "intensity": 0.4},
{"type": "directional", "color": "#ffffff", "intensity": 2.0, "position": [4, 6, 4]},
],
"animation": {"type": "rotate", "speed": 0.5, "axis": "y"},
})},
{"role": "user", "content": "a row of three coins"},
{"role": "assistant", "content": json.dumps({
"background": "#1a1205",
"objects": [{
"type": "group",
"layout": "row",
"gap": 0.4,
"children": [
{"type": "extrude", "shape": "badge", "depth": 0.08, "bevel": True,
"color": "#ffd700", "preset": "gold"},
{"type": "extrude", "shape": "badge", "depth": 0.08, "bevel": True,
"color": "#ffd700", "preset": "gold"},
{"type": "extrude", "shape": "badge", "depth": 0.08, "bevel": True,
"color": "#ffd700", "preset": "gold"},
],
}],
"lights": [
{"type": "ambient", "intensity": 0.3},
{"type": "directional", "color": "#fff8e0", "intensity": 2.5, "position": [4, 6, 4]},
],
"animation": {"type": "rotate", "speed": 0.5, "axis": "y"},
})},
{"role": "user", "content": "a tall trophy: a star on a stacked column"},
{"role": "assistant", "content": json.dumps({
"background": "#1a1205",
"objects": [{
"type": "group",
"layout": "stack",
"gap": 0.1,
"children": [
{"shape": "cylinder", "color": "#ffd700", "preset": "gold",
"params": {"radiusTop": 0.08, "radiusBottom": 0.18, "height": 1.2}},
{"type": "extrude", "shape": "star", "depth": 0.2, "bevel": True,
"color": "#ffd700", "preset": "gold"},
],
}],
"lights": [
{"type": "ambient", "intensity": 0.3},
{"type": "directional", "color": "#fff8e0", "intensity": 2.5, "position": [4, 6, 4]},
],
"animation": {"type": "rotate", "speed": 0.5, "axis": "y"},
})},
{"role": "user", "content": "three cubes in a row"},
{"role": "assistant", "content": json.dumps({
"background": "#0b0e14",
"objects": [{
"type": "group",
"layout": "row",
"gap": 0.3,
"children": [
{"shape": "box", "color": "#ff6b6b", "material": "standard",
"metalness": 0.1, "roughness": 0.5},
{"shape": "box", "color": "#ffd700", "material": "standard",
"metalness": 0.1, "roughness": 0.5},
{"shape": "box", "color": "#6bffb8", "material": "standard",
"metalness": 0.1, "roughness": 0.5},
],
}],
"lights": [
{"type": "ambient", "intensity": 0.5},
{"type": "directional", "intensity": 1.3, "position": [5, 8, 6]},
],
"animation": {"type": "rotate", "speed": 0.6, "axis": "y"},
})},
]
_tok = None
_model = None
_device = None
_lock = threading.Lock()
def _load():
global _tok, _model, _device
with _lock:
if _model is None:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
log.info("Loading model %s", MODEL_ID)
_device = "cuda" if torch.cuda.is_available() else "cpu"
_tok = AutoTokenizer.from_pretrained(MODEL_ID)
_model = AutoModelForCausalLM.from_pretrained(
MODEL_ID, torch_dtype=torch.bfloat16
).to(_device)
log.info("Model loaded on %s", _device)
return _tok, _model, _device
def run_llm(prompt: str, max_new_tokens: int = 512) -> str:
"""Generate raw text (expected to be a JSON object) from the model."""
if len(prompt) > MAX_PROMPT_CHARS:
prompt = prompt[:MAX_PROMPT_CHARS]
log.warning("Prompt truncated to %d chars", MAX_PROMPT_CHARS)
tok, model, device = _load()
messages = [{"role": "system", "content": SYSTEM}, *FEWSHOT,
{"role": "user", "content": prompt}]
text = tok.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True)
inputs = tok(text, return_tensors="pt").to(device)
out = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=0.4,
top_p=0.9,
pad_token_id=tok.eos_token_id,
)
gen = out[0][inputs.input_ids.shape[1]:]
result = tok.decode(gen, skip_special_tokens=True)
log.info("Generated %d chars for prompt: %.60s", len(result), prompt)
return result
def mock_scene_json(prompt: str) -> str:
"""Deterministic, no-GPU generator so the UI can be built/tested fast."""
p = (prompt or "").lower()
color = "#88ccff"
if "neon" in p or "green" in p:
color = "#39ff14"
elif "red" in p:
color = "#ff4444"
elif "gold" in p or "yellow" in p:
color = "#ffcc33"
elif "purple" in p or "violet" in p:
color = "#9b59ff"
material = "standard"
if "glass" in p:
material = "phong"
elif "wire" in p:
material = "wireframe"
shape = "box"
for key, val in [
("torusknot", "torusKnot"), ("knot", "torusKnot"),
("torus", "torus"), ("donut", "torus"),
("sphere", "sphere"), ("ball", "sphere"),
("cone", "cone"), ("cylinder", "cylinder"),
("ico", "icosahedron"), ("dodeca", "dodecahedron"),
("octa", "octahedron"), ("cube", "box"),
]:
if key in p:
shape = val
break
anim = "rotate"
if "float" in p:
anim = "float"
elif "still" in p or "static" in p:
anim = "none"
elif "orbit" in p:
anim = "orbit"
objects = [{"shape": shape, "color": color, "material": material,
"position": [0, 0, 0]}]
if "tree" in p:
objects = [
{"shape": "cone", "color": "#2e8b57", "position": [0, 0.6, 0],
"params": {"radius": 0.7, "height": 1.4}},
{"shape": "cylinder", "color": "#8b5a2b", "position": [0, -0.4, 0],
"params": {"radiusTop": 0.15, "radiusBottom": 0.2, "height": 0.8}},
]
scene = {
"background": "#0b0e14",
"objects": objects,
"lights": [
{"type": "ambient", "intensity": 0.5},
{"type": "directional", "intensity": 1.1, "position": [5, 8, 6]},
],
"animation": {"type": anim, "speed": 1.0, "axis": "y"},
}
return json.dumps(scene)