""" Small-model layer. Default: Qwen/Qwen2.5-Coder-1.5B-Instruct. Swap MODEL_ID to Qwen2.5-Coder-0.5B-Instruct (smaller, stronger "build small" pitch) or -3B-Instruct (safety net) via env var. The model only ever emits the scene JSON defined in scene.py — never raw Three.js. UPGRADE PATH (recommended for reliability): replace the free-form generate() with grammar-constrained decoding (outlines / xgrammar / llguidance) bound to the Scene schema, so output is *guaranteed* valid JSON. See CLAUDE_CODE_BRIEF.md. """ from __future__ import annotations import json import logging import os import threading MODEL_ID = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-Coder-3B-Instruct") MAX_PROMPT_CHARS = int(os.environ.get("MAX_PROMPT_CHARS", "500")) log = logging.getLogger(__name__) SYSTEM = """You convert a short scene description into a JSON scene graph for a 3D renderer. Output ONLY valid JSON. No markdown, no comments, no prose, no code fences. Schema — primitive object: { "background": "#RRGGBB", "objects": [ { "shape": "box|sphere|cylinder|cone|torus|torusKnot|plane|tetrahedron|icosahedron|dodecahedron|octahedron|capsule|ring|circle|tube|roundedBox", "position": [x, y, z], "rotation": [x, y, z], "scale": [x, y, z], "color": "#RRGGBB", "material": "standard|basic|phong|wireframe", "preset": "gold|chrome|glass|neon|matte|plastic", "metalness": 0.0-1.0, "roughness": 0.0-1.0, "emissive": "#RRGGBB", "params": {} } ], "lights": [ {"type": "ambient|directional|point", "color": "#RRGGBB", "intensity": 1.0, "position": [x, y, z]} ], "animation": {"type": "none|rotate|float|orbit", "speed": 1.0, "axis": "x|y|z"} } For stars, badges, shields, hearts, hexagons, coins, logos, or emblems use an EXTRUDE object instead: { "type": "extrude", "shape": "star|heart|hexagon|badge|shield", "depth": 0.2, "bevel": true, "color": "#RRGGBB", "preset": "gold|chrome|glass|neon|matte|plastic", "metalness": 0.0-1.0, "roughness": 0.0-1.0, "emissive": "#RRGGBB", "position": [x, y, z] } For 3D text (words, numbers, short labels) use a TEXT3D object (Latin chars only, max 24 chars): { "type": "text3d", "text": "HELLO", "size": 0.8, "depth": 0.2, "bevel": true, "color": "#RRGGBB", "preset": "gold|chrome|glass|neon|matte|plastic", "metalness": 0.0-1.0, "roughness": 0.0-1.0, "position": [x, y, z] } Use text3d for: single words, numbers, initials, short labels (max 2 text objects per scene). For a badge/emblem with text on it, use the badge_with_text TEMPLATE — never place text3d manually on an extrude: { "background": "#0d1117", "template": { "name": "badge_with_text", "shape": "star|shield|hexagon|badge|heart", "text": "WORD", "color_badge": "#RRGGBB", "color_text": "#RRGGBB", "metalness": 0.0-1.0, "roughness": 0.0-1.0, "preset_badge": "gold|chrome|glass|neon|matte|plastic" }, "lights": [...], "animation": {...} } The template positions and scales the text automatically — never set text position or size. Material preset notes: - gold/chrome: use for shiny metallic looks; color overrides the base hue - glass: transparent refractive; color adds a tint - neon: glowing edge; set color to the desired glow color - matte: no shine, soft diffuse - plastic: slight sheen, clearcoat Omit preset to use metalness/roughness directly. To arrange multiple objects together use a GROUP node: { "type": "group", "layout": "row|column|stack|grid|none", "gap": 0.3, "cols": 3, "position": [x, y, z], "rotation": [x, y, z], "scale": [x, y, z], "children": [ ...any node types, including nested groups... ] } Layout rules: - "row": children spaced along x-axis, auto-centered - "column" or "stack": children spaced along y-axis, auto-centered (first child = bottom) - "grid": children tiled on x-z plane, cols wide - "none": children keep their own positions When layout is set, DO NOT set child positions — the compiler places them automatically. Groups can be nested. Use groups for: rows of items, stacked arrangements, any scene needing multiple evenly-spaced objects. Keep scenes small (1-6 objects total). Center the composition near the origin.""" FEWSHOT = [ {"role": "user", "content": "a glowing green icosahedron floating in the dark"}, {"role": "assistant", "content": json.dumps({ "background": "#05060a", "objects": [{ "shape": "icosahedron", "color": "#39ff14", "material": "standard", "metalness": 0.2, "roughness": 0.3, "emissive": "#0a3a0a", "position": [0, 0, 0], "params": {"radius": 0.8} }], "lights": [ {"type": "ambient", "intensity": 0.4}, {"type": "point", "color": "#39ff14", "intensity": 2.0, "position": [2, 2, 2]} ], "animation": {"type": "float", "speed": 1.2, "axis": "y"} })}, {"role": "user", "content": "three glowing cubes stacked into a tower"}, {"role": "assistant", "content": json.dumps({ "background": "#0b0e14", "objects": [ {"shape": "box", "color": "#39ff14", "emissive": "#0a3a0a", "material": "standard", "metalness": 0.2, "roughness": 0.3, "position": [0, 0.8, 0]}, {"shape": "box", "color": "#39ff14", "emissive": "#0a3a0a", "material": "standard", "metalness": 0.2, "roughness": 0.3, "position": [0, 0.0, 0]}, {"shape": "box", "color": "#39ff14", "emissive": "#0a3a0a", "material": "standard", "metalness": 0.2, "roughness": 0.3, "position": [0, -0.8, 0]}, ], "lights": [ {"type": "ambient", "intensity": 0.5}, {"type": "directional", "intensity": 1.1, "position": [5, 8, 6]}, ], "animation": {"type": "rotate", "speed": 1.0, "axis": "y"}, })}, {"role": "user", "content": "a red sphere floating above a flat grey plane"}, {"role": "assistant", "content": json.dumps({ "background": "#1a1a2e", "objects": [ {"shape": "sphere", "color": "#ff4444", "emissive": "#330000", "material": "standard", "metalness": 0.1, "roughness": 0.5, "position": [0, 0.8, 0], "params": {"radius": 0.5}}, {"shape": "plane", "color": "#555566", "material": "standard", "metalness": 0.0, "roughness": 0.9, "position": [0, -0.2, 0], "rotation": [-1.5708, 0, 0], "params": {"width": 6, "height": 6}}, ], "lights": [ {"type": "ambient", "intensity": 0.4}, {"type": "directional", "color": "#ffffff", "intensity": 1.2, "position": [5, 8, 6]}, ], "animation": {"type": "float", "speed": 0.8, "axis": "y"}, })}, {"role": "user", "content": "a classic burger"}, {"role": "assistant", "content": json.dumps({ "background": "#1a1209", "objects": [{ "type": "stack", "axis": "y", "gap": 0.02, "children": [ {"shape": "sphere", "color": "#c8a96e", "params": {"radius": 0.45}}, {"shape": "cylinder", "color": "#3a8a3a", "params": {"radiusTop": 0.52, "radiusBottom": 0.52, "height": 0.1}}, {"shape": "cylinder", "color": "#5a3a1a", "params": {"radiusTop": 0.5, "radiusBottom": 0.5, "height": 0.18}}, {"shape": "cylinder", "color": "#c8a96e", "params": {"radiusTop": 0.52, "radiusBottom": 0.55, "height": 0.32}}, ] }], "lights": [ {"type": "ambient", "intensity": 0.5}, {"type": "directional", "intensity": 1.3, "position": [5, 8, 6]}, ], "animation": {"type": "rotate", "speed": 0.8, "axis": "y"}, })}, {"role": "user", "content": "build me a snowman"}, {"role": "assistant", "content": json.dumps({ "background": "#0d1b2a", "template": {"name": "snowman", "color_body": "#e8e8e8", "color_hat": "#1a1a1a"}, "lights": [ {"type": "ambient", "intensity": 0.5}, {"type": "directional", "intensity": 1.3, "position": [5, 8, 6]}, ], "animation": {"type": "rotate", "speed": 0.5, "axis": "y"}, })}, {"role": "user", "content": "a cute capsule character with a rounded body"}, {"role": "assistant", "content": json.dumps({ "background": "#0d1b2a", "objects": [ {"shape": "capsule", "color": "#ff6b6b", "emissive": "#330000", "material": "standard", "metalness": 0.1, "roughness": 0.5, "position": [0, 0, 0], "params": {"radius": 0.35, "length": 0.6}}, {"shape": "sphere", "color": "#ffe8cc", "material": "standard", "metalness": 0.0, "roughness": 0.8, "position": [0, 0.72, 0], "params": {"radius": 0.28}}, ], "lights": [ {"type": "ambient", "intensity": 0.5}, {"type": "directional", "intensity": 1.3, "position": [5, 8, 6]}, ], "animation": {"type": "float", "speed": 1.0, "axis": "y"}, })}, {"role": "user", "content": "a stack of soft rounded boxes"}, {"role": "assistant", "content": json.dumps({ "background": "#0b0e14", "objects": [{ "type": "stack", "axis": "y", "gap": 0.08, "children": [ {"shape": "roundedBox", "color": "#ff9f43", "material": "standard", "metalness": 0.1, "roughness": 0.4, "params": {"width": 1.2, "height": 0.4, "depth": 1.2, "radius": 0.12}}, {"shape": "roundedBox", "color": "#ee5a24", "material": "standard", "metalness": 0.1, "roughness": 0.4, "params": {"width": 1.0, "height": 0.4, "depth": 1.0, "radius": 0.12}}, {"shape": "roundedBox", "color": "#c0392b", "material": "standard", "metalness": 0.1, "roughness": 0.4, "params": {"width": 0.8, "height": 0.4, "depth": 0.8, "radius": 0.12}}, ], }], "lights": [ {"type": "ambient", "intensity": 0.5}, {"type": "directional", "intensity": 1.3, "position": [5, 8, 6]}, ], "animation": {"type": "rotate", "speed": 0.8, "axis": "y"}, })}, {"role": "user", "content": "a gold star badge"}, {"role": "assistant", "content": json.dumps({ "background": "#1a1205", "objects": [{ "type": "extrude", "shape": "star", "depth": 0.3, "bevel": True, "color": "#ffd700", "material": "standard", "metalness": 0.9, "roughness": 0.2, "emissive": "#332200", "position": [0, 0, 0], }], "lights": [ {"type": "ambient", "intensity": 0.3}, {"type": "directional", "color": "#fff8e0", "intensity": 2.0, "position": [3, 5, 4]}, ], "animation": {"type": "rotate", "speed": 0.6, "axis": "y"}, })}, {"role": "user", "content": "a shield emblem"}, {"role": "assistant", "content": json.dumps({ "background": "#0d1117", "objects": [{ "type": "extrude", "shape": "shield", "depth": 0.25, "bevel": True, "color": "#3a6bc4", "material": "standard", "metalness": 0.5, "roughness": 0.3, "emissive": "#061833", "position": [0, 0, 0], }], "lights": [ {"type": "ambient", "intensity": 0.4}, {"type": "directional", "color": "#ffffff", "intensity": 1.8, "position": [4, 6, 3]}, ], "animation": {"type": "rotate", "speed": 0.5, "axis": "y"}, })}, {"role": "user", "content": "a glass star badge"}, {"role": "assistant", "content": json.dumps({ "background": "#0a0a18", "objects": [{ "type": "extrude", "shape": "star", "depth": 0.25, "bevel": True, "color": "#aaddff", "preset": "glass", "position": [0, 0, 0], }], "lights": [ {"type": "ambient", "intensity": 0.6}, {"type": "directional", "color": "#ffffff", "intensity": 2.0, "position": [5, 8, 4]}, {"type": "point", "color": "#88ccff", "intensity": 1.5, "position": [-3, 2, 3]}, ], "animation": {"type": "rotate", "speed": 0.5, "axis": "y"}, })}, {"role": "user", "content": "a neon glowing shield"}, {"role": "assistant", "content": json.dumps({ "background": "#050508", "objects": [{ "type": "extrude", "shape": "shield", "depth": 0.2, "bevel": True, "color": "#ff2266", "preset": "neon", "position": [0, 0, 0], }], "lights": [ {"type": "ambient", "intensity": 0.1}, {"type": "point", "color": "#ff2266", "intensity": 3.0, "position": [0, 1, 2]}, ], "animation": {"type": "float", "speed": 1.0, "axis": "y"}, })}, {"role": "user", "content": "a chrome hexagon"}, {"role": "assistant", "content": json.dumps({ "background": "#1a1a2e", "objects": [{ "type": "extrude", "shape": "hexagon", "depth": 0.3, "bevel": True, "color": "#ffffff", "preset": "chrome", "position": [0, 0, 0], }], "lights": [ {"type": "ambient", "intensity": 0.4}, {"type": "directional", "color": "#ffffff", "intensity": 2.5, "position": [4, 6, 3]}, {"type": "point", "color": "#88aaff", "intensity": 1.0, "position": [-3, 2, 2]}, ], "animation": {"type": "rotate", "speed": 0.6, "axis": "y"}, })}, {"role": "user", "content": "the word HELLO in 3D"}, {"role": "assistant", "content": json.dumps({ "background": "#0b0e14", "objects": [{ "type": "text3d", "text": "HELLO", "size": 0.8, "depth": 0.2, "bevel": True, "color": "#88ccff", "metalness": 0.3, "roughness": 0.4, "position": [0, 0, 0], }], "lights": [ {"type": "ambient", "intensity": 0.4}, {"type": "directional", "color": "#ffffff", "intensity": 2.0, "position": [5, 8, 5]}, ], "animation": {"type": "rotate", "speed": 0.5, "axis": "y"}, })}, {"role": "user", "content": "a gold 3D number 1"}, {"role": "assistant", "content": json.dumps({ "background": "#1a1205", "objects": [{ "type": "text3d", "text": "1", "size": 1.2, "depth": 0.3, "bevel": True, "color": "#ffd700", "preset": "gold", "position": [0, 0, 0], }], "lights": [ {"type": "ambient", "intensity": 0.3}, {"type": "directional", "color": "#fff8e0", "intensity": 2.5, "position": [4, 6, 4]}, ], "animation": {"type": "rotate", "speed": 0.5, "axis": "y"}, })}, {"role": "user", "content": "a star badge with the text PRO"}, {"role": "assistant", "content": json.dumps({ "background": "#0d1117", "template": { "name": "badge_with_text", "shape": "star", "text": "PRO", "color_badge": "#3a6bc4", "color_text": "#ffffff", "metalness": 0.7, "roughness": 0.2, }, "lights": [ {"type": "ambient", "intensity": 0.4}, {"type": "directional", "color": "#ffffff", "intensity": 2.0, "position": [3, 5, 4]}, ], "animation": {"type": "rotate", "speed": 0.5, "axis": "y"}, })}, {"role": "user", "content": "a shield badge with the text NEW"}, {"role": "assistant", "content": json.dumps({ "background": "#0d1117", "template": { "name": "badge_with_text", "shape": "shield", "text": "NEW", "color_badge": "#22aa66", "color_text": "#ffffff", "metalness": 0.5, "roughness": 0.3, }, "lights": [ {"type": "ambient", "intensity": 0.4}, {"type": "directional", "color": "#ffffff", "intensity": 2.0, "position": [4, 6, 4]}, ], "animation": {"type": "rotate", "speed": 0.5, "axis": "y"}, })}, {"role": "user", "content": "a row of three coins"}, {"role": "assistant", "content": json.dumps({ "background": "#1a1205", "objects": [{ "type": "group", "layout": "row", "gap": 0.4, "children": [ {"type": "extrude", "shape": "badge", "depth": 0.08, "bevel": True, "color": "#ffd700", "preset": "gold"}, {"type": "extrude", "shape": "badge", "depth": 0.08, "bevel": True, "color": "#ffd700", "preset": "gold"}, {"type": "extrude", "shape": "badge", "depth": 0.08, "bevel": True, "color": "#ffd700", "preset": "gold"}, ], }], "lights": [ {"type": "ambient", "intensity": 0.3}, {"type": "directional", "color": "#fff8e0", "intensity": 2.5, "position": [4, 6, 4]}, ], "animation": {"type": "rotate", "speed": 0.5, "axis": "y"}, })}, {"role": "user", "content": "a tall trophy: a star on a stacked column"}, {"role": "assistant", "content": json.dumps({ "background": "#1a1205", "objects": [{ "type": "group", "layout": "stack", "gap": 0.1, "children": [ {"shape": "cylinder", "color": "#ffd700", "preset": "gold", "params": {"radiusTop": 0.08, "radiusBottom": 0.18, "height": 1.2}}, {"type": "extrude", "shape": "star", "depth": 0.2, "bevel": True, "color": "#ffd700", "preset": "gold"}, ], }], "lights": [ {"type": "ambient", "intensity": 0.3}, {"type": "directional", "color": "#fff8e0", "intensity": 2.5, "position": [4, 6, 4]}, ], "animation": {"type": "rotate", "speed": 0.5, "axis": "y"}, })}, {"role": "user", "content": "three cubes in a row"}, {"role": "assistant", "content": json.dumps({ "background": "#0b0e14", "objects": [{ "type": "group", "layout": "row", "gap": 0.3, "children": [ {"shape": "box", "color": "#ff6b6b", "material": "standard", "metalness": 0.1, "roughness": 0.5}, {"shape": "box", "color": "#ffd700", "material": "standard", "metalness": 0.1, "roughness": 0.5}, {"shape": "box", "color": "#6bffb8", "material": "standard", "metalness": 0.1, "roughness": 0.5}, ], }], "lights": [ {"type": "ambient", "intensity": 0.5}, {"type": "directional", "intensity": 1.3, "position": [5, 8, 6]}, ], "animation": {"type": "rotate", "speed": 0.6, "axis": "y"}, })}, ] _tok = None _model = None _device = None _lock = threading.Lock() def _load(): global _tok, _model, _device with _lock: if _model is None: import torch from transformers import AutoModelForCausalLM, AutoTokenizer log.info("Loading model %s", MODEL_ID) _device = "cuda" if torch.cuda.is_available() else "cpu" _tok = AutoTokenizer.from_pretrained(MODEL_ID) _model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16 ).to(_device) log.info("Model loaded on %s", _device) return _tok, _model, _device def run_llm(prompt: str, max_new_tokens: int = 512) -> str: """Generate raw text (expected to be a JSON object) from the model.""" if len(prompt) > MAX_PROMPT_CHARS: prompt = prompt[:MAX_PROMPT_CHARS] log.warning("Prompt truncated to %d chars", MAX_PROMPT_CHARS) tok, model, device = _load() messages = [{"role": "system", "content": SYSTEM}, *FEWSHOT, {"role": "user", "content": prompt}] text = tok.apply_chat_template( messages, tokenize=False, add_generation_prompt=True) inputs = tok(text, return_tensors="pt").to(device) out = model.generate( **inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.4, top_p=0.9, pad_token_id=tok.eos_token_id, ) gen = out[0][inputs.input_ids.shape[1]:] result = tok.decode(gen, skip_special_tokens=True) log.info("Generated %d chars for prompt: %.60s", len(result), prompt) return result def mock_scene_json(prompt: str) -> str: """Deterministic, no-GPU generator so the UI can be built/tested fast.""" p = (prompt or "").lower() color = "#88ccff" if "neon" in p or "green" in p: color = "#39ff14" elif "red" in p: color = "#ff4444" elif "gold" in p or "yellow" in p: color = "#ffcc33" elif "purple" in p or "violet" in p: color = "#9b59ff" material = "standard" if "glass" in p: material = "phong" elif "wire" in p: material = "wireframe" shape = "box" for key, val in [ ("torusknot", "torusKnot"), ("knot", "torusKnot"), ("torus", "torus"), ("donut", "torus"), ("sphere", "sphere"), ("ball", "sphere"), ("cone", "cone"), ("cylinder", "cylinder"), ("ico", "icosahedron"), ("dodeca", "dodecahedron"), ("octa", "octahedron"), ("cube", "box"), ]: if key in p: shape = val break anim = "rotate" if "float" in p: anim = "float" elif "still" in p or "static" in p: anim = "none" elif "orbit" in p: anim = "orbit" objects = [{"shape": shape, "color": color, "material": material, "position": [0, 0, 0]}] if "tree" in p: objects = [ {"shape": "cone", "color": "#2e8b57", "position": [0, 0.6, 0], "params": {"radius": 0.7, "height": 1.4}}, {"shape": "cylinder", "color": "#8b5a2b", "position": [0, -0.4, 0], "params": {"radiusTop": 0.15, "radiusBottom": 0.2, "height": 0.8}}, ] scene = { "background": "#0b0e14", "objects": objects, "lights": [ {"type": "ambient", "intensity": 0.5}, {"type": "directional", "intensity": 1.1, "position": [5, 8, 6]}, ], "animation": {"type": anim, "speed": 1.0, "axis": "y"}, } return json.dumps(scene)