Spaces:
Running on Zero
Running on Zero
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8" /> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |
| <title>DiffusionGemma · Text → 3D Asset</title> | |
| <style> | |
| :root { | |
| --bg: #0b0e14; | |
| --panel: #121722; | |
| --panel-2: #0f141d; | |
| --border: #222a38; | |
| --text: #e6edf3; | |
| --muted: #8b97a7; | |
| --accent: #7c5cff; | |
| --accent-2: #18c29c; | |
| --amber: #f5c451; | |
| --green: #2ea043; | |
| } | |
| * { box-sizing: border-box; } | |
| html, body { margin: 0; height: 100%; } | |
| body { | |
| background: radial-gradient(1200px 600px at 70% -10%, #1a2030 0%, var(--bg) 55%); | |
| color: var(--text); | |
| font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, sans-serif; | |
| display: flex; flex-direction: column; height: 100vh; overflow: hidden; | |
| } | |
| header { | |
| padding: 14px 22px; border-bottom: 1px solid var(--border); | |
| display: flex; align-items: center; gap: 14px; flex: 0 0 auto; | |
| } | |
| header .logo { font-size: 22px; } | |
| header h1 { font-size: 16px; margin: 0; font-weight: 650; letter-spacing: .2px; } | |
| header p { margin: 0; color: var(--muted); font-size: 12.5px; } | |
| .pill { | |
| margin-left: auto; font-size: 12px; color: var(--muted); | |
| border: 1px solid var(--border); border-radius: 999px; padding: 5px 12px; | |
| display: flex; align-items: center; gap: 8px; white-space: nowrap; | |
| } | |
| .dot { width: 8px; height: 8px; border-radius: 50%; background: var(--muted); } | |
| .dot.live { background: var(--amber); box-shadow: 0 0 8px var(--amber); } | |
| .dot.done { background: var(--green); box-shadow: 0 0 8px var(--green); } | |
| .dot.err { background: #f85149; box-shadow: 0 0 8px #f85149; } | |
| main { flex: 1 1 auto; display: grid; grid-template-columns: 1fr 1fr; gap: 14px; padding: 14px 18px; min-height: 0; } | |
| .panel { background: var(--panel); border: 1px solid var(--border); border-radius: 12px; display: flex; flex-direction: column; min-height: 0; overflow: hidden; } | |
| .panel .cap { padding: 9px 14px; border-bottom: 1px solid var(--border); font-size: 12.5px; font-weight: 600; color: var(--muted); display: flex; align-items: center; gap: 8px; } | |
| .panel .cap .sub { font-weight: 400; color: #5d6b7a; } | |
| /* Code / diffusion view */ | |
| #code { flex: 1 1 auto; overflow: auto; margin: 0; padding: 12px 0; font-family: "SF Mono", ui-monospace, "JetBrains Mono", Menlo, Consolas, monospace; font-size: 12px; line-height: 1.55; background: var(--panel-2); } | |
| #code .ln { padding: 0 14px; white-space: pre-wrap; word-break: break-word; min-height: 1.55em; border-left: 3px solid transparent; } | |
| #code .ln.live { animation: flash .6s ease-out; background: rgba(245,196,81,.05); } | |
| #code .ln.diff { background: rgba(46,160,67,.13); border-left-color: var(--green); } | |
| @keyframes flash { | |
| 0% { background: rgba(245,196,81,.42); } | |
| 100% { background: rgba(245,196,81,.05); } | |
| } | |
| #code::-webkit-scrollbar, .scroll::-webkit-scrollbar { width: 9px; height: 9px; } | |
| #code::-webkit-scrollbar-thumb, .scroll::-webkit-scrollbar-thumb { background: #2a3344; border-radius: 8px; } | |
| /* Website preview */ | |
| #preview { flex: 1 1 auto; border: 0; width: 100%; background: #fff; } | |
| /* Bottom dock */ | |
| footer { flex: 0 0 auto; border-top: 1px solid var(--border); padding: 12px 18px; background: var(--panel-2); } | |
| .row { display: flex; gap: 12px; align-items: flex-start; } | |
| textarea#prompt { | |
| flex: 1 1 auto; resize: none; height: 72px; background: var(--panel); color: var(--text); | |
| border: 1px solid var(--border); border-radius: 10px; padding: 11px 13px; font-size: 14px; font-family: inherit; | |
| } | |
| textarea#prompt:focus { outline: none; border-color: var(--accent); } | |
| .btns { display: flex; flex-direction: column; gap: 8px; width: 150px; } | |
| button { font-family: inherit; font-size: 13.5px; font-weight: 600; border-radius: 10px; padding: 9px 12px; cursor: pointer; border: 1px solid var(--border); } | |
| button.primary { background: linear-gradient(180deg, #8a6bff, #6b48f0); color: #fff; border: 0; } | |
| button.primary:disabled { opacity: .5; cursor: not-allowed; } | |
| button.ghost { background: transparent; color: var(--muted); } | |
| button.ghost:hover:not(:disabled) { color: var(--text); border-color: #36405230; } | |
| button.ghost:disabled { opacity: .4; cursor: not-allowed; } | |
| .meta { display: flex; gap: 16px; align-items: center; flex-wrap: wrap; margin-top: 11px; color: var(--muted); font-size: 12px; } | |
| .meta label { display: flex; align-items: center; gap: 7px; white-space: nowrap; } | |
| .meta input[type="range"] { accent-color: var(--accent); width: 120px; } | |
| .meta input[type="checkbox"] { accent-color: var(--accent); width: 15px; height: 15px; } | |
| .meta .val { color: var(--text); font-variant-numeric: tabular-nums; min-width: 30px; } | |
| .chips { display: flex; gap: 8px; flex-wrap: wrap; margin-top: 11px; } | |
| .chip { font-size: 12px; color: var(--muted); border: 1px solid var(--border); border-radius: 999px; padding: 5px 11px; cursor: pointer; background: transparent; } | |
| .chip:hover { color: var(--text); border-color: var(--accent); } | |
| .history { margin-top: 10px; display: flex; gap: 8px; flex-wrap: wrap; max-height: 46px; overflow: auto; } | |
| .turn { font-size: 11.5px; color: var(--muted); border: 1px solid var(--border); border-radius: 8px; padding: 3px 9px; } | |
| .turn b { color: var(--accent-2); } | |
| </style> | |
| </head> | |
| <body> | |
| <header> | |
| <span class="logo">🌫️→🧊</span> | |
| <div> | |
| <h1>DiffusionGemma · Text → 3D Asset</h1> | |
| <p>Describe a game asset and a block-diffusion LLM draws an SVG by denoising — every token updates at once. Watch the SVG take shape (left) while it's extruded into a spinning 3D object (right).</p> | |
| </div> | |
| <span class="pill"><span class="dot" id="statusDot"></span><span id="statusText">idle</span></span> | |
| </header> | |
| <main> | |
| <section class="panel"> | |
| <div class="cap">🧠 Model's view — diffusion canvas <span class="sub" id="capInfo"></span></div> | |
| <div id="code" class="scroll"></div> | |
| </section> | |
| <section class="panel"> | |
| <div class="cap">🧊 Live 3D scene <span class="sub">drag to orbit</span></div> | |
| <iframe id="preview" sandbox="allow-scripts allow-same-origin"></iframe> | |
| </section> | |
| </main> | |
| <footer> | |
| <div class="row"> | |
| <textarea id="prompt" placeholder="Describe a game asset… e.g. 'a glowing health potion in a round flask' then tweak: 'make it metallic gold', 'add a glowing rim', 'simpler shape'"></textarea> | |
| <div class="btns"> | |
| <button class="primary" id="buildBtn">Build / Tweak</button> | |
| <button class="ghost" id="resetBtn">Reset</button> | |
| </div> | |
| </div> | |
| <div class="chips" id="chips"></div> | |
| <div class="meta"> | |
| <label>tokens <input type="range" id="maxTokens" min="2048" max="4096" step="256" value="2048"><span class="val" id="maxTokensV">2048</span></label> | |
| <label>iterations/block <input type="range" id="maxIters" min="8" max="120" step="8" value="64"><span class="val" id="maxItersV">64</span></label> | |
| <label>anim delay <input type="range" id="delay" min="0" max="0.3" step="0.02" value="0"><span class="val" id="delayV">0.0s</span></label> | |
| <label><input type="checkbox" id="fullDenoise"> run all denoising steps (no early stop)</label> | |
| <label><input type="checkbox" id="warmStart" checked> tweak in place (diffuse from current page, not noise)</label> | |
| <span class="history" id="history"></span> | |
| </div> | |
| </footer> | |
| <script type="module"> | |
| import { Client } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js"; | |
| const $ = (id) => document.getElementById(id); | |
| const codeEl = $("code"), preview = $("preview"); | |
| const statusDot = $("statusDot"), statusText = $("statusText"), capInfo = $("capInfo"); | |
| const buildBtn = $("buildBtn"), resetBtn = $("resetBtn"), promptEl = $("prompt"); | |
| let client = null; | |
| let busy = false; | |
| let messages = []; // [{role, content}] confirmed conversation | |
| let prevFrameLines = []; // lines shown on the previous streaming frame (live churn diff) | |
| let lastFinalLines = []; // lines of the previous round's final HTML (tweak diff) | |
| // Game-asset prompts — the model designs the SVG itself, then extrudes it to 3D. | |
| const EXAMPLES = [ | |
| "A legendary fantasy sword with a glowing blue gemstone in the crossguard and an ornate golden hilt.", | |
| "A wooden treasure chest overflowing with gold coins, with iron banding and a heavy lock.", | |
| "A bubbling red health potion in a round glass flask with a cork and a heart label.", | |
| "A cute retro spaceship with twin fins, a round cockpit window, and orange thruster flames.", | |
| "A blocky 8-bit style mushroom power-up with white spots on a red cap.", | |
| "A heraldic knight's shield with a roaring lion crest and a diagonal blue-and-silver split.", | |
| "A friendly boxy robot companion with an antenna, glowing eyes, and a chest panel.", | |
| "A golden ornate skeleton key with a clover-shaped bow and intricate teeth.", | |
| ]; | |
| // ---------- helpers ---------- | |
| function esc(s) { return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">"); } | |
| function escAttr(s) { return s.replace(/&/g, "&").replace(/"/g, """); } | |
| function setStatus(kind, text) { | |
| statusDot.className = "dot" + (kind ? " " + kind : ""); | |
| statusText.textContent = text; | |
| } | |
| // Longest-common-subsequence over lines -> set of indices in `b` that are unchanged. | |
| function unchangedSet(a, b) { | |
| const n = a.length, m = b.length; | |
| const dp = Array.from({ length: n + 1 }, () => new Int32Array(m + 1)); | |
| for (let i = n - 1; i >= 0; i--) | |
| for (let j = m - 1; j >= 0; j--) | |
| dp[i][j] = a[i] === b[j] ? dp[i + 1][j + 1] + 1 : Math.max(dp[i + 1][j], dp[i][j + 1]); | |
| const keep = new Set(); | |
| let i = 0, j = 0; | |
| while (i < n && j < m) { | |
| if (a[i] === b[j]) { keep.add(j); i++; j++; } | |
| else if (dp[i + 1][j] >= dp[i][j + 1]) i++; else j++; | |
| } | |
| return keep; | |
| } | |
| // Render the source with per-line highlight classes. | |
| function renderCode(source, { liveAgainst = null, diffAgainst = null } = {}) { | |
| const lines = source.split("\n"); | |
| const liveKeep = liveAgainst ? unchangedSet(liveAgainst, lines) : null; | |
| const diffKeep = diffAgainst ? unchangedSet(diffAgainst, lines) : null; | |
| const html = lines.map((ln, idx) => { | |
| let cls = "ln"; | |
| if (diffKeep && !diffKeep.has(idx)) cls += " diff"; // persistent tweak diff | |
| else if (liveKeep && !liveKeep.has(idx)) cls += " live"; // transient churn flash | |
| return `<div class="${cls}">${esc(ln) || " "}</div>`; | |
| }).join(""); | |
| codeEl.innerHTML = html; | |
| return lines; | |
| } | |
| // Repair a (possibly front-mangled) SVG so the viewer always gets valid markup. Warm-start | |
| // diffusion often eats the leading "<" of "<svg" (-> "svg viewBox=…"); when there's no | |
| // intact "<svg", rebuild a canonical wrapper around the first real child element (the | |
| // viewer auto-fits the camera, so a default viewBox is fine). Mirrors server extract_svg. | |
| function normalizeSvg(src) { | |
| const i = src.toLowerCase().indexOf("<svg"); | |
| if (i !== -1) { | |
| src = src.slice(i); | |
| } else { | |
| const m = src.match(/<(?:path|rect|circle|ellipse|polygon|polyline|line|g|defs)\b/i); | |
| if (m) src = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">\n' + src.slice(m.index); | |
| } | |
| const e = src.toLowerCase().lastIndexOf("</svg>"); | |
| if (e !== -1) src = src.slice(0, e + 6); | |
| else src = src.replace(/\s+$/, "") + "\n</svg>"; | |
| return src; | |
| } | |
| // A FIXED, correct Three.js viewer. The model only has to produce the SVG art (which it | |
| // does reliably); this hand-written harness extrudes it into 3D — so the 3D scene always | |
| // renders regardless of any quirks in model-written JS. Three.js loads once from the CDN | |
| // and is HTTP-cached for subsequent block refreshes. | |
| function viewerHtml(svg) { | |
| return ( | |
| '<!DOCTYPE html><html><head><meta charset="utf-8">' + | |
| '<style>html,body{margin:0;height:100%;overflow:hidden;background:#0b0e14;}canvas{display:block;}</style>' + | |
| '<script type="importmap">{"imports":{"three":"https://esm.sh/three@0.161.0","three/addons/":"https://esm.sh/three@0.161.0/examples/jsm/"}}<\/script>' + | |
| '</head><body><script type="module">' + | |
| 'import * as THREE from "three";' + | |
| 'import { SVGLoader } from "three/addons/loaders/SVGLoader.js";' + | |
| 'import { OrbitControls } from "three/addons/controls/OrbitControls.js";' + | |
| 'const SVG = ' + JSON.stringify(svg) + ';' + | |
| 'const scene = new THREE.Scene(); scene.background = new THREE.Color(0x0b0e14);' + | |
| 'const camera = new THREE.PerspectiveCamera(40, innerWidth/innerHeight, 0.1, 100000);' + | |
| 'const renderer = new THREE.WebGLRenderer({antialias:true}); renderer.setPixelRatio(devicePixelRatio); renderer.setSize(innerWidth, innerHeight);' + | |
| 'document.body.appendChild(renderer.domElement);' + | |
| 'scene.add(new THREE.AmbientLight(0xffffff, 0.85));' + | |
| 'const d1 = new THREE.DirectionalLight(0xffffff, 1.0); d1.position.set(0.5, 0.8, 1); scene.add(d1);' + | |
| 'const d2 = new THREE.DirectionalLight(0x88aaff, 0.4); d2.position.set(-1, -0.5, 0.5); scene.add(d2);' + | |
| 'const controls = new OrbitControls(camera, renderer.domElement); controls.enableDamping = true;' + | |
| 'const group = new THREE.Group();' + | |
| 'let data; try { data = new SVGLoader().parse(SVG); } catch (e) { data = { paths: [] }; }' + | |
| // Keep only filled paths and pre-collect all shapes so we can size depth to the 2D art. | |
| 'const filled = data.paths.filter(function(p){ var f=(p.userData&&p.userData.style&&p.userData.style.fill); return f && f!=="none"; });' + | |
| 'const tmp = new THREE.Box3();' + | |
| 'const entries = [];' + | |
| 'for (const path of filled) {' + | |
| ' const fill = (path.userData && path.userData.style && path.userData.style.fill) || "#cccccc";' + | |
| ' let shapes = []; try { shapes = SVGLoader.createShapes ? SVGLoader.createShapes(path) : path.toShapes(true); } catch (e) { try { shapes = path.toShapes(true); } catch (_) {} }' + | |
| ' if (shapes.length) entries.push({ fill: fill, shapes: shapes });' + | |
| '}' + | |
| // 2D extent drives a shallow, art-relative extrusion thickness. | |
| 'for (const e of entries) for (const s of e.shapes) { const g0 = new THREE.ShapeGeometry(s); g0.computeBoundingBox(); tmp.union(g0.boundingBox); g0.dispose(); }' + | |
| 'const flat = tmp.getSize(new THREE.Vector3());' + | |
| 'const span = Math.max(flat.x, flat.y) || 100;' + | |
| 'const depth = span * 0.11;' + | |
| 'const relief = depth * 0.32;' + | |
| 'const n = entries.length;' + | |
| 'let layer = 0;' + | |
| 'for (const e of entries) {' + | |
| ' const mat = new THREE.MeshStandardMaterial({ color: new THREE.Color(e.fill), metalness: 0.1, roughness: 0.65, side: THREE.DoubleSide, polygonOffset: true, polygonOffsetFactor: -1, polygonOffsetUnits: -1 });' + | |
| // Total layer separation is capped at `relief` regardless of path count, so components stay packed. | |
| ' const zOff = n > 1 ? (layer / (n - 1)) * relief : 0;' + | |
| ' for (const shape of e.shapes) {' + | |
| ' try {' + | |
| ' const geo = new THREE.ExtrudeGeometry(shape, { depth: depth, bevelEnabled: true, bevelThickness: depth*0.15, bevelSize: depth*0.1, bevelSegments: 2 });' + | |
| ' const mesh = new THREE.Mesh(geo, mat); mesh.position.z = zOff; mesh.renderOrder = layer; group.add(mesh);' + | |
| ' } catch (e2) {}' + | |
| ' }' + | |
| ' layer++;' + | |
| '}' + | |
| 'group.scale.y = -1;' + | |
| 'const box = new THREE.Box3().setFromObject(group);' + | |
| 'const center = box.getCenter(new THREE.Vector3());' + | |
| 'if (isFinite(center.x)) group.position.sub(center);' + | |
| 'scene.add(group);' + | |
| 'const size = box.getSize(new THREE.Vector3());' + | |
| 'const frameDim = Math.max(size.x, size.y) || 100;' + | |
| 'const dist = (frameDim / 2) / Math.tan((camera.fov * Math.PI / 180) / 2) * 1.3;' + | |
| 'camera.position.set(0, 0, dist);' + | |
| 'camera.lookAt(0, 0, 0); controls.update();' + | |
| 'addEventListener("resize", () => { camera.aspect = innerWidth/innerHeight; camera.updateProjectionMatrix(); renderer.setSize(innerWidth, innerHeight); });' + | |
| // Gentle bounded oscillation shows relief depth but never rotates to the thin edge. | |
| 'let t = 0;' + | |
| 'function animate(){ requestAnimationFrame(animate); t += 0.012; group.rotation.y = Math.sin(t) * 0.5; controls.update(); renderer.render(scene, camera); }' + | |
| 'animate();' + | |
| '<\/script></body></html>' | |
| ); | |
| } | |
| function renderPreview(source) { | |
| if (!source || !source.trim()) { preview.srcdoc = ""; return; } | |
| preview.srcdoc = viewerHtml(normalizeSvg(source)); | |
| } | |
| // ---------- generation ---------- | |
| async function ensureClient() { | |
| if (!client) { setStatus("", "connecting…"); client = await Client.connect(window.location.origin); } | |
| return client; | |
| } | |
| async function run() { | |
| if (busy) return; | |
| const prompt = promptEl.value.trim(); | |
| if (!prompt) { promptEl.focus(); return; } | |
| busy = true; buildBtn.disabled = true; | |
| const isTweak = messages.length > 0; | |
| setStatus("live", isTweak ? "tweaking…" : "diffusing…"); | |
| prevFrameLines = []; | |
| try { | |
| const c = await ensureClient(); | |
| const payload = { | |
| prompt, | |
| history_json: JSON.stringify(messages), | |
| max_new_tokens: parseInt($("maxTokens").value, 10), | |
| max_iters: parseInt($("maxIters").value, 10), | |
| full_denoise: $("fullDenoise").checked, | |
| anim_delay: parseFloat($("delay").value), | |
| warm_start: $("warmStart").checked, | |
| }; | |
| let finalSource = ""; | |
| const sub = c.submit("/generate", payload); | |
| for await (const ev of sub) { | |
| if (ev.type === "data") { | |
| const frame = JSON.parse(ev.data[0]); | |
| if (frame.kind === "error") { setStatus("err", "error"); renderCode("/* " + frame.message + " */"); break; } | |
| if (frame.kind === "done") { | |
| finalSource = frame.source; | |
| // Persistent green highlight of what changed vs the previous round. | |
| prevFrameLines = renderCode(finalSource, { diffAgainst: isTweak ? lastFinalLines : null }); | |
| renderPreview(finalSource); | |
| setStatus("done", "done"); | |
| continue; | |
| } | |
| // draft / commit frame: the code panel churns live every frame, but the 3D | |
| // preview reloads Three.js from the CDN on each refresh — so only rebuild it on | |
| // committed blocks (not every denoising draft) to avoid perpetual blank flashing. | |
| const src = frame.source || ""; | |
| prevFrameLines = renderCode(src, { liveAgainst: prevFrameLines }); | |
| if (frame.kind === "commit") renderPreview(src); | |
| capInfo.textContent = `block ${frame.block} · step ${frame.step}/${frame.max_iters} · ${frame.canvas} tokens update simultaneously`; | |
| setStatus("live", `${frame.kind === "draft" ? "diffusing" : "committed"} · block ${frame.block} · step ${frame.step}`); | |
| } else if (ev.type === "status" && ev.stage === "error") { | |
| setStatus("err", "error"); break; | |
| } | |
| } | |
| if (finalSource) { | |
| messages.push({ role: "user", content: prompt }); | |
| messages.push({ role: "assistant", content: finalSource }); | |
| lastFinalLines = finalSource.split("\n"); | |
| promptEl.value = ""; | |
| renderHistory(); | |
| } | |
| } catch (e) { | |
| setStatus("err", "error"); | |
| renderCode("/* connection error: " + (e && e.message ? e.message : e) + " */"); | |
| } finally { | |
| busy = false; buildBtn.disabled = false; | |
| } | |
| } | |
| function renderHistory() { | |
| const turns = messages.filter((m) => m.role === "user"); | |
| $("history").innerHTML = turns.map((m, i) => | |
| `<span class="turn"><b>${i === 0 ? "build" : "tweak " + i}</b> · ${esc(m.content.slice(0, 40))}${m.content.length > 40 ? "…" : ""}</span>` | |
| ).join(""); | |
| } | |
| function reset() { | |
| messages = []; prevFrameLines = []; lastFinalLines = []; | |
| codeEl.innerHTML = ""; renderPreview(""); $("history").innerHTML = ""; | |
| capInfo.textContent = ""; setStatus("", "idle"); promptEl.value = ""; | |
| } | |
| // ---------- wiring ---------- | |
| buildBtn.addEventListener("click", run); | |
| resetBtn.addEventListener("click", reset); | |
| promptEl.addEventListener("keydown", (e) => { | |
| if (e.key === "Enter" && (e.metaKey || e.ctrlKey)) { e.preventDefault(); run(); } | |
| }); | |
| for (const [id, fmt] of [["maxTokens", (v) => v], ["maxIters", (v) => v], ["delay", (v) => (+v).toFixed(1) + "s"]]) { | |
| const el = $(id), out = $(id + "V"); | |
| el.addEventListener("input", () => (out.textContent = fmt(el.value))); | |
| } | |
| $("chips").innerHTML = EXAMPLES.map((e, i) => `<button class="chip" data-i="${i}">${esc(e.slice(0, 42))}…</button>`).join(""); | |
| $("chips").addEventListener("click", (e) => { | |
| const i = e.target.getAttribute("data-i"); | |
| if (i !== null) { promptEl.value = EXAMPLES[+i]; promptEl.focus(); } | |
| }); | |
| renderPreview(""); | |
| setStatus("", "idle"); | |
| </script> | |
| </body> | |
| </html> | |