diffusiongemma-3d-gen / index.html
merve's picture
merve HF Staff
Add Gemma Diffusion 3D asset builder with google/diffusiongemma-26B-A4B-it
7f3234f verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>DiffusionGemma · Text → 3D Asset</title>
<style>
:root {
--bg: #0b0e14;
--panel: #121722;
--panel-2: #0f141d;
--border: #222a38;
--text: #e6edf3;
--muted: #8b97a7;
--accent: #7c5cff;
--accent-2: #18c29c;
--amber: #f5c451;
--green: #2ea043;
}
* { box-sizing: border-box; }
html, body { margin: 0; height: 100%; }
body {
background: radial-gradient(1200px 600px at 70% -10%, #1a2030 0%, var(--bg) 55%);
color: var(--text);
font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, sans-serif;
display: flex; flex-direction: column; height: 100vh; overflow: hidden;
}
header {
padding: 14px 22px; border-bottom: 1px solid var(--border);
display: flex; align-items: center; gap: 14px; flex: 0 0 auto;
}
header .logo { font-size: 22px; }
header h1 { font-size: 16px; margin: 0; font-weight: 650; letter-spacing: .2px; }
header p { margin: 0; color: var(--muted); font-size: 12.5px; }
.pill {
margin-left: auto; font-size: 12px; color: var(--muted);
border: 1px solid var(--border); border-radius: 999px; padding: 5px 12px;
display: flex; align-items: center; gap: 8px; white-space: nowrap;
}
.dot { width: 8px; height: 8px; border-radius: 50%; background: var(--muted); }
.dot.live { background: var(--amber); box-shadow: 0 0 8px var(--amber); }
.dot.done { background: var(--green); box-shadow: 0 0 8px var(--green); }
.dot.err { background: #f85149; box-shadow: 0 0 8px #f85149; }
main { flex: 1 1 auto; display: grid; grid-template-columns: 1fr 1fr; gap: 14px; padding: 14px 18px; min-height: 0; }
.panel { background: var(--panel); border: 1px solid var(--border); border-radius: 12px; display: flex; flex-direction: column; min-height: 0; overflow: hidden; }
.panel .cap { padding: 9px 14px; border-bottom: 1px solid var(--border); font-size: 12.5px; font-weight: 600; color: var(--muted); display: flex; align-items: center; gap: 8px; }
.panel .cap .sub { font-weight: 400; color: #5d6b7a; }
/* Code / diffusion view */
#code { flex: 1 1 auto; overflow: auto; margin: 0; padding: 12px 0; font-family: "SF Mono", ui-monospace, "JetBrains Mono", Menlo, Consolas, monospace; font-size: 12px; line-height: 1.55; background: var(--panel-2); }
#code .ln { padding: 0 14px; white-space: pre-wrap; word-break: break-word; min-height: 1.55em; border-left: 3px solid transparent; }
#code .ln.live { animation: flash .6s ease-out; background: rgba(245,196,81,.05); }
#code .ln.diff { background: rgba(46,160,67,.13); border-left-color: var(--green); }
@keyframes flash {
0% { background: rgba(245,196,81,.42); }
100% { background: rgba(245,196,81,.05); }
}
#code::-webkit-scrollbar, .scroll::-webkit-scrollbar { width: 9px; height: 9px; }
#code::-webkit-scrollbar-thumb, .scroll::-webkit-scrollbar-thumb { background: #2a3344; border-radius: 8px; }
/* Website preview */
#preview { flex: 1 1 auto; border: 0; width: 100%; background: #fff; }
/* Bottom dock */
footer { flex: 0 0 auto; border-top: 1px solid var(--border); padding: 12px 18px; background: var(--panel-2); }
.row { display: flex; gap: 12px; align-items: flex-start; }
textarea#prompt {
flex: 1 1 auto; resize: none; height: 72px; background: var(--panel); color: var(--text);
border: 1px solid var(--border); border-radius: 10px; padding: 11px 13px; font-size: 14px; font-family: inherit;
}
textarea#prompt:focus { outline: none; border-color: var(--accent); }
.btns { display: flex; flex-direction: column; gap: 8px; width: 150px; }
button { font-family: inherit; font-size: 13.5px; font-weight: 600; border-radius: 10px; padding: 9px 12px; cursor: pointer; border: 1px solid var(--border); }
button.primary { background: linear-gradient(180deg, #8a6bff, #6b48f0); color: #fff; border: 0; }
button.primary:disabled { opacity: .5; cursor: not-allowed; }
button.ghost { background: transparent; color: var(--muted); }
button.ghost:hover:not(:disabled) { color: var(--text); border-color: #36405230; }
button.ghost:disabled { opacity: .4; cursor: not-allowed; }
.meta { display: flex; gap: 16px; align-items: center; flex-wrap: wrap; margin-top: 11px; color: var(--muted); font-size: 12px; }
.meta label { display: flex; align-items: center; gap: 7px; white-space: nowrap; }
.meta input[type="range"] { accent-color: var(--accent); width: 120px; }
.meta input[type="checkbox"] { accent-color: var(--accent); width: 15px; height: 15px; }
.meta .val { color: var(--text); font-variant-numeric: tabular-nums; min-width: 30px; }
.chips { display: flex; gap: 8px; flex-wrap: wrap; margin-top: 11px; }
.chip { font-size: 12px; color: var(--muted); border: 1px solid var(--border); border-radius: 999px; padding: 5px 11px; cursor: pointer; background: transparent; }
.chip:hover { color: var(--text); border-color: var(--accent); }
.history { margin-top: 10px; display: flex; gap: 8px; flex-wrap: wrap; max-height: 46px; overflow: auto; }
.turn { font-size: 11.5px; color: var(--muted); border: 1px solid var(--border); border-radius: 8px; padding: 3px 9px; }
.turn b { color: var(--accent-2); }
</style>
</head>
<body>
<header>
<span class="logo">🌫️→🧊</span>
<div>
<h1>DiffusionGemma · Text → 3D Asset</h1>
<p>Describe a game asset and a block-diffusion LLM draws an SVG by denoising — every token updates at once. Watch the SVG take shape (left) while it's extruded into a spinning 3D object (right).</p>
</div>
<span class="pill"><span class="dot" id="statusDot"></span><span id="statusText">idle</span></span>
</header>
<main>
<section class="panel">
<div class="cap">🧠 Model's view — diffusion canvas <span class="sub" id="capInfo"></span></div>
<div id="code" class="scroll"></div>
</section>
<section class="panel">
<div class="cap">🧊 Live 3D scene <span class="sub">drag to orbit</span></div>
<iframe id="preview" sandbox="allow-scripts allow-same-origin"></iframe>
</section>
</main>
<footer>
<div class="row">
<textarea id="prompt" placeholder="Describe a game asset… e.g. 'a glowing health potion in a round flask' then tweak: 'make it metallic gold', 'add a glowing rim', 'simpler shape'"></textarea>
<div class="btns">
<button class="primary" id="buildBtn">Build / Tweak</button>
<button class="ghost" id="resetBtn">Reset</button>
</div>
</div>
<div class="chips" id="chips"></div>
<div class="meta">
<label>tokens <input type="range" id="maxTokens" min="2048" max="4096" step="256" value="2048"><span class="val" id="maxTokensV">2048</span></label>
<label>iterations/block <input type="range" id="maxIters" min="8" max="120" step="8" value="64"><span class="val" id="maxItersV">64</span></label>
<label>anim delay <input type="range" id="delay" min="0" max="0.3" step="0.02" value="0"><span class="val" id="delayV">0.0s</span></label>
<label><input type="checkbox" id="fullDenoise"> run all denoising steps (no early stop)</label>
<label><input type="checkbox" id="warmStart" checked> tweak in place (diffuse from current page, not noise)</label>
<span class="history" id="history"></span>
</div>
</footer>
<script type="module">
import { Client } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
const $ = (id) => document.getElementById(id);
const codeEl = $("code"), preview = $("preview");
const statusDot = $("statusDot"), statusText = $("statusText"), capInfo = $("capInfo");
const buildBtn = $("buildBtn"), resetBtn = $("resetBtn"), promptEl = $("prompt");
let client = null;
let busy = false;
let messages = []; // [{role, content}] confirmed conversation
let prevFrameLines = []; // lines shown on the previous streaming frame (live churn diff)
let lastFinalLines = []; // lines of the previous round's final HTML (tweak diff)
// Game-asset prompts — the model designs the SVG itself, then extrudes it to 3D.
const EXAMPLES = [
"A legendary fantasy sword with a glowing blue gemstone in the crossguard and an ornate golden hilt.",
"A wooden treasure chest overflowing with gold coins, with iron banding and a heavy lock.",
"A bubbling red health potion in a round glass flask with a cork and a heart label.",
"A cute retro spaceship with twin fins, a round cockpit window, and orange thruster flames.",
"A blocky 8-bit style mushroom power-up with white spots on a red cap.",
"A heraldic knight's shield with a roaring lion crest and a diagonal blue-and-silver split.",
"A friendly boxy robot companion with an antenna, glowing eyes, and a chest panel.",
"A golden ornate skeleton key with a clover-shaped bow and intricate teeth.",
];
// ---------- helpers ----------
function esc(s) { return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;"); }
function escAttr(s) { return s.replace(/&/g, "&amp;").replace(/"/g, "&quot;"); }
function setStatus(kind, text) {
statusDot.className = "dot" + (kind ? " " + kind : "");
statusText.textContent = text;
}
// Longest-common-subsequence over lines -> set of indices in `b` that are unchanged.
function unchangedSet(a, b) {
const n = a.length, m = b.length;
const dp = Array.from({ length: n + 1 }, () => new Int32Array(m + 1));
for (let i = n - 1; i >= 0; i--)
for (let j = m - 1; j >= 0; j--)
dp[i][j] = a[i] === b[j] ? dp[i + 1][j + 1] + 1 : Math.max(dp[i + 1][j], dp[i][j + 1]);
const keep = new Set();
let i = 0, j = 0;
while (i < n && j < m) {
if (a[i] === b[j]) { keep.add(j); i++; j++; }
else if (dp[i + 1][j] >= dp[i][j + 1]) i++; else j++;
}
return keep;
}
// Render the source with per-line highlight classes.
function renderCode(source, { liveAgainst = null, diffAgainst = null } = {}) {
const lines = source.split("\n");
const liveKeep = liveAgainst ? unchangedSet(liveAgainst, lines) : null;
const diffKeep = diffAgainst ? unchangedSet(diffAgainst, lines) : null;
const html = lines.map((ln, idx) => {
let cls = "ln";
if (diffKeep && !diffKeep.has(idx)) cls += " diff"; // persistent tweak diff
else if (liveKeep && !liveKeep.has(idx)) cls += " live"; // transient churn flash
return `<div class="${cls}">${esc(ln) || "&nbsp;"}</div>`;
}).join("");
codeEl.innerHTML = html;
return lines;
}
// Repair a (possibly front-mangled) SVG so the viewer always gets valid markup. Warm-start
// diffusion often eats the leading "<" of "<svg" (-> "svg viewBox=…"); when there's no
// intact "<svg", rebuild a canonical wrapper around the first real child element (the
// viewer auto-fits the camera, so a default viewBox is fine). Mirrors server extract_svg.
function normalizeSvg(src) {
const i = src.toLowerCase().indexOf("<svg");
if (i !== -1) {
src = src.slice(i);
} else {
const m = src.match(/<(?:path|rect|circle|ellipse|polygon|polyline|line|g|defs)\b/i);
if (m) src = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">\n' + src.slice(m.index);
}
const e = src.toLowerCase().lastIndexOf("</svg>");
if (e !== -1) src = src.slice(0, e + 6);
else src = src.replace(/\s+$/, "") + "\n</svg>";
return src;
}
// A FIXED, correct Three.js viewer. The model only has to produce the SVG art (which it
// does reliably); this hand-written harness extrudes it into 3D — so the 3D scene always
// renders regardless of any quirks in model-written JS. Three.js loads once from the CDN
// and is HTTP-cached for subsequent block refreshes.
function viewerHtml(svg) {
return (
'<!DOCTYPE html><html><head><meta charset="utf-8">' +
'<style>html,body{margin:0;height:100%;overflow:hidden;background:#0b0e14;}canvas{display:block;}</style>' +
'<script type="importmap">{"imports":{"three":"https://esm.sh/three@0.161.0","three/addons/":"https://esm.sh/three@0.161.0/examples/jsm/"}}<\/script>' +
'</head><body><script type="module">' +
'import * as THREE from "three";' +
'import { SVGLoader } from "three/addons/loaders/SVGLoader.js";' +
'import { OrbitControls } from "three/addons/controls/OrbitControls.js";' +
'const SVG = ' + JSON.stringify(svg) + ';' +
'const scene = new THREE.Scene(); scene.background = new THREE.Color(0x0b0e14);' +
'const camera = new THREE.PerspectiveCamera(40, innerWidth/innerHeight, 0.1, 100000);' +
'const renderer = new THREE.WebGLRenderer({antialias:true}); renderer.setPixelRatio(devicePixelRatio); renderer.setSize(innerWidth, innerHeight);' +
'document.body.appendChild(renderer.domElement);' +
'scene.add(new THREE.AmbientLight(0xffffff, 0.85));' +
'const d1 = new THREE.DirectionalLight(0xffffff, 1.0); d1.position.set(0.5, 0.8, 1); scene.add(d1);' +
'const d2 = new THREE.DirectionalLight(0x88aaff, 0.4); d2.position.set(-1, -0.5, 0.5); scene.add(d2);' +
'const controls = new OrbitControls(camera, renderer.domElement); controls.enableDamping = true;' +
'const group = new THREE.Group();' +
'let data; try { data = new SVGLoader().parse(SVG); } catch (e) { data = { paths: [] }; }' +
// Keep only filled paths and pre-collect all shapes so we can size depth to the 2D art.
'const filled = data.paths.filter(function(p){ var f=(p.userData&&p.userData.style&&p.userData.style.fill); return f && f!=="none"; });' +
'const tmp = new THREE.Box3();' +
'const entries = [];' +
'for (const path of filled) {' +
' const fill = (path.userData && path.userData.style && path.userData.style.fill) || "#cccccc";' +
' let shapes = []; try { shapes = SVGLoader.createShapes ? SVGLoader.createShapes(path) : path.toShapes(true); } catch (e) { try { shapes = path.toShapes(true); } catch (_) {} }' +
' if (shapes.length) entries.push({ fill: fill, shapes: shapes });' +
'}' +
// 2D extent drives a shallow, art-relative extrusion thickness.
'for (const e of entries) for (const s of e.shapes) { const g0 = new THREE.ShapeGeometry(s); g0.computeBoundingBox(); tmp.union(g0.boundingBox); g0.dispose(); }' +
'const flat = tmp.getSize(new THREE.Vector3());' +
'const span = Math.max(flat.x, flat.y) || 100;' +
'const depth = span * 0.11;' +
'const relief = depth * 0.32;' +
'const n = entries.length;' +
'let layer = 0;' +
'for (const e of entries) {' +
' const mat = new THREE.MeshStandardMaterial({ color: new THREE.Color(e.fill), metalness: 0.1, roughness: 0.65, side: THREE.DoubleSide, polygonOffset: true, polygonOffsetFactor: -1, polygonOffsetUnits: -1 });' +
// Total layer separation is capped at `relief` regardless of path count, so components stay packed.
' const zOff = n > 1 ? (layer / (n - 1)) * relief : 0;' +
' for (const shape of e.shapes) {' +
' try {' +
' const geo = new THREE.ExtrudeGeometry(shape, { depth: depth, bevelEnabled: true, bevelThickness: depth*0.15, bevelSize: depth*0.1, bevelSegments: 2 });' +
' const mesh = new THREE.Mesh(geo, mat); mesh.position.z = zOff; mesh.renderOrder = layer; group.add(mesh);' +
' } catch (e2) {}' +
' }' +
' layer++;' +
'}' +
'group.scale.y = -1;' +
'const box = new THREE.Box3().setFromObject(group);' +
'const center = box.getCenter(new THREE.Vector3());' +
'if (isFinite(center.x)) group.position.sub(center);' +
'scene.add(group);' +
'const size = box.getSize(new THREE.Vector3());' +
'const frameDim = Math.max(size.x, size.y) || 100;' +
'const dist = (frameDim / 2) / Math.tan((camera.fov * Math.PI / 180) / 2) * 1.3;' +
'camera.position.set(0, 0, dist);' +
'camera.lookAt(0, 0, 0); controls.update();' +
'addEventListener("resize", () => { camera.aspect = innerWidth/innerHeight; camera.updateProjectionMatrix(); renderer.setSize(innerWidth, innerHeight); });' +
// Gentle bounded oscillation shows relief depth but never rotates to the thin edge.
'let t = 0;' +
'function animate(){ requestAnimationFrame(animate); t += 0.012; group.rotation.y = Math.sin(t) * 0.5; controls.update(); renderer.render(scene, camera); }' +
'animate();' +
'<\/script></body></html>'
);
}
function renderPreview(source) {
if (!source || !source.trim()) { preview.srcdoc = ""; return; }
preview.srcdoc = viewerHtml(normalizeSvg(source));
}
// ---------- generation ----------
async function ensureClient() {
if (!client) { setStatus("", "connecting…"); client = await Client.connect(window.location.origin); }
return client;
}
async function run() {
if (busy) return;
const prompt = promptEl.value.trim();
if (!prompt) { promptEl.focus(); return; }
busy = true; buildBtn.disabled = true;
const isTweak = messages.length > 0;
setStatus("live", isTweak ? "tweaking…" : "diffusing…");
prevFrameLines = [];
try {
const c = await ensureClient();
const payload = {
prompt,
history_json: JSON.stringify(messages),
max_new_tokens: parseInt($("maxTokens").value, 10),
max_iters: parseInt($("maxIters").value, 10),
full_denoise: $("fullDenoise").checked,
anim_delay: parseFloat($("delay").value),
warm_start: $("warmStart").checked,
};
let finalSource = "";
const sub = c.submit("/generate", payload);
for await (const ev of sub) {
if (ev.type === "data") {
const frame = JSON.parse(ev.data[0]);
if (frame.kind === "error") { setStatus("err", "error"); renderCode("/* " + frame.message + " */"); break; }
if (frame.kind === "done") {
finalSource = frame.source;
// Persistent green highlight of what changed vs the previous round.
prevFrameLines = renderCode(finalSource, { diffAgainst: isTweak ? lastFinalLines : null });
renderPreview(finalSource);
setStatus("done", "done");
continue;
}
// draft / commit frame: the code panel churns live every frame, but the 3D
// preview reloads Three.js from the CDN on each refresh — so only rebuild it on
// committed blocks (not every denoising draft) to avoid perpetual blank flashing.
const src = frame.source || "";
prevFrameLines = renderCode(src, { liveAgainst: prevFrameLines });
if (frame.kind === "commit") renderPreview(src);
capInfo.textContent = `block ${frame.block} · step ${frame.step}/${frame.max_iters} · ${frame.canvas} tokens update simultaneously`;
setStatus("live", `${frame.kind === "draft" ? "diffusing" : "committed"} · block ${frame.block} · step ${frame.step}`);
} else if (ev.type === "status" && ev.stage === "error") {
setStatus("err", "error"); break;
}
}
if (finalSource) {
messages.push({ role: "user", content: prompt });
messages.push({ role: "assistant", content: finalSource });
lastFinalLines = finalSource.split("\n");
promptEl.value = "";
renderHistory();
}
} catch (e) {
setStatus("err", "error");
renderCode("/* connection error: " + (e && e.message ? e.message : e) + " */");
} finally {
busy = false; buildBtn.disabled = false;
}
}
function renderHistory() {
const turns = messages.filter((m) => m.role === "user");
$("history").innerHTML = turns.map((m, i) =>
`<span class="turn"><b>${i === 0 ? "build" : "tweak " + i}</b> · ${esc(m.content.slice(0, 40))}${m.content.length > 40 ? "…" : ""}</span>`
).join("");
}
function reset() {
messages = []; prevFrameLines = []; lastFinalLines = [];
codeEl.innerHTML = ""; renderPreview(""); $("history").innerHTML = "";
capInfo.textContent = ""; setStatus("", "idle"); promptEl.value = "";
}
// ---------- wiring ----------
buildBtn.addEventListener("click", run);
resetBtn.addEventListener("click", reset);
promptEl.addEventListener("keydown", (e) => {
if (e.key === "Enter" && (e.metaKey || e.ctrlKey)) { e.preventDefault(); run(); }
});
for (const [id, fmt] of [["maxTokens", (v) => v], ["maxIters", (v) => v], ["delay", (v) => (+v).toFixed(1) + "s"]]) {
const el = $(id), out = $(id + "V");
el.addEventListener("input", () => (out.textContent = fmt(el.value)));
}
$("chips").innerHTML = EXAMPLES.map((e, i) => `<button class="chip" data-i="${i}">${esc(e.slice(0, 42))}…</button>`).join("");
$("chips").addEventListener("click", (e) => {
const i = e.target.getAttribute("data-i");
if (i !== null) { promptEl.value = EXAMPLES[+i]; promptEl.focus(); }
});
renderPreview("");
setStatus("", "idle");
</script>
</body>
</html>