multimodalart's picture
multimodalart HF Staff
Upload index.html with huggingface_hub
d94b53d verified
Raw
History Blame Contribute Delete
8.19 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>LiveEdit · Realtime</title>
<style>
:root { --bg:#0e0f13; --panel:#171922; --line:#2a2e3a; --fg:#e8e8ee; --accent:#c084fc; --good:#86efac; }
* { box-sizing: border-box; }
body { margin:0; background:var(--bg); color:var(--fg); font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Helvetica,Arial,sans-serif; }
.wrap { max-width:1100px; margin:0 auto; padding:24px 18px 60px; }
h1 { font-size:1.5rem; margin:0 0 4px; }
.sub { color:#9aa0ad; font-size:.95rem; margin:0 0 18px; line-height:1.5; }
.sub a { color:var(--accent); }
.controls { display:flex; gap:10px; flex-wrap:wrap; align-items:center; margin-bottom:16px; }
input[type=text] { flex:1; min-width:260px; background:var(--panel); border:1px solid var(--line); color:var(--fg); padding:12px 14px; border-radius:10px; font-size:1rem; }
button { background:var(--accent); color:#1a1024; border:0; padding:12px 18px; border-radius:10px; font-size:1rem; font-weight:600; cursor:pointer; }
button:disabled { opacity:.5; cursor:not-allowed; }
.timer { font-family:ui-monospace,Menlo,monospace; color:var(--good); background:#11210f; border:1px solid #234; padding:8px 12px; border-radius:8px; display:none; }
.grid { display:grid; grid-template-columns:1fr 1fr; gap:14px; }
.card { background:var(--panel); border:1px solid var(--line); border-radius:14px; overflow:hidden; }
.card h2 { font-size:.85rem; text-transform:uppercase; letter-spacing:.05em; color:#9aa0ad; margin:0; padding:10px 14px; border-bottom:1px solid var(--line); }
.media { aspect-ratio:832/480; background:#000; display:flex; align-items:center; justify-content:center; }
.media video, .media img { width:100%; height:100%; object-fit:cover; display:block; }
.placeholder { color:#5a6070; font-size:.9rem; }
.status { margin-top:14px; color:#9aa0ad; font-size:.9rem; min-height:1.2em; }
@media (max-width:780px){ .grid{ grid-template-columns:1fr; } }
</style>
</head>
<body>
<div class="wrap">
<h1>🌀 StreamDiffusionV2 · Realtime Webcam Diffusion</h1>
<p class="sub">
Live demo of <a href="https://streamdiffusionv2.github.io/" target="_blank">StreamDiffusionV2</a>
(MLSys 2026 Best Paper) on Wan2.1-T2V-1.3B. It streams your webcam through a causal video-diffusion
model with a <b>sink-token rolling KV cache</b> &mdash; built for <i>continuous</i> streaming, so it
keeps flowing without the window-shift burst. Type a style prompt, click <b>Start</b> to grab ZeroGPU
for ~60s. <b>Change the prompt anytime</b> &mdash; it updates the live stream.
</p>
<div class="controls">
<input id="instruction" type="text" placeholder="style prompt · e.g. psychedelic neon dream · van gogh · cyberpunk city" />
<button id="startBtn">▶ Start session</button>
<span id="timer" class="timer"><span id="count">58</span>s</span>
</div>
<div class="grid">
<div class="card">
<h2>Your webcam</h2>
<div class="media"><video id="cam" autoplay muted playsinline></video></div>
</div>
<div class="card">
<h2>Edited (live)</h2>
<div class="media"><img id="out" alt="" /><span id="outPh" class="placeholder">edited stream appears here</span></div>
</div>
</div>
<div id="status" class="status"></div>
</div>
<canvas id="grab" width="640" height="360" style="display:none"></canvas>
<script type="module">
import { Client } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
const camEl = document.getElementById("cam");
const outEl = document.getElementById("out");
const outPh = document.getElementById("outPh");
const startBtn = document.getElementById("startBtn");
const instr = document.getElementById("instruction");
const timerEl = document.getElementById("timer");
const countEl = document.getElementById("count");
const statusEl = document.getElementById("status");
const grab = document.getElementById("grab");
const gctx = grab.getContext("2d");
let client = null;
let stream = null;
let captureTimer = null;
let countdownTimer = null;
let running = false;
const FPS = 30; // webcam frames sent per second (backend drops backlog)
const SESSION_SECONDS = 58;
// --- low-latency player: StreamDiffusionV2 streams steadily (~14fps), so we do
// NOT jitter-buffer. Keep a tiny queue and aggressively drop the backlog so the
// preview always shows the most recent edit (minimal action->reaction delay).
let playQueue = [];
const MAX_QUEUE = 3; // ~0.2s worth; drop older frames past this
let lastShown = 0;
function playLoop(ts){
if (playQueue.length > MAX_QUEUE) playQueue = playQueue.slice(-MAX_QUEUE);
if (playQueue.length && ts - lastShown >= 1000 / 30){
outEl.src = playQueue.shift();
outPh.style.display = "none";
lastShown = ts;
}
requestAnimationFrame(playLoop);
}
requestAnimationFrame(playLoop);
function setStatus(t){ statusEl.textContent = t; }
async function ensureClient(){
if (!client) client = await Client.connect(window.location.origin);
return client;
}
async function ensureCam(){
if (stream) return;
stream = await navigator.mediaDevices.getUserMedia({ video: { width: 832, height: 480 }, audio: false });
camEl.srcObject = stream;
await camEl.play().catch(()=>{});
}
async function sendInstruction(){
try {
await fetch("/instruction", {
method:"POST", headers:{ "Content-Type":"application/json" },
body: JSON.stringify({ instruction: instr.value || "" })
});
} catch(e){}
}
function startCapture(){
captureTimer = setInterval(() => {
if (!camEl.videoWidth) return;
gctx.drawImage(camEl, 0, 0, grab.width, grab.height);
grab.toBlob(async (blob) => {
if (!blob) return;
try { await fetch("/frame", { method:"POST", body: blob }); } catch(e){}
}, "image/jpeg", 0.6);
}, 1000 / FPS);
}
function stopAll(){
running = false;
if (captureTimer) { clearInterval(captureTimer); captureTimer = null; }
if (countdownTimer) { clearInterval(countdownTimer); countdownTimer = null; }
timerEl.style.display = "none";
startBtn.disabled = false;
startBtn.textContent = "▶ Start session";
}
function startCountdown(){
let r = SESSION_SECONDS;
countEl.textContent = r;
timerEl.style.display = "inline-block";
countdownTimer = setInterval(() => {
r -= 1; countEl.textContent = Math.max(0, r);
if (r <= 0) clearInterval(countdownTimer);
}, 1000);
}
instr.addEventListener("change", () => { if (running) sendInstruction(); });
instr.addEventListener("input", () => { if (running) sendInstruction(); });
startBtn.addEventListener("click", async () => {
if (running) return;
startBtn.disabled = true;
try {
setStatus("Requesting webcam…");
await ensureCam();
setStatus("Connecting…");
await ensureClient();
running = true;
playQueue = [];
startBtn.textContent = "◌ Acquiring ZeroGPU…";
await sendInstruction();
setStatus("Queued for ZeroGPU — webcam streaming starts once the GPU is acquired…");
const job = client.submit("/run_session", {});
let frames = 0;
for await (const msg of job) {
if (msg.type !== "data" || !msg.data || msg.data[0] == null) continue;
const payload = msg.data[0];
if (payload === "__READY__") {
// GPU is now allocated — only now start capturing & sending frames.
startBtn.textContent = "● Live";
await sendInstruction();
startCapture();
startCountdown();
setStatus("ZeroGPU acquired — streaming your webcam through LiveEdit…");
continue;
}
playQueue.push(payload); // jitter buffer paces actual display
frames += 1;
if (frames % 12 === 0) setStatus(`Streaming… ${frames} edited frames`);
}
setStatus(frames ? "Session ended. Click Start to run another ~60s session."
: "Session ended before any frames were produced — try again.");
} catch (e) {
setStatus("Error: " + (e && e.message ? e.message : e));
} finally {
stopAll();
}
});
</script>
</body>
</html>