Spaces:
Running on Zero
Running on Zero
File size: 8,187 Bytes
e88b235 d94b53d e88b235 7a2774f e88b235 7a2774f e88b235 f75e3fe e88b235 f75e3fe e88b235 f75e3fe e88b235 7a2774f e88b235 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 | <!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>LiveEdit · Realtime</title>
<style>
:root { --bg:#0e0f13; --panel:#171922; --line:#2a2e3a; --fg:#e8e8ee; --accent:#c084fc; --good:#86efac; }
* { box-sizing: border-box; }
body { margin:0; background:var(--bg); color:var(--fg); font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Helvetica,Arial,sans-serif; }
.wrap { max-width:1100px; margin:0 auto; padding:24px 18px 60px; }
h1 { font-size:1.5rem; margin:0 0 4px; }
.sub { color:#9aa0ad; font-size:.95rem; margin:0 0 18px; line-height:1.5; }
.sub a { color:var(--accent); }
.controls { display:flex; gap:10px; flex-wrap:wrap; align-items:center; margin-bottom:16px; }
input[type=text] { flex:1; min-width:260px; background:var(--panel); border:1px solid var(--line); color:var(--fg); padding:12px 14px; border-radius:10px; font-size:1rem; }
button { background:var(--accent); color:#1a1024; border:0; padding:12px 18px; border-radius:10px; font-size:1rem; font-weight:600; cursor:pointer; }
button:disabled { opacity:.5; cursor:not-allowed; }
.timer { font-family:ui-monospace,Menlo,monospace; color:var(--good); background:#11210f; border:1px solid #234; padding:8px 12px; border-radius:8px; display:none; }
.grid { display:grid; grid-template-columns:1fr 1fr; gap:14px; }
.card { background:var(--panel); border:1px solid var(--line); border-radius:14px; overflow:hidden; }
.card h2 { font-size:.85rem; text-transform:uppercase; letter-spacing:.05em; color:#9aa0ad; margin:0; padding:10px 14px; border-bottom:1px solid var(--line); }
.media { aspect-ratio:832/480; background:#000; display:flex; align-items:center; justify-content:center; }
.media video, .media img { width:100%; height:100%; object-fit:cover; display:block; }
.placeholder { color:#5a6070; font-size:.9rem; }
.status { margin-top:14px; color:#9aa0ad; font-size:.9rem; min-height:1.2em; }
@media (max-width:780px){ .grid{ grid-template-columns:1fr; } }
</style>
</head>
<body>
<div class="wrap">
<h1>🌀 StreamDiffusionV2 · Realtime Webcam Diffusion</h1>
<p class="sub">
Live demo of <a href="https://streamdiffusionv2.github.io/" target="_blank">StreamDiffusionV2</a>
(MLSys 2026 Best Paper) on Wan2.1-T2V-1.3B. It streams your webcam through a causal video-diffusion
model with a <b>sink-token rolling KV cache</b> — built for <i>continuous</i> streaming, so it
keeps flowing without the window-shift burst. Type a style prompt, click <b>Start</b> to grab ZeroGPU
for ~60s. <b>Change the prompt anytime</b> — it updates the live stream.
</p>
<div class="controls">
<input id="instruction" type="text" placeholder="style prompt · e.g. psychedelic neon dream · van gogh · cyberpunk city" />
<button id="startBtn">▶ Start session</button>
<span id="timer" class="timer">⏱ <span id="count">58</span>s</span>
</div>
<div class="grid">
<div class="card">
<h2>Your webcam</h2>
<div class="media"><video id="cam" autoplay muted playsinline></video></div>
</div>
<div class="card">
<h2>Edited (live)</h2>
<div class="media"><img id="out" alt="" /><span id="outPh" class="placeholder">edited stream appears here</span></div>
</div>
</div>
<div id="status" class="status"></div>
</div>
<canvas id="grab" width="640" height="360" style="display:none"></canvas>
<script type="module">
import { Client } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
const camEl = document.getElementById("cam");
const outEl = document.getElementById("out");
const outPh = document.getElementById("outPh");
const startBtn = document.getElementById("startBtn");
const instr = document.getElementById("instruction");
const timerEl = document.getElementById("timer");
const countEl = document.getElementById("count");
const statusEl = document.getElementById("status");
const grab = document.getElementById("grab");
const gctx = grab.getContext("2d");
let client = null;
let stream = null;
let captureTimer = null;
let countdownTimer = null;
let running = false;
const FPS = 30; // webcam frames sent per second (backend drops backlog)
const SESSION_SECONDS = 58;
// --- low-latency player: StreamDiffusionV2 streams steadily (~14fps), so we do
// NOT jitter-buffer. Keep a tiny queue and aggressively drop the backlog so the
// preview always shows the most recent edit (minimal action->reaction delay).
let playQueue = [];
const MAX_QUEUE = 3; // ~0.2s worth; drop older frames past this
let lastShown = 0;
function playLoop(ts){
if (playQueue.length > MAX_QUEUE) playQueue = playQueue.slice(-MAX_QUEUE);
if (playQueue.length && ts - lastShown >= 1000 / 30){
outEl.src = playQueue.shift();
outPh.style.display = "none";
lastShown = ts;
}
requestAnimationFrame(playLoop);
}
requestAnimationFrame(playLoop);
function setStatus(t){ statusEl.textContent = t; }
async function ensureClient(){
if (!client) client = await Client.connect(window.location.origin);
return client;
}
async function ensureCam(){
if (stream) return;
stream = await navigator.mediaDevices.getUserMedia({ video: { width: 832, height: 480 }, audio: false });
camEl.srcObject = stream;
await camEl.play().catch(()=>{});
}
async function sendInstruction(){
try {
await fetch("/instruction", {
method:"POST", headers:{ "Content-Type":"application/json" },
body: JSON.stringify({ instruction: instr.value || "" })
});
} catch(e){}
}
function startCapture(){
captureTimer = setInterval(() => {
if (!camEl.videoWidth) return;
gctx.drawImage(camEl, 0, 0, grab.width, grab.height);
grab.toBlob(async (blob) => {
if (!blob) return;
try { await fetch("/frame", { method:"POST", body: blob }); } catch(e){}
}, "image/jpeg", 0.6);
}, 1000 / FPS);
}
function stopAll(){
running = false;
if (captureTimer) { clearInterval(captureTimer); captureTimer = null; }
if (countdownTimer) { clearInterval(countdownTimer); countdownTimer = null; }
timerEl.style.display = "none";
startBtn.disabled = false;
startBtn.textContent = "▶ Start session";
}
function startCountdown(){
let r = SESSION_SECONDS;
countEl.textContent = r;
timerEl.style.display = "inline-block";
countdownTimer = setInterval(() => {
r -= 1; countEl.textContent = Math.max(0, r);
if (r <= 0) clearInterval(countdownTimer);
}, 1000);
}
instr.addEventListener("change", () => { if (running) sendInstruction(); });
instr.addEventListener("input", () => { if (running) sendInstruction(); });
startBtn.addEventListener("click", async () => {
if (running) return;
startBtn.disabled = true;
try {
setStatus("Requesting webcam…");
await ensureCam();
setStatus("Connecting…");
await ensureClient();
running = true;
playQueue = [];
startBtn.textContent = "◌ Acquiring ZeroGPU…";
await sendInstruction();
setStatus("Queued for ZeroGPU — webcam streaming starts once the GPU is acquired…");
const job = client.submit("/run_session", {});
let frames = 0;
for await (const msg of job) {
if (msg.type !== "data" || !msg.data || msg.data[0] == null) continue;
const payload = msg.data[0];
if (payload === "__READY__") {
// GPU is now allocated — only now start capturing & sending frames.
startBtn.textContent = "● Live";
await sendInstruction();
startCapture();
startCountdown();
setStatus("ZeroGPU acquired — streaming your webcam through LiveEdit…");
continue;
}
playQueue.push(payload); // jitter buffer paces actual display
frames += 1;
if (frames % 12 === 0) setStatus(`Streaming… ${frames} edited frames`);
}
setStatus(frames ? "Session ended. Click Start to run another ~60s session."
: "Session ended before any frames were produced — try again.");
} catch (e) {
setStatus("Error: " + (e && e.message ? e.message : e));
} finally {
stopAll();
}
});
</script>
</body>
</html>
|