Qwen3.5-0.8B-WebGPU / index.html
Xenova's picture
Xenova HF Staff
Update index.html
c543fa9 verified
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Qwen3.5-0.8B WebGPU demo</title>
<style>
body {
font-family: "Inter", "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
gap: 12px;
padding: 14px;
margin: 0;
min-height: 100vh;
min-height: 100svh;
box-sizing: border-box;
background: linear-gradient(180deg, #f8fafc 0%, #eef2ff 100%);
color: #0f172a;
}
h1 {
margin: 0;
font-size: 2rem;
font-weight: 700;
letter-spacing: -0.02em;
color: #111827;
text-align: center;
}
.io-areas {
display: flex;
gap: 12px;
align-items: center;
width: min(92vw, 760px);
background: rgba(255, 255, 255, 0.9);
border: 1px solid #e5e7eb;
padding: 10px 0;
border-radius: 12px;
box-shadow: 0 8px 24px rgba(15, 23, 42, 0.08);
}
.io-areas {
flex-direction: column;
align-items: center;
gap: 8px;
}
.row-main {
display: grid;
grid-template-columns: minmax(0, 1fr) auto;
width: min(100%, 720px);
gap: 10px;
align-items: flex-end;
}
.field-group {
display: flex;
flex-direction: column;
gap: 4px;
align-items: stretch;
}
.instruction-group {
min-width: 0;
}
.response-group {
display: grid;
grid-template-columns: minmax(0, 1fr) auto;
width: min(100%, 720px);
gap: 10px;
align-items: flex-end;
max-width: none;
}
textarea {
width: 100%;
min-height: 2.2em;
padding: 7px 9px;
border: 1px solid #d1d5db;
border-radius: 8px;
font-size: 13px;
line-height: 1.35;
color: #111827;
background-color: #ffffff;
box-sizing: border-box;
}
textarea:focus,
select:focus {
outline: none;
border-color: #6366f1;
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.15);
}
textarea[readonly] {
background-color: #f9fafb;
}
.wide-textarea {
width: 100%;
}
.single-line {
resize: none;
overflow: hidden;
}
.response-area {
min-height: calc(1.35em * 3 + 14px);
max-height: calc(1.35em * 3 + 14px);
resize: none;
overflow-y: auto;
}
.control-group {
display: flex;
align-items: center;
justify-content: flex-end;
gap: 8px;
}
.control-spacer {
width: 84px;
visibility: hidden;
}
#videoFeed {
display: block;
width: 100%;
height: 100%;
border-radius: 10px;
object-fit: cover;
}
#videoContainer {
position: relative;
width: min(92vw, 640px);
aspect-ratio: 4 / 3;
border: 1px solid #cbd5e1;
background-color: #000;
border-radius: 12px;
margin: 0 auto;
overflow: hidden;
box-shadow: 0 12px 30px rgba(15, 23, 42, 0.16);
}
#loadingOverlay {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
display: none;
justify-content: center;
align-items: center;
background-color: rgba(0, 0, 0, 0.7);
z-index: 10;
border-radius: 10px;
color: #ffffff;
font-size: 1.1rem;
font-weight: 600;
backdrop-filter: blur(1px);
}
#prefillIndicator {
position: absolute;
top: 10px;
right: 10px;
display: none;
align-items: center;
gap: 8px;
z-index: 11;
padding: 7px 10px;
border-radius: 999px;
background: rgba(17, 24, 39, 0.72);
color: #fff;
font-size: 12px;
font-weight: 600;
backdrop-filter: blur(2px);
}
#prefillIndicator.active {
display: inline-flex;
}
#sourceToggleButton {
position: absolute;
top: 10px;
left: 10px;
z-index: 12;
padding: 6px 10px;
border: 1px solid rgba(255, 255, 255, 0.28);
border-radius: 999px;
background: rgba(17, 24, 39, 0.72);
color: #fff;
font-size: 12px;
font-weight: 600;
cursor: pointer;
backdrop-filter: blur(2px);
opacity: 0;
pointer-events: none;
transition: opacity 0.18s ease;
}
#sourceToggleButton:hover:not(:disabled) {
background: rgba(31, 41, 55, 0.82);
}
#sourceToggleButton:disabled {
opacity: 0.45;
cursor: not-allowed;
}
#videoContainer:hover #sourceToggleButton,
#videoContainer:focus-within #sourceToggleButton {
opacity: 1;
pointer-events: auto;
}
.spinner {
width: 12px;
height: 12px;
border: 2px solid rgba(255, 255, 255, 0.35);
border-top-color: #ffffff;
border-radius: 50%;
animation: spin 0.8s linear infinite;
}
@keyframes spin {
to {
transform: rotate(360deg);
}
}
#startButton {
min-width: 84px;
padding: 8px 14px;
font-size: 14px;
font-weight: 600;
cursor: pointer;
border: none;
border-radius: 8px;
color: white;
transition:
transform 0.1s ease,
box-shadow 0.2s ease;
}
#startButton:hover:not(:disabled) {
transform: translateY(-1px);
box-shadow: 0 6px 16px rgba(15, 23, 42, 0.2);
}
#startButton.start {
background-color: #16a34a;
}
#startButton.stop {
background-color: #dc2626;
}
label {
font-weight: 600;
color: #374151;
font-size: 13px;
}
.hidden {
display: none;
}
@media (max-width: 640px) {
body {
padding: 10px;
}
h1 {
font-size: 1.2rem;
text-align: center;
}
.row-main {
grid-template-columns: 1fr;
align-items: stretch;
}
.response-group {
grid-template-columns: 1fr;
align-items: stretch;
}
.control-group {
justify-content: flex-start;
}
.control-spacer {
display: none;
}
}
</style>
</head>
<body>
<h1>Qwen3.5-0.8B WebGPU demo</h1>
<div id="videoContainer">
<video id="videoFeed" autoplay playsinline></video>
<button id="sourceToggleButton" type="button">Use video file</button>
<input id="videoFileInput" type="file" accept="video/*" class="hidden" />
<div id="prefillIndicator">
<span class="spinner"></span>
<span>Processing image</span>
</div>
<div id="loadingOverlay">Loading model (~850MB)...</div>
</div>
<canvas id="canvas" class="hidden"></canvas>
<div class="io-areas">
<div class="row-main">
<div class="field-group instruction-group">
<label for="instructionText">Instruction:</label>
<textarea
id="instructionText"
class="wide-textarea single-line"
name="Instruction"
rows="1"
></textarea>
</div>
<div class="control-group">
<button id="startButton" class="start">Start</button>
</div>
</div>
<div class="field-group response-group">
<div class="field-group">
<label for="responseText">Response:</label>
<textarea
id="responseText"
class="wide-textarea response-area"
name="Response"
rows="3"
readonly
placeholder="Response will appear here..."
></textarea>
</div>
<div class="control-spacer" aria-hidden="true"></div>
</div>
</div>
<script type="module">
import {
AutoProcessor,
Qwen3_5ForConditionalGeneration,
RawImage,
TextStreamer,
} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.5";
const MODEL_ID = "onnx-community/Qwen3.5-0.8B-ONNX";
const CAPTURE_MAX_WIDTH = 800;
const video = document.getElementById("videoFeed");
const canvas = document.getElementById("canvas");
const instructionText = document.getElementById("instructionText");
const responseText = document.getElementById("responseText");
const startButton = document.getElementById("startButton");
const loadingOverlay = document.getElementById("loadingOverlay");
const prefillIndicator = document.getElementById("prefillIndicator");
const sourceToggleButton = document.getElementById("sourceToggleButton");
const videoFileInput = document.getElementById("videoFileInput");
instructionText.value = "Briefly describe what you see (2 sentences max).";
let stream = null;
let fileObjectUrl = null;
let isProcessing = false;
let isModelReady = false;
let sourceMode = "webcam";
let processor = null;
let model = null;
function hasVideoFrame() {
return video.videoWidth > 0 && video.videoHeight > 0;
}
function hasActiveInput() {
if (sourceMode === "file") {
return Boolean(video.src);
}
return Boolean(stream);
}
function updateSourceToggleButton() {
sourceToggleButton.textContent = sourceMode === "webcam" ? "Use video file" : "Use webcam";
}
function updateStartAvailability() {
setStartButtonEnabled(isModelReady && hasActiveInput());
}
function setResponse(text) {
responseText.value = text;
}
function setLoading(isLoading) {
loadingOverlay.style.display = isLoading ? "flex" : "none";
}
function setPrefillProcessing(isProcessingImage) {
prefillIndicator.classList.toggle("active", isProcessingImage);
}
function setControlsDisabled(disabled) {
sourceToggleButton.disabled = disabled;
}
function setStartButtonMode(mode) {
if (mode === "start") {
startButton.textContent = "Start";
startButton.classList.remove("stop");
startButton.classList.add("start");
} else {
startButton.textContent = "Stop";
startButton.classList.remove("start");
startButton.classList.add("stop");
}
}
function setStartButtonEnabled(enabled) {
startButton.disabled = !enabled;
startButton.style.opacity = enabled ? "1" : "0.6";
startButton.style.cursor = enabled ? "pointer" : "not-allowed";
}
async function initModel() {
setLoading(true);
setResponse("Loading processor...");
processor = await AutoProcessor.from_pretrained(MODEL_ID);
setResponse("Processor loaded. Loading model...");
model = await Qwen3_5ForConditionalGeneration.from_pretrained(MODEL_ID, {
dtype: {
embed_tokens: "q4",
vision_encoder: "fp16",
decoder_model_merged: "q4",
},
device: "webgpu",
});
setResponse("Model loaded. Initializing camera...");
setLoading(false);
}
async function initCamera() {
try {
stream = await navigator.mediaDevices.getUserMedia({
video: true,
audio: false,
});
if (fileObjectUrl) {
URL.revokeObjectURL(fileObjectUrl);
fileObjectUrl = null;
}
video.removeAttribute("src");
video.srcObject = stream;
sourceMode = "webcam";
updateSourceToggleButton();
updateStartAvailability();
setResponse("Camera access granted. Ready to start.");
return true;
} catch (err) {
console.error("Error accessing camera:", err);
setResponse(
`Error accessing camera: ${err.name} - ${err.message}. Please ensure permissions are granted and you are on HTTPS or localhost.`,
);
alert(
`Error accessing camera: ${err.name}. Make sure you've granted permission and are on HTTPS or localhost.`,
);
return false;
}
}
function stopWebcamStream() {
if (stream) {
stream.getTracks().forEach((track) => track.stop());
stream = null;
}
}
async function switchToVideoFile(file) {
if (!file) return;
stopWebcamStream();
if (fileObjectUrl) {
URL.revokeObjectURL(fileObjectUrl);
}
fileObjectUrl = URL.createObjectURL(file);
sourceMode = "file";
video.srcObject = null;
video.src = fileObjectUrl;
video.loop = true;
video.muted = true;
try {
await video.play();
} catch (err) {
console.warn("Autoplay blocked for video file:", err);
}
updateSourceToggleButton();
updateStartAvailability();
setResponse(`Using video file: ${file.name}`);
}
async function switchToWebcam() {
const cameraReady = await initCamera();
if (!cameraReady) {
updateStartAvailability();
}
}
function captureImage() {
if (!hasVideoFrame()) {
console.warn("Video stream not ready for capture.");
return null;
}
const sourceWidth = video.videoWidth;
const sourceHeight = video.videoHeight;
const scale = Math.min(1, CAPTURE_MAX_WIDTH / sourceWidth);
canvas.width = Math.max(1, Math.round(sourceWidth * scale));
canvas.height = Math.max(1, Math.round(sourceHeight * scale));
const context = canvas.getContext("2d", { willReadFrequently: true });
context.imageSmoothingEnabled = false;
context.drawImage(video, 0, 0, canvas.width, canvas.height);
return RawImage.fromCanvas(canvas);
}
async function runInference(imgElement, instruction) {
const messages = [
{
role: "user",
content: [{ type: "image" }, { type: "text", text: instruction }],
},
];
const text = processor.apply_chat_template(messages, {
add_generation_prompt: true,
tokenizer_kwargs: { enable_thinking: false },
});
const start = performance.now();
const inputs = await processor(text, [imgElement]);
const end = performance.now();
console.log(`Preprocessing time: ${(end - start).toFixed(2)} ms`);
let waitingForFirstToken = true;
setPrefillProcessing(true);
try {
await model.generate({
...inputs,
do_sample: false,
max_new_tokens: 128,
streamer: new TextStreamer(processor.tokenizer, {
skip_prompt: true,
skip_special_tokens: true,
callback_function: (token) => {
if (waitingForFirstToken) {
setPrefillProcessing(false);
responseText.value = token.trimStart();
waitingForFirstToken = false;
} else {
responseText.value += token;
}
},
}),
});
} finally {
setPrefillProcessing(false);
}
}
async function sendData() {
if (!isProcessing) return;
const instruction = instructionText.value;
const rawImg = captureImage();
if (!rawImg) {
setResponse("Capture failed");
return;
}
try {
await runInference(rawImg, instruction);
} catch (e) {
console.error(e);
setResponse(`Error: ${e.message}`);
}
}
async function processingLoop() {
while (isProcessing) {
await sendData();
}
}
function handleStart() {
if (!isModelReady || !processor || !model) {
setResponse("Demo is not ready yet. Please wait.");
return;
}
if (!hasActiveInput()) {
setResponse("No video source selected. Use webcam or choose a video file.");
return;
}
if (isProcessing) {
return;
}
if (!hasVideoFrame()) {
setResponse("Video is not ready yet.");
return;
}
isProcessing = true;
setStartButtonMode("stop");
setControlsDisabled(true);
setResponse("Processing started...");
processingLoop();
}
function handleStop() {
isProcessing = false;
setStartButtonMode("start");
setControlsDisabled(false);
setPrefillProcessing(false);
if (responseText.value.startsWith("Processing started...")) {
setResponse("Processing stopped.");
}
}
startButton.addEventListener("click", () => {
if (isProcessing) {
handleStop();
} else {
handleStart();
}
});
sourceToggleButton.addEventListener("click", async () => {
if (sourceMode === "webcam") {
videoFileInput.click();
return;
}
await switchToWebcam();
});
videoFileInput.addEventListener("change", async (event) => {
const file = event.target.files?.[0];
if (!file) return;
await switchToVideoFile(file);
event.target.value = "";
});
video.addEventListener("loadeddata", () => {
updateStartAvailability();
});
window.addEventListener("DOMContentLoaded", async () => {
updateSourceToggleButton();
setStartButtonEnabled(false);
if (!navigator.gpu) {
const warningElement = document.createElement("p");
warningElement.textContent = "WebGPU is not available in this browser.";
warningElement.style.color = "red";
warningElement.style.textAlign = "center";
video.parentNode.insertBefore(warningElement, video.nextSibling);
setResponse("WebGPU is not available in this browser.");
return;
}
try {
await initModel();
isModelReady = true;
await initCamera();
updateStartAvailability();
} catch (error) {
console.error("Initialization error:", error);
setLoading(false);
setResponse(`Initialization failed: ${error.message}`);
alert(`Initialization failed: ${error.message}`);
}
});
window.addEventListener("beforeunload", () => {
stopWebcamStream();
if (fileObjectUrl) {
URL.revokeObjectURL(fileObjectUrl);
}
});
</script>
</body>
</html>