import { AutoProcessor, Qwen3_5ForConditionalGeneration, RawImage, TextStreamer, InterruptableStoppingCriteria, } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.6"; /* ─── State ─── */ let processor = null; let model = null; let conversationImage = null; // single RawImage used across the conversation let attachedImage = null; // { raw: RawImage, dataURL: string, name: string } | null let isGenerating = false; let pastKeyValues = null; // cached KV from previous generation let imageGridThw = null; // cached image_grid_thw from initial image inputs let promptHistory = ""; // raw prompt text built up across turns const stoppingCriteria = new InterruptableStoppingCriteria(); /* ─── Wait for fonts, then reveal ─── */ document.fonts.ready.then(() => document.body.classList.add("ready")); /* ─── DOM refs ─── */ const $ = (id) => document.getElementById(id); const $loaderTx = $("loaderText"); const $messages = $("chatMessages"); const $input = $("msgInput"); const $btnSend = $("btnSend"); const $btnLoad = $("btnLoad"); const $btnReset = $("btnReset"); const $btnAttach = $("btnAttach"); const $fileInput = $("fileInput"); const $imgPrev = $("imagePreview"); const $imgThumb = $("imageThumb"); const $imgName = $("imageName"); const $btnRemImg = $("btnRemoveImage"); const $errBanner = $("errorBanner"); const $reasoning = $("reasoningToggle"); const $modelSelect = $("modelSelect"); const $modelSizeLabel = $("modelSizeLabel"); const $btnModelArrow = $("btnModelArrow"); /* ─── Model selector ─── */ $btnModelArrow.addEventListener("click", () => { $modelSelect.style.pointerEvents = "auto"; $modelSelect.focus(); $modelSelect.showPicker?.(); }); $modelSelect.addEventListener("change", () => { $modelSizeLabel.textContent = $modelSelect.selectedOptions[0].textContent; $modelSelect.style.pointerEvents = "none"; }); $modelSelect.addEventListener("blur", () => { $modelSelect.style.pointerEvents = "none"; }); /* ─── Screen switching ─── */ function showScreen(id) { document .querySelectorAll(".screen") .forEach((s) => s.classList.toggle("active", s.id === id)); } /* ─── Model loading ─── */ $btnLoad.addEventListener("click", async () => { showScreen("loading"); try { const model_id = $modelSelect.value; const sizeLabel = $modelSizeLabel.textContent; $loaderTx.textContent = "Loading processor…"; processor = await AutoProcessor.from_pretrained(model_id); $loaderTx.textContent = "Loading model weights…"; model = await Qwen3_5ForConditionalGeneration.from_pretrained(model_id, { dtype: { embed_tokens: "q4", vision_encoder: "fp16", decoder_model_merged: "q4", }, device: "webgpu", }); $loaderTx.textContent = "Ready!"; document.querySelector(".chat-header-title").textContent = `Qwen 3.5 Vision · ${sizeLabel}`; setTimeout(() => showScreen("chat"), 400); } catch (err) { console.error(err); $loaderTx.textContent = "Failed to load model"; document.querySelector(".loader-sub").textContent = err.message; document.querySelector(".loader-ring").style.borderTopColor = "var(--red)"; } }); /* ─── Image attachment ─── */ $btnAttach.addEventListener("click", () => { if ($btnAttach.disabled) return; $fileInput.click(); }); $fileInput.addEventListener("change", async (e) => { const file = e.target.files?.[0]; if (!file) return; const dataURL = URL.createObjectURL(file); const raw = await RawImage.read(dataURL); const resized = await raw.resize(448, 448); attachedImage = { raw: resized, dataURL, name: file.name }; $imgThumb.src = dataURL; $imgName.textContent = file.name; $imgPrev.classList.add("visible"); updateSendBtn(); $fileInput.value = ""; }); $btnRemImg.addEventListener("click", clearAttachment); function clearAttachment() { attachedImage = null; $imgPrev.classList.remove("visible"); $imgThumb.src = ""; $imgName.textContent = ""; updateSendBtn(); } /* ─── Input handling ─── */ $input.addEventListener("input", () => { $input.style.height = "auto"; $input.style.height = Math.min($input.scrollHeight, 140) + "px"; updateSendBtn(); }); $input.addEventListener("keydown", (e) => { if (e.key === "Enter" && !e.shiftKey) { e.preventDefault(); if (!isGenerating) sendMessage(); } }); $btnSend.addEventListener("click", () => { if (isGenerating) { stoppingCriteria.interrupt(); } else { sendMessage(); } }); function updateSendBtn() { if (isGenerating) { $btnSend.disabled = false; $btnSend.classList.add("stopping"); } else { $btnSend.classList.remove("stopping"); $btnSend.disabled = !$input.value.trim() && !attachedImage; } } function disposePastKeyValues() { if (pastKeyValues) { for (const tensor of Object.values(pastKeyValues)) { tensor.dispose(); } pastKeyValues = null; } } /* ─── Reset ─── */ $btnReset.addEventListener("click", () => { conversationImage = null; attachedImage = null; disposePastKeyValues(); stoppingCriteria.reset(); imageGridThw = null; promptHistory = ""; $imgPrev.classList.remove("visible"); $btnAttach.disabled = false; $messages.innerHTML = `

Start a conversation

Optionally attach an image, then type your message.
The model runs entirely in your browser.

`; $errBanner.classList.remove("visible"); $input.value = ""; $input.style.height = "auto"; updateSendBtn(); }); /* ─── Chat logic ─── */ async function sendMessage() { if (isGenerating) return; const text = $input.value.trim(); if (!text && !attachedImage) return; $errBanner.classList.remove("visible"); // Clear welcome const welcome = $messages.querySelector(".welcome-msg"); if (welcome) welcome.remove(); // Capture attached image before clearing const img = attachedImage; if (img) conversationImage = img.raw; // Render user message in the UI appendMessage("user", text, img?.dataURL); // Clear input fields $input.value = ""; $input.style.height = "auto"; clearAttachment(); // Disable image attach for the rest of this conversation if we just used one if (conversationImage) { $btnAttach.disabled = true; } // Start generating isGenerating = true; updateSendBtn(); const assistantEl = appendMessage("assistant", "", null, true); const contentEl = assistantEl.querySelector(".msg-content"); try { // Build prompt manually (can't use apply_chat_template with PKV approach) const isFirstTurn = promptHistory === ""; // Build the user turn const enableThinking = $reasoning.checked; let userPrompt = "<|im_start|>user\n"; if (img?.raw) { userPrompt += "<|vision_start|><|image_pad|><|vision_end|>"; } userPrompt += (text || "") + "<|im_end|>\n"; userPrompt += enableThinking ? "<|im_start|>assistant\n\n" : "<|im_start|>assistant\n\n\n\n\n"; let inputs, generateArgs; if (img?.raw) { // Image attached: must do a full encode (no PKV reuse possible) // Rebuild the full prompt including any prior conversation const fullPrompt = (isFirstTurn ? "" : promptHistory + "\n") + userPrompt; inputs = await processor(fullPrompt, img.raw); // Cache image_grid_thw for future PKV continuation turns if (inputs.image_grid_thw) { imageGridThw = inputs.image_grid_thw; } // Discard past key values — image changes the encoded sequence disposePastKeyValues(); generateArgs = { ...inputs }; } else if (isFirstTurn) { // First turn, text only: full encode, no image inputs = await processor(userPrompt); generateArgs = { ...inputs }; } else { // Continuation: use past_key_values, no image re-encoding const continuationPrompt = promptHistory + "\n" + userPrompt; inputs = await processor(continuationPrompt); generateArgs = { ...inputs, past_key_values: pastKeyValues, }; // Pass image_grid_thw if we had an image earlier if (imageGridThw) { generateArgs.image_grid_thw = imageGridThw; } } let fullText = ""; let thinkingDone = !enableThinking; let thinkingEl = null; let thinkingContentEl = null; let thinkingLabel = null; let chevron = null; let tokenCount = 0; let startTime = null; if (enableThinking) { // Add collapsible thinking block before the content area thinkingLabel = document.createElement("div"); thinkingLabel.className = "msg-thinking-label"; chevron = document.createElement("span"); chevron.className = "msg-thinking-chevron"; chevron.textContent = "▼"; thinkingLabel.append(chevron, " Thinking"); thinkingEl = document.createElement("div"); thinkingEl.className = "msg-thinking"; contentEl.before(thinkingLabel, thinkingEl); thinkingContentEl = thinkingEl; thinkingLabel.addEventListener("click", () => { if (thinkingEl.classList.contains("collapsed")) { thinkingEl.classList.add("collapsing"); thinkingEl.classList.remove("collapsed"); thinkingEl.style.maxHeight = thinkingEl.scrollHeight + "px"; thinkingEl.addEventListener( "transitionend", () => { thinkingEl.classList.remove("collapsing"); thinkingEl.style.maxHeight = ""; }, { once: true }, ); chevron.textContent = "▼"; } else { thinkingEl.style.maxHeight = thinkingEl.scrollHeight + "px"; thinkingEl.classList.add("collapsing"); requestAnimationFrame(() => { thinkingEl.classList.add("collapsed"); }); thinkingEl.addEventListener( "transitionend", () => { thinkingEl.classList.remove("collapsing"); thinkingEl.style.maxHeight = ""; }, { once: true }, ); chevron.textContent = "▶"; } }); } const streamer = new TextStreamer(processor.tokenizer, { skip_prompt: true, skip_special_tokens: !enableThinking, token_callback_function: () => { if (!startTime) startTime = performance.now(); tokenCount++; }, callback_function: (token) => { if (!thinkingDone) { // Check if this token contains the boundary const endIdx = (fullText + token).indexOf(""); if (endIdx !== -1) { thinkingDone = true; const thinkText = (fullText + token).slice(0, endIdx).trim(); thinkingContentEl.textContent = thinkText; fullText = (fullText + token).slice(endIdx + "".length); contentEl.textContent = fullText .replace(/^\n+/, "") .replace(/<\|im_end\|>/g, ""); // Auto-collapse thinking with animation thinkingEl.style.maxHeight = thinkingEl.scrollHeight + "px"; thinkingEl.classList.add("collapsing"); requestAnimationFrame(() => { thinkingEl.classList.add("collapsed"); }); thinkingEl.addEventListener( "transitionend", () => { thinkingEl.classList.remove("collapsing"); thinkingEl.style.maxHeight = ""; }, { once: true }, ); chevron.textContent = "▶"; } else { fullText += token; thinkingContentEl.textContent = fullText; } } else { fullText += token; contentEl.textContent = fullText .replace(/^\n+/, "") .replace(/<\|im_end\|>/g, ""); } $messages.scrollTop = $messages.scrollHeight; }, }); const result = await model.generate({ ...generateArgs, max_new_tokens: enableThinking ? 2048 : 512, do_sample: true, streamer, stopping_criteria: stoppingCriteria, return_dict_in_generate: true, }); // Update past key values for next turn pastKeyValues = result.past_key_values; // Decode the full sequence to maintain prompt history const fullSequenceText = processor.batch_decode(result.sequences, { skip_special_tokens: false, })[0]; promptHistory = fullSequenceText; // Show generation stats if (tokenCount > 0 && startTime) { const elapsed = (performance.now() - startTime) / 1000; const tps = (tokenCount / elapsed).toFixed(1); const statsEl = document.createElement("div"); statsEl.className = "msg-stats"; statsEl.textContent = `${tokenCount} tokens · ${tps} tok/s · ${elapsed.toFixed(1)}s`; assistantEl.appendChild(statsEl); } assistantEl.classList.remove("generating"); } catch (err) { console.error(err); assistantEl.remove(); $errBanner.textContent = "Generation error: " + err.message; $errBanner.classList.add("visible"); } isGenerating = false; stoppingCriteria.reset(); updateSendBtn(); $messages.scrollTop = $messages.scrollHeight; } /* ─── Render helpers ─── */ function appendMessage(role, text, imageDataURL, generating = false) { const el = document.createElement("div"); el.className = `msg ${role}` + (generating ? " generating" : ""); const roleEl = document.createElement("div"); roleEl.className = "msg-role"; roleEl.textContent = role === "user" ? "You" : "Qwen 3.5"; el.appendChild(roleEl); if (imageDataURL) { const img = document.createElement("img"); img.className = "msg-image"; img.src = imageDataURL; img.alt = "attached"; el.appendChild(img); } const content = document.createElement("div"); content.className = "msg-content"; if (generating) { const dots = document.createElement("span"); dots.className = "thinking-dots"; for (let i = 0; i < 3; i++) dots.appendChild(document.createElement("span")); content.appendChild(dots); } else { content.textContent = text; } el.appendChild(content); $messages.appendChild(el); $messages.scrollTop = $messages.scrollHeight; return el; }