Spaces:
Running
Running
| import { | |
| AutoProcessor, | |
| Qwen3_5ForConditionalGeneration, | |
| RawImage, | |
| TextStreamer, | |
| InterruptableStoppingCriteria, | |
| } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.6"; | |
| /* βββ State βββ */ | |
| let processor = null; | |
| let model = null; | |
| let conversationImage = null; // single RawImage used across the conversation | |
| let attachedImage = null; // { raw: RawImage, dataURL: string, name: string } | null | |
| let isGenerating = false; | |
| let pastKeyValues = null; // cached KV from previous generation | |
| let imageGridThw = null; // cached image_grid_thw from initial image inputs | |
| let promptHistory = ""; // raw prompt text built up across turns | |
| const stoppingCriteria = new InterruptableStoppingCriteria(); | |
| /* βββ Wait for fonts, then reveal βββ */ | |
| document.fonts.ready.then(() => document.body.classList.add("ready")); | |
| /* βββ DOM refs βββ */ | |
| const $ = (id) => document.getElementById(id); | |
| const $loaderTx = $("loaderText"); | |
| const $messages = $("chatMessages"); | |
| const $input = $("msgInput"); | |
| const $btnSend = $("btnSend"); | |
| const $btnLoad = $("btnLoad"); | |
| const $btnReset = $("btnReset"); | |
| const $btnAttach = $("btnAttach"); | |
| const $fileInput = $("fileInput"); | |
| const $imgPrev = $("imagePreview"); | |
| const $imgThumb = $("imageThumb"); | |
| const $imgName = $("imageName"); | |
| const $btnRemImg = $("btnRemoveImage"); | |
| const $errBanner = $("errorBanner"); | |
| const $reasoning = $("reasoningToggle"); | |
| const $modelSelect = $("modelSelect"); | |
| const $modelSizeLabel = $("modelSizeLabel"); | |
| const $btnModelArrow = $("btnModelArrow"); | |
| /* βββ Model selector βββ */ | |
| $btnModelArrow.addEventListener("click", () => { | |
| $modelSelect.style.pointerEvents = "auto"; | |
| $modelSelect.focus(); | |
| $modelSelect.showPicker?.(); | |
| }); | |
| $modelSelect.addEventListener("change", () => { | |
| $modelSizeLabel.textContent = $modelSelect.selectedOptions[0].textContent; | |
| $modelSelect.style.pointerEvents = "none"; | |
| }); | |
| $modelSelect.addEventListener("blur", () => { | |
| $modelSelect.style.pointerEvents = "none"; | |
| }); | |
| /* βββ Screen switching βββ */ | |
| function showScreen(id) { | |
| document | |
| .querySelectorAll(".screen") | |
| .forEach((s) => s.classList.toggle("active", s.id === id)); | |
| } | |
| /* βββ Model loading βββ */ | |
| $btnLoad.addEventListener("click", async () => { | |
| showScreen("loading"); | |
| try { | |
| const model_id = $modelSelect.value; | |
| const sizeLabel = $modelSizeLabel.textContent; | |
| $loaderTx.textContent = "Loading processorβ¦"; | |
| processor = await AutoProcessor.from_pretrained(model_id); | |
| $loaderTx.textContent = "Loading model weightsβ¦"; | |
| model = await Qwen3_5ForConditionalGeneration.from_pretrained(model_id, { | |
| dtype: { | |
| embed_tokens: "q4", | |
| vision_encoder: "fp16", | |
| decoder_model_merged: "q4", | |
| }, | |
| device: "webgpu", | |
| }); | |
| $loaderTx.textContent = "Ready!"; | |
| document.querySelector(".chat-header-title").textContent = | |
| `Qwen 3.5 Vision Β· ${sizeLabel}`; | |
| setTimeout(() => showScreen("chat"), 400); | |
| } catch (err) { | |
| console.error(err); | |
| $loaderTx.textContent = "Failed to load model"; | |
| document.querySelector(".loader-sub").textContent = err.message; | |
| document.querySelector(".loader-ring").style.borderTopColor = "var(--red)"; | |
| } | |
| }); | |
| /* βββ Image attachment βββ */ | |
| $btnAttach.addEventListener("click", () => { | |
| if ($btnAttach.disabled) return; | |
| $fileInput.click(); | |
| }); | |
| $fileInput.addEventListener("change", async (e) => { | |
| const file = e.target.files?.[0]; | |
| if (!file) return; | |
| const dataURL = URL.createObjectURL(file); | |
| const raw = await RawImage.read(dataURL); | |
| const resized = await raw.resize(448, 448); | |
| attachedImage = { raw: resized, dataURL, name: file.name }; | |
| $imgThumb.src = dataURL; | |
| $imgName.textContent = file.name; | |
| $imgPrev.classList.add("visible"); | |
| updateSendBtn(); | |
| $fileInput.value = ""; | |
| }); | |
| $btnRemImg.addEventListener("click", clearAttachment); | |
| function clearAttachment() { | |
| attachedImage = null; | |
| $imgPrev.classList.remove("visible"); | |
| $imgThumb.src = ""; | |
| $imgName.textContent = ""; | |
| updateSendBtn(); | |
| } | |
| /* βββ Input handling βββ */ | |
| $input.addEventListener("input", () => { | |
| $input.style.height = "auto"; | |
| $input.style.height = Math.min($input.scrollHeight, 140) + "px"; | |
| updateSendBtn(); | |
| }); | |
| $input.addEventListener("keydown", (e) => { | |
| if (e.key === "Enter" && !e.shiftKey) { | |
| e.preventDefault(); | |
| if (!isGenerating) sendMessage(); | |
| } | |
| }); | |
| $btnSend.addEventListener("click", () => { | |
| if (isGenerating) { | |
| stoppingCriteria.interrupt(); | |
| } else { | |
| sendMessage(); | |
| } | |
| }); | |
| function updateSendBtn() { | |
| if (isGenerating) { | |
| $btnSend.disabled = false; | |
| $btnSend.classList.add("stopping"); | |
| } else { | |
| $btnSend.classList.remove("stopping"); | |
| $btnSend.disabled = !$input.value.trim() && !attachedImage; | |
| } | |
| } | |
| function disposePastKeyValues() { | |
| if (pastKeyValues) { | |
| for (const tensor of Object.values(pastKeyValues)) { | |
| tensor.dispose(); | |
| } | |
| pastKeyValues = null; | |
| } | |
| } | |
| /* βββ Reset βββ */ | |
| $btnReset.addEventListener("click", () => { | |
| conversationImage = null; | |
| attachedImage = null; | |
| disposePastKeyValues(); | |
| stoppingCriteria.reset(); | |
| imageGridThw = null; | |
| promptHistory = ""; | |
| $imgPrev.classList.remove("visible"); | |
| $btnAttach.disabled = false; | |
| $messages.innerHTML = ` | |
| <div class="welcome-msg"> | |
| <h3>Start a conversation</h3> | |
| <p>Optionally attach an image, then type your message.<br>The model runs entirely in your browser.</p> | |
| </div>`; | |
| $errBanner.classList.remove("visible"); | |
| $input.value = ""; | |
| $input.style.height = "auto"; | |
| updateSendBtn(); | |
| }); | |
| /* βββ Chat logic βββ */ | |
| async function sendMessage() { | |
| if (isGenerating) return; | |
| const text = $input.value.trim(); | |
| if (!text && !attachedImage) return; | |
| $errBanner.classList.remove("visible"); | |
| // Clear welcome | |
| const welcome = $messages.querySelector(".welcome-msg"); | |
| if (welcome) welcome.remove(); | |
| // Capture attached image before clearing | |
| const img = attachedImage; | |
| if (img) conversationImage = img.raw; | |
| // Render user message in the UI | |
| appendMessage("user", text, img?.dataURL); | |
| // Clear input fields | |
| $input.value = ""; | |
| $input.style.height = "auto"; | |
| clearAttachment(); | |
| // Disable image attach for the rest of this conversation if we just used one | |
| if (conversationImage) { | |
| $btnAttach.disabled = true; | |
| } | |
| // Start generating | |
| isGenerating = true; | |
| updateSendBtn(); | |
| const assistantEl = appendMessage("assistant", "", null, true); | |
| const contentEl = assistantEl.querySelector(".msg-content"); | |
| try { | |
| // Build prompt manually (can't use apply_chat_template with PKV approach) | |
| const isFirstTurn = promptHistory === ""; | |
| // Build the user turn | |
| const enableThinking = $reasoning.checked; | |
| let userPrompt = "<|im_start|>user\n"; | |
| if (img?.raw) { | |
| userPrompt += "<|vision_start|><|image_pad|><|vision_end|>"; | |
| } | |
| userPrompt += (text || "") + "<|im_end|>\n"; | |
| userPrompt += enableThinking | |
| ? "<|im_start|>assistant\n<think>\n" | |
| : "<|im_start|>assistant\n<think>\n\n</think>\n\n"; | |
| let inputs, generateArgs; | |
| if (img?.raw) { | |
| // Image attached: must do a full encode (no PKV reuse possible) | |
| // Rebuild the full prompt including any prior conversation | |
| const fullPrompt = (isFirstTurn ? "" : promptHistory + "\n") + userPrompt; | |
| inputs = await processor(fullPrompt, img.raw); | |
| // Cache image_grid_thw for future PKV continuation turns | |
| if (inputs.image_grid_thw) { | |
| imageGridThw = inputs.image_grid_thw; | |
| } | |
| // Discard past key values β image changes the encoded sequence | |
| disposePastKeyValues(); | |
| generateArgs = { ...inputs }; | |
| } else if (isFirstTurn) { | |
| // First turn, text only: full encode, no image | |
| inputs = await processor(userPrompt); | |
| generateArgs = { ...inputs }; | |
| } else { | |
| // Continuation: use past_key_values, no image re-encoding | |
| const continuationPrompt = promptHistory + "\n" + userPrompt; | |
| inputs = await processor(continuationPrompt); | |
| generateArgs = { | |
| ...inputs, | |
| past_key_values: pastKeyValues, | |
| }; | |
| // Pass image_grid_thw if we had an image earlier | |
| if (imageGridThw) { | |
| generateArgs.image_grid_thw = imageGridThw; | |
| } | |
| } | |
| let fullText = ""; | |
| let thinkingDone = !enableThinking; | |
| let thinkingEl = null; | |
| let thinkingContentEl = null; | |
| let thinkingLabel = null; | |
| let chevron = null; | |
| let tokenCount = 0; | |
| let startTime = null; | |
| if (enableThinking) { | |
| // Add collapsible thinking block before the content area | |
| thinkingLabel = document.createElement("div"); | |
| thinkingLabel.className = "msg-thinking-label"; | |
| chevron = document.createElement("span"); | |
| chevron.className = "msg-thinking-chevron"; | |
| chevron.textContent = "βΌ"; | |
| thinkingLabel.append(chevron, " Thinking"); | |
| thinkingEl = document.createElement("div"); | |
| thinkingEl.className = "msg-thinking"; | |
| contentEl.before(thinkingLabel, thinkingEl); | |
| thinkingContentEl = thinkingEl; | |
| thinkingLabel.addEventListener("click", () => { | |
| if (thinkingEl.classList.contains("collapsed")) { | |
| thinkingEl.classList.add("collapsing"); | |
| thinkingEl.classList.remove("collapsed"); | |
| thinkingEl.style.maxHeight = thinkingEl.scrollHeight + "px"; | |
| thinkingEl.addEventListener( | |
| "transitionend", | |
| () => { | |
| thinkingEl.classList.remove("collapsing"); | |
| thinkingEl.style.maxHeight = ""; | |
| }, | |
| { once: true }, | |
| ); | |
| chevron.textContent = "βΌ"; | |
| } else { | |
| thinkingEl.style.maxHeight = thinkingEl.scrollHeight + "px"; | |
| thinkingEl.classList.add("collapsing"); | |
| requestAnimationFrame(() => { | |
| thinkingEl.classList.add("collapsed"); | |
| }); | |
| thinkingEl.addEventListener( | |
| "transitionend", | |
| () => { | |
| thinkingEl.classList.remove("collapsing"); | |
| thinkingEl.style.maxHeight = ""; | |
| }, | |
| { once: true }, | |
| ); | |
| chevron.textContent = "βΆ"; | |
| } | |
| }); | |
| } | |
| const streamer = new TextStreamer(processor.tokenizer, { | |
| skip_prompt: true, | |
| skip_special_tokens: !enableThinking, | |
| token_callback_function: () => { | |
| if (!startTime) startTime = performance.now(); | |
| tokenCount++; | |
| }, | |
| callback_function: (token) => { | |
| if (!thinkingDone) { | |
| // Check if this token contains the </think> boundary | |
| const endIdx = (fullText + token).indexOf("</think>"); | |
| if (endIdx !== -1) { | |
| thinkingDone = true; | |
| const thinkText = (fullText + token).slice(0, endIdx).trim(); | |
| thinkingContentEl.textContent = thinkText; | |
| fullText = (fullText + token).slice(endIdx + "</think>".length); | |
| contentEl.textContent = fullText | |
| .replace(/^\n+/, "") | |
| .replace(/<\|im_end\|>/g, ""); | |
| // Auto-collapse thinking with animation | |
| thinkingEl.style.maxHeight = thinkingEl.scrollHeight + "px"; | |
| thinkingEl.classList.add("collapsing"); | |
| requestAnimationFrame(() => { | |
| thinkingEl.classList.add("collapsed"); | |
| }); | |
| thinkingEl.addEventListener( | |
| "transitionend", | |
| () => { | |
| thinkingEl.classList.remove("collapsing"); | |
| thinkingEl.style.maxHeight = ""; | |
| }, | |
| { once: true }, | |
| ); | |
| chevron.textContent = "βΆ"; | |
| } else { | |
| fullText += token; | |
| thinkingContentEl.textContent = fullText; | |
| } | |
| } else { | |
| fullText += token; | |
| contentEl.textContent = fullText | |
| .replace(/^\n+/, "") | |
| .replace(/<\|im_end\|>/g, ""); | |
| } | |
| $messages.scrollTop = $messages.scrollHeight; | |
| }, | |
| }); | |
| const result = await model.generate({ | |
| ...generateArgs, | |
| max_new_tokens: enableThinking ? 2048 : 512, | |
| do_sample: true, | |
| streamer, | |
| stopping_criteria: stoppingCriteria, | |
| return_dict_in_generate: true, | |
| }); | |
| // Update past key values for next turn | |
| pastKeyValues = result.past_key_values; | |
| // Decode the full sequence to maintain prompt history | |
| const fullSequenceText = processor.batch_decode(result.sequences, { | |
| skip_special_tokens: false, | |
| })[0]; | |
| promptHistory = fullSequenceText; | |
| // Show generation stats | |
| if (tokenCount > 0 && startTime) { | |
| const elapsed = (performance.now() - startTime) / 1000; | |
| const tps = (tokenCount / elapsed).toFixed(1); | |
| const statsEl = document.createElement("div"); | |
| statsEl.className = "msg-stats"; | |
| statsEl.textContent = `${tokenCount} tokens Β· ${tps} tok/s Β· ${elapsed.toFixed(1)}s`; | |
| assistantEl.appendChild(statsEl); | |
| } | |
| assistantEl.classList.remove("generating"); | |
| } catch (err) { | |
| console.error(err); | |
| assistantEl.remove(); | |
| $errBanner.textContent = "Generation error: " + err.message; | |
| $errBanner.classList.add("visible"); | |
| } | |
| isGenerating = false; | |
| stoppingCriteria.reset(); | |
| updateSendBtn(); | |
| $messages.scrollTop = $messages.scrollHeight; | |
| } | |
| /* βββ Render helpers βββ */ | |
| function appendMessage(role, text, imageDataURL, generating = false) { | |
| const el = document.createElement("div"); | |
| el.className = `msg ${role}` + (generating ? " generating" : ""); | |
| const roleEl = document.createElement("div"); | |
| roleEl.className = "msg-role"; | |
| roleEl.textContent = role === "user" ? "You" : "Qwen 3.5"; | |
| el.appendChild(roleEl); | |
| if (imageDataURL) { | |
| const img = document.createElement("img"); | |
| img.className = "msg-image"; | |
| img.src = imageDataURL; | |
| img.alt = "attached"; | |
| el.appendChild(img); | |
| } | |
| const content = document.createElement("div"); | |
| content.className = "msg-content"; | |
| if (generating) { | |
| const dots = document.createElement("span"); | |
| dots.className = "thinking-dots"; | |
| for (let i = 0; i < 3; i++) | |
| dots.appendChild(document.createElement("span")); | |
| content.appendChild(dots); | |
| } else { | |
| content.textContent = text; | |
| } | |
| el.appendChild(content); | |
| $messages.appendChild(el); | |
| $messages.scrollTop = $messages.scrollHeight; | |
| return el; | |
| } | |