import {
AutoProcessor,
Qwen3_5ForConditionalGeneration,
RawImage,
TextStreamer,
InterruptableStoppingCriteria,
} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.6";
/* ─── State ─── */
let processor = null;
let model = null;
let conversationImage = null; // single RawImage used across the conversation
let attachedImage = null; // { raw: RawImage, dataURL: string, name: string } | null
let isGenerating = false;
let pastKeyValues = null; // cached KV from previous generation
let imageGridThw = null; // cached image_grid_thw from initial image inputs
let promptHistory = ""; // raw prompt text built up across turns
const stoppingCriteria = new InterruptableStoppingCriteria();
/* ─── Wait for fonts, then reveal ─── */
document.fonts.ready.then(() => document.body.classList.add("ready"));
/* ─── DOM refs ─── */
const $ = (id) => document.getElementById(id);
const $loaderTx = $("loaderText");
const $messages = $("chatMessages");
const $input = $("msgInput");
const $btnSend = $("btnSend");
const $btnLoad = $("btnLoad");
const $btnReset = $("btnReset");
const $btnAttach = $("btnAttach");
const $fileInput = $("fileInput");
const $imgPrev = $("imagePreview");
const $imgThumb = $("imageThumb");
const $imgName = $("imageName");
const $btnRemImg = $("btnRemoveImage");
const $errBanner = $("errorBanner");
const $reasoning = $("reasoningToggle");
const $modelSelect = $("modelSelect");
const $modelSizeLabel = $("modelSizeLabel");
const $btnModelArrow = $("btnModelArrow");
/* ─── Model selector ─── */
$btnModelArrow.addEventListener("click", () => {
$modelSelect.style.pointerEvents = "auto";
$modelSelect.focus();
$modelSelect.showPicker?.();
});
$modelSelect.addEventListener("change", () => {
$modelSizeLabel.textContent = $modelSelect.selectedOptions[0].textContent;
$modelSelect.style.pointerEvents = "none";
});
$modelSelect.addEventListener("blur", () => {
$modelSelect.style.pointerEvents = "none";
});
/* ─── Screen switching ─── */
function showScreen(id) {
document
.querySelectorAll(".screen")
.forEach((s) => s.classList.toggle("active", s.id === id));
}
/* ─── Model loading ─── */
$btnLoad.addEventListener("click", async () => {
showScreen("loading");
try {
const model_id = $modelSelect.value;
const sizeLabel = $modelSizeLabel.textContent;
$loaderTx.textContent = "Loading processor…";
processor = await AutoProcessor.from_pretrained(model_id);
$loaderTx.textContent = "Loading model weights…";
model = await Qwen3_5ForConditionalGeneration.from_pretrained(model_id, {
dtype: {
embed_tokens: "q4",
vision_encoder: "fp16",
decoder_model_merged: "q4",
},
device: "webgpu",
});
$loaderTx.textContent = "Ready!";
document.querySelector(".chat-header-title").textContent =
`Qwen 3.5 Vision · ${sizeLabel}`;
setTimeout(() => showScreen("chat"), 400);
} catch (err) {
console.error(err);
$loaderTx.textContent = "Failed to load model";
document.querySelector(".loader-sub").textContent = err.message;
document.querySelector(".loader-ring").style.borderTopColor = "var(--red)";
}
});
/* ─── Image attachment ─── */
$btnAttach.addEventListener("click", () => {
if ($btnAttach.disabled) return;
$fileInput.click();
});
$fileInput.addEventListener("change", async (e) => {
const file = e.target.files?.[0];
if (!file) return;
const dataURL = URL.createObjectURL(file);
const raw = await RawImage.read(dataURL);
const resized = await raw.resize(448, 448);
attachedImage = { raw: resized, dataURL, name: file.name };
$imgThumb.src = dataURL;
$imgName.textContent = file.name;
$imgPrev.classList.add("visible");
updateSendBtn();
$fileInput.value = "";
});
$btnRemImg.addEventListener("click", clearAttachment);
function clearAttachment() {
attachedImage = null;
$imgPrev.classList.remove("visible");
$imgThumb.src = "";
$imgName.textContent = "";
updateSendBtn();
}
/* ─── Input handling ─── */
$input.addEventListener("input", () => {
$input.style.height = "auto";
$input.style.height = Math.min($input.scrollHeight, 140) + "px";
updateSendBtn();
});
$input.addEventListener("keydown", (e) => {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
if (!isGenerating) sendMessage();
}
});
$btnSend.addEventListener("click", () => {
if (isGenerating) {
stoppingCriteria.interrupt();
} else {
sendMessage();
}
});
function updateSendBtn() {
if (isGenerating) {
$btnSend.disabled = false;
$btnSend.classList.add("stopping");
} else {
$btnSend.classList.remove("stopping");
$btnSend.disabled = !$input.value.trim() && !attachedImage;
}
}
function disposePastKeyValues() {
if (pastKeyValues) {
for (const tensor of Object.values(pastKeyValues)) {
tensor.dispose();
}
pastKeyValues = null;
}
}
/* ─── Reset ─── */
$btnReset.addEventListener("click", () => {
conversationImage = null;
attachedImage = null;
disposePastKeyValues();
stoppingCriteria.reset();
imageGridThw = null;
promptHistory = "";
$imgPrev.classList.remove("visible");
$btnAttach.disabled = false;
$messages.innerHTML = `
Start a conversation
Optionally attach an image, then type your message.
The model runs entirely in your browser.
`;
$errBanner.classList.remove("visible");
$input.value = "";
$input.style.height = "auto";
updateSendBtn();
});
/* ─── Chat logic ─── */
async function sendMessage() {
if (isGenerating) return;
const text = $input.value.trim();
if (!text && !attachedImage) return;
$errBanner.classList.remove("visible");
// Clear welcome
const welcome = $messages.querySelector(".welcome-msg");
if (welcome) welcome.remove();
// Capture attached image before clearing
const img = attachedImage;
if (img) conversationImage = img.raw;
// Render user message in the UI
appendMessage("user", text, img?.dataURL);
// Clear input fields
$input.value = "";
$input.style.height = "auto";
clearAttachment();
// Disable image attach for the rest of this conversation if we just used one
if (conversationImage) {
$btnAttach.disabled = true;
}
// Start generating
isGenerating = true;
updateSendBtn();
const assistantEl = appendMessage("assistant", "", null, true);
const contentEl = assistantEl.querySelector(".msg-content");
try {
// Build prompt manually (can't use apply_chat_template with PKV approach)
const isFirstTurn = promptHistory === "";
// Build the user turn
const enableThinking = $reasoning.checked;
let userPrompt = "<|im_start|>user\n";
if (img?.raw) {
userPrompt += "<|vision_start|><|image_pad|><|vision_end|>";
}
userPrompt += (text || "") + "<|im_end|>\n";
userPrompt += enableThinking
? "<|im_start|>assistant\n\n"
: "<|im_start|>assistant\n\n\n\n\n";
let inputs, generateArgs;
if (img?.raw) {
// Image attached: must do a full encode (no PKV reuse possible)
// Rebuild the full prompt including any prior conversation
const fullPrompt = (isFirstTurn ? "" : promptHistory + "\n") + userPrompt;
inputs = await processor(fullPrompt, img.raw);
// Cache image_grid_thw for future PKV continuation turns
if (inputs.image_grid_thw) {
imageGridThw = inputs.image_grid_thw;
}
// Discard past key values — image changes the encoded sequence
disposePastKeyValues();
generateArgs = { ...inputs };
} else if (isFirstTurn) {
// First turn, text only: full encode, no image
inputs = await processor(userPrompt);
generateArgs = { ...inputs };
} else {
// Continuation: use past_key_values, no image re-encoding
const continuationPrompt = promptHistory + "\n" + userPrompt;
inputs = await processor(continuationPrompt);
generateArgs = {
...inputs,
past_key_values: pastKeyValues,
};
// Pass image_grid_thw if we had an image earlier
if (imageGridThw) {
generateArgs.image_grid_thw = imageGridThw;
}
}
let fullText = "";
let thinkingDone = !enableThinking;
let thinkingEl = null;
let thinkingContentEl = null;
let thinkingLabel = null;
let chevron = null;
let tokenCount = 0;
let startTime = null;
if (enableThinking) {
// Add collapsible thinking block before the content area
thinkingLabel = document.createElement("div");
thinkingLabel.className = "msg-thinking-label";
chevron = document.createElement("span");
chevron.className = "msg-thinking-chevron";
chevron.textContent = "▼";
thinkingLabel.append(chevron, " Thinking");
thinkingEl = document.createElement("div");
thinkingEl.className = "msg-thinking";
contentEl.before(thinkingLabel, thinkingEl);
thinkingContentEl = thinkingEl;
thinkingLabel.addEventListener("click", () => {
if (thinkingEl.classList.contains("collapsed")) {
thinkingEl.classList.add("collapsing");
thinkingEl.classList.remove("collapsed");
thinkingEl.style.maxHeight = thinkingEl.scrollHeight + "px";
thinkingEl.addEventListener(
"transitionend",
() => {
thinkingEl.classList.remove("collapsing");
thinkingEl.style.maxHeight = "";
},
{ once: true },
);
chevron.textContent = "▼";
} else {
thinkingEl.style.maxHeight = thinkingEl.scrollHeight + "px";
thinkingEl.classList.add("collapsing");
requestAnimationFrame(() => {
thinkingEl.classList.add("collapsed");
});
thinkingEl.addEventListener(
"transitionend",
() => {
thinkingEl.classList.remove("collapsing");
thinkingEl.style.maxHeight = "";
},
{ once: true },
);
chevron.textContent = "▶";
}
});
}
const streamer = new TextStreamer(processor.tokenizer, {
skip_prompt: true,
skip_special_tokens: !enableThinking,
token_callback_function: () => {
if (!startTime) startTime = performance.now();
tokenCount++;
},
callback_function: (token) => {
if (!thinkingDone) {
// Check if this token contains the boundary
const endIdx = (fullText + token).indexOf("");
if (endIdx !== -1) {
thinkingDone = true;
const thinkText = (fullText + token).slice(0, endIdx).trim();
thinkingContentEl.textContent = thinkText;
fullText = (fullText + token).slice(endIdx + "".length);
contentEl.textContent = fullText
.replace(/^\n+/, "")
.replace(/<\|im_end\|>/g, "");
// Auto-collapse thinking with animation
thinkingEl.style.maxHeight = thinkingEl.scrollHeight + "px";
thinkingEl.classList.add("collapsing");
requestAnimationFrame(() => {
thinkingEl.classList.add("collapsed");
});
thinkingEl.addEventListener(
"transitionend",
() => {
thinkingEl.classList.remove("collapsing");
thinkingEl.style.maxHeight = "";
},
{ once: true },
);
chevron.textContent = "▶";
} else {
fullText += token;
thinkingContentEl.textContent = fullText;
}
} else {
fullText += token;
contentEl.textContent = fullText
.replace(/^\n+/, "")
.replace(/<\|im_end\|>/g, "");
}
$messages.scrollTop = $messages.scrollHeight;
},
});
const result = await model.generate({
...generateArgs,
max_new_tokens: enableThinking ? 2048 : 512,
do_sample: true,
streamer,
stopping_criteria: stoppingCriteria,
return_dict_in_generate: true,
});
// Update past key values for next turn
pastKeyValues = result.past_key_values;
// Decode the full sequence to maintain prompt history
const fullSequenceText = processor.batch_decode(result.sequences, {
skip_special_tokens: false,
})[0];
promptHistory = fullSequenceText;
// Show generation stats
if (tokenCount > 0 && startTime) {
const elapsed = (performance.now() - startTime) / 1000;
const tps = (tokenCount / elapsed).toFixed(1);
const statsEl = document.createElement("div");
statsEl.className = "msg-stats";
statsEl.textContent = `${tokenCount} tokens · ${tps} tok/s · ${elapsed.toFixed(1)}s`;
assistantEl.appendChild(statsEl);
}
assistantEl.classList.remove("generating");
} catch (err) {
console.error(err);
assistantEl.remove();
$errBanner.textContent = "Generation error: " + err.message;
$errBanner.classList.add("visible");
}
isGenerating = false;
stoppingCriteria.reset();
updateSendBtn();
$messages.scrollTop = $messages.scrollHeight;
}
/* ─── Render helpers ─── */
function appendMessage(role, text, imageDataURL, generating = false) {
const el = document.createElement("div");
el.className = `msg ${role}` + (generating ? " generating" : "");
const roleEl = document.createElement("div");
roleEl.className = "msg-role";
roleEl.textContent = role === "user" ? "You" : "Qwen 3.5";
el.appendChild(roleEl);
if (imageDataURL) {
const img = document.createElement("img");
img.className = "msg-image";
img.src = imageDataURL;
img.alt = "attached";
el.appendChild(img);
}
const content = document.createElement("div");
content.className = "msg-content";
if (generating) {
const dots = document.createElement("span");
dots.className = "thinking-dots";
for (let i = 0; i < 3; i++)
dots.appendChild(document.createElement("span"));
content.appendChild(dots);
} else {
content.textContent = text;
}
el.appendChild(content);
$messages.appendChild(el);
$messages.scrollTop = $messages.scrollHeight;
return el;
}