violetdemo / violet_test.html
zakarth's picture
Upload folder using huggingface_hub
e69eb0d verified
<!-- violet_test.html - WebLLM raw completions + Transformers.js AutoTokenizer token audit -->
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8" />
<title>Violet Test (WebLLM + Token IDs)</title>
<style>
body {
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
max-width: 1500px;
margin: 0 auto;
padding: 18px;
background: #111;
color: #d8ffd8;
}
.row { display: flex; gap: 10px; flex-wrap: wrap; align-items: center; }
button {
background: #00ff7b;
color: #000;
border: none;
padding: 10px 14px;
font-size: 14px;
cursor: pointer;
border-radius: 6px;
}
button:disabled { opacity: 0.5; cursor: not-allowed; }
.output {
background: #000;
padding: 14px;
margin: 12px 0;
border: 1px solid #00ff7b;
white-space: pre-wrap;
font-size: 11px;
line-height: 1.35;
max-height: 820px;
overflow-y: auto;
border-radius: 8px;
}
.hint { color: #9affff; font-size: 12px; opacity: 0.9; }
.warn { color: #ffcf6a; }
.err { color: #ff6a6a; }
.ok { color: #00ff7b; }
.pill {
display: inline-block;
padding: 2px 8px;
border-radius: 999px;
border: 1px solid #2d2d2d;
background: #151515;
color: #cfcfcf;
font-size: 11px;
}
input, textarea {
background: #0b0b0b;
color: #d8ffd8;
border: 1px solid #2d2d2d;
padding: 8px 10px;
border-radius: 6px;
font-size: 12px;
}
label { font-size: 12px; opacity: 0.9; }
details { margin: 8px 0; }
summary { cursor: pointer; color: #9affff; }
textarea { width: 520px; height: 64px; }
.small { font-size: 11px; opacity: 0.9; }
</style>
</head>
<body>
<h1>🔬 Violet Test (Raw + Token IDs)</h1>
<div class="row">
<button id="loadBtn">Load Model</button>
<button id="rawBtn" disabled>Run Raw Completion (DPO Prompt)</button>
<button id="tokenBtn" disabled>Tokenizer Audit (specials)</button>
<button id="streamBtn" disabled>Stream Raw Completion</button>
<button id="clearBtn">Clear</button>
<span class="pill" id="statusPill">status: idle</span>
</div>
<div class="row" style="margin-top:10px">
<label>Model ID</label>
<input id="modelId" value="violet-q3f16_1-MLC" size="22" />
<label>Max tokens</label>
<input id="maxTokens" type="number" value="160" min="1" max="4096" />
<label>Temp</label>
<input id="temp" type="number" value="0" min="0" max="2" step="0.05" />
<label>Top-p</label>
<input id="topP" type="number" value="1" min="0" max="1" step="0.05" />
<label>Rep pen</label>
<input id="repPen" type="number" value="1.15" min="0.5" max="2" step="0.05" />
<label class="small">
<input id="stopAfterMoodLine" type="checkbox" />
stop after 1st line after &lt;|violet_mood|&gt;
</label>
</div>
<div class="row" style="margin-top:10px; align-items:flex-start;">
<div>
<label>Stop strings (one per line)</label><br/>
<textarea id="stopStrs"><|endoftext|>
<|user|>
<|system|></textarea>
<div class="small">These are applied by <b>string-match</b> postprocessing (best-effort; WebLLM build dependent).</div>
</div>
<div>
<label>Stop token IDs (comma-separated)</label><br/>
<input id="stopTokenIds" value="0,24000,24001" size="28" />
<div class="small">These are applied as <b>token-id stops</b> if supported by your WebLLM build.</div>
</div>
<div>
<label>Question</label><br/>
<input id="question" value="What is your name?" size="36" />
<div class="small">Edits the user question in the DPO prompt.</div>
</div>
</div>
<details>
<summary>Notes</summary>
<div class="hint">
- Generation uses <b>engine.completions.create</b> (raw completion), not chat (so conv_template shouldn’t inject roles).<br/>
- Token IDs come from <b>Transformers.js AutoTokenizer</b> loading <code>/models/&lt;modelId&gt;/tokenizer.json</code>.<br/>
- If your WebLLM build ignores <code>stop_token_ids</code> / <code>stop</code>, we still show where EOS would have been via the tokenizer diff.
</div>
</details>
<div id="output" class="output">Ready...</div>
<script type="module">
import * as webllm from "https://esm.run/@mlc-ai/web-llm@0.2.80";
import { AutoTokenizer, env } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.1.1/+esm";
const outputEl = document.getElementById("output");
const loadBtn = document.getElementById("loadBtn");
const rawBtn = document.getElementById("rawBtn");
const tokenBtn = document.getElementById("tokenBtn");
const streamBtn = document.getElementById("streamBtn");
const clearBtn = document.getElementById("clearBtn");
const statusPill = document.getElementById("statusPill");
const modelIdEl = document.getElementById("modelId");
const maxTokensEl = document.getElementById("maxTokens");
const tempEl = document.getElementById("temp");
const topPEl = document.getElementById("topP");
const repPenEl = document.getElementById("repPen");
const stopStrsEl = document.getElementById("stopStrs");
const stopTokenIdsEl = document.getElementById("stopTokenIds");
const questionEl = document.getElementById("question");
const stopAfterMoodLineEl = document.getElementById("stopAfterMoodLine");
let engine = null;
let tokenizer = null;
let MODEL_ROOT = null;
const SYSTEM_PROMPT = `A Young Lady's Card of Particulars
(To be observed in all replies and discourse)
I am Miss Violet Hartwell, a young lady of Kensington, London, living in the year of our Lord 1899.
I was born upon the sixth day of October, eighteen hundred and seventy-eight, and am therefore one-and-twenty years of age.
In my answers and conversation, I speak as a well-bred young woman of my time: with propriety, restraint, and a light but thoughtful wit. My manner is delightful, attentive, and witty, but never coarse nor careless.
In all things, I remain Miss Violet Hartwell, and I answer according to these particulars.`;
function setStatus(text, cls = "") {
statusPill.textContent = `status: ${text}`;
statusPill.className = "pill " + cls;
}
function log(msg = "") {
outputEl.textContent += msg + "\n";
outputEl.scrollTop = outputEl.scrollHeight;
}
function section(title) {
log("\n" + "=".repeat(98));
log(title);
log("=".repeat(98));
}
clearBtn.addEventListener("click", () => (outputEl.textContent = ""));
function jstr(x, space = 0) {
return JSON.stringify(
x,
(_k, v) => {
if (typeof v === "bigint") return v.toString();
if (v instanceof Map) return Object.fromEntries(v);
if (v instanceof Set) return Array.from(v);
return v;
},
space
);
}
function visualizeNewlines(s) {
return s.replace(/\r/g, "\\r").replace(/\n/g, "\\n\n");
}
function charCodeDump(s, limit = 220) {
const arr = Array.from(s.slice(0, limit));
return arr.map((c, i) => {
const code = c.codePointAt(0);
const vis =
c === "\n" ? "\\n" :
c === "\r" ? "\\r" :
c === "\t" ? "\\t" :
c === " " ? "␠" :
c;
return `${i.toString().padStart(3, " ")} U+${code.toString(16).toUpperCase().padStart(4,"0")} '${vis}'`;
}).join("\n");
}
function findSpecialStrings(s) {
const specials = ["<|system|>","<|user|>","<|assistant|>","<|endoftext|>","<|violet_mood|>"];
const hits = {};
for (const sp of specials) hits[sp] = s.includes(sp);
return hits;
}
function buildDpoPrompt(question) {
return `<|system|>\n${SYSTEM_PROMPT}\n<|user|>\n${question}\n<|violet_mood|>\n`;
}
async function loadTokenizerForModel(modelId, baseUrl) {
if (tokenizer) return tokenizer;
section("🧠 Loading tokenizer via Transformers.js AutoTokenizer");
env.allowRemoteModels = false;
env.allowLocalModels = true;
//env.localModelPath = `${baseUrl}/models/`;
env.localModelPath = `${baseUrl}/models/violet-q3f16_1-MLC`
tokenizer = await AutoTokenizer.from_pretrained(modelId);
log("✅ Tokenizer loaded.");
return tokenizer;
}
function extractIds(input_ids) {
if (Array.isArray(input_ids)) return input_ids.slice();
const ot = input_ids?.ort_tensor;
const cpu = ot?.cpuData;
if (cpu) {
if (ArrayBuffer.isView(cpu)) return Array.from(cpu);
if (Array.isArray(cpu)) return cpu.slice();
if (typeof cpu === "object") {
const keys = Object.keys(cpu).sort((a,b) => Number(a) - Number(b));
return keys.map(k => cpu[k]);
}
}
if (typeof input_ids?.toArray === "function") return input_ids.toArray();
if (typeof input_ids?.tolist === "function") return input_ids.tolist();
throw new Error("extractIds: unrecognized input_ids structure: " + jstr(input_ids).slice(0, 300));
}
function normalizeIdsToNumber(ids) {
return ids.map(x => {
if (typeof x === "number") return x;
if (typeof x === "bigint") return Number(x);
if (typeof x === "string") return Number(x);
return Number(x);
});
}
async function encodeIds(text) {
const enc = await tokenizer(text, { add_special_tokens: false });
const rawIds = extractIds(enc.input_ids);
return normalizeIdsToNumber(rawIds);
}
function decodeIds(ids) {
return tokenizer.decode(ids, { skip_special_tokens: false });
}
async function encodeDecodeAudit(text) {
section("🧷 TOKENIZER ENCODE/DECODE AUDIT (Transformers.js)");
log("Input text (JSON escaped):");
log(jstr(text));
if (!tokenizer) {
log("⚠️ Tokenizer not loaded yet. Click 'Load Model' first.");
return;
}
const ids = await encodeIds(text);
log("\nToken IDs (Number):");
log(jstr(ids));
const decoded = decodeIds(ids);
log("\nDecode(Encode(text)):");
log(jstr(decoded));
log("\nPer-token decoded pieces (first 240):");
for (let i = 0; i < Math.min(ids.length, 240); i++) {
const idNum = ids[i];
const piece = decodeIds([idNum]);
const shown = piece.replace(/\n/g, "\\n").replace(/\r/g, "\\r").replace(/\t/g, "\\t");
log(`${i.toString().padStart(4, " ")} id=${String(idNum).padStart(6, " ")} piece="${shown}"`);
}
log("\nSpecial-string presence in INPUT:");
log(jstr(findSpecialStrings(text), 2));
}
function parseStopStrings() {
const lines = stopStrsEl.value.split("\n").map(s => s.trim()).filter(Boolean);
return lines;
}
function parseStopTokenIds() {
const raw = stopTokenIdsEl.value.split(",").map(s => s.trim()).filter(Boolean);
const ids = raw.map(x => Number(x)).filter(x => Number.isFinite(x));
return ids;
}
function buildCompletionReq(prompt) {
const max_tokens = Number(maxTokensEl.value || 160);
const temperature = Number(tempEl.value || 0);
const top_p = Number(topPEl.value || 1);
const repetition_penalty = Number(repPenEl.value || 1.0);
const ignore_ios=false;
const stop = parseStopStrings();
const stop_token_ids = parseStopTokenIds();
// Different WebLLM builds use different key names.
// We attach a few common variants; unsupported ones should be ignored.
return {
prompt,
max_tokens,
temperature,
top_p,
repetition_penalty,
ignore_ios,
// variants:
stop, // OpenAI-style
stop_str: stop, // MLC-style sometimes
stop_token_ids, // MLC-style
stopTokenIds: stop_token_ids
};
}
function maybeTruncateAfterMoodLine(prompt, outputText) {
if (!stopAfterMoodLineEl.checked) return outputText;
// We want: everything up to end of the first line AFTER the <|violet_mood|>\n boundary.
// Since prompt already ends with "<|violet_mood|>\n", output begins right after that.
// Capture: optional leading newlines/spaces + first line.
// If model starts with "\nI am..." this captures that first line so you can see whether it ever emits a label.
const idxNL = outputText.indexOf("\n");
if (idxNL === -1) return outputText;
return outputText.slice(0, idxNL + 1);
}
async function tokenDiffAudit(prompt, outputText) {
section("🧮 TOKEN-DIFF AUDIT (prompt vs prompt+output)");
const prompt_ids = await encodeIds(prompt);
const joined_ids = await encodeIds(prompt + outputText);
log(`prompt_ids: ${prompt_ids.length} tokens`);
log(`joined_ids: ${joined_ids.length} tokens`);
// prefix check
let isPrefix = true;
for (let i = 0; i < prompt_ids.length; i++) {
if (prompt_ids[i] !== joined_ids[i]) { isPrefix = false; break; }
}
log(`prompt is prefix of joined? ${isPrefix}`);
const cont = joined_ids.slice(prompt_ids.length);
log(`continuation_ids: ${cont.length} tokens`);
const firstN = cont.slice(0, 40);
const lastN = cont.slice(Math.max(0, cont.length - 40));
log("\nFirst continuation tokens (id => piece):");
for (let i = 0; i < firstN.length; i++) {
const id = firstN[i];
const piece = decodeIds([id]).replace(/\n/g,"\\n").replace(/\r/g,"\\r").replace(/\t/g,"\\t");
log(`${String(i).padStart(3," ")} ${String(id).padStart(6," ")} "${piece}"`);
}
log("\nLast continuation tokens (id => piece):");
for (let i = 0; i < lastN.length; i++) {
const id = lastN[i];
const piece = decodeIds([id]).replace(/\n/g,"\\n").replace(/\r/g,"\\r").replace(/\t/g,"\\t");
const j = cont.length - lastN.length + i;
log(`${String(j).padStart(3," ")} ${String(id).padStart(6," ")} "${piece}"`);
}
const stopIds = new Set(parseStopTokenIds());
const hits = [];
for (let i = 0; i < cont.length; i++) {
if (stopIds.has(cont[i])) hits.push({ i, id: cont[i], piece: decodeIds([cont[i]]) });
}
log(`\nStop-id hits in continuation (among [${Array.from(stopIds).join(",")}]):`);
log(jstr(hits.map(h => ({ ...h, piece: h.piece.replace(/\n/g,"\\n") })), 2));
log(`\nContinuation begins with EOS(0)? ${cont[0] === 0}`);
log(`Continuation begins with <|violet_mood|>(24005)? ${cont[0] === 24005}`);
}
async function rawCompletion(prompt, { stream = false } = {}) {
section(stream ? "🧪 RAW COMPLETION (STREAMING)" : "🧪 RAW COMPLETION");
log("Prompt (JSON escaped):");
log(jstr(prompt));
log("\nPrompt with visible newlines:");
log(visualizeNewlines(prompt));
log("\nPrompt length:");
log(`chars=${prompt.length}`);
log("\nPrompt tail (last 220 chars, JSON escaped):");
log(jstr(prompt.slice(-220)));
await encodeDecodeAudit(prompt);
const req = buildCompletionReq(prompt);
log("\nCompletion request (BigInt-safe):");
log(jstr(req, 2));
if (!stream) {
const t0 = performance.now();
const reply = await engine.completions.create(req);
const t1 = performance.now();
const choice = reply.choices?.[0] || {};
let text = choice.text ?? "";
// optional truncation to inspect mood-line behavior
text = maybeTruncateAfterMoodLine(prompt, text);
section("📤 RAW OUTPUT");
log("Finish reason: " + (choice.finish_reason ?? "<none>"));
log(`Elapsed: ${(t1 - t0).toFixed(1)} ms`);
log("\nOutput (JSON escaped):");
log(jstr(text));
log("\nOutput with visible newlines:");
log(visualizeNewlines(text));
log("\nOutput length:");
log(`chars=${text.length}`);
log("\nOutput head (first 220 chars, JSON escaped):");
log(jstr(text.slice(0, 220)));
log("\nOutput tail (last 220 chars, JSON escaped):");
log(jstr(text.slice(-220)));
log("\nChar code dump (first 220 chars):");
log(charCodeDump(text, 220));
log("\nSpecial-string presence in OUTPUT:");
log(jstr(findSpecialStrings(text), 2));
await encodeDecodeAudit(text);
section("🧵 PROMPT+OUTPUT JOINED (for DPO alignment)");
const joined = prompt + text;
log("Joined tail (last 260 chars, JSON escaped):");
log(jstr(joined.slice(-260)));
await encodeDecodeAudit(joined);
await tokenDiffAudit(prompt, text);
section("📎 LINE DUMP");
const lines = text.split("\n");
log(`Total lines: ${lines.length}`);
for (let i = 0; i < Math.min(lines.length, 80); i++) {
log(`${String(i).padStart(3, " ")}: ${jstr(lines[i])}`);
}
return text;
}
// STREAMING mode (build-dependent)
section("📡 STREAM START");
let full = "";
let i = 0;
const gen = await engine.completions.create({ ...req, stream: true });
for await (const chunk of gen) {
const delta = chunk?.choices?.[0]?.text ?? "";
full += delta;
log(${String(i).padStart(4,"0")} (${delta.length} chars): ${jstr(delta)}`);
i++;
if (i > 4000) break;
}
full = maybeTruncateAfterMoodLine(prompt, full);
section("📤 STREAM FULL OUTPUT");
log(jstr(full));
log("\nWith visible newlines:");
log(visualizeNewlines(full));
log("\nSpecials:");
log(jstr(findSpecialStrings(full), 2));
await encodeDecodeAudit(full);
await tokenDiffAudit(prompt, full);
return full;
}
// --- load model --------------------------------------------------------
loadBtn.addEventListener("click", async () => {
loadBtn.disabled = true;
outputEl.textContent = "";
setStatus("loading", "warn");
try {
section("📦 LOADING MODEL");
const modelId = modelIdEl.value.trim();
const baseUrl = window.location.origin;
MODEL_ROOT = `${baseUrl}/models/${modelId}/`;
const modelLib = `${baseUrl}/models/${modelId}/${modelId}-webgpu.wasm`;
log(`Model ID : ${modelId}`);
log(`Model Root: ${MODEL_ROOT}`);
log(`Model Lib : ${modelLib}`);
// Fetch logger
const originalFetch = globalThis.fetch.bind(globalThis);
globalThis.fetch = async (...args) => {
const url = typeof args[0] === "string" ? args[0] : args[0]?.url;
log(`🌐 FETCH: ${url}`);
const res = await originalFetch(...args);
log(`✅ FETCH: ${url} ${res.status} ${(res.headers.get("content-type")||"")}`);
return res;
};
const appConfig = {
model_list: [{
model_id: modelId,
model: MODEL_ROOT,
model_lib: modelLib,
vram_required_MB: 2048,
low_resource_required: false
}],
use_web_worker: false
};
engine = new webllm.MLCEngine({
appConfig,
logLevel: "INFO",
initProgressCallback: (p) => {
const pct = (p.progress * 100).toFixed(1);
log(` Progress: ${pct}% ${p.text ? "- " + p.text : ""}`);
},
useWebWorker: false
});
await engine.reload(modelId);
// Load tokenizer from the same local model folder
await loadTokenizerForModel(modelId, baseUrl);
log("\n✅ Model + tokenizer loaded.");
setStatus("ready", "ok");
rawBtn.disabled = false;
tokenBtn.disabled = false;
streamBtn.disabled = false;
} catch (err) {
setStatus("error", "err");
log("\n❌ ERROR while loading model/tokenizer:");
log(String(err));
log(err?.stack || "");
loadBtn.disabled = false;
}
});
// --- tokenizer specials button ----------------------------------------
tokenBtn.addEventListener("click", async () => {
tokenBtn.disabled = true;
setStatus("token-audit", "warn");
try {
section("🧪 TOKENIZER QUICK SPECIALS CHECK");
section("🧪 DIRECT-ID DECODE CHECK");
for (const id of [0, 1, 174, 24000, 24001, 24002, 24005]) {
const piece = decodeIds([id]);
log(`id=${id} => ${jstr(piece)}`);
}
const samples = [
"<|system|>",
"<|user|>",
"<|violet_mood|>",
"<|assistant|>",
"<|endoftext|>",
"\n",
"X",
"Y",
"<|system|>\nX\n<|user|>\nY\n<|violet_mood|>\n"
];
for (const s of samples) {
log("\n---");
await encodeDecodeAudit(s);
}
setStatus("ready", "ok");
} catch (err) {
setStatus("error", "err");
log("\n❌ Tokenizer audit error:");
log(String(err));
log(err?.stack || "");
} finally {
tokenBtn.disabled = false;
}
});
// --- raw completion ----------------------------------------------------
rawBtn.addEventListener("click", async () => {
rawBtn.disabled = true;
setStatus("running", "warn");
try {
const q = questionEl.value || "What is your name?";
section("🧾 BUILDING EXACT DPO PROMPT");
const prompt = buildDpoPrompt(q);
log("Question: " + q);
await rawCompletion(prompt, { stream: false });
setStatus("ready", "ok");
} catch (err) {
setStatus("error", "err");
log("\n❌ Raw completion error:");
log(String(err));
log(err?.stack || "");
} finally {
rawBtn.disabled = false;
}
});
streamBtn.addEventListener("click", async () => {
streamBtn.disabled = true;
setStatus("streaming", "warn");
try {
const q = questionEl.value || "What is your name?";
section("🧾 BUILDING EXACT DPO PROMPT");
const prompt = buildDpoPrompt(q);
log("Question: " + q);
await rawCompletion(prompt, { stream: true });
setStatus("ready", "ok");
} catch (err) {
setStatus("error", "err");
log("\n❌ Streaming error (if unsupported, use non-stream):");
log(String(err));
log(err?.stack || "");
} finally {
streamBtn.disabled = false;
}
});
log("Ready. Click 'Load Model' → then run 'Tokenizer Audit' and/or 'Run Raw Completion'.");
</script>
</body>
</html>