blessingmwiti's picture
Sanitize streamed local model output
d256fda
import { pipeline, TextStreamer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.0/dist/transformers.min.js";
const MODEL_ID = "onnx-community/Qwen2.5-Coder-1.5B-Instruct";
const DTYPE_CANDIDATES = ["q4f16", "q4", "fp16"];
let generator = null;
let isLoaded = false;
let activeDtype = null;
export async function loadModel(onProgress) {
if (isLoaded) return;
const errors = [];
for (const dtype of DTYPE_CANDIDATES) {
try {
onProgress?.({ status: "attempt", dtype });
generator = await pipeline("text-generation", MODEL_ID, {
dtype,
device: "webgpu",
progress_callback: (progress) => onProgress?.({ ...progress, dtype }),
});
activeDtype = dtype;
isLoaded = true;
return;
} catch (error) {
console.error(`Model load failed for dtype=${dtype}`, error);
errors.push(`${dtype}: ${formatError(error)}`);
}
}
throw new Error(`All WebGPU dtype attempts failed. ${errors.join(" | ")}`);
}
export async function generateCode(prompt, language, onToken, onComplete) {
if (!generator) throw new Error("Model not loaded");
let streamedText = "";
const messages = [
{
role: "system",
content: [
`You are an expert ${language} programmer.`,
"Return raw source code only.",
"Do not use markdown fences.",
"Do not add explanations, bullet points, headings, comments, or usage notes.",
"Do not wrap the answer in ```.",
"The response must be directly executable or pasteable as a source file.",
].join(" "),
},
{
role: "user",
content: `${prompt}\n\nReturn only the ${language} code. No markdown. No comments. No explanation.`,
},
];
const streamer = new TextStreamer(generator.tokenizer, {
skip_prompt: true,
callback_function: (token) => {
streamedText += token;
onToken(token);
},
});
const result = await generator(messages, {
max_new_tokens: 1024,
do_sample: false,
streamer,
});
const generated = result?.[0]?.generated_text;
const resultText = Array.isArray(generated)
? generated.at(-1).content
: String(generated || "");
const fullCodeRaw = resultText && resultText.trim() ? resultText : streamedText;
const fullCode = stripMarkdownCodeFence(fullCodeRaw);
onComplete(fullCode);
return fullCode;
}
export function isWebGPUSupported() {
return Boolean(navigator.gpu);
}
export function getActiveDtype() {
return activeDtype;
}
function formatError(error) {
if (!error) return "unknown error";
if (error.message) return error.message;
if (typeof error === "string") return error;
try {
return JSON.stringify(error);
} catch {
return String(error);
}
}
export function stripMarkdownCodeFence(text) {
const trimmed = String(text || "").trim();
if (!trimmed) return "";
let code = trimmed;
const openingFence = code.match(/^```(?:[a-zA-Z0-9_+#.-]+)?\s*\n?/);
if (openingFence) {
code = code.slice(openingFence[0].length);
const closingIndex = code.indexOf("```");
if (closingIndex >= 0) code = code.slice(0, closingIndex);
} else {
const firstFence = code.indexOf("```");
if (firstFence >= 0) code = code.slice(0, firstFence);
}
return trimMarkdownExplanation(code);
}
function trimMarkdownExplanation(text) {
const lines = String(text || "").split(/\r?\n/);
const explanationPattern =
/^\s*(?:[-*]\s+|\d+\.\s+|#{1,6}\s+|Explanation\s*:|Steps\s*:|Notes?\s*:|The code\b|This code\b)/i;
let cutIndex = lines.length;
for (let i = 0; i < lines.length; i += 1) {
if (explanationPattern.test(lines[i])) {
cutIndex = i;
break;
}
}
return lines.slice(0, cutIndex).join("\n").trim();
}
Object.assign(window, {
loadModel,
generateCode,
isWebGPUSupported,
getActiveDtype,
stripMarkdownCodeFence,
});