import { pipeline, TextStreamer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.0/dist/transformers.min.js"; const MODEL_ID = "onnx-community/Qwen2.5-Coder-1.5B-Instruct"; const DTYPE_CANDIDATES = ["q4f16", "q4", "fp16"]; let generator = null; let isLoaded = false; let activeDtype = null; export async function loadModel(onProgress) { if (isLoaded) return; const errors = []; for (const dtype of DTYPE_CANDIDATES) { try { onProgress?.({ status: "attempt", dtype }); generator = await pipeline("text-generation", MODEL_ID, { dtype, device: "webgpu", progress_callback: (progress) => onProgress?.({ ...progress, dtype }), }); activeDtype = dtype; isLoaded = true; return; } catch (error) { console.error(`Model load failed for dtype=${dtype}`, error); errors.push(`${dtype}: ${formatError(error)}`); } } throw new Error(`All WebGPU dtype attempts failed. ${errors.join(" | ")}`); } export async function generateCode(prompt, language, onToken, onComplete) { if (!generator) throw new Error("Model not loaded"); let streamedText = ""; const messages = [ { role: "system", content: [ `You are an expert ${language} programmer.`, "Return raw source code only.", "Do not use markdown fences.", "Do not add explanations, bullet points, headings, comments, or usage notes.", "Do not wrap the answer in ```.", "The response must be directly executable or pasteable as a source file.", ].join(" "), }, { role: "user", content: `${prompt}\n\nReturn only the ${language} code. No markdown. No comments. No explanation.`, }, ]; const streamer = new TextStreamer(generator.tokenizer, { skip_prompt: true, callback_function: (token) => { streamedText += token; onToken(token); }, }); const result = await generator(messages, { max_new_tokens: 1024, do_sample: false, streamer, }); const generated = result?.[0]?.generated_text; const resultText = Array.isArray(generated) ? generated.at(-1).content : String(generated || ""); const fullCodeRaw = resultText && resultText.trim() ? resultText : streamedText; const fullCode = stripMarkdownCodeFence(fullCodeRaw); onComplete(fullCode); return fullCode; } export function isWebGPUSupported() { return Boolean(navigator.gpu); } export function getActiveDtype() { return activeDtype; } function formatError(error) { if (!error) return "unknown error"; if (error.message) return error.message; if (typeof error === "string") return error; try { return JSON.stringify(error); } catch { return String(error); } } export function stripMarkdownCodeFence(text) { const trimmed = String(text || "").trim(); if (!trimmed) return ""; let code = trimmed; const openingFence = code.match(/^```(?:[a-zA-Z0-9_+#.-]+)?\s*\n?/); if (openingFence) { code = code.slice(openingFence[0].length); const closingIndex = code.indexOf("```"); if (closingIndex >= 0) code = code.slice(0, closingIndex); } else { const firstFence = code.indexOf("```"); if (firstFence >= 0) code = code.slice(0, firstFence); } return trimMarkdownExplanation(code); } function trimMarkdownExplanation(text) { const lines = String(text || "").split(/\r?\n/); const explanationPattern = /^\s*(?:[-*]\s+|\d+\.\s+|#{1,6}\s+|Explanation\s*:|Steps\s*:|Notes?\s*:|The code\b|This code\b)/i; let cutIndex = lines.length; for (let i = 0; i < lines.length; i += 1) { if (explanationPattern.test(lines[i])) { cutIndex = i; break; } } return lines.slice(0, cutIndex).join("\n").trim(); } Object.assign(window, { loadModel, generateCode, isWebGPUSupported, getActiveDtype, stripMarkdownCodeFence, });