import { env, AutoTokenizer, AutoModelForCausalLM, TextStreamer, InterruptableStoppingCriteria, } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3"; env.allowLocalModels = false; const MODEL_ID = "av-codes/Supra-50M-Instruct-ONNX"; let tokenizer = null; let model = null; let generating = false; const stopping = new InterruptableStoppingCriteria(); function formatPrompt(instruction) { return ( "Below is an instruction that describes a task. " + "Write a response that appropriately completes the request.\n\n" + "### Instruction:\n" + instruction + "\n\n### Response:\n" ); } async function load() { self.postMessage({ type: "status", message: "Loading tokenizer..." }); tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID); self.postMessage({ type: "status", message: "Loading model (50 MB)..." }); model = await AutoModelForCausalLM.from_pretrained(MODEL_ID, { dtype: "q8", progress_callback: (progress) => { if (progress.status === "progress") { self.postMessage({ type: "progress", percent: progress.progress, file: progress.file, }); } }, }); self.postMessage({ type: "ready" }); } async function generate(instruction, params) { if (!model || !tokenizer || generating) return; generating = true; stopping.reset(); const prompt = formatPrompt(instruction); const inputs = tokenizer(prompt); const streamer = new TextStreamer(tokenizer, { skip_prompt: true, skip_special_tokens: true, callback_function: (text) => { self.postMessage({ type: "token", text }); }, }); try { await model.generate({ ...inputs, max_new_tokens: params.max_new_tokens || 256, temperature: params.temperature || 0.7, top_k: params.top_k || 50, top_p: params.top_p || 0.9, repetition_penalty: params.repetition_penalty || 1.15, do_sample: params.temperature > 0, streamer, stopping_criteria: [stopping], }); } catch (e) { self.postMessage({ type: "error", message: e.message }); } generating = false; self.postMessage({ type: "done" }); } self.onmessage = (e) => { const { type, instruction, params } = e.data; if (type === "load") load(); else if (type === "generate") generate(instruction, params); else if (type === "stop") stopping.interrupt(); };