Spaces:

build-small-hackathon
/

split-brain-copilot

Running

App Files Files Community

split-brain-copilot / static /engine.js

blessingmwiti's picture

Sanitize streamed local model output

d256fda 1 day ago

history blame contribute delete

4.25 kB

	import { pipeline, TextStreamer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.0/dist/transformers.min.js";

	const MODEL_ID = "onnx-community/Qwen2.5-Coder-1.5B-Instruct";
	const DTYPE_CANDIDATES = ["q4f16", "q4", "fp16"];

	let generator = null;
	let isLoaded = false;
	let activeDtype = null;

	export async function loadModel(onProgress) {
	if (isLoaded) return;

	const errors = [];
	for (const dtype of DTYPE_CANDIDATES) {
	try {
	onProgress?.({ status: "attempt", dtype });
	generator = await pipeline("text-generation", MODEL_ID, {
	dtype,
	device: "webgpu",
	progress_callback: (progress) => onProgress?.({ ...progress, dtype }),
	});
	activeDtype = dtype;
	isLoaded = true;
	return;
	} catch (error) {
	console.error(`Model load failed for dtype=${dtype}`, error);
	errors.push(`${dtype}: ${formatError(error)}`);
	}
	}

	throw new Error(`All WebGPU dtype attempts failed. ${errors.join(" \| ")}`);
	}

	export async function generateCode(prompt, language, onToken, onComplete) {
	if (!generator) throw new Error("Model not loaded");
	let streamedText = "";

	const messages = [
	{
	role: "system",
	content: [
	`You are an expert ${language} programmer.`,
	"Return raw source code only.",
	"Do not use markdown fences.",
	"Do not add explanations, bullet points, headings, comments, or usage notes.",
	"Do not wrap the answer in ```.",
	"The response must be directly executable or pasteable as a source file.",
	].join(" "),
	},
	{
	role: "user",
	content: `${prompt}\n\nReturn only the ${language} code. No markdown. No comments. No explanation.`,
	},
	];

	const streamer = new TextStreamer(generator.tokenizer, {
	skip_prompt: true,
	callback_function: (token) => {
	streamedText += token;
	onToken(token);
	},
	});

	const result = await generator(messages, {
	max_new_tokens: 1024,
	do_sample: false,
	streamer,
	});

	const generated = result?.[0]?.generated_text;
	const resultText = Array.isArray(generated)
	? generated.at(-1).content
	: String(generated \|\| "");
	const fullCodeRaw = resultText && resultText.trim() ? resultText : streamedText;
	const fullCode = stripMarkdownCodeFence(fullCodeRaw);
	onComplete(fullCode);
	return fullCode;
	}

	export function isWebGPUSupported() {
	return Boolean(navigator.gpu);
	}

	export function getActiveDtype() {
	return activeDtype;
	}

	function formatError(error) {
	if (!error) return "unknown error";
	if (error.message) return error.message;
	if (typeof error === "string") return error;
	try {
	return JSON.stringify(error);
	} catch {
	return String(error);
	}
	}

	export function stripMarkdownCodeFence(text) {
	const trimmed = String(text \|\| "").trim();
	if (!trimmed) return "";

	let code = trimmed;
	const openingFence = code.match(/^```(?:[a-zA-Z0-9_+#.-]+)?\s*\n?/);
	if (openingFence) {
	code = code.slice(openingFence[0].length);
	const closingIndex = code.indexOf("```");
	if (closingIndex >= 0) code = code.slice(0, closingIndex);
	} else {
	const firstFence = code.indexOf("```");
	if (firstFence >= 0) code = code.slice(0, firstFence);
	}

	return trimMarkdownExplanation(code);
	}

	function trimMarkdownExplanation(text) {
	const lines = String(text \|\| "").split(/\r?\n/);
	const explanationPattern =
	/^\s(?:[-]\s+\|\d+\.\s+\|#{1,6}\s+\|Explanation\s:\|Steps\s:\|Notes?\s*:\|The code\b\|This code\b)/i;

	let cutIndex = lines.length;
	for (let i = 0; i < lines.length; i += 1) {
	if (explanationPattern.test(lines[i])) {
	cutIndex = i;
	break;
	}
	}

	return lines.slice(0, cutIndex).join("\n").trim();
	}

	Object.assign(window, {
	loadModel,
	generateCode,
	isWebGPUSupported,
	getActiveDtype,
	stripMarkdownCodeFence,
	});