Spaces:

LiquidAI
/

LFM2-VL-WebGPU

Running

App Files Files Community

LFM2-VL-WebGPU / src /context /VLMProvider.tsx

mlabonne

upload demo files (#1)

01488bc 1 day ago

raw

history blame contribute delete

6.41 kB

	import {
	AutoModelForImageTextToText,
	AutoProcessor,
	RawImage,
	TextStreamer,
	type ProgressInfo,
	type Tensor,
	} from "@huggingface/transformers";
	import { useCallback, useRef, useState, type PropsWithChildren } from "react";
	import { VLMContext, type LoadState } from "./VLMContext";

	const MODEL_ID = "onnx-community/LFM2-VL-450M-ONNX";
	const MODEL_FILE_COUNT = 3;
	const MAX_NEW_TOKENS = 128;

	type CaptionRequest = {
	frame: ImageData;
	onStream?: (text: string) => void;
	prompt: string;
	};

	type ProcessorType = Awaited<ReturnType<typeof AutoProcessor.from_pretrained>>;
	type ModelType = Awaited<
	ReturnType<typeof AutoModelForImageTextToText.from_pretrained>
	>;

	const initialLoadState: LoadState = {
	error: null,
	message: "Downloading...",
	progress: 0,
	status: "idle",
	};

	function normalizeText(text: string) {
	return text.replace(/\s+/g, " ").trim();
	}

	function getErrorMessage(error: unknown) {
	if (error instanceof Error) {
	return error.message;
	}

	return "The model could not be loaded.";
	}

	export function VLMProvider({ children }: PropsWithChildren) {
	const [loadState, setLoadState] = useState(initialLoadState);
	const processorRef = useRef<ProcessorType \| null>(null);
	const modelRef = useRef<ModelType \| null>(null);
	const loadPromiseRef = useRef<Promise<void> \| null>(null);
	const generationInFlightRef = useRef(false);

	const setLoadProgress = useCallback((state: Partial<LoadState>) => {
	setLoadState((current) => ({
	...current,
	...state,
	}));
	}, []);

	const loadModel = useCallback(async () => {
	if (processorRef.current && modelRef.current) {
	setLoadProgress({
	error: null,
	message: "Model ready",
	progress: 100,
	status: "ready",
	});
	return;
	}

	if (loadPromiseRef.current) {
	return loadPromiseRef.current;
	}

	if (!("gpu" in navigator)) {
	const message = "WebGPU is not available in this browser.";
	setLoadProgress({
	error: message,
	message: "WebGPU unavailable",
	progress: 0,
	status: "error",
	});
	throw new Error(message);
	}

	loadPromiseRef.current = (async () => {
	try {
	const processor = await AutoProcessor.from_pretrained(MODEL_ID);
	processorRef.current = processor;

	setLoadProgress({
	message: "Downloading...",
	progress: 0,
	status: "loading",
	});

	const progressMap = new Map<string, number>();
	const progressCallback = (info: ProgressInfo) => {
	if (
	info.status !== "progress" \|\|
	!info.file.endsWith(".onnx_data") \|\|
	info.total === 0
	) {
	return;
	}

	progressMap.set(info.file, info.loaded / info.total);

	const totalProgress =
	(Array.from(progressMap.values()).reduce(
	(sum, value) => sum + value,
	0,
	) /
	MODEL_FILE_COUNT) *
	100;

	setLoadProgress({
	message: "Downloading...",
	progress: totalProgress,
	status: "loading",
	});
	};

	modelRef.current = await AutoModelForImageTextToText.from_pretrained(
	MODEL_ID,
	{
	device: "webgpu",
	dtype: {
	vision_encoder: "fp16",
	embed_tokens: "fp16",
	decoder_model_merged: "q4f16",
	},
	progress_callback: progressCallback,
	},
	);

	setLoadProgress({
	error: null,
	message: "Model ready",
	progress: 100,
	status: "ready",
	});
	} catch (error) {
	const message = getErrorMessage(error);
	setLoadProgress({
	error: message,
	message: "Unable to load model",
	progress: 0,
	status: "error",
	});
	throw error;
	} finally {
	loadPromiseRef.current = null;
	}
	})();

	return loadPromiseRef.current;
	}, [setLoadProgress]);

	const generateCaption = useCallback(
	async ({ frame, onStream, prompt }: CaptionRequest) => {
	const processor = processorRef.current;
	const model = modelRef.current;

	if (!processor \|\| !model \|\| !processor.tokenizer) {
	throw new Error("The model is not ready yet.");
	}

	if (generationInFlightRef.current) {
	return "";
	}

	generationInFlightRef.current = true;

	try {
	const messages = [
	{
	content: [
	{ type: "image" },
	{ text: normalizeText(prompt), type: "text" },
	],
	role: "user",
	},
	];

	const chatPrompt = processor.apply_chat_template(messages, {
	add_generation_prompt: true,
	});
	const rawFrame = new RawImage(frame.data, frame.width, frame.height, 4);

	const inputs = await processor(rawFrame, chatPrompt, {
	add_special_tokens: false,
	});

	let streamedText = "";
	const streamer = new TextStreamer(processor.tokenizer, {
	callback_function: (text) => {
	streamedText += text;
	const normalized = normalizeText(streamedText);

	if (normalized.length > 0) {
	onStream?.(normalized);
	}
	},
	skip_prompt: true,
	skip_special_tokens: true,
	});

	const outputs = (await model.generate({
	...inputs,
	do_sample: false,
	max_new_tokens: MAX_NEW_TOKENS,
	repetition_penalty: 1.08,
	streamer,
	})) as Tensor;

	const inputLength = inputs.input_ids.dims.at(-1) ?? 0;
	const generated = outputs.slice(null, [inputLength, null]);
	const [decoded] = processor.batch_decode(generated, {
	skip_special_tokens: true,
	});

	const finalCaption = normalizeText(decoded ?? streamedText);
	if (finalCaption.length > 0) {
	onStream?.(finalCaption);
	}

	return finalCaption;
	} finally {
	generationInFlightRef.current = false;
	}
	},
	[],
	);

	return (
	<VLMContext.Provider
	value={{
	...loadState,
	generateCaption,
	loadModel,
	}}
	>
	{children}
	</VLMContext.Provider>
	);
	}