Spaces:

d3evil4
/

Image2Caption

Running

App Files Files Community

Image2Caption / src /model.js

khushalcodiste's picture

feat: added

9a16713 about 1 month ago

2.01 kB

	import {
	Florence2ForConditionalGeneration,
	AutoProcessor,
	RawImage,
	} from "@huggingface/transformers";

	const MODEL_ID = "onnx-community/Florence-2-base";

	let model = null;
	let processor = null;

	/** Supported Florence-2 task tokens */
	export const TASKS = {
	caption: "<CAPTION>",
	detailed_caption: "<DETAILED_CAPTION>",
	more_detailed_caption: "<MORE_DETAILED_CAPTION>",
	ocr: "<OCR>",
	ocr_with_region: "<OCR_WITH_REGION>",
	object_detection: "<OD>",
	dense_region_caption: "<DENSE_REGION_CAPTION>",
	region_proposal: "<REGION_PROPOSAL>",
	};

	export async function loadModel() {
	if (!model) {
	console.log("Loading Florence-2 model...");
	model = await Florence2ForConditionalGeneration.from_pretrained(MODEL_ID, {
	dtype: "fp32",
	});
	processor = await AutoProcessor.from_pretrained(MODEL_ID);
	console.log("Model loaded.");
	}
	return { model, processor };
	}

	/**
	* Generate text from an image buffer.
	* @param {Buffer} imageBuffer - Raw image bytes
	* @param {string} task - One of the TASKS keys (default: "caption")
	* @param {string\|null} textInput - Optional extra text input for the task
	* @param {number} maxTokens - Max new tokens to generate
	* @returns {Promise<object>} Parsed result from Florence-2
	*/
	export async function generateCaption(
	imageBuffer,
	task = "caption",
	textInput = null,
	maxTokens = 100
	) {
	const { model: m, processor: p } = await loadModel();

	const image = await RawImage.fromBlob(new Blob([imageBuffer]));

	const taskToken = TASKS[task] \|\| TASKS.caption;
	const prompt = textInput ? taskToken + textInput : taskToken;

	const prompts = p.construct_prompts(prompt);
	const inputs = await p(image, prompts);

	const generatedIds = await m.generate({
	...inputs,
	max_new_tokens: maxTokens,
	});

	const generatedText = p.batch_decode(generatedIds, {
	skip_special_tokens: false,
	})[0];

	const result = p.post_process_generation(generatedText, taskToken, image.size);

	return result;
	}