import {
Florence2ForConditionalGeneration,
AutoProcessor,
RawImage,
} from "@huggingface/transformers";
const MODEL_ID = "onnx-community/Florence-2-base";
let model = null;
let processor = null;
/** Supported Florence-2 task tokens */
export const TASKS = {
caption: "
",
detailed_caption: "",
more_detailed_caption: "",
ocr: "",
ocr_with_region: "",
object_detection: "",
dense_region_caption: "",
region_proposal: "",
};
export async function loadModel() {
if (!model) {
console.log("Loading Florence-2 model...");
model = await Florence2ForConditionalGeneration.from_pretrained(MODEL_ID, {
dtype: "fp32",
});
processor = await AutoProcessor.from_pretrained(MODEL_ID);
console.log("Model loaded.");
}
return { model, processor };
}
/**
* Generate text from an image buffer.
* @param {Buffer} imageBuffer - Raw image bytes
* @param {string} task - One of the TASKS keys (default: "caption")
* @param {string|null} textInput - Optional extra text input for the task
* @param {number} maxTokens - Max new tokens to generate
* @returns {Promise