import Fastify from "fastify";
import multipart from "@fastify/multipart";
import swagger from "@fastify/swagger";
import swaggerUi from "@fastify/swagger-ui";
import cors from "@fastify/cors";
import { generateCaption, loadModel, TASKS } from "./model.js";
const app = Fastify({ logger: true });
// --- Plugins ---
await app.register(cors);
await app.register(multipart, { limits: { fileSize: 20 * 1024 * 1024 } });
await app.register(swagger, {
openapi: {
info: {
title: "img3txt — Image Captioning API",
description:
"Generate captions, OCR, object detection and more from images using Microsoft Florence-2 (ONNX).",
version: "1.0.0",
},
servers: [{ url: "/" }],
tags: [
{ name: "caption", description: "Image captioning endpoints" },
{ name: "health", description: "Health check" },
],
},
});
await app.register(swaggerUi, {
routePrefix: "/docs",
uiConfig: { docExpansion: "list", deepLinking: true },
});
// --- Schemas ---
const taskEnum = Object.keys(TASKS);
const captionResponseSchema = {
type: "object",
properties: {
task: { type: "string", example: "caption" },
result: { type: "object", additionalProperties: true },
},
};
const batchResponseSchema = {
type: "object",
properties: {
results: {
type: "array",
items: {
type: "object",
properties: {
filename: { type: "string" },
task: { type: "string" },
result: { type: "object", additionalProperties: true },
},
},
},
},
};
const errorSchema = {
type: "object",
properties: {
error: { type: "string" },
},
};
// --- Routes ---
// Landing page — HF Spaces iframe shows this
app.get(
"/",
{ schema: { hide: true } },
async (req, reply) => {
reply.type("text/html").send(`
img3txt — Florence-2 Image Captioning API
img3txt
Image captioning, OCR & object detection powered by Florence-2 (ONNX)
Swagger UI
Health Check
POST /caption with form fields:
file — image (required)
task — caption, detailed_caption, more_detailed_caption, ocr, ocr_with_region, object_detection, dense_region_caption, region_proposal
max_tokens — default 100
`);
}
);
app.get(
"/health",
{
schema: {
tags: ["health"],
summary: "Health check",
response: {
200: {
type: "object",
properties: {
status: { type: "string", example: "ok" },
model: { type: "string" },
tasks: { type: "array", items: { type: "string" } },
},
},
},
},
},
async () => ({
status: "ok",
model: "onnx-community/Florence-2-base",
tasks: taskEnum,
})
);
app.post(
"/caption",
{
schema: {
tags: ["caption"],
summary: "Generate caption / OCR / detection for a single image",
description: `Upload an image as multipart form data. Supported tasks: ${taskEnum.join(", ")}`,
consumes: ["multipart/form-data"],
response: {
200: captionResponseSchema,
400: errorSchema,
},
},
},
async (req, reply) => {
const data = await req.file();
if (!data) {
return reply.code(400).send({ error: "No file uploaded" });
}
const task = data.fields.task?.value || "caption";
const textInput = data.fields.text?.value || null;
const maxTokens = parseInt(data.fields.max_tokens?.value || "100", 10);
if (!TASKS[task]) {
return reply
.code(400)
.send({ error: `Invalid task. Choose from: ${taskEnum.join(", ")}` });
}
const buffer = await data.toBuffer();
const result = await generateCaption(buffer, task, textInput, maxTokens);
return { task, result };
}
);
app.post(
"/caption/batch",
{
schema: {
tags: ["caption"],
summary: "Generate captions for multiple images",
description:
"Upload multiple images as multipart form data. All images share the same task and settings.",
consumes: ["multipart/form-data"],
response: {
200: batchResponseSchema,
400: errorSchema,
},
},
},
async (req, reply) => {
const parts = await req.parts();
const files = [];
let task = "caption";
let textInput = null;
let maxTokens = 100;
for await (const part of parts) {
if (part.type === "file") {
files.push({ filename: part.filename, buffer: await part.toBuffer() });
} else if (part.fieldname === "task") {
task = part.value;
} else if (part.fieldname === "text") {
textInput = part.value;
} else if (part.fieldname === "max_tokens") {
maxTokens = parseInt(part.value, 10);
}
}
if (files.length === 0) {
return reply.code(400).send({ error: "No files uploaded" });
}
if (!TASKS[task]) {
return reply
.code(400)
.send({ error: `Invalid task. Choose from: ${taskEnum.join(", ")}` });
}
const results = [];
for (const f of files) {
const result = await generateCaption(f.buffer, task, textInput, maxTokens);
results.push({ filename: f.filename, task, result });
}
return { results };
}
);
// --- Start ---
const PORT = process.env.PORT || 7860;
// Pre-load model then start server
await loadModel();
app.listen({ host: "0.0.0.0", port: PORT }, (err) => {
if (err) {
app.log.error(err);
process.exit(1);
}
});