Spaces:

d3evil4
/

Image2Caption

Running

App Files Files Community

Image2Caption / src /server.js

khushalcodiste's picture

feat: added

a022cd7 about 1 month ago

6.62 kB

	import Fastify from "fastify";
	import multipart from "@fastify/multipart";
	import swagger from "@fastify/swagger";
	import swaggerUi from "@fastify/swagger-ui";
	import cors from "@fastify/cors";
	import { generateCaption, loadModel, TASKS } from "./model.js";

	const app = Fastify({ logger: true });

	// --- Plugins ---
	await app.register(cors);
	await app.register(multipart, { limits: { fileSize: 20 * 1024 * 1024 } });

	await app.register(swagger, {
	openapi: {
	info: {
	title: "img3txt — Image Captioning API",
	description:
	"Generate captions, OCR, object detection and more from images using Microsoft Florence-2 (ONNX).",
	version: "1.0.0",
	},
	servers: [{ url: "/" }],
	tags: [
	{ name: "caption", description: "Image captioning endpoints" },
	{ name: "health", description: "Health check" },
	],
	},
	});

	await app.register(swaggerUi, {
	routePrefix: "/docs",
	uiConfig: { docExpansion: "list", deepLinking: true },
	});

	// --- Schemas ---
	const taskEnum = Object.keys(TASKS);

	const captionResponseSchema = {
	type: "object",
	properties: {
	task: { type: "string", example: "caption" },
	result: { type: "object", additionalProperties: true },
	},
	};

	const batchResponseSchema = {
	type: "object",
	properties: {
	results: {
	type: "array",
	items: {
	type: "object",
	properties: {
	filename: { type: "string" },
	task: { type: "string" },
	result: { type: "object", additionalProperties: true },
	},
	},
	},
	},
	};

	const errorSchema = {
	type: "object",
	properties: {
	error: { type: "string" },
	},
	};

	// --- Routes ---

	// Landing page — HF Spaces iframe shows this
	app.get(
	"/",
	{ schema: { hide: true } },
	async (req, reply) => {
	reply.type("text/html").send(`<!DOCTYPE html>
	<html lang="en"><head><meta charset="utf-8">
	<meta name="viewport" content="width=device-width,initial-scale=1">
	<title>img3txt — Florence-2 Image Captioning API</title>
	<style>
	*{margin:0;padding:0;box-sizing:border-box}
	body{font-family:system-ui,sans-serif;background:#0f172a;color:#e2e8f0;display:flex;align-items:center;justify-content:center;min-height:100vh}
	.card{background:#1e293b;border-radius:16px;padding:2.5rem;max-width:520px;width:90%;text-align:center;box-shadow:0 25px 50px rgba(0,0,0,.4)}
	h1{font-size:1.8rem;margin-bottom:.5rem}
	.sub{color:#94a3b8;margin-bottom:1.5rem}
	.btn{display:inline-block;padding:.75rem 1.5rem;background:#3b82f6;color:#fff;border-radius:8px;text-decoration:none;font-weight:600;margin:.25rem}
	.btn:hover{background:#2563eb}
	.tasks{margin-top:1.5rem;text-align:left;background:#0f172a;border-radius:8px;padding:1rem}
	.tasks code{color:#38bdf8}
	</style></head><body>
	<div class="card">
	<h1>img3txt</h1>
	<p class="sub">Image captioning, OCR & object detection powered by Florence-2 (ONNX)</p>
	<a class="btn" href="/docs">Swagger UI</a>
	<a class="btn" href="/health">Health Check</a>
	<div class="tasks">
	<p><strong>POST /caption</strong> with form fields:</p>
	<ul style="margin:.5rem 0 0 1.2rem;color:#94a3b8">
	<li><code>file</code> — image (required)</li>
	<li><code>task</code> — caption, detailed_caption, more_detailed_caption, ocr, ocr_with_region, object_detection, dense_region_caption, region_proposal</li>
	<li><code>max_tokens</code> — default 100</li>
	</ul>
	</div>
	</div></body></html>`);
	}
	);

	app.get(
	"/health",
	{
	schema: {
	tags: ["health"],
	summary: "Health check",
	response: {
	200: {
	type: "object",
	properties: {
	status: { type: "string", example: "ok" },
	model: { type: "string" },
	tasks: { type: "array", items: { type: "string" } },
	},
	},
	},
	},
	},
	async () => ({
	status: "ok",
	model: "onnx-community/Florence-2-base",
	tasks: taskEnum,
	})
	);

	app.post(
	"/caption",
	{
	schema: {
	tags: ["caption"],
	summary: "Generate caption / OCR / detection for a single image",
	description: `Upload an image as multipart form data. Supported tasks: ${taskEnum.join(", ")}`,
	consumes: ["multipart/form-data"],
	response: {
	200: captionResponseSchema,
	400: errorSchema,
	},
	},
	},
	async (req, reply) => {
	const data = await req.file();
	if (!data) {
	return reply.code(400).send({ error: "No file uploaded" });
	}

	const task = data.fields.task?.value \|\| "caption";
	const textInput = data.fields.text?.value \|\| null;
	const maxTokens = parseInt(data.fields.max_tokens?.value \|\| "100", 10);

	if (!TASKS[task]) {
	return reply
	.code(400)
	.send({ error: `Invalid task. Choose from: ${taskEnum.join(", ")}` });
	}

	const buffer = await data.toBuffer();
	const result = await generateCaption(buffer, task, textInput, maxTokens);

	return { task, result };
	}
	);

	app.post(
	"/caption/batch",
	{
	schema: {
	tags: ["caption"],
	summary: "Generate captions for multiple images",
	description:
	"Upload multiple images as multipart form data. All images share the same task and settings.",
	consumes: ["multipart/form-data"],
	response: {
	200: batchResponseSchema,
	400: errorSchema,
	},
	},
	},
	async (req, reply) => {
	const parts = await req.parts();
	const files = [];
	let task = "caption";
	let textInput = null;
	let maxTokens = 100;

	for await (const part of parts) {
	if (part.type === "file") {
	files.push({ filename: part.filename, buffer: await part.toBuffer() });
	} else if (part.fieldname === "task") {
	task = part.value;
	} else if (part.fieldname === "text") {
	textInput = part.value;
	} else if (part.fieldname === "max_tokens") {
	maxTokens = parseInt(part.value, 10);
	}
	}

	if (files.length === 0) {
	return reply.code(400).send({ error: "No files uploaded" });
	}
	if (!TASKS[task]) {
	return reply
	.code(400)
	.send({ error: `Invalid task. Choose from: ${taskEnum.join(", ")}` });
	}

	const results = [];
	for (const f of files) {
	const result = await generateCaption(f.buffer, task, textInput, maxTokens);
	results.push({ filename: f.filename, task, result });
	}

	return { results };
	}
	);

	// --- Start ---
	const PORT = process.env.PORT \|\| 7860;

	// Pre-load model then start server
	await loadModel();
	app.listen({ host: "0.0.0.0", port: PORT }, (err) => {
	if (err) {
	app.log.error(err);
	process.exit(1);
	}
	});