Spaces:
Running
Running
| import Fastify from "fastify"; | |
| import multipart from "@fastify/multipart"; | |
| import swagger from "@fastify/swagger"; | |
| import swaggerUi from "@fastify/swagger-ui"; | |
| import cors from "@fastify/cors"; | |
| import { generateCaption, loadModel, TASKS } from "./model.js"; | |
| const app = Fastify({ logger: true }); | |
| // --- Plugins --- | |
| await app.register(cors); | |
| await app.register(multipart, { limits: { fileSize: 20 * 1024 * 1024 } }); | |
| await app.register(swagger, { | |
| openapi: { | |
| info: { | |
| title: "img3txt — Image Captioning API", | |
| description: | |
| "Generate captions, OCR, object detection and more from images using Microsoft Florence-2 (ONNX).", | |
| version: "1.0.0", | |
| }, | |
| servers: [{ url: "/" }], | |
| tags: [ | |
| { name: "caption", description: "Image captioning endpoints" }, | |
| { name: "health", description: "Health check" }, | |
| ], | |
| }, | |
| }); | |
| await app.register(swaggerUi, { | |
| routePrefix: "/docs", | |
| uiConfig: { docExpansion: "list", deepLinking: true }, | |
| }); | |
| // --- Schemas --- | |
| const taskEnum = Object.keys(TASKS); | |
| const captionResponseSchema = { | |
| type: "object", | |
| properties: { | |
| task: { type: "string", example: "caption" }, | |
| result: { type: "object", additionalProperties: true }, | |
| }, | |
| }; | |
| const batchResponseSchema = { | |
| type: "object", | |
| properties: { | |
| results: { | |
| type: "array", | |
| items: { | |
| type: "object", | |
| properties: { | |
| filename: { type: "string" }, | |
| task: { type: "string" }, | |
| result: { type: "object", additionalProperties: true }, | |
| }, | |
| }, | |
| }, | |
| }, | |
| }; | |
| const errorSchema = { | |
| type: "object", | |
| properties: { | |
| error: { type: "string" }, | |
| }, | |
| }; | |
| // --- Routes --- | |
| // Landing page — HF Spaces iframe shows this | |
| app.get( | |
| "/", | |
| { schema: { hide: true } }, | |
| async (req, reply) => { | |
| reply.type("text/html").send(`<!DOCTYPE html> | |
| <html lang="en"><head><meta charset="utf-8"> | |
| <meta name="viewport" content="width=device-width,initial-scale=1"> | |
| <title>img3txt — Florence-2 Image Captioning API</title> | |
| <style> | |
| *{margin:0;padding:0;box-sizing:border-box} | |
| body{font-family:system-ui,sans-serif;background:#0f172a;color:#e2e8f0;display:flex;align-items:center;justify-content:center;min-height:100vh} | |
| .card{background:#1e293b;border-radius:16px;padding:2.5rem;max-width:520px;width:90%;text-align:center;box-shadow:0 25px 50px rgba(0,0,0,.4)} | |
| h1{font-size:1.8rem;margin-bottom:.5rem} | |
| .sub{color:#94a3b8;margin-bottom:1.5rem} | |
| .btn{display:inline-block;padding:.75rem 1.5rem;background:#3b82f6;color:#fff;border-radius:8px;text-decoration:none;font-weight:600;margin:.25rem} | |
| .btn:hover{background:#2563eb} | |
| .tasks{margin-top:1.5rem;text-align:left;background:#0f172a;border-radius:8px;padding:1rem} | |
| .tasks code{color:#38bdf8} | |
| </style></head><body> | |
| <div class="card"> | |
| <h1>img3txt</h1> | |
| <p class="sub">Image captioning, OCR & object detection powered by Florence-2 (ONNX)</p> | |
| <a class="btn" href="/docs">Swagger UI</a> | |
| <a class="btn" href="/health">Health Check</a> | |
| <div class="tasks"> | |
| <p><strong>POST /caption</strong> with form fields:</p> | |
| <ul style="margin:.5rem 0 0 1.2rem;color:#94a3b8"> | |
| <li><code>file</code> — image (required)</li> | |
| <li><code>task</code> — caption, detailed_caption, more_detailed_caption, ocr, ocr_with_region, object_detection, dense_region_caption, region_proposal</li> | |
| <li><code>max_tokens</code> — default 100</li> | |
| </ul> | |
| </div> | |
| </div></body></html>`); | |
| } | |
| ); | |
| app.get( | |
| "/health", | |
| { | |
| schema: { | |
| tags: ["health"], | |
| summary: "Health check", | |
| response: { | |
| 200: { | |
| type: "object", | |
| properties: { | |
| status: { type: "string", example: "ok" }, | |
| model: { type: "string" }, | |
| tasks: { type: "array", items: { type: "string" } }, | |
| }, | |
| }, | |
| }, | |
| }, | |
| }, | |
| async () => ({ | |
| status: "ok", | |
| model: "onnx-community/Florence-2-base", | |
| tasks: taskEnum, | |
| }) | |
| ); | |
| app.post( | |
| "/caption", | |
| { | |
| schema: { | |
| tags: ["caption"], | |
| summary: "Generate caption / OCR / detection for a single image", | |
| description: `Upload an image as multipart form data. Supported tasks: ${taskEnum.join(", ")}`, | |
| consumes: ["multipart/form-data"], | |
| response: { | |
| 200: captionResponseSchema, | |
| 400: errorSchema, | |
| }, | |
| }, | |
| }, | |
| async (req, reply) => { | |
| const data = await req.file(); | |
| if (!data) { | |
| return reply.code(400).send({ error: "No file uploaded" }); | |
| } | |
| const task = data.fields.task?.value || "caption"; | |
| const textInput = data.fields.text?.value || null; | |
| const maxTokens = parseInt(data.fields.max_tokens?.value || "100", 10); | |
| if (!TASKS[task]) { | |
| return reply | |
| .code(400) | |
| .send({ error: `Invalid task. Choose from: ${taskEnum.join(", ")}` }); | |
| } | |
| const buffer = await data.toBuffer(); | |
| const result = await generateCaption(buffer, task, textInput, maxTokens); | |
| return { task, result }; | |
| } | |
| ); | |
| app.post( | |
| "/caption/batch", | |
| { | |
| schema: { | |
| tags: ["caption"], | |
| summary: "Generate captions for multiple images", | |
| description: | |
| "Upload multiple images as multipart form data. All images share the same task and settings.", | |
| consumes: ["multipart/form-data"], | |
| response: { | |
| 200: batchResponseSchema, | |
| 400: errorSchema, | |
| }, | |
| }, | |
| }, | |
| async (req, reply) => { | |
| const parts = await req.parts(); | |
| const files = []; | |
| let task = "caption"; | |
| let textInput = null; | |
| let maxTokens = 100; | |
| for await (const part of parts) { | |
| if (part.type === "file") { | |
| files.push({ filename: part.filename, buffer: await part.toBuffer() }); | |
| } else if (part.fieldname === "task") { | |
| task = part.value; | |
| } else if (part.fieldname === "text") { | |
| textInput = part.value; | |
| } else if (part.fieldname === "max_tokens") { | |
| maxTokens = parseInt(part.value, 10); | |
| } | |
| } | |
| if (files.length === 0) { | |
| return reply.code(400).send({ error: "No files uploaded" }); | |
| } | |
| if (!TASKS[task]) { | |
| return reply | |
| .code(400) | |
| .send({ error: `Invalid task. Choose from: ${taskEnum.join(", ")}` }); | |
| } | |
| const results = []; | |
| for (const f of files) { | |
| const result = await generateCaption(f.buffer, task, textInput, maxTokens); | |
| results.push({ filename: f.filename, task, result }); | |
| } | |
| return { results }; | |
| } | |
| ); | |
| // --- Start --- | |
| const PORT = process.env.PORT || 7860; | |
| // Pre-load model then start server | |
| await loadModel(); | |
| app.listen({ host: "0.0.0.0", port: PORT }, (err) => { | |
| if (err) { | |
| app.log.error(err); | |
| process.exit(1); | |
| } | |
| }); | |