Spaces:

videopix
/

image2caption

Sleeping

App Files Files Community

videopix commited on Dec 2, 2025

Commit

256d97e

verified ·

1 Parent(s): 2afc806

Update app_working_api.py

Browse files

Files changed (1) hide show

app_working_api.py +114 -24

app_working_api.py CHANGED Viewed

@@ -3,43 +3,53 @@ import asyncio
 import threading
 import time
 from fastapi import FastAPI, File, UploadFile
-from fastapi.responses import JSONResponse
 from PIL import Image
 import torch
 from transformers import AutoProcessor, AutoModelForCausalLM
 import requests
-app = FastAPI(title="Image Caption API")
-# Load model once at startup
 device = "cuda" if torch.cuda.is_available() else "cpu"
-processor = AutoProcessor.from_pretrained(
-    "microsoft/Florence-2-base",
-    trust_remote_code=True
-)
-model = AutoModelForCausalLM.from_pretrained(
-    "microsoft/Florence-2-base",
-    trust_remote_code=True
-).to(device).eval()
-# A lock to allow multiple requests safely
-inference_lock = asyncio.Lock()
-def caption_image(image: Image.Image) -> str:
     inputs = processor(
         text="<MORE_DETAILED_CAPTION>",
         images=image,
-        return_tensors="pt",
     ).to(device)
     output_ids = model.generate(
         input_ids=inputs["input_ids"],
         pixel_values=inputs["pixel_values"],
         max_new_tokens=256,
-        num_beams=3,
     )
     decoded = processor.batch_decode(output_ids, skip_special_tokens=False)[0]
@@ -47,22 +57,28 @@ def caption_image(image: Image.Image) -> str:
     parsed = processor.post_process_generation(
         decoded,
         task="<MORE_DETAILED_CAPTION>",
-        image_size=(image.width, image.height),
     )
     return parsed["<MORE_DETAILED_CAPTION>"]
 @app.post("/img2caption")
 async def img2caption(file: UploadFile = File(...)):
     try:
-        # Read image
         data = await file.read()
         image = Image.open(io.BytesIO(data)).convert("RGB")
-        # Protect inference in async server
-        async with inference_lock:
-            caption = caption_image(image)
         return {"caption": caption}
@@ -70,6 +86,80 @@ async def img2caption(file: UploadFile = File(...)):
         return JSONResponse({"error": str(e)}, status_code=500)
-@app.get("/health")
-async def health():
-    return {"status": "ok"}

 import threading
 import time
 from fastapi import FastAPI, File, UploadFile
+from fastapi.responses import JSONResponse, HTMLResponse
 from PIL import Image
 import torch
 from transformers import AutoProcessor, AutoModelForCausalLM
 import requests
+# ---------------------------------------------------
+# FastAPI App
+# ---------------------------------------------------
+app = FastAPI(title="Florence Image Caption API")
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Lazy load model on first request (prevents HF timeout)
+processor = None
+model = None
+model_lock = asyncio.Lock()
+async def load_model():
+    """Load Florence model only when first needed."""
+    global processor, model
+    if model is None:
+        processor = AutoProcessor.from_pretrained(
+            "microsoft/Florence-2-base",
+            trust_remote_code=True
+        )
+        model = AutoModelForCausalLM.from_pretrained(
+            "microsoft/Florence-2-base",
+            trust_remote_code=True
+        ).to(device).eval()
+def run_caption(image: Image.Image) -> str:
+    """Perform caption generation."""
     inputs = processor(
         text="<MORE_DETAILED_CAPTION>",
         images=image,
+        return_tensors="pt"
     ).to(device)
     output_ids = model.generate(
         input_ids=inputs["input_ids"],
         pixel_values=inputs["pixel_values"],
         max_new_tokens=256,
+        num_beams=3
     )
     decoded = processor.batch_decode(output_ids, skip_special_tokens=False)[0]
     parsed = processor.post_process_generation(
         decoded,
         task="<MORE_DETAILED_CAPTION>",
+        image_size=(image.width, image.height)
     )
     return parsed["<MORE_DETAILED_CAPTION>"]
+# ---------------------------------------------------
+# API Endpoint
+# ---------------------------------------------------
 @app.post("/img2caption")
 async def img2caption(file: UploadFile = File(...)):
     try:
+        # Ensure model is loaded
+        async with model_lock:
+            await load_model()
+        # Read and convert image
         data = await file.read()
         image = Image.open(io.BytesIO(data)).convert("RGB")
+        # Caption
+        caption = run_caption(image)
         return {"caption": caption}
         return JSONResponse({"error": str(e)}, status_code=500)
+# ---------------------------------------------------
+# Simple HTML UI
+# ---------------------------------------------------
+@app.get("/", response_class=HTMLResponse)
+def ui():
+    return """
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Image Caption Generator</title>
+    <style>
+        body { font-family: Arial; max-width: 650px; margin: 40px auto; }
+        h2 { text-align: center; }
+        #preview {
+            width: 100%; margin-top: 15px; display: none;
+            border-radius: 8px;
+        }
+        #captionBox {
+            margin-top: 20px; padding: 15px;
+            background: #eee; border-radius: 6px; display: none;
+        }
+        button {
+            padding: 12px; width: 100%; margin-top: 10px;
+            background: #4A90E2; color: white; border: none;
+            border-radius: 6px; cursor: pointer; font-size: 16px;
+        }
+        button:hover { background: #357ABD; }
+    </style>
+</head>
+<body>
+    <h2>Image Caption Generator</h2>
+    <input type="file" id="imageInput" accept="image/*">
+    <img id="preview">
+    <button onclick="generateCaption()">Generate Caption</button>
+    <div id="captionBox"></div>
+<script>
+    const imageInput = document.getElementById("imageInput");
+    const preview = document.getElementById("preview");
+    const captionBox = document.getElementById("captionBox");
+    imageInput.onchange = () => {
+        const f = imageInput.files[0];
+        if (f) {
+            preview.src = URL.createObjectURL(f);
+            preview.style.display = "block";
+        }
+    };
+    async function generateCaption() {
+        const f = imageInput.files[0];
+        if (!f) {
+            alert("Upload an image first");
+            return;
+        }
+        const form = new FormData();
+        form.append("file", f);
+        captionBox.style.display = "block";
+        captionBox.innerHTML = "Generating caption...";
+        const res = await fetch("/img2caption", {
+            method: "POST",
+            body: form
+        });
+        const data = await res.json();
+        captionBox.innerHTML = data.caption || data.error;
+    }
+</script>
+</body>
+</html>
+"""
+def keep_alive():
+    pass
+if __name__ == "__main__":
+    import uvicorn
+    print("🚀 Launching Fast img2caption API")
+    keep_alive()
+    uvicorn.run(app, host="0.0.0.0", port=7860)