Spaces:

videopix
/

image2caption

Sleeping

App Files Files Community

videopix commited on Dec 1, 2025

Commit

b4b4755

verified ·

1 Parent(s): 866a0cc

Update app.py

Browse files

Files changed (1) hide show

app.py +160 -17

app.py CHANGED Viewed

@@ -1,12 +1,22 @@
-import gradio as gr
-import torch
 from PIL import Image
 from transformers import AutoProcessor, AutoModelForCausalLM
-# Choose device
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load Florence-2 Base model
 processor = AutoProcessor.from_pretrained(
     "microsoft/Florence-2-base",
     trust_remote_code=True
@@ -17,10 +27,10 @@ model = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True
 ).to(device).eval()
-def generate_caption(image):
-    if not isinstance(image, Image.Image):
-        image = Image.fromarray(image)
     inputs = processor(
         text="<MORE_DETAILED_CAPTION>",
         images=image,
@@ -31,7 +41,7 @@ def generate_caption(image):
         input_ids=inputs["input_ids"],
         pixel_values=inputs["pixel_values"],
         max_new_tokens=256,
-        num_beams=3,
     )
     decoded = processor.batch_decode(output_ids, skip_special_tokens=False)[0]
@@ -44,13 +54,146 @@ def generate_caption(image):
     return parsed["<MORE_DETAILED_CAPTION>"]
-# Gradio interface
-io = gr.Interface(
-    fn=generate_caption,
-    inputs=gr.Image(label="Upload Image"),
-    outputs=gr.Textbox(label="Generated Caption", lines=3),
-    title="Image to Caption Generator",
-    description="Upload an image and get a detailed AI-generated caption."
-)
-io.launch(debug=True)

+import io
+import asyncio
+import threading
+import time
+from fastapi import FastAPI, File, UploadFile
+from fastapi.responses import JSONResponse, HTMLResponse
+from fastapi.staticfiles import StaticFiles
 from PIL import Image
+import torch
 from transformers import AutoProcessor, AutoModelForCausalLM
+import requests
+app = FastAPI(title="Image Caption API")
+# -------------------------
+# Load Model
+# -------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
 processor = AutoProcessor.from_pretrained(
     "microsoft/Florence-2-base",
     trust_remote_code=True
     trust_remote_code=True
 ).to(device).eval()
+inference_lock = asyncio.Lock()
+def caption_image(image: Image.Image) -> str:
     inputs = processor(
         text="<MORE_DETAILED_CAPTION>",
         images=image,
         input_ids=inputs["input_ids"],
         pixel_values=inputs["pixel_values"],
         max_new_tokens=256,
+        num_beams=3
     )
     decoded = processor.batch_decode(output_ids, skip_special_tokens=False)[0]
     return parsed["<MORE_DETAILED_CAPTION>"]
+# -------------------------
+# API Endpoint
+# -------------------------
+@app.post("/img2caption")
+async def img2caption(file: UploadFile = File(...)):
+    try:
+        data = await file.read()
+        image = Image.open(io.BytesIO(data)).convert("RGB")
+        async with inference_lock:
+            caption = caption_image(image)
+        return {"caption": caption}
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+# -------------------------
+# HTML UI
+# -------------------------
+@app.get("/", response_class=HTMLResponse)
+def ui():
+    return """
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Image Caption Generator</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            max-width: 650px;
+            margin: 40px auto;
+            padding: 20px;
+            background: #fafafa;
+        }
+        h2 {
+            text-align: center;
+        }
+        #preview {
+            width: 100%;
+            margin-top: 15px;
+            display: none;
+            border-radius: 8px;
+        }
+        #captionBox {
+            margin-top: 20px;
+            padding: 15px;
+            background: #eee;
+            border-radius: 6px;
+            display: none;
+        }
+        button {
+            padding: 12px 20px;
+            margin-top: 10px;
+            width: 100%;
+            background: #4A90E2;
+            color: white;
+            font-size: 16px;
+            border: none;
+            border-radius: 6px;
+            cursor: pointer;
+        }
+        button:hover {
+            background: #357ABD;
+        }
+    </style>
+</head>
+<body>
+    <h2>Image to Caption Generator</h2>
+    <input type="file" id="imageInput" accept="image/*">
+    <img id="preview">
+    <button onclick="generateCaption()">Generate Caption</button>
+    <div id="captionBox"></div>
+<script>
+    const imgInput = document.getElementById("imageInput");
+    const preview = document.getElementById("preview");
+    const captionBox = document.getElementById("captionBox");
+    imgInput.onchange = () => {
+        const file = imgInput.files[0];
+        if (file) {
+            preview.src = URL.createObjectURL(file);
+            preview.style.display = "block";
+        }
+    };
+    async function generateCaption() {
+        const file = imgInput.files[0];
+        if (!file) {
+            alert("Please upload an image.");
+            return;
+        }
+        const formData = new FormData();
+        formData.append("file", file);
+        captionBox.style.display = "block";
+        captionBox.innerHTML = "Generating caption...";
+        const response = await fetch("/img2caption", {
+            method: "POST",
+            body: formData
+        });
+        const result = await response.json();
+        captionBox.innerHTML = result.caption || result.error;
+    }
+</script>
+</body>
+</html>
+"""
+# -------------------------
+# Keep HF Space alive
+# -------------------------
+SPACE_URL = "https://YOUR-SPACE-NAME.hf.space/health"
+def keep_alive():
+    while True:
+        try:
+            requests.get(SPACE_URL, timeout=5)
+        except:
+            pass
+        time.sleep(240)
+threading.Thread(target=keep_alive, daemon=True).start()
+@app.get("/health")
+def health():
+    return {"status": "ok"}