Spaces:

videopix
/

image2caption

Running

App Files Files Community

videopix commited on Dec 1, 2025

Commit

859e47e

verified ·

1 Parent(s): b4b4755

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -38

app.py CHANGED Viewed

@@ -4,17 +4,19 @@ import threading
 import time
 from fastapi import FastAPI, File, UploadFile
 from fastapi.responses import JSONResponse, HTMLResponse
-from fastapi.staticfiles import StaticFiles
 from PIL import Image
 import torch
 from transformers import AutoProcessor, AutoModelForCausalLM
 import requests
-app = FastAPI(title="Image Caption API")
-# -------------------------
-# Load Model
-# -------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
 processor = AutoProcessor.from_pretrained(
@@ -27,10 +29,12 @@ model = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True
 ).to(device).eval()
 inference_lock = asyncio.Lock()
 def caption_image(image: Image.Image) -> str:
     inputs = processor(
         text="<MORE_DETAILED_CAPTION>",
         images=image,
@@ -44,7 +48,9 @@ def caption_image(image: Image.Image) -> str:
         num_beams=3
     )
-    decoded = processor.batch_decode(output_ids, skip_special_tokens=False)[0]
     parsed = processor.post_process_generation(
         decoded,
@@ -55,45 +61,47 @@ def caption_image(image: Image.Image) -> str:
     return parsed["<MORE_DETAILED_CAPTION>"]
-# -------------------------
-# API Endpoint
-# -------------------------
 @app.post("/img2caption")
 async def img2caption(file: UploadFile = File(...)):
     try:
         data = await file.read()
         image = Image.open(io.BytesIO(data)).convert("RGB")
         async with inference_lock:
             caption = caption_image(image)
         return {"caption": caption}
     except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-# -------------------------
-# HTML UI
-# -------------------------
 @app.get("/", response_class=HTMLResponse)
 def ui():
     return """
 <!DOCTYPE html>
 <html>
 <head>
-    <title>Image Caption Generator</title>
     <style>
         body {
-            font-family: Arial, sans-serif;
             max-width: 650px;
             margin: 40px auto;
             padding: 20px;
             background: #fafafa;
         }
-        h2 {
-            text-align: center;
-        }
         #preview {
             width: 100%;
             margin-top: 15px;
@@ -108,27 +116,26 @@ def ui():
             display: none;
         }
         button {
-            padding: 12px 20px;
             margin-top: 10px;
             width: 100%;
-            background: #4A90E2;
             color: white;
-            font-size: 16px;
             border: none;
             border-radius: 6px;
             cursor: pointer;
         }
         button:hover {
-            background: #357ABD;
         }
     </style>
 </head>
 <body>
-    <h2>Image to Caption Generator</h2>
     <input type="file" id="imageInput" accept="image/*">
     <img id="preview">
     <button onclick="generateCaption()">Generate Caption</button>
@@ -151,24 +158,23 @@ def ui():
     async function generateCaption() {
         const file = imgInput.files[0];
         if (!file) {
-            alert("Please upload an image.");
             return;
         }
-        const formData = new FormData();
-        formData.append("file", file);
         captionBox.style.display = "block";
         captionBox.innerHTML = "Generating caption...";
-        const response = await fetch("/img2caption", {
             method: "POST",
-            body: formData
         });
-        const result = await response.json();
-        captionBox.innerHTML = result.caption || result.error;
     }
 </script>
@@ -177,17 +183,15 @@ def ui():
 """
-# -------------------------
-# Keep HF Space alive
-# -------------------------
-SPACE_URL = "https://YOUR-SPACE-NAME.hf.space/health"
 def keep_alive():
     while True:
         try:
             requests.get(SPACE_URL, timeout=5)
-        except:
             pass
         time.sleep(240)

 import time
 from fastapi import FastAPI, File, UploadFile
 from fastapi.responses import JSONResponse, HTMLResponse
 from PIL import Image
 import torch
 from transformers import AutoProcessor, AutoModelForCausalLM
 import requests
+# ---------------------------------------------------
+# FastAPI application
+# ---------------------------------------------------
+app = FastAPI(title="Florence Image Caption API")
+# ---------------------------------------------------
+# Load model once
+# ---------------------------------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
 processor = AutoProcessor.from_pretrained(
     trust_remote_code=True
 ).to(device).eval()
+# Concurrency lock so HF Spaces doesn't crash under load
 inference_lock = asyncio.Lock()
 def caption_image(image: Image.Image) -> str:
+    """Generate caption for a single image."""
     inputs = processor(
         text="<MORE_DETAILED_CAPTION>",
         images=image,
         num_beams=3
     )
+    decoded = processor.batch_decode(
+        output_ids, skip_special_tokens=False
+    )[0]
     parsed = processor.post_process_generation(
         decoded,
     return parsed["<MORE_DETAILED_CAPTION>"]
+# ---------------------------------------------------
+# API endpoint
+# ---------------------------------------------------
 @app.post("/img2caption")
 async def img2caption(file: UploadFile = File(...)):
     try:
         data = await file.read()
         image = Image.open(io.BytesIO(data)).convert("RGB")
+        # Protect GPU inference
         async with inference_lock:
             caption = caption_image(image)
         return {"caption": caption}
     except Exception as e:
+        return JSONResponse(
+            {"error": str(e)},
+            status_code=500
+        )
+# ---------------------------------------------------
+# Custom UI (HTML + CSS + JS)
+# ---------------------------------------------------
 @app.get("/", response_class=HTMLResponse)
 def ui():
     return """
 <!DOCTYPE html>
 <html>
 <head>
+    <title>Florence Image Captioning</title>
     <style>
         body {
+            font-family: Arial;
             max-width: 650px;
             margin: 40px auto;
             padding: 20px;
             background: #fafafa;
         }
+        h2 { text-align: center; }
         #preview {
             width: 100%;
             margin-top: 15px;
             display: none;
         }
         button {
+            padding: 12px;
             margin-top: 10px;
             width: 100%;
+            background: #4a90e2;
             color: white;
             border: none;
             border-radius: 6px;
             cursor: pointer;
+            font-size: 16px;
         }
         button:hover {
+            background: #357abd;
         }
     </style>
 </head>
 <body>
+    <h2>Image Caption Generator</h2>
     <input type="file" id="imageInput" accept="image/*">
     <img id="preview">
     <button onclick="generateCaption()">Generate Caption</button>
     async function generateCaption() {
         const file = imgInput.files[0];
         if (!file) {
+            alert("Upload an image first");
             return;
         }
+        const form = new FormData();
+        form.append("file", file);
         captionBox.style.display = "block";
         captionBox.innerHTML = "Generating caption...";
+        const res = await fetch("/img2caption", {
             method: "POST",
+            body: form
         });
+        const data = await res.json();
+        captionBox.innerHTML = data.caption || data.error;
     }
 </script>
 """
+# ---------------------------------------------------
+# Keep-alive system to prevent HF auto-sleep
+# ---------------------------------------------------
 def keep_alive():
     while True:
         try:
             requests.get(SPACE_URL, timeout=5)
+        except Exception:
             pass
         time.sleep(240)