Spaces:

videopix
/

image2caption

Sleeping

App Files Files Community

videopix commited on Dec 2, 2025

Commit

b53989e

verified ·

1 Parent(s): 735c9b7

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -18

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import asyncio
 import threading
 import time
 from fastapi import FastAPI, File, UploadFile, Header
-from fastapi.responses import JSONResponse, HTMLResponse
 from PIL import Image
 import torch
 from transformers import AutoProcessor, AutoModelForCausalLM
@@ -22,11 +22,10 @@ processor = None
 model = None
 model_lock = asyncio.Lock()
-# Hugging Face token stored in HF Secrets
 HF_TOKEN = os.getenv("img2caption")
 async def load_model():
-    """Load Florence model only when first needed."""
     global processor, model
     if model is None:
@@ -41,7 +40,6 @@ async def load_model():
 def run_caption(image: Image.Image) -> str:
-    """Perform caption generation."""
     inputs = processor(
         text="<MORE_DETAILED_CAPTION>",
         images=image,
@@ -67,43 +65,41 @@ def run_caption(image: Image.Image) -> str:
 # ---------------------------------------------------
-# API Endpoint (Token enforced only when app sends a token)
 # ---------------------------------------------------
-@app.post("/img2caption")
 async def img2caption(
     file: UploadFile = File(...),
     authorization: str = Header(None)
 ):
-    # Apps must send token → enforce check
-    # UI sends no token → skip check → allow
     if authorization is not None:
         if not authorization.startswith("Bearer "):
-            return JSONResponse({"error": "Invalid token format"}, status_code=403)
         token = authorization.replace("Bearer ", "").strip()
         if token != HF_TOKEN:
-            return JSONResponse({"error": "Invalid token"}, status_code=403)
     try:
-        # Ensure model is loaded
         async with model_lock:
             await load_model()
-        # Read and convert image
         data = await file.read()
         image = Image.open(io.BytesIO(data)).convert("RGB")
-        # Caption
         caption = run_caption(image)
-        return {"caption": caption}
     except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
 # ---------------------------------------------------
-# Simple HTML UI (NO token required)
 # ---------------------------------------------------
 @app.get("/", response_class=HTMLResponse)
 def ui():
@@ -173,8 +169,8 @@ def ui():
             body: form
         });
-        const data = await res.json();
-        captionBox.innerHTML = data.caption || data.error;
     }
 </script>

 import threading
 import time
 from fastapi import FastAPI, File, UploadFile, Header
+from fastapi.responses import JSONResponse, HTMLResponse, PlainTextResponse
 from PIL import Image
 import torch
 from transformers import AutoProcessor, AutoModelForCausalLM
 model = None
 model_lock = asyncio.Lock()
+# Hugging Face token stored in Space secrets
 HF_TOKEN = os.getenv("img2caption")
 async def load_model():
     global processor, model
     if model is None:
 def run_caption(image: Image.Image) -> str:
     inputs = processor(
         text="<MORE_DETAILED_CAPTION>",
         images=image,
 # ---------------------------------------------------
+# API Endpoint (Protected only if token is sent)
 # ---------------------------------------------------
+@app.post("/img2caption", response_class=PlainTextResponse)
 async def img2caption(
     file: UploadFile = File(...),
     authorization: str = Header(None)
 ):
+    # If app sends a token → validate it
     if authorization is not None:
         if not authorization.startswith("Bearer "):
+            return PlainTextResponse("Invalid token format", status_code=403)
         token = authorization.replace("Bearer ", "").strip()
         if token != HF_TOKEN:
+            return PlainTextResponse("Invalid token", status_code=403)
     try:
         async with model_lock:
             await load_model()
         data = await file.read()
         image = Image.open(io.BytesIO(data)).convert("RGB")
         caption = run_caption(image)
+        # Return ONLY the caption string, no JSON
+        return caption
     except Exception as e:
+        return PlainTextResponse(f"Error: {str(e)}", status_code=500)
 # ---------------------------------------------------
+# Simple HTML UI (no token required)
 # ---------------------------------------------------
 @app.get("/", response_class=HTMLResponse)
 def ui():
             body: form
         });
+        const text = await res.text();
+        captionBox.innerHTML = text;
     }
 </script>