Spaces:
Sleeping
Sleeping
| import io | |
| import asyncio | |
| import threading | |
| import time | |
| from fastapi import FastAPI, File, UploadFile, Header | |
| from fastapi.responses import JSONResponse, HTMLResponse, PlainTextResponse | |
| from PIL import Image | |
| import torch | |
| from transformers import AutoProcessor, AutoModelForCausalLM | |
| import requests | |
| import os | |
| # --------------------------------------------------- | |
| # FastAPI App | |
| # --------------------------------------------------- | |
| app = FastAPI(title="Florence Image Caption API") | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Lazy load model on first request (prevents HF timeout) | |
| processor = None | |
| model = None | |
| model_lock = asyncio.Lock() | |
| # Hugging Face token stored in Space secrets | |
| HF_TOKEN = os.getenv("img2caption") | |
| async def load_model(): | |
| global processor, model | |
| if model is None: | |
| processor = AutoProcessor.from_pretrained( | |
| "microsoft/Florence-2-base", | |
| trust_remote_code=True | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "microsoft/Florence-2-base", | |
| trust_remote_code=True | |
| ).to(device).eval() | |
| def run_caption(image: Image.Image) -> str: | |
| inputs = processor( | |
| text="<MORE_DETAILED_CAPTION>", | |
| images=image, | |
| return_tensors="pt" | |
| ).to(device) | |
| output_ids = model.generate( | |
| input_ids=inputs["input_ids"], | |
| pixel_values=inputs["pixel_values"], | |
| max_new_tokens=256, | |
| num_beams=3 | |
| ) | |
| decoded = processor.batch_decode(output_ids, skip_special_tokens=False)[0] | |
| parsed = processor.post_process_generation( | |
| decoded, | |
| task="<MORE_DETAILED_CAPTION>", | |
| image_size=(image.width, image.height) | |
| ) | |
| return parsed["<MORE_DETAILED_CAPTION>"] | |
| # --------------------------------------------------- | |
| # API Endpoint (Protected only if token is sent) | |
| # --------------------------------------------------- | |
| async def img2caption( | |
| file: UploadFile = File(...), | |
| authorization: str = Header(None) | |
| ): | |
| # If app sends a token โ validate it | |
| if authorization is not None: | |
| if not authorization.startswith("Bearer "): | |
| return PlainTextResponse("Invalid token format", status_code=403) | |
| token = authorization.replace("Bearer ", "").strip() | |
| if token != HF_TOKEN: | |
| return PlainTextResponse("Invalid token", status_code=403) | |
| try: | |
| async with model_lock: | |
| await load_model() | |
| data = await file.read() | |
| image = Image.open(io.BytesIO(data)).convert("RGB") | |
| caption = run_caption(image) | |
| # Return ONLY the caption string, no JSON | |
| return caption | |
| except Exception as e: | |
| return PlainTextResponse(f"Error: {str(e)}", status_code=500) | |
| # --------------------------------------------------- | |
| # Simple HTML UI (no token required) | |
| # --------------------------------------------------- | |
| def ui(): | |
| return """ | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>Image Caption Generator</title> | |
| <style> | |
| body { font-family: Arial; max-width: 650px; margin: 40px auto; } | |
| h2 { text-align: center; } | |
| #preview { | |
| width: 100%; margin-top: 15px; display: none; | |
| border-radius: 8px; | |
| } | |
| #captionBox { | |
| margin-top: 20px; padding: 15px; | |
| background: #eee; border-radius: 6px; display: none; | |
| } | |
| button { | |
| padding: 12px; width: 100%; margin-top: 10px; | |
| background: #4A90E2; color: white; border: none; | |
| border-radius: 6px; cursor: pointer; font-size: 16px; | |
| } | |
| button:hover { background: #357ABD; } | |
| </style> | |
| </head> | |
| <body> | |
| <h2>Image Caption Generator</h2> | |
| <input type="file" id="imageInput" accept="image/*"> | |
| <img id="preview"> | |
| <button onclick="generateCaption()">Generate Caption</button> | |
| <div id="captionBox"></div> | |
| <script> | |
| const imageInput = document.getElementById("imageInput"); | |
| const preview = document.getElementById("preview"); | |
| const captionBox = document.getElementById("captionBox"); | |
| imageInput.onchange = () => { | |
| const f = imageInput.files[0]; | |
| if (f) { | |
| preview.src = URL.createObjectURL(f); | |
| preview.style.display = "block"; | |
| } | |
| }; | |
| async function generateCaption() { | |
| const f = imageInput.files[0]; | |
| if (!f) { | |
| alert("Upload an image first"); | |
| return; | |
| } | |
| const form = new FormData(); | |
| form.append("file", f); | |
| captionBox.style.display = "block"; | |
| captionBox.innerHTML = "Generating caption..."; | |
| const res = await fetch("/img2caption", { | |
| method: "POST", | |
| body: form | |
| }); | |
| const text = await res.text(); | |
| captionBox.innerHTML = text; | |
| } | |
| </script> | |
| </body> | |
| </html> | |
| """ | |
| def keep_alive(): | |
| pass | |
| if __name__ == "__main__": | |
| import uvicorn | |
| print("๐ Launching Fast img2caption API") | |
| keep_alive() | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |