Spaces:
Sleeping
Sleeping
File size: 5,132 Bytes
b4b4755 735c9b7 b53989e 8775421 b4b4755 8775421 735c9b7 8775421 859e47e b434da0 859e47e b4b4755 8775421 735c9b7 b434da0 8775421 b53989e 735c9b7 fc19af7 b434da0 b4b4755 8775421 b434da0 8775421 b4b4755 8775421 b434da0 8775421 859e47e b53989e 859e47e b53989e 735c9b7 b53989e 735c9b7 b53989e 735c9b7 b53989e fc19af7 735c9b7 b4b4755 735c9b7 b4b4755 735c9b7 b53989e 735c9b7 b53989e b4b4755 859e47e b53989e 859e47e 735c9b7 b4b4755 b434da0 b4b4755 b434da0 859e47e 735c9b7 b434da0 b4b4755 859e47e b4b4755 b434da0 b4b4755 b434da0 b4b4755 b434da0 859e47e b4b4755 859e47e b434da0 b4b4755 735c9b7 b4b4755 859e47e b4b4755 859e47e b4b4755 b53989e b4b4755 2afc806 b4b4755 735c9b7 2afc806 d02eefa 2afc806 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
import io
import asyncio
import threading
import time
from fastapi import FastAPI, File, UploadFile, Header
from fastapi.responses import JSONResponse, HTMLResponse, PlainTextResponse
from PIL import Image
import torch
from transformers import AutoProcessor, AutoModelForCausalLM
import requests
import os
# ---------------------------------------------------
# FastAPI App
# ---------------------------------------------------
app = FastAPI(title="Florence Image Caption API")
device = "cuda" if torch.cuda.is_available() else "cpu"
# Lazy load model on first request (prevents HF timeout)
processor = None
model = None
model_lock = asyncio.Lock()
# Hugging Face token stored in Space secrets
HF_TOKEN = os.getenv("img2caption")
async def load_model():
global processor, model
if model is None:
processor = AutoProcessor.from_pretrained(
"microsoft/Florence-2-base",
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
"microsoft/Florence-2-base",
trust_remote_code=True
).to(device).eval()
def run_caption(image: Image.Image) -> str:
inputs = processor(
text="<MORE_DETAILED_CAPTION>",
images=image,
return_tensors="pt"
).to(device)
output_ids = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=256,
num_beams=3
)
decoded = processor.batch_decode(output_ids, skip_special_tokens=False)[0]
parsed = processor.post_process_generation(
decoded,
task="<MORE_DETAILED_CAPTION>",
image_size=(image.width, image.height)
)
return parsed["<MORE_DETAILED_CAPTION>"]
# ---------------------------------------------------
# API Endpoint (Protected only if token is sent)
# ---------------------------------------------------
@app.post("/img2caption", response_class=PlainTextResponse)
async def img2caption(
file: UploadFile = File(...),
authorization: str = Header(None)
):
# If app sends a token → validate it
if authorization is not None:
if not authorization.startswith("Bearer "):
return PlainTextResponse("Invalid token format", status_code=403)
token = authorization.replace("Bearer ", "").strip()
if token != HF_TOKEN:
return PlainTextResponse("Invalid token", status_code=403)
try:
async with model_lock:
await load_model()
data = await file.read()
image = Image.open(io.BytesIO(data)).convert("RGB")
caption = run_caption(image)
# Return ONLY the caption string, no JSON
return caption
except Exception as e:
return PlainTextResponse(f"Error: {str(e)}", status_code=500)
# ---------------------------------------------------
# Simple HTML UI (no token required)
# ---------------------------------------------------
@app.get("/", response_class=HTMLResponse)
def ui():
return """
<!DOCTYPE html>
<html>
<head>
<title>Image Caption Generator</title>
<style>
body { font-family: Arial; max-width: 650px; margin: 40px auto; }
h2 { text-align: center; }
#preview {
width: 100%; margin-top: 15px; display: none;
border-radius: 8px;
}
#captionBox {
margin-top: 20px; padding: 15px;
background: #eee; border-radius: 6px; display: none;
}
button {
padding: 12px; width: 100%; margin-top: 10px;
background: #4A90E2; color: white; border: none;
border-radius: 6px; cursor: pointer; font-size: 16px;
}
button:hover { background: #357ABD; }
</style>
</head>
<body>
<h2>Image Caption Generator</h2>
<input type="file" id="imageInput" accept="image/*">
<img id="preview">
<button onclick="generateCaption()">Generate Caption</button>
<div id="captionBox"></div>
<script>
const imageInput = document.getElementById("imageInput");
const preview = document.getElementById("preview");
const captionBox = document.getElementById("captionBox");
imageInput.onchange = () => {
const f = imageInput.files[0];
if (f) {
preview.src = URL.createObjectURL(f);
preview.style.display = "block";
}
};
async function generateCaption() {
const f = imageInput.files[0];
if (!f) {
alert("Upload an image first");
return;
}
const form = new FormData();
form.append("file", f);
captionBox.style.display = "block";
captionBox.innerHTML = "Generating caption...";
const res = await fetch("/img2caption", {
method: "POST",
body: form
});
const text = await res.text();
captionBox.innerHTML = text;
}
</script>
</body>
</html>
"""
def keep_alive():
pass
if __name__ == "__main__":
import uvicorn
print("🚀 Launching Fast img2caption API")
keep_alive()
uvicorn.run(app, host="0.0.0.0", port=7860)
|