Spaces:
Sleeping
Sleeping
File size: 5,119 Bytes
8dfd66b 0ecd601 bf63b36 0ecd601 8dfd66b 0ecd601 bf63b36 8dfd66b 256d97e 8dfd66b 0ecd601 8dfd66b 256d97e 8dfd66b bf63b36 8dfd66b 256d97e 8dfd66b 256d97e 8dfd66b 256d97e 0ecd601 256d97e 0ecd601 8dfd66b 0ecd601 256d97e 8dfd66b 0ecd601 8dfd66b 0ecd601 256d97e 0ecd601 8dfd66b 0ecd601 8dfd66b 256d97e bf63b36 256d97e bf63b36 8dfd66b 256d97e 0ecd601 8dfd66b 256d97e 8dfd66b bf63b36 8dfd66b bf63b36 8dfd66b 256d97e bf63b36 256d97e bf63b36 256d97e bf63b36 256d97e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import io
import asyncio
import threading
import time
from fastapi import FastAPI, File, UploadFile, Header
from fastapi.responses import JSONResponse, HTMLResponse, PlainTextResponse
from PIL import Image
import torch
from transformers import AutoProcessor, AutoModelForCausalLM
import requests
import os
# ---------------------------------------------------
# FastAPI App
# ---------------------------------------------------
app = FastAPI(title="Florence Image Caption API")
device = "cuda" if torch.cuda.is_available() else "cpu"
# Lazy load model on first request (prevents HF timeout)
processor = None
model = None
model_lock = asyncio.Lock()
# Hugging Face token stored in Space secrets
HF_TOKEN = os.getenv("img2caption")
async def load_model():
global processor, model
if model is None:
processor = AutoProcessor.from_pretrained(
"microsoft/Florence-2-base",
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
"microsoft/Florence-2-base",
trust_remote_code=True
).to(device).eval()
def run_caption(image: Image.Image) -> str:
inputs = processor(
text="<MORE_DETAILED_CAPTION>",
images=image,
return_tensors="pt"
).to(device)
output_ids = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=256,
num_beams=3
)
decoded = processor.batch_decode(output_ids, skip_special_tokens=False)[0]
parsed = processor.post_process_generation(
decoded,
task="<MORE_DETAILED_CAPTION>",
image_size=(image.width, image.height)
)
return parsed["<MORE_DETAILED_CAPTION>"]
# ---------------------------------------------------
# API Endpoint (Protected only if token is sent)
# ---------------------------------------------------
@app.post("/img2caption", response_class=PlainTextResponse)
async def img2caption(
file: UploadFile = File(...),
authorization: str = Header(None)
):
# If app sends a token → validate it
if authorization is not None:
if not authorization.startswith("Bearer "):
return PlainTextResponse("Invalid token format", status_code=403)
token = authorization.replace("Bearer ", "").strip()
if token != HF_TOKEN:
return PlainTextResponse("Invalid token", status_code=403)
try:
async with model_lock:
await load_model()
data = await file.read()
image = Image.open(io.BytesIO(data)).convert("RGB")
caption = run_caption(image)
# Return ONLY the caption string, no JSON
return caption
except Exception as e:
return PlainTextResponse(f"Error: {str(e)}", status_code=500)
# ---------------------------------------------------
# Simple HTML UI (no token required)
# ---------------------------------------------------
@app.get("/", response_class=HTMLResponse)
def ui():
return """
<!DOCTYPE html>
<html>
<head>
<title>Image Caption Generator</title>
<style>
body { font-family: Arial; max-width: 650px; margin: 40px auto; }
h2 { text-align: center; }
#preview {
width: 100%; margin-top: 15px; display: none;
border-radius: 8px;
}
#captionBox {
margin-top: 20px; padding: 15px;
background: #eee; border-radius: 6px; display: none;
}
button {
padding: 12px; width: 100%; margin-top: 10px;
background: #4A90E2; color: white; border: none;
border-radius: 6px; cursor: pointer; font-size: 16px;
}
button:hover { background: #357ABD; }
</style>
</head>
<body>
<h2>Image Caption Generator</h2>
<input type="file" id="imageInput" accept="image/*">
<img id="preview">
<button onclick="generateCaption()">Generate Caption</button>
<div id="captionBox"></div>
<script>
const imageInput = document.getElementById("imageInput");
const preview = document.getElementById("preview");
const captionBox = document.getElementById("captionBox");
imageInput.onchange = () => {
const f = imageInput.files[0];
if (f) {
preview.src = URL.createObjectURL(f);
preview.style.display = "block";
}
};
async function generateCaption() {
const f = imageInput.files[0];
if (!f) {
alert("Upload an image first");
return;
}
const form = new FormData();
form.append("file", f);
captionBox.style.display = "block";
captionBox.innerHTML = "Generating caption...";
const res = await fetch("/img2caption", {
method: "POST",
body: form
});
const text = await res.text();
captionBox.innerHTML = text;
}
</script>
</body>
</html>
"""
def keep_alive():
pass
if __name__ == "__main__":
import uvicorn
print("🚀 Launching Fast img2caption API")
keep_alive()
uvicorn.run(app, host="0.0.0.0", port=7860) |