Spaces:
Sleeping
Sleeping
Auth
Browse files- .env.example +3 -0
- README.md +12 -0
- app.py +66 -9
.env.example
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MONGO_URI=
|
| 2 |
+
MONGO_DB_NAME=image_to_speech
|
| 3 |
+
FIREBASE_API_KEY=
|
README.md
CHANGED
|
@@ -8,3 +8,15 @@ pinned: false
|
|
| 8 |
---
|
| 9 |
|
| 10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 11 |
+
|
| 12 |
+
## API
|
| 13 |
+
|
| 14 |
+
`POST /generate-caption` accepts multipart form-data:
|
| 15 |
+
|
| 16 |
+
- `firebase_id_token` (required)
|
| 17 |
+
- `file` or `files` image field(s), up to 5
|
| 18 |
+
|
| 19 |
+
## Required environment variables
|
| 20 |
+
|
| 21 |
+
- `MONGO_URI`
|
| 22 |
+
- `FIREBASE_API_KEY`
|
app.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
| 1 |
import io
|
|
|
|
| 2 |
import logging
|
| 3 |
import os
|
| 4 |
import re
|
| 5 |
import threading
|
| 6 |
from datetime import datetime, timezone
|
|
|
|
| 7 |
|
| 8 |
# Avoid invalid OMP setting from runtime environment (e.g. empty/non-numeric).
|
| 9 |
_omp_threads = os.getenv("OMP_NUM_THREADS", "").strip()
|
|
@@ -11,14 +13,16 @@ if not _omp_threads.isdigit() or int(_omp_threads) < 1:
|
|
| 11 |
os.environ["OMP_NUM_THREADS"] = "8"
|
| 12 |
|
| 13 |
import torch
|
|
|
|
| 14 |
from dotenv import load_dotenv
|
| 15 |
-
from fastapi import FastAPI, Request, UploadFile
|
| 16 |
from fastapi.exceptions import RequestValidationError
|
| 17 |
from fastapi.responses import JSONResponse
|
| 18 |
from PIL import Image, UnidentifiedImageError
|
| 19 |
from pymongo import MongoClient
|
| 20 |
from pymongo.errors import PyMongoError, ServerSelectionTimeoutError
|
| 21 |
from starlette.datastructures import UploadFile as StarletteUploadFile
|
|
|
|
| 22 |
from transformers import (
|
| 23 |
AutoModelForImageTextToText,
|
| 24 |
AutoModelForSeq2SeqLM,
|
|
@@ -37,6 +41,7 @@ MAX_NEW_TOKENS = 120
|
|
| 37 |
MAX_IMAGES = 5
|
| 38 |
MONGO_URI = (os.getenv("MONGO_URI") or os.getenv("MONGODB_URI") or "").strip().strip('"').strip("'")
|
| 39 |
MONGO_DB_NAME = os.getenv("MONGO_DB_NAME", "image_to_speech")
|
|
|
|
| 40 |
|
| 41 |
CAPTION_PROMPT = (
|
| 42 |
"Act as a professional news reporter delivering a live on-scene report in real time. "
|
|
@@ -55,6 +60,11 @@ PROCESSOR_MAX_LENGTH = 8192
|
|
| 55 |
|
| 56 |
logger = logging.getLogger(__name__)
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
def ok(message: str, data):
|
| 60 |
return JSONResponse(
|
|
@@ -111,7 +121,7 @@ else:
|
|
| 111 |
async def root():
|
| 112 |
return {
|
| 113 |
"success": True,
|
| 114 |
-
"message": "Use POST /generate-caption with form-data
|
| 115 |
"data": None,
|
| 116 |
}
|
| 117 |
|
|
@@ -164,6 +174,16 @@ async def validation_error_handler(_, exc: RequestValidationError):
|
|
| 164 |
return fail("Invalid request payload.", 422)
|
| 165 |
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
@app.exception_handler(Exception)
|
| 168 |
async def unhandled_error_handler(_, exc: Exception):
|
| 169 |
logger.exception("Unhandled server error: %s", exc)
|
|
@@ -175,6 +195,37 @@ def _ensure_db_ready():
|
|
| 175 |
raise AppError(db_init_error, 503)
|
| 176 |
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
def _finalize_caption(raw_text: str, max_sentences: int = CAPTION_MAX_SENTENCES) -> str:
|
| 179 |
text = " ".join(raw_text.split()).strip()
|
| 180 |
if not text:
|
|
@@ -360,11 +411,12 @@ def insert_record(collection, payload: dict) -> str:
|
|
| 360 |
raise AppError("MongoDB insert failed.", 503) from exc
|
| 361 |
|
| 362 |
|
| 363 |
-
async def _parse_images(request: Request) -> list[tuple[str, Image.Image]]:
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
|
|
|
| 368 |
|
| 369 |
uploads: list[UploadFile | StarletteUploadFile] = []
|
| 370 |
for key in ("files", "files[]", "file"):
|
|
@@ -410,9 +462,14 @@ async def _parse_images(request: Request) -> list[tuple[str, Image.Image]]:
|
|
| 410 |
|
| 411 |
|
| 412 |
@app.post("/generate-caption")
|
| 413 |
-
async def generate_caption(request: Request):
|
| 414 |
_ensure_db_ready()
|
| 415 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
|
| 417 |
image_captions = []
|
| 418 |
for filename, image in images:
|
|
|
|
| 1 |
import io
|
| 2 |
+
import asyncio
|
| 3 |
import logging
|
| 4 |
import os
|
| 5 |
import re
|
| 6 |
import threading
|
| 7 |
from datetime import datetime, timezone
|
| 8 |
+
from typing import Optional
|
| 9 |
|
| 10 |
# Avoid invalid OMP setting from runtime environment (e.g. empty/non-numeric).
|
| 11 |
_omp_threads = os.getenv("OMP_NUM_THREADS", "").strip()
|
|
|
|
| 13 |
os.environ["OMP_NUM_THREADS"] = "8"
|
| 14 |
|
| 15 |
import torch
|
| 16 |
+
import requests
|
| 17 |
from dotenv import load_dotenv
|
| 18 |
+
from fastapi import FastAPI, Form, HTTPException, Request, UploadFile
|
| 19 |
from fastapi.exceptions import RequestValidationError
|
| 20 |
from fastapi.responses import JSONResponse
|
| 21 |
from PIL import Image, UnidentifiedImageError
|
| 22 |
from pymongo import MongoClient
|
| 23 |
from pymongo.errors import PyMongoError, ServerSelectionTimeoutError
|
| 24 |
from starlette.datastructures import UploadFile as StarletteUploadFile
|
| 25 |
+
from starlette.exceptions import HTTPException as StarletteHTTPException
|
| 26 |
from transformers import (
|
| 27 |
AutoModelForImageTextToText,
|
| 28 |
AutoModelForSeq2SeqLM,
|
|
|
|
| 41 |
MAX_IMAGES = 5
|
| 42 |
MONGO_URI = (os.getenv("MONGO_URI") or os.getenv("MONGODB_URI") or "").strip().strip('"').strip("'")
|
| 43 |
MONGO_DB_NAME = os.getenv("MONGO_DB_NAME", "image_to_speech")
|
| 44 |
+
FIREBASE_API_KEY = os.getenv("FIREBASE_API_KEY", "").strip().strip('"').strip("'")
|
| 45 |
|
| 46 |
CAPTION_PROMPT = (
|
| 47 |
"Act as a professional news reporter delivering a live on-scene report in real time. "
|
|
|
|
| 60 |
|
| 61 |
logger = logging.getLogger(__name__)
|
| 62 |
|
| 63 |
+
ERRORS = {
|
| 64 |
+
"TOKEN_MISSING": "firebase_id_token is missing",
|
| 65 |
+
"TOKEN_INVALID": "Invalid Firebase token",
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
|
| 69 |
def ok(message: str, data):
|
| 70 |
return JSONResponse(
|
|
|
|
| 121 |
async def root():
|
| 122 |
return {
|
| 123 |
"success": True,
|
| 124 |
+
"message": "Use POST /generate-caption with form-data keys 'firebase_id_token' and 'file' or 'files' (up to 5 images).",
|
| 125 |
"data": None,
|
| 126 |
}
|
| 127 |
|
|
|
|
| 174 |
return fail("Invalid request payload.", 422)
|
| 175 |
|
| 176 |
|
| 177 |
+
@app.exception_handler(HTTPException)
|
| 178 |
+
async def fastapi_http_exception_handler(_, exc: HTTPException):
|
| 179 |
+
return fail(str(exc.detail), exc.status_code)
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
@app.exception_handler(StarletteHTTPException)
|
| 183 |
+
async def starlette_http_exception_handler(_, exc: StarletteHTTPException):
|
| 184 |
+
return fail(str(exc.detail), exc.status_code)
|
| 185 |
+
|
| 186 |
+
|
| 187 |
@app.exception_handler(Exception)
|
| 188 |
async def unhandled_error_handler(_, exc: Exception):
|
| 189 |
logger.exception("Unhandled server error: %s", exc)
|
|
|
|
| 195 |
raise AppError(db_init_error, 503)
|
| 196 |
|
| 197 |
|
| 198 |
+
def _ensure_firebase_ready():
|
| 199 |
+
if not FIREBASE_API_KEY:
|
| 200 |
+
raise AppError("FIREBASE_API_KEY is not set.", 503)
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
async def verify_firebase_token(firebase_id_token: str) -> dict:
|
| 204 |
+
"""Verify Firebase ID token using REST API."""
|
| 205 |
+
if not firebase_id_token:
|
| 206 |
+
raise HTTPException(status_code=401, detail=ERRORS["TOKEN_MISSING"])
|
| 207 |
+
|
| 208 |
+
_ensure_firebase_ready()
|
| 209 |
+
url = f"https://identitytoolkit.googleapis.com/v1/accounts:lookup?key={FIREBASE_API_KEY}"
|
| 210 |
+
payload = {"idToken": firebase_id_token}
|
| 211 |
+
|
| 212 |
+
try:
|
| 213 |
+
resp = await asyncio.to_thread(requests.post, url, json=payload, timeout=10)
|
| 214 |
+
if resp.status_code != 200:
|
| 215 |
+
detail = resp.json().get("error", {}).get("message", ERRORS["TOKEN_INVALID"])
|
| 216 |
+
raise HTTPException(status_code=401, detail=f"Firebase token verification failed: {detail}")
|
| 217 |
+
|
| 218 |
+
users = resp.json().get("users", [])
|
| 219 |
+
if not users:
|
| 220 |
+
raise HTTPException(status_code=401, detail="Firebase token verification failed: no user found")
|
| 221 |
+
return users[0]
|
| 222 |
+
except requests.RequestException as exc:
|
| 223 |
+
raise HTTPException(
|
| 224 |
+
status_code=503,
|
| 225 |
+
detail=f"Firebase verification service unavailable: {str(exc)}",
|
| 226 |
+
) from exc
|
| 227 |
+
|
| 228 |
+
|
| 229 |
def _finalize_caption(raw_text: str, max_sentences: int = CAPTION_MAX_SENTENCES) -> str:
|
| 230 |
text = " ".join(raw_text.split()).strip()
|
| 231 |
if not text:
|
|
|
|
| 411 |
raise AppError("MongoDB insert failed.", 503) from exc
|
| 412 |
|
| 413 |
|
| 414 |
+
async def _parse_images(request: Request, form=None) -> list[tuple[str, Image.Image]]:
|
| 415 |
+
if form is None:
|
| 416 |
+
try:
|
| 417 |
+
form = await request.form()
|
| 418 |
+
except Exception as exc:
|
| 419 |
+
raise AppError("Invalid request payload.", 422) from exc
|
| 420 |
|
| 421 |
uploads: list[UploadFile | StarletteUploadFile] = []
|
| 422 |
for key in ("files", "files[]", "file"):
|
|
|
|
| 462 |
|
| 463 |
|
| 464 |
@app.post("/generate-caption")
|
| 465 |
+
async def generate_caption(request: Request, firebase_id_token: Optional[str] = Form(None)):
|
| 466 |
_ensure_db_ready()
|
| 467 |
+
|
| 468 |
+
form = await request.form()
|
| 469 |
+
token = firebase_id_token or form.get("firebase_id_token")
|
| 470 |
+
await verify_firebase_token(token)
|
| 471 |
+
|
| 472 |
+
images = await _parse_images(request, form=form)
|
| 473 |
|
| 474 |
image_captions = []
|
| 475 |
for filename, image in images:
|