vidhi0405 commited on
Commit
0f2dcd5
·
1 Parent(s): 2925be4
Files changed (3) hide show
  1. .env.example +3 -0
  2. README.md +12 -0
  3. app.py +66 -9
.env.example ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ MONGO_URI=
2
+ MONGO_DB_NAME=image_to_speech
3
+ FIREBASE_API_KEY=
README.md CHANGED
@@ -8,3 +8,15 @@ pinned: false
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
11
+
12
+ ## API
13
+
14
+ `POST /generate-caption` accepts multipart form-data:
15
+
16
+ - `firebase_id_token` (required)
17
+ - `file` or `files` image field(s), up to 5
18
+
19
+ ## Required environment variables
20
+
21
+ - `MONGO_URI`
22
+ - `FIREBASE_API_KEY`
app.py CHANGED
@@ -1,9 +1,11 @@
1
  import io
 
2
  import logging
3
  import os
4
  import re
5
  import threading
6
  from datetime import datetime, timezone
 
7
 
8
  # Avoid invalid OMP setting from runtime environment (e.g. empty/non-numeric).
9
  _omp_threads = os.getenv("OMP_NUM_THREADS", "").strip()
@@ -11,14 +13,16 @@ if not _omp_threads.isdigit() or int(_omp_threads) < 1:
11
  os.environ["OMP_NUM_THREADS"] = "8"
12
 
13
  import torch
 
14
  from dotenv import load_dotenv
15
- from fastapi import FastAPI, Request, UploadFile
16
  from fastapi.exceptions import RequestValidationError
17
  from fastapi.responses import JSONResponse
18
  from PIL import Image, UnidentifiedImageError
19
  from pymongo import MongoClient
20
  from pymongo.errors import PyMongoError, ServerSelectionTimeoutError
21
  from starlette.datastructures import UploadFile as StarletteUploadFile
 
22
  from transformers import (
23
  AutoModelForImageTextToText,
24
  AutoModelForSeq2SeqLM,
@@ -37,6 +41,7 @@ MAX_NEW_TOKENS = 120
37
  MAX_IMAGES = 5
38
  MONGO_URI = (os.getenv("MONGO_URI") or os.getenv("MONGODB_URI") or "").strip().strip('"').strip("'")
39
  MONGO_DB_NAME = os.getenv("MONGO_DB_NAME", "image_to_speech")
 
40
 
41
  CAPTION_PROMPT = (
42
  "Act as a professional news reporter delivering a live on-scene report in real time. "
@@ -55,6 +60,11 @@ PROCESSOR_MAX_LENGTH = 8192
55
 
56
  logger = logging.getLogger(__name__)
57
 
 
 
 
 
 
58
 
59
  def ok(message: str, data):
60
  return JSONResponse(
@@ -111,7 +121,7 @@ else:
111
  async def root():
112
  return {
113
  "success": True,
114
- "message": "Use POST /generate-caption with form-data key 'file' or 'files' (up to 5 images).",
115
  "data": None,
116
  }
117
 
@@ -164,6 +174,16 @@ async def validation_error_handler(_, exc: RequestValidationError):
164
  return fail("Invalid request payload.", 422)
165
 
166
 
 
 
 
 
 
 
 
 
 
 
167
  @app.exception_handler(Exception)
168
  async def unhandled_error_handler(_, exc: Exception):
169
  logger.exception("Unhandled server error: %s", exc)
@@ -175,6 +195,37 @@ def _ensure_db_ready():
175
  raise AppError(db_init_error, 503)
176
 
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  def _finalize_caption(raw_text: str, max_sentences: int = CAPTION_MAX_SENTENCES) -> str:
179
  text = " ".join(raw_text.split()).strip()
180
  if not text:
@@ -360,11 +411,12 @@ def insert_record(collection, payload: dict) -> str:
360
  raise AppError("MongoDB insert failed.", 503) from exc
361
 
362
 
363
- async def _parse_images(request: Request) -> list[tuple[str, Image.Image]]:
364
- try:
365
- form = await request.form()
366
- except Exception as exc:
367
- raise AppError("Invalid request payload.", 422) from exc
 
368
 
369
  uploads: list[UploadFile | StarletteUploadFile] = []
370
  for key in ("files", "files[]", "file"):
@@ -410,9 +462,14 @@ async def _parse_images(request: Request) -> list[tuple[str, Image.Image]]:
410
 
411
 
412
  @app.post("/generate-caption")
413
- async def generate_caption(request: Request):
414
  _ensure_db_ready()
415
- images = await _parse_images(request)
 
 
 
 
 
416
 
417
  image_captions = []
418
  for filename, image in images:
 
1
  import io
2
+ import asyncio
3
  import logging
4
  import os
5
  import re
6
  import threading
7
  from datetime import datetime, timezone
8
+ from typing import Optional
9
 
10
  # Avoid invalid OMP setting from runtime environment (e.g. empty/non-numeric).
11
  _omp_threads = os.getenv("OMP_NUM_THREADS", "").strip()
 
13
  os.environ["OMP_NUM_THREADS"] = "8"
14
 
15
  import torch
16
+ import requests
17
  from dotenv import load_dotenv
18
+ from fastapi import FastAPI, Form, HTTPException, Request, UploadFile
19
  from fastapi.exceptions import RequestValidationError
20
  from fastapi.responses import JSONResponse
21
  from PIL import Image, UnidentifiedImageError
22
  from pymongo import MongoClient
23
  from pymongo.errors import PyMongoError, ServerSelectionTimeoutError
24
  from starlette.datastructures import UploadFile as StarletteUploadFile
25
+ from starlette.exceptions import HTTPException as StarletteHTTPException
26
  from transformers import (
27
  AutoModelForImageTextToText,
28
  AutoModelForSeq2SeqLM,
 
41
  MAX_IMAGES = 5
42
  MONGO_URI = (os.getenv("MONGO_URI") or os.getenv("MONGODB_URI") or "").strip().strip('"').strip("'")
43
  MONGO_DB_NAME = os.getenv("MONGO_DB_NAME", "image_to_speech")
44
+ FIREBASE_API_KEY = os.getenv("FIREBASE_API_KEY", "").strip().strip('"').strip("'")
45
 
46
  CAPTION_PROMPT = (
47
  "Act as a professional news reporter delivering a live on-scene report in real time. "
 
60
 
61
  logger = logging.getLogger(__name__)
62
 
63
+ ERRORS = {
64
+ "TOKEN_MISSING": "firebase_id_token is missing",
65
+ "TOKEN_INVALID": "Invalid Firebase token",
66
+ }
67
+
68
 
69
  def ok(message: str, data):
70
  return JSONResponse(
 
121
  async def root():
122
  return {
123
  "success": True,
124
+ "message": "Use POST /generate-caption with form-data keys 'firebase_id_token' and 'file' or 'files' (up to 5 images).",
125
  "data": None,
126
  }
127
 
 
174
  return fail("Invalid request payload.", 422)
175
 
176
 
177
+ @app.exception_handler(HTTPException)
178
+ async def fastapi_http_exception_handler(_, exc: HTTPException):
179
+ return fail(str(exc.detail), exc.status_code)
180
+
181
+
182
+ @app.exception_handler(StarletteHTTPException)
183
+ async def starlette_http_exception_handler(_, exc: StarletteHTTPException):
184
+ return fail(str(exc.detail), exc.status_code)
185
+
186
+
187
  @app.exception_handler(Exception)
188
  async def unhandled_error_handler(_, exc: Exception):
189
  logger.exception("Unhandled server error: %s", exc)
 
195
  raise AppError(db_init_error, 503)
196
 
197
 
198
+ def _ensure_firebase_ready():
199
+ if not FIREBASE_API_KEY:
200
+ raise AppError("FIREBASE_API_KEY is not set.", 503)
201
+
202
+
203
+ async def verify_firebase_token(firebase_id_token: str) -> dict:
204
+ """Verify Firebase ID token using REST API."""
205
+ if not firebase_id_token:
206
+ raise HTTPException(status_code=401, detail=ERRORS["TOKEN_MISSING"])
207
+
208
+ _ensure_firebase_ready()
209
+ url = f"https://identitytoolkit.googleapis.com/v1/accounts:lookup?key={FIREBASE_API_KEY}"
210
+ payload = {"idToken": firebase_id_token}
211
+
212
+ try:
213
+ resp = await asyncio.to_thread(requests.post, url, json=payload, timeout=10)
214
+ if resp.status_code != 200:
215
+ detail = resp.json().get("error", {}).get("message", ERRORS["TOKEN_INVALID"])
216
+ raise HTTPException(status_code=401, detail=f"Firebase token verification failed: {detail}")
217
+
218
+ users = resp.json().get("users", [])
219
+ if not users:
220
+ raise HTTPException(status_code=401, detail="Firebase token verification failed: no user found")
221
+ return users[0]
222
+ except requests.RequestException as exc:
223
+ raise HTTPException(
224
+ status_code=503,
225
+ detail=f"Firebase verification service unavailable: {str(exc)}",
226
+ ) from exc
227
+
228
+
229
  def _finalize_caption(raw_text: str, max_sentences: int = CAPTION_MAX_SENTENCES) -> str:
230
  text = " ".join(raw_text.split()).strip()
231
  if not text:
 
411
  raise AppError("MongoDB insert failed.", 503) from exc
412
 
413
 
414
+ async def _parse_images(request: Request, form=None) -> list[tuple[str, Image.Image]]:
415
+ if form is None:
416
+ try:
417
+ form = await request.form()
418
+ except Exception as exc:
419
+ raise AppError("Invalid request payload.", 422) from exc
420
 
421
  uploads: list[UploadFile | StarletteUploadFile] = []
422
  for key in ("files", "files[]", "file"):
 
462
 
463
 
464
  @app.post("/generate-caption")
465
+ async def generate_caption(request: Request, firebase_id_token: Optional[str] = Form(None)):
466
  _ensure_db_ready()
467
+
468
+ form = await request.form()
469
+ token = firebase_id_token or form.get("firebase_id_token")
470
+ await verify_firebase_token(token)
471
+
472
+ images = await _parse_images(request, form=form)
473
 
474
  image_captions = []
475
  for filename, image in images: