vidhi0405 commited on
Commit
87cdad5
·
1 Parent(s): ca14807
Files changed (3) hide show
  1. README.md +5 -0
  2. app.py +39 -3
  3. requirements.txt +2 -1
README.md CHANGED
@@ -13,3 +13,8 @@ license: apache-2.0
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
16
+
17
+ ## Required environment variables
18
+
19
+ - `MONGO_URI`
20
+ - `FIREBASE_API_KEY` (required for `/audio/by-id` token verification)
app.py CHANGED
@@ -1,6 +1,6 @@
1
  #!/usr/bin/env python3
2
 
3
- import base64
4
  import io
5
  import os
6
  import time
@@ -11,11 +11,12 @@ from typing import Optional
11
 
12
  import gradio as gr
13
  import pymongo
 
14
  import soundfile as sf
15
  from bson.binary import Binary
16
  from bson.objectid import ObjectId
17
  from dotenv import load_dotenv
18
- from fastapi import Body, FastAPI, Form, Request, Response
19
  from pydantic import BaseModel
20
 
21
  from model import ENGLISH_REPO_ID, get_pretrained_model
@@ -26,6 +27,12 @@ MONGO_URI = os.getenv("MONGO_URI", "").strip()
26
  MONGO_DB_NAME = os.getenv("MONGO_DB_NAME", "image_to_speech").strip()
27
  MONGO_COLLECTION = os.getenv("MONGO_COLLECTION", "audio").strip()
28
  MONGO_CAPTIONS_COLLECTION = os.getenv("MONGO_CAPTIONS_COLLECTION", "captions").strip()
 
 
 
 
 
 
29
 
30
 
31
  def log(msg: str) -> None:
@@ -199,6 +206,7 @@ class AudioByIdRequest(BaseModel):
199
  audio_id: str
200
  sid: Optional[int] = 0
201
  speed: Optional[float] = 1.0
 
202
 
203
 
204
  api = FastAPI(title="Text-to-Speech API")
@@ -217,17 +225,45 @@ def _find_audio_doc(identifier: str):
217
  return None
218
 
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  @api.post("/audio/by-id")
221
- def get_audio_by_id(
222
  request: Request,
223
  payload: Optional[AudioByIdRequest] = Body(default=None),
224
  audio_id: Optional[str] = Form(default=None),
225
  sid: Optional[int] = Form(default=0),
226
  speed: Optional[float] = Form(default=1.0),
 
227
  ):
228
  resolved_audio_id = audio_id or (payload.audio_id if payload else None)
229
  resolved_sid = payload.sid if payload and payload.sid is not None else sid
230
  resolved_speed = payload.speed if payload and payload.speed is not None else speed
 
 
 
231
 
232
  if not resolved_audio_id:
233
  return _api_response(False, "audio_id is required", None)
 
1
  #!/usr/bin/env python3
2
 
3
+ import asyncio
4
  import io
5
  import os
6
  import time
 
11
 
12
  import gradio as gr
13
  import pymongo
14
+ import requests
15
  import soundfile as sf
16
  from bson.binary import Binary
17
  from bson.objectid import ObjectId
18
  from dotenv import load_dotenv
19
+ from fastapi import Body, FastAPI, Form, HTTPException, Request, Response
20
  from pydantic import BaseModel
21
 
22
  from model import ENGLISH_REPO_ID, get_pretrained_model
 
27
  MONGO_DB_NAME = os.getenv("MONGO_DB_NAME", "image_to_speech").strip()
28
  MONGO_COLLECTION = os.getenv("MONGO_COLLECTION", "audio").strip()
29
  MONGO_CAPTIONS_COLLECTION = os.getenv("MONGO_CAPTIONS_COLLECTION", "captions").strip()
30
+ FIREBASE_API_KEY = os.getenv("FIREBASE_API_KEY", "").strip()
31
+
32
+ ERRORS = {
33
+ "TOKEN_MISSING": "firebase_id_token is missing",
34
+ "TOKEN_INVALID": "Invalid Firebase token",
35
+ }
36
 
37
 
38
  def log(msg: str) -> None:
 
206
  audio_id: str
207
  sid: Optional[int] = 0
208
  speed: Optional[float] = 1.0
209
+ firebase_id_token: Optional[str] = None
210
 
211
 
212
  api = FastAPI(title="Text-to-Speech API")
 
225
  return None
226
 
227
 
228
+ async def verify_firebase_token(firebase_id_token: str) -> dict:
229
+ """Verify Firebase ID token using REST API."""
230
+ if not firebase_id_token:
231
+ raise HTTPException(status_code=401, detail=ERRORS["TOKEN_MISSING"])
232
+ if not FIREBASE_API_KEY:
233
+ raise HTTPException(status_code=500, detail="FIREBASE_API_KEY is missing in .env")
234
+
235
+ url = f"https://identitytoolkit.googleapis.com/v1/accounts:lookup?key={FIREBASE_API_KEY}"
236
+ payload = {"idToken": firebase_id_token}
237
+
238
+ try:
239
+ resp = await asyncio.to_thread(requests.post, url, json=payload, timeout=10)
240
+ if resp.status_code != 200:
241
+ detail = resp.json().get("error", {}).get("message", ERRORS["TOKEN_INVALID"])
242
+ raise HTTPException(status_code=401, detail=f"Firebase token verification failed: {detail}")
243
+
244
+ users = resp.json().get("users", [])
245
+ if not users:
246
+ raise HTTPException(status_code=401, detail="Firebase token verification failed: no user found")
247
+ return users[0]
248
+ except requests.RequestException as e:
249
+ raise HTTPException(status_code=503, detail=f"Firebase verification service unavailable: {str(e)}")
250
+
251
+
252
  @api.post("/audio/by-id")
253
+ async def get_audio_by_id(
254
  request: Request,
255
  payload: Optional[AudioByIdRequest] = Body(default=None),
256
  audio_id: Optional[str] = Form(default=None),
257
  sid: Optional[int] = Form(default=0),
258
  speed: Optional[float] = Form(default=1.0),
259
+ firebase_id_token: Optional[str] = Form(default=None),
260
  ):
261
  resolved_audio_id = audio_id or (payload.audio_id if payload else None)
262
  resolved_sid = payload.sid if payload and payload.sid is not None else sid
263
  resolved_speed = payload.speed if payload and payload.speed is not None else speed
264
+ resolved_firebase_token = firebase_id_token or (payload.firebase_id_token if payload else None)
265
+
266
+ await verify_firebase_token(resolved_firebase_token)
267
 
268
  if not resolved_audio_id:
269
  return _api_response(False, "audio_id is required", None)
requirements.txt CHANGED
@@ -6,4 +6,5 @@ soundfile
6
  python-dotenv
7
  pydantic
8
  sherpa-onnx
9
- huggingface_hub
 
 
6
  python-dotenv
7
  pydantic
8
  sherpa-onnx
9
+ huggingface_hub
10
+ requests