Spaces:
Paused
Paused
| from datetime import datetime, timezone | |
| from typing import Any, Dict, List, Optional | |
| from fastapi import APIRouter, File, Form, Request, UploadFile | |
| from groq import Groq, RateLimitError | |
| from src.api.utils import ( | |
| check_user_rate_limit, | |
| get_groq_keys, | |
| track_key_usage, | |
| user_request_tracker, | |
| verify_user_license, | |
| ) | |
| from src.common.logger import logger | |
| from src.common.utils import response_error, response_success | |
| router = APIRouter() | |
| def _build_segments_from_response(response) -> List[Dict[str, Any]]: | |
| """Build segments list from Groq API response. Each segment contains its associated words.""" | |
| response = response.model_dump() | |
| segments = response.get("segments", []) | |
| words = response.get("words", []) | |
| result = [] | |
| for segment in segments: | |
| seg_start = segment.get("start") | |
| seg_end = segment.get("end") | |
| # Find and remove words that belong to this segment | |
| segment_words = [] | |
| i = 0 | |
| while i < len(words): | |
| word = words[i] | |
| word_start = word.get("start") | |
| word_end = word.get("end") | |
| # Word belongs to segment if it overlaps | |
| if word_start >= seg_start and word_end <= seg_end + 0.01: | |
| segment_words.append( | |
| { | |
| "word": word.get("word", ""), | |
| "start": word_start, | |
| "end": word_end, | |
| "probability": 1, | |
| } | |
| ) | |
| words.pop(i) # Remove word from list | |
| else: | |
| i += 1 | |
| # Build segment with embedded words | |
| result.append( | |
| { | |
| "id": segment.get("id"), | |
| "seek": segment.get("seek"), | |
| "start": seg_start, | |
| "end": seg_end, | |
| "text": segment.get("text"), | |
| "tokens": segment.get("tokens"), | |
| "avg_logprob": segment.get("avg_logprob"), | |
| "compression_ratio": segment.get("compression_ratio"), | |
| "no_speech_prob": segment.get("no_speech_prob"), | |
| "temperature": segment.get("temperature"), | |
| "words": segment_words, | |
| } | |
| ) | |
| return result | |
| async def transcribe_audio( | |
| email: str = Form(...), | |
| license_key: str = Form(...), | |
| audio_file: UploadFile = File(...), | |
| language: Optional[str] = None, | |
| request: Request = None, | |
| ): | |
| _, error = verify_user_license(email, license_key) | |
| if error: | |
| return response_error(error, f"License verification failed: {error}", 403) | |
| if not check_user_rate_limit(email): | |
| remaining = user_request_tracker[email]["reset_at"] - datetime.now(timezone.utc) | |
| return response_error( | |
| "USER_RATE_LIMIT", | |
| f"Rate limit exceeded. Try again in {int(remaining.total_seconds())} seconds", | |
| 429, | |
| ) | |
| api_keys = get_groq_keys() | |
| if not api_keys: | |
| return response_error("NO_API_KEYS", "No Groq API keys configured", 503) | |
| audio_content = await audio_file.read() | |
| last_error = None | |
| for i, api_key in enumerate(api_keys): | |
| try: | |
| client = Groq(api_key=api_key) | |
| params = { | |
| "file": (audio_file.filename, audio_content), | |
| "model": "whisper-large-v3", | |
| "temperature": 0, | |
| "response_format": "verbose_json", | |
| "timestamp_granularities": ["word", "segment"], | |
| } | |
| if language: | |
| params["language"] = language | |
| response = client.audio.transcriptions.create(**params) | |
| track_key_usage(api_key) | |
| # Build segments with embedded words | |
| segments = _build_segments_from_response(response) | |
| return response_success( | |
| { | |
| "text": response.text, | |
| "language": getattr(response, "language", language), | |
| "duration": getattr(response, "duration", None), | |
| "segments": segments, | |
| } | |
| ) | |
| except RateLimitError as e: | |
| logger.error("Key rate-limited: switching...") | |
| logger.error(str(e)) | |
| last_error = str(e) | |
| continue | |
| except Exception as e: | |
| logger.error(str(e)) | |
| last_error = str(e) | |
| continue | |
| return response_error("ASR_FAILED", f"All API keys failed: {last_error}", 500) | |