Update app.py
Browse files
app.py
CHANGED
|
@@ -2,12 +2,11 @@ import logging
|
|
| 2 |
import math
|
| 3 |
import time
|
| 4 |
import base64
|
| 5 |
-
import io
|
| 6 |
import os
|
| 7 |
from typing import Dict, Any
|
| 8 |
from functools import wraps
|
| 9 |
|
| 10 |
-
from fastapi import FastAPI, Depends, HTTPException, File, UploadFile
|
| 11 |
from fastapi.encoders import jsonable_encoder
|
| 12 |
from pydantic import BaseModel
|
| 13 |
import jax.numpy as jnp
|
|
@@ -38,7 +37,7 @@ chunk_len = round(CHUNK_LENGTH_S * pipeline.feature_extractor.sampling_rate)
|
|
| 38 |
stride_left = stride_right = round(stride_length_s * pipeline.feature_extractor.sampling_rate)
|
| 39 |
step = chunk_len - stride_left - stride_right
|
| 40 |
|
| 41 |
-
#
|
| 42 |
logger.debug("Compiling forward call...")
|
| 43 |
start = time.time()
|
| 44 |
random_inputs = {
|
|
@@ -51,7 +50,7 @@ compile_time = time.time() - start
|
|
| 51 |
logger.debug(f"Compiled in {compile_time}s")
|
| 52 |
|
| 53 |
class TranscribeAudioRequest(BaseModel):
|
| 54 |
-
audio_base64: str
|
| 55 |
task: str = "transcribe"
|
| 56 |
return_timestamps: bool = False
|
| 57 |
|
|
@@ -69,40 +68,55 @@ def timeit(func):
|
|
| 69 |
return result
|
| 70 |
return wrapper
|
| 71 |
|
| 72 |
-
def check_api_key():
|
| 73 |
api_key = os.environ.get("WHISPER_API_KEY")
|
| 74 |
-
if not api_key:
|
| 75 |
-
raise HTTPException(status_code=401, detail="
|
| 76 |
-
return
|
| 77 |
|
| 78 |
-
@app.post("/
|
| 79 |
@timeit
|
| 80 |
-
async def
|
| 81 |
-
|
| 82 |
-
|
|
|
|
| 83 |
api_key: str = Depends(check_api_key)
|
| 84 |
) -> Dict[str, Any]:
|
| 85 |
-
logger.debug("Starting
|
| 86 |
-
logger.debug(f"Received parameters - task: {
|
| 87 |
|
| 88 |
try:
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
file_size_mb = file_size / (1024 * 1024)
|
| 101 |
-
logger.debug(f"
|
| 102 |
except Exception as e:
|
| 103 |
-
logger.error(f"Error
|
| 104 |
-
raise HTTPException(status_code=400, detail=f"Error
|
|
|
|
|
|
|
| 105 |
|
|
|
|
| 106 |
if file_size_mb > FILE_LIMIT_MB:
|
| 107 |
logger.warning(f"Max file size exceeded: {file_size_mb:.2f}MB > {FILE_LIMIT_MB}MB")
|
| 108 |
raise HTTPException(status_code=400, detail=f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB.")
|
|
@@ -118,15 +132,13 @@ async def transcribe_chunked_audio(
|
|
| 118 |
|
| 119 |
logger.debug("Calling tqdm_generate to transcribe audio")
|
| 120 |
try:
|
| 121 |
-
task = request.task if request else "transcribe"
|
| 122 |
-
return_timestamps = request.return_timestamps if request else False
|
| 123 |
text, runtime, timing_info = tqdm_generate(inputs, task=task, return_timestamps=return_timestamps)
|
| 124 |
logger.debug(f"Transcription completed. Runtime: {runtime:.2f}s")
|
| 125 |
except Exception as e:
|
| 126 |
logger.error(f"Error in tqdm_generate: {str(e)}", exc_info=True)
|
| 127 |
raise HTTPException(status_code=500, detail=f"Error transcribing audio: {str(e)}")
|
| 128 |
|
| 129 |
-
logger.debug("
|
| 130 |
return jsonable_encoder({
|
| 131 |
"text": text,
|
| 132 |
"runtime": runtime,
|
|
@@ -211,4 +223,5 @@ def format_timestamp(seconds: float, always_include_hours: bool = False, decimal
|
|
| 211 |
return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
|
| 212 |
else:
|
| 213 |
# we have a malformed timestamp so just return it as is
|
| 214 |
-
return seconds
|
|
|
|
|
|
| 2 |
import math
|
| 3 |
import time
|
| 4 |
import base64
|
|
|
|
| 5 |
import os
|
| 6 |
from typing import Dict, Any
|
| 7 |
from functools import wraps
|
| 8 |
|
| 9 |
+
from fastapi import FastAPI, Depends, HTTPException, File, UploadFile, Form, Header
|
| 10 |
from fastapi.encoders import jsonable_encoder
|
| 11 |
from pydantic import BaseModel
|
| 12 |
import jax.numpy as jnp
|
|
|
|
| 37 |
stride_left = stride_right = round(stride_length_s * pipeline.feature_extractor.sampling_rate)
|
| 38 |
step = chunk_len - stride_left - stride_right
|
| 39 |
|
| 40 |
+
# Pre-compile step
|
| 41 |
logger.debug("Compiling forward call...")
|
| 42 |
start = time.time()
|
| 43 |
random_inputs = {
|
|
|
|
| 50 |
logger.debug(f"Compiled in {compile_time}s")
|
| 51 |
|
| 52 |
class TranscribeAudioRequest(BaseModel):
|
| 53 |
+
audio_base64: str
|
| 54 |
task: str = "transcribe"
|
| 55 |
return_timestamps: bool = False
|
| 56 |
|
|
|
|
| 68 |
return result
|
| 69 |
return wrapper
|
| 70 |
|
| 71 |
+
def check_api_key(x_api_key: str = Header(...)):
|
| 72 |
api_key = os.environ.get("WHISPER_API_KEY")
|
| 73 |
+
if not api_key or x_api_key != api_key:
|
| 74 |
+
raise HTTPException(status_code=401, detail="Invalid or missing API key")
|
| 75 |
+
return x_api_key
|
| 76 |
|
| 77 |
+
@app.post("/transcribe_audio_file")
|
| 78 |
@timeit
|
| 79 |
+
async def transcribe_audio_file(
|
| 80 |
+
file: UploadFile = File(...),
|
| 81 |
+
task: str = Form("transcribe"),
|
| 82 |
+
return_timestamps: bool = Form(False),
|
| 83 |
api_key: str = Depends(check_api_key)
|
| 84 |
) -> Dict[str, Any]:
|
| 85 |
+
logger.debug("Starting transcribe_audio_file function")
|
| 86 |
+
logger.debug(f"Received parameters - task: {task}, return_timestamps: {return_timestamps}")
|
| 87 |
|
| 88 |
try:
|
| 89 |
+
audio_data = await file.read()
|
| 90 |
+
file_size = len(audio_data)
|
| 91 |
+
file_size_mb = file_size / (1024 * 1024)
|
| 92 |
+
logger.debug(f"Audio file size: {file_size} bytes ({file_size_mb:.2f}MB)")
|
| 93 |
+
except Exception as e:
|
| 94 |
+
logger.error(f"Error reading audio file: {str(e)}", exc_info=True)
|
| 95 |
+
raise HTTPException(status_code=400, detail=f"Error reading audio file: {str(e)}")
|
| 96 |
+
|
| 97 |
+
return await process_audio(audio_data, file_size_mb, task, return_timestamps)
|
| 98 |
+
|
| 99 |
+
@app.post("/transcribe_audio_base64")
|
| 100 |
+
@timeit
|
| 101 |
+
async def transcribe_audio_base64(
|
| 102 |
+
request: TranscribeAudioRequest,
|
| 103 |
+
api_key: str = Depends(check_api_key)
|
| 104 |
+
) -> Dict[str, Any]:
|
| 105 |
+
logger.debug("Starting transcribe_audio_base64 function")
|
| 106 |
+
logger.debug(f"Received parameters - task: {request.task}, return_timestamps: {request.return_timestamps}")
|
| 107 |
+
|
| 108 |
+
try:
|
| 109 |
+
audio_data = base64.b64decode(request.audio_base64)
|
| 110 |
+
file_size = len(audio_data)
|
| 111 |
file_size_mb = file_size / (1024 * 1024)
|
| 112 |
+
logger.debug(f"Decoded audio data size: {file_size} bytes ({file_size_mb:.2f}MB)")
|
| 113 |
except Exception as e:
|
| 114 |
+
logger.error(f"Error decoding base64 audio data: {str(e)}", exc_info=True)
|
| 115 |
+
raise HTTPException(status_code=400, detail=f"Error decoding base64 audio data: {str(e)}")
|
| 116 |
+
|
| 117 |
+
return await process_audio(audio_data, file_size_mb, request.task, request.return_timestamps)
|
| 118 |
|
| 119 |
+
async def process_audio(audio_data: bytes, file_size_mb: float, task: str, return_timestamps: bool) -> Dict[str, Any]:
|
| 120 |
if file_size_mb > FILE_LIMIT_MB:
|
| 121 |
logger.warning(f"Max file size exceeded: {file_size_mb:.2f}MB > {FILE_LIMIT_MB}MB")
|
| 122 |
raise HTTPException(status_code=400, detail=f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB.")
|
|
|
|
| 132 |
|
| 133 |
logger.debug("Calling tqdm_generate to transcribe audio")
|
| 134 |
try:
|
|
|
|
|
|
|
| 135 |
text, runtime, timing_info = tqdm_generate(inputs, task=task, return_timestamps=return_timestamps)
|
| 136 |
logger.debug(f"Transcription completed. Runtime: {runtime:.2f}s")
|
| 137 |
except Exception as e:
|
| 138 |
logger.error(f"Error in tqdm_generate: {str(e)}", exc_info=True)
|
| 139 |
raise HTTPException(status_code=500, detail=f"Error transcribing audio: {str(e)}")
|
| 140 |
|
| 141 |
+
logger.debug("Audio processing completed successfully")
|
| 142 |
return jsonable_encoder({
|
| 143 |
"text": text,
|
| 144 |
"runtime": runtime,
|
|
|
|
| 223 |
return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
|
| 224 |
else:
|
| 225 |
# we have a malformed timestamp so just return it as is
|
| 226 |
+
return seconds
|
| 227 |
+
|