|
|
import os |
|
|
import base64 |
|
|
import tempfile |
|
|
from fastapi import FastAPI |
|
|
from pydantic import BaseModel |
|
|
from faster_whisper import WhisperModel |
|
|
import gradio as gr |
|
|
import uvicorn |
|
|
|
|
|
|
|
|
os.environ["HF_HOME"] = "/tmp/huggingface_cache" |
|
|
os.environ["HF_HUB_CACHE"] = "/tmp/huggingface_cache" |
|
|
model = WhisperModel("Systran/faster-whisper-small", device="cpu", compute_type="int8") |
|
|
|
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
class AudioInput(BaseModel): |
|
|
data: list[str] |
|
|
|
|
|
def transcribe_audio(audio_filepath, language): |
|
|
if audio_filepath is None: |
|
|
return "Error: No audio file provided." |
|
|
lang = None if language == "auto" else language |
|
|
segments, _ = model.transcribe(audio_filepath, beam_size=5, language=lang, vad_filter=True) |
|
|
return " ".join(seg.text for seg in segments) |
|
|
|
|
|
|
|
|
@app.post("/predict") |
|
|
async def predict(audio_input: AudioInput): |
|
|
|
|
|
base64_data_uri = audio_input.data[0] |
|
|
|
|
|
|
|
|
if base64_data_uri is None: |
|
|
return {"data": ["Error: No audio file provided."]} |
|
|
|
|
|
header, encoded_data = base64_data_uri.split(",", 1) |
|
|
audio_data = base64.b64decode(encoded_data) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file: |
|
|
temp_audio_file.write(audio_data) |
|
|
temp_filepath = temp_audio_file.name |
|
|
|
|
|
try: |
|
|
transcription = transcribe_audio(temp_filepath, "auto") |
|
|
finally: |
|
|
os.remove(temp_filepath) |
|
|
|
|
|
return {"data": [transcription]} |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=transcribe_audio, |
|
|
inputs=[ |
|
|
gr.Audio(type="filepath", label="Upload Audio File"), |
|
|
gr.Radio(['en', 'bn', 'auto'], label="Select Language", value='auto') |
|
|
], |
|
|
outputs="text", |
|
|
title="β‘ Zen Speech-to-Text (API Fixed)", |
|
|
description="Upload audio β get transcription" |
|
|
) |
|
|
|
|
|
|
|
|
app = gr.mount_gradio_app(app, iface, path="/") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860))) |