| from fastapi import FastAPI, UploadFile, File |
| from transformers import pipeline |
| import torchaudio |
| import torch |
| import subprocess |
| import os |
|
|
| app = FastAPI() |
|
|
| |
| |
| asr = pipeline( |
| "automatic-speech-recognition", |
| model="openai/whisper-base", |
| device="cpu" |
| ) |
|
|
| @app.post("/predict") |
| async def predict(file: UploadFile = File(...)): |
| input_path = "/tmp/input_audio.webm" |
| wav_path = "/tmp/input_audio.wav" |
|
|
| |
| with open(input_path, "wb") as f: |
| f.write(await file.read()) |
|
|
| |
| subprocess.run([ |
| "ffmpeg", "-y", "-i", input_path, |
| "-ac", "1", "-ar", "16000", wav_path |
| ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) |
|
|
| |
| waveform, sr = torchaudio.load(wav_path) |
| waveform = waveform.to(torch.float32) |
|
|
| |
| |
| result = asr( |
| {"array": waveform[0].numpy(), "sampling_rate": sr}, |
| generate_kwargs={ |
| "task": "transcribe", |
| "language": None |
| } |
| ) |
|
|
| |
| os.remove(input_path) |
| os.remove(wav_path) |
|
|
| return { |
| "text": result["text"].strip(), |
| "language": result.get("language", "auto"), |
| "note": "Auto language detection enabled. Optimized for Hindi + English speech." |
| } |
|
|