from fastapi import FastAPI, UploadFile, File
from transformers import pipeline
import torchaudio
import torch
import subprocess
import os

app = FastAPI()

# ✅ Multilingual model (better Hindi-English support than tiny)
# You can switch to "openai/whisper-small" for even better accuracy if your container allows.
asr = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-base",
    device="cpu"
)

@app.post("/predict")
async def predict(file: UploadFile = File(...)):
    input_path = "/tmp/input_audio.webm"
    wav_path = "/tmp/input_audio.wav"

    # Save uploaded file
    with open(input_path, "wb") as f:
        f.write(await file.read())

    # Convert to 16 kHz mono WAV — ensures consistency
    subprocess.run([
        "ffmpeg", "-y", "-i", input_path,
        "-ac", "1", "-ar", "16000", wav_path
    ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    # Load waveform
    waveform, sr = torchaudio.load(wav_path)
    waveform = waveform.to(torch.float32)

    # ✅ Transcribe with automatic language detection
    # The 'task': 'transcribe' ensures Whisper writes what it hears, no translation.
    result = asr(
        {"array": waveform[0].numpy(), "sampling_rate": sr},
        generate_kwargs={
            "task": "transcribe",  # disables translation
            "language": None       # auto-detect language
        }
    )

    # Cleanup temp files
    os.remove(input_path)
    os.remove(wav_path)

    return {
        "text": result["text"].strip(),
        "language": result.get("language", "auto"),
        "note": "Auto language detection enabled. Optimized for Hindi + English speech."
    }