import gradio as gr
from transformers import pipeline

# ==============================
# CONFIG
# ==============================
MODEL_PATH = "Sabbir772/BNWCH"
LANGUAGE = "bn"  # Bengali
TASK = "transcribe"

# ==============================
# LOAD MODEL
# ==============================
print(f"🔍 Loading model from {MODEL_PATH} ...")
pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_PATH,
    tokenizer=MODEL_PATH,
    chunk_length_s=30,  # You can tune this for performance
    device=-1  # Use CPU on Spaces unless GPU is enabled
)
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(
    language=LANGUAGE, task=TASK
)
print("✅ Model loaded successfully!\n")

# ==============================
# DEFINE INFERENCE FUNCTION
# ==============================
def transcribe(audio):
    """Takes an audio file (tuple from Gradio) and returns transcription."""
    if audio is None:
        return "No audio provided."
    # Gradio passes (sample_rate, data)
    sr, data = audio
    result = pipe(data)["text"]
    return result.strip()

# ==============================
# DEFINE GRADIO INTERFACE
# ==============================
title = "Bangla Whisper ASR (Chittagong Dialect)"
description = (
    "🎙️ Upload or record audio to transcribe Bangla (Chittagong dialect) speech "
    "using fine-tuned Whisper model. <br><br>"
    "Model: **Sabbir772/BNWCH**"
)

demo = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(sources=["microphone", "upload"], type="numpy", label="🎧 Input Audio"),
    outputs=gr.Textbox(label="📝 Transcription", placeholder="Model output will appear here..."),
    title=title,
    description=description,
    allow_flagging="never",
)

# ==============================
# LAUNCH APP
# ==============================
if __name__ == "__main__":
    demo.launch()