import gradio as gr
from transformers import pipeline
# ==============================
# CONFIG
# ==============================
MODEL_PATH = "Sabbir772/BNWCH"
LANGUAGE = "bn" # Bengali
TASK = "transcribe"
# ==============================
# LOAD MODEL
# ==============================
print(f"🔍 Loading model from {MODEL_PATH} ...")
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_PATH,
tokenizer=MODEL_PATH,
chunk_length_s=30, # You can tune this for performance
device=-1 # Use CPU on Spaces unless GPU is enabled
)
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(
language=LANGUAGE, task=TASK
)
print("✅ Model loaded successfully!\n")
# ==============================
# DEFINE INFERENCE FUNCTION
# ==============================
def transcribe(audio):
"""Takes an audio file (tuple from Gradio) and returns transcription."""
if audio is None:
return "No audio provided."
# Gradio passes (sample_rate, data)
sr, data = audio
result = pipe(data)["text"]
return result.strip()
# ==============================
# DEFINE GRADIO INTERFACE
# ==============================
title = "Bangla Whisper ASR (Chittagong Dialect)"
description = (
"🎙️ Upload or record audio to transcribe Bangla (Chittagong dialect) speech "
"using fine-tuned Whisper model.
"
"Model: **Sabbir772/BNWCH**"
)
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources=["microphone", "upload"], type="numpy", label="🎧 Input Audio"),
outputs=gr.Textbox(label="📝 Transcription", placeholder="Model output will appear here..."),
title=title,
description=description,
allow_flagging="never",
)
# ==============================
# LAUNCH APP
# ==============================
if __name__ == "__main__":
demo.launch()