import gradio as gr from transformers import pipeline # ============================== # CONFIG # ============================== MODEL_PATH = "Sabbir772/BNWCH" LANGUAGE = "bn" # Bengali TASK = "transcribe" # ============================== # LOAD MODEL # ============================== print(f"🔍 Loading model from {MODEL_PATH} ...") pipe = pipeline( task="automatic-speech-recognition", model=MODEL_PATH, tokenizer=MODEL_PATH, chunk_length_s=30, # You can tune this for performance device=-1 # Use CPU on Spaces unless GPU is enabled ) pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids( language=LANGUAGE, task=TASK ) print("✅ Model loaded successfully!\n") # ============================== # DEFINE INFERENCE FUNCTION # ============================== def transcribe(audio): """Takes an audio file (tuple from Gradio) and returns transcription.""" if audio is None: return "No audio provided." # Gradio passes (sample_rate, data) sr, data = audio result = pipe(data)["text"] return result.strip() # ============================== # DEFINE GRADIO INTERFACE # ============================== title = "Bangla Whisper ASR (Chittagong Dialect)" description = ( "🎙️ Upload or record audio to transcribe Bangla (Chittagong dialect) speech " "using fine-tuned Whisper model.

" "Model: **Sabbir772/BNWCH**" ) demo = gr.Interface( fn=transcribe, inputs=gr.Audio(sources=["microphone", "upload"], type="numpy", label="🎧 Input Audio"), outputs=gr.Textbox(label="📝 Transcription", placeholder="Model output will appear here..."), title=title, description=description, allow_flagging="never", ) # ============================== # LAUNCH APP # ============================== if __name__ == "__main__": demo.launch()