| | import gradio as gr |
| | from transformers import pipeline |
| |
|
| | |
| | |
| | |
| | MODEL_PATH = "Sabbir772/BNWCH" |
| | LANGUAGE = "bn" |
| | TASK = "transcribe" |
| |
|
| | |
| | |
| | |
| | print(f"π Loading model from {MODEL_PATH} ...") |
| | pipe = pipeline( |
| | task="automatic-speech-recognition", |
| | model=MODEL_PATH, |
| | tokenizer=MODEL_PATH, |
| | chunk_length_s=30, |
| | device=-1 |
| | ) |
| | pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids( |
| | language=LANGUAGE, task=TASK |
| | ) |
| | print("β
Model loaded successfully!\n") |
| |
|
| | |
| | |
| | |
| | def transcribe(audio): |
| | """Takes an audio file (tuple from Gradio) and returns transcription.""" |
| | if audio is None: |
| | return "No audio provided." |
| | |
| | sr, data = audio |
| | result = pipe(data)["text"] |
| | return result.strip() |
| |
|
| | |
| | |
| | |
| | title = "Bangla Whisper ASR (Chittagong Dialect)" |
| | description = ( |
| | "ποΈ Upload or record audio to transcribe Bangla (Chittagong dialect) speech " |
| | "using fine-tuned Whisper model. <br><br>" |
| | "Model: **Sabbir772/BNWCH**" |
| | ) |
| |
|
| | demo = gr.Interface( |
| | fn=transcribe, |
| | inputs=gr.Audio(sources=["microphone", "upload"], type="numpy", label="π§ Input Audio"), |
| | outputs=gr.Textbox(label="π Transcription", placeholder="Model output will appear here..."), |
| | title=title, |
| | description=description, |
| | allow_flagging="never", |
| | ) |
| |
|
| | |
| | |
| | |
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|