| from transformers import pipeline |
| import gradio as gr |
| from transformers import Wav2Vec2CTCTokenizer |
|
|
| preTrainedTokenizer = Wav2Vec2CTCTokenizer.from_pretrained("sukantan/wav2vec2-large-xls-r-300m-or-colab", unk_token="[UNK]", pad_token="[PAD]", word_delimiter_token="|", task="transcribe") |
| pipe = pipeline(model="sukantan/wav2vec2-large-xls-r-300m-or-colab", tokenizer=preTrainedTokenizer) |
|
|
| def transcribe(audio): |
| text = pipe(audio)["text"] |
| text = text.replace("<s>", "") |
| return text |
|
|
| iface = gr.Interface( |
| fn=transcribe, |
| inputs=gr.Audio(source="microphone", type="filepath"), |
| outputs="text", |
| title="Wav2Vec2 Odia", |
| description="Realtime demo for Odia speech recognition using a fine-tuned wav2vec2-large-xls-r-300m model.", |
| ) |
|
|
| iface.launch() |
|
|