from transformers import pipeline import gradio as gr import pytube as pt import os pipe = pipeline(model="Redve/BengaliModel") def transcribe(audio): text = pipe(audio)["text"] print(text) return text # def video(url): # video = pt.YouTube(url) # out_path = video.streams.filter(only_audio=True).first().download() # name = os.path.splitext(out_path)[0] # mp3_path = name + '.mp3' # text = pipe(mp3_path)["text"] # return text audio= gr.Interface( fn=transcribe, inputs=gr.Audio(sources=["microphone"], type="filepath"), outputs="text", title="Whisper Small Bengali", description="Realtime demo for Bengali speech recognition using a fine-tuned Whisper small model.", ) # video = gr.Interface( # fn=video, # inputs="text", # outputs="text", # title="Whisper Small Bengali", # description="Realtime demo for Bengali speech recognition using a fine-tuned Whisper small model.", # ) audio.launch() # video.launch()