import gradio as gr from transformers import pipeline import numpy as np transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en", return_timestamps=True) def transcribe(stream, new_chunk): if stream is None: return "" sr, y = stream # Convert to mono if stereo if y.ndim > 1: y = y.mean(axis=1) y = y.astype(np.float32) y /= np.max(np.abs(y)) text = transcriber({"sampling_rate": sr, "raw": y})["text"] return text def clear(audio, transcribed): audio = None transcribed = None return audio, transcribed with gr.Blocks() as demo: gr.HTML(value="