# Import necessary libraries import gradio as gr import numpy as np # Define the function to transcribe audio def transcribe(audio): sr, y = audio # Convert to mono if stereo if y.ndim > 1: y = y.mean(axis=1) y = y.astype(np.float32) y /= np.max(np.abs(y)) # Placeholder for the actual transcription logic return "Transcribed text: " + " ".join([str(i) for i in y[:10]]) # Create a Gradio interface for full-context ASR demo_full_context = gr.Interface( transcribe, gr.Audio(sources="microphone"), "text", ) # Define the function to transcribe streaming audio def transcribe_stream(stream, new_chunk): sr, y = new_chunk # Convert to mono if stereo if y.ndim > 1: y = y.mean(axis=1) y = y.astype(np.float32) y /= np.max(np.abs(y)) if stream is not None: stream = np.concatenate([stream, y]) else: stream = y # Placeholder for the actual transcription logic return stream, "Transcribed text: " + " ".join([str(i) for i in stream[:10]]) # Create a Gradio interface for streaming ASR demo_streaming = gr.Interface( transcribe_stream, ["state", gr.Audio(sources=["microphone"], streaming=True)], ["state", "text"], live=True, ) # Launch the interfaces if __name__ == "__main__": demo_full_context.launch(show_error=True) demo_streaming.launch()