import gradio as gr import pandas as pd import collections import scipy.signal import numpy as np import plotly.graph_objects as go from functools import partial from openwakeword.model import Model # Download models first import openwakeword openwakeword.utils.download_models() # Load openWakeWord models model = Model(inference_framework="onnx") # Define function to process audio def process_audio(audio, state=collections.defaultdict(partial(collections.deque, maxlen=10))): # Resample audio to 16khz if needed if audio[0] != 16000: data = scipy.signal.resample(audio[1], int(float(audio[1].shape[0])/audio[0]*16000)) else: data = audio[1] # Get predictions for i in range(0, data.shape[0], 1280): if len(data.shape) == 2 or data.shape[-1] == 2: chunk = data[i:i+1280][:, 0] # just get one channel of audio else: chunk = data[i:i+1280] if chunk.shape[0] == 1280: prediction = model.predict(chunk) for key in prediction: state[key].append(prediction[key]) # Create bar chart with average of recent predictions model_names = [] scores = [] for key in state.keys(): if len(state[key]) > 0: model_names.append(key.replace('_', ' ').title()) # Average last few frames for smoother display scores.append(np.mean(list(state[key]))) # Sort by score for better visibility if len(scores) > 0: sorted_indices = np.argsort(scores)[::-1] model_names = [model_names[i] for i in sorted_indices] scores = [scores[i] for i in sorted_indices] # Create Plotly figure with horizontal bar chart fig = go.Figure() # Add horizontal bar trace fig.add_trace(go.Bar( y=model_names, x=scores, orientation='h', marker=dict( color=scores, colorscale='Blues', cmin=0, cmax=1, line=dict(color='rgba(58, 71, 80, 0.6)', width=1) ), text=[f'{score:.3f}' for score in scores], textposition='outside', hovertemplate='%{y}
Score: %{x:.3f}' )) # Update layout fig.update_layout( title={ 'text': 'Real-time Wake Word Detection', 'x': 0.5, 'xanchor': 'center', 'font': {'size': 18, 'color': '#2c3e50'} }, xaxis=dict( title='Detection Score', range=[0, 1.1], gridcolor='rgba(200, 200, 200, 0.3)', showgrid=True ), yaxis=dict( title='', autorange='reversed' # Keep highest scores at top ), height=500, margin=dict(l=150, r=50, t=80, b=50), plot_bgcolor='rgba(240, 242, 245, 0.5)', paper_bgcolor='white', showlegend=False ) return fig, state # Create Gradio interface and launch desc = """This is a demo of the pre-trained models included in the latest release of the [openWakeWord](https://github.com/dscripka/openWakeWord) library. Click on the "record from microphone" button below to start capturing. The real-time scores from each model will be shown in the interactive bar chart (higher bars = stronger detection). Different models will respond to different wake words/phrases (see [the model docs](https://github.com/dscripka/openWakeWord/tree/main/docs/models) for more details). **Try these phrases:** - **alexa** - "alexa" - **hey mycroft** - "hey mycroft" - **hey jarvis** - "hey jarvis" - **hey rhasspy** - "hey rhasspy" - **weather** - "what's the weather", "tell me today's weather" - **timer** - "set a timer for 1 minute", "create 1 hour alarm" """ gr_int = gr.Interface( title="openWakeWord Live Demo", description=desc, css=".flex {flex-direction: column} .gr-panel {width: 100%}", fn=process_audio, inputs=[ gr.Audio(source="microphone", type="numpy", streaming=True, show_label=False), "state" ], outputs=[ gr.Plot(show_label=False), "state" ], live=True ) gr_int.launch()