Spaces:
Sleeping
Sleeping
File size: 4,191 Bytes
3d4323f de66b6c 24ad263 3d4323f 929234a 3d4323f 8e966ac 3d4323f 37fb9d6 3d4323f 37fb9d6 3d4323f 987011f 37fb9d6 987011f 3d4323f 987011f 3d4323f 37fb9d6 3d4323f 37fb9d6 3d4323f 37fb9d6 24ad263 e224dd8 24ad263 e224dd8 24ad263 e224dd8 24ad263 3d4323f e224dd8 3d4323f 37fb9d6 42fc253 24ad263 42fc253 37fb9d6 42fc253 3d4323f 37fb9d6 3d4323f 37fb9d6 3d4323f e224dd8 3d4323f 37fb9d6 3d4323f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import gradio as gr
import pandas as pd
import collections
import scipy.signal
import numpy as np
import plotly.graph_objects as go
from functools import partial
from openwakeword.model import Model
# Download models first
import openwakeword
openwakeword.utils.download_models()
# Load openWakeWord models
model = Model(inference_framework="onnx")
# Define function to process audio
def process_audio(audio, state=collections.defaultdict(partial(collections.deque, maxlen=10))):
# Resample audio to 16khz if needed
if audio[0] != 16000:
data = scipy.signal.resample(audio[1], int(float(audio[1].shape[0])/audio[0]*16000))
else:
data = audio[1]
# Get predictions
for i in range(0, data.shape[0], 1280):
if len(data.shape) == 2 or data.shape[-1] == 2:
chunk = data[i:i+1280][:, 0] # just get one channel of audio
else:
chunk = data[i:i+1280]
if chunk.shape[0] == 1280:
prediction = model.predict(chunk)
for key in prediction:
state[key].append(prediction[key])
# Create bar chart with average of recent predictions
model_names = []
scores = []
for key in state.keys():
if len(state[key]) > 0:
model_names.append(key.replace('_', ' ').title())
# Average last few frames for smoother display
scores.append(np.mean(list(state[key])))
# Sort by score for better visibility
if len(scores) > 0:
sorted_indices = np.argsort(scores)[::-1]
model_names = [model_names[i] for i in sorted_indices]
scores = [scores[i] for i in sorted_indices]
# Create Plotly figure with horizontal bar chart
fig = go.Figure()
# Add horizontal bar trace
fig.add_trace(go.Bar(
y=model_names,
x=scores,
orientation='h',
marker=dict(
color=scores,
colorscale='Blues',
cmin=0,
cmax=1,
line=dict(color='rgba(58, 71, 80, 0.6)', width=1)
),
text=[f'{score:.3f}' for score in scores],
textposition='outside',
hovertemplate='<b>%{y}</b><br>Score: %{x:.3f}<extra></extra>'
))
# Update layout
fig.update_layout(
title={
'text': 'Real-time Wake Word Detection',
'x': 0.5,
'xanchor': 'center',
'font': {'size': 18, 'color': '#2c3e50'}
},
xaxis=dict(
title='Detection Score',
range=[0, 1.1],
gridcolor='rgba(200, 200, 200, 0.3)',
showgrid=True
),
yaxis=dict(
title='',
autorange='reversed' # Keep highest scores at top
),
height=500,
margin=dict(l=150, r=50, t=80, b=50),
plot_bgcolor='rgba(240, 242, 245, 0.5)',
paper_bgcolor='white',
showlegend=False
)
return fig, state
# Create Gradio interface and launch
desc = """This is a demo of the pre-trained models included in the latest release
of the [openWakeWord](https://github.com/dscripka/openWakeWord) library.
Click on the "record from microphone" button below to start capturing.
The real-time scores from each model will be shown in the interactive bar chart (higher bars = stronger detection).
Different models will respond to different wake words/phrases (see [the model docs](https://github.com/dscripka/openWakeWord/tree/main/docs/models) for more details).
**Try these phrases:**
- **alexa** - "alexa"
- **hey mycroft** - "hey mycroft"
- **hey jarvis** - "hey jarvis"
- **hey rhasspy** - "hey rhasspy"
- **weather** - "what's the weather", "tell me today's weather"
- **timer** - "set a timer for 1 minute", "create 1 hour alarm"
"""
gr_int = gr.Interface(
title="openWakeWord Live Demo",
description=desc,
css=".flex {flex-direction: column} .gr-panel {width: 100%}",
fn=process_audio,
inputs=[
gr.Audio(source="microphone", type="numpy", streaming=True, show_label=False),
"state"
],
outputs=[
gr.Plot(show_label=False),
"state"
],
live=True
)
gr_int.launch() |