File size: 4,191 Bytes
3d4323f
 
 
 
de66b6c
24ad263
3d4323f
 
 
929234a
 
 
 
3d4323f
8e966ac
3d4323f
 
37fb9d6
3d4323f
 
37fb9d6
 
 
3d4323f
 
987011f
 
 
 
 
37fb9d6
987011f
3d4323f
987011f
 
3d4323f
37fb9d6
 
 
 
3d4323f
37fb9d6
 
 
 
3d4323f
37fb9d6
24ad263
 
 
 
e224dd8
24ad263
 
e224dd8
24ad263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e224dd8
24ad263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d4323f
e224dd8
3d4323f
 
37fb9d6
42fc253
 
 
24ad263
42fc253
 
 
37fb9d6
 
 
 
 
 
 
42fc253
 
3d4323f
37fb9d6
 
 
3d4323f
 
37fb9d6
3d4323f
 
 
e224dd8
3d4323f
 
37fb9d6
 
3d4323f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import gradio as gr
import pandas as pd
import collections
import scipy.signal
import numpy as np
import plotly.graph_objects as go
from functools import partial
from openwakeword.model import Model

# Download models first
import openwakeword
openwakeword.utils.download_models()

# Load openWakeWord models
model = Model(inference_framework="onnx")

# Define function to process audio
def process_audio(audio, state=collections.defaultdict(partial(collections.deque, maxlen=10))):
    # Resample audio to 16khz if needed
    if audio[0] != 16000:
        data = scipy.signal.resample(audio[1], int(float(audio[1].shape[0])/audio[0]*16000))
    else:
        data = audio[1]
    
    # Get predictions
    for i in range(0, data.shape[0], 1280):
        if len(data.shape) == 2 or data.shape[-1] == 2:
            chunk = data[i:i+1280][:, 0]  # just get one channel of audio
        else:
            chunk = data[i:i+1280]
        
        if chunk.shape[0] == 1280:
            prediction = model.predict(chunk)
            for key in prediction:
                state[key].append(prediction[key])
    
    # Create bar chart with average of recent predictions
    model_names = []
    scores = []
    
    for key in state.keys():
        if len(state[key]) > 0:
            model_names.append(key.replace('_', ' ').title())
            # Average last few frames for smoother display
            scores.append(np.mean(list(state[key])))
    
    # Sort by score for better visibility
    if len(scores) > 0:
        sorted_indices = np.argsort(scores)[::-1]
        model_names = [model_names[i] for i in sorted_indices]
        scores = [scores[i] for i in sorted_indices]
    
    # Create Plotly figure with horizontal bar chart
    fig = go.Figure()
    
    # Add horizontal bar trace
    fig.add_trace(go.Bar(
        y=model_names,
        x=scores,
        orientation='h',
        marker=dict(
            color=scores,
            colorscale='Blues',
            cmin=0,
            cmax=1,
            line=dict(color='rgba(58, 71, 80, 0.6)', width=1)
        ),
        text=[f'{score:.3f}' for score in scores],
        textposition='outside',
        hovertemplate='<b>%{y}</b><br>Score: %{x:.3f}<extra></extra>'
    ))
    
    # Update layout
    fig.update_layout(
        title={
            'text': 'Real-time Wake Word Detection',
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 18, 'color': '#2c3e50'}
        },
        xaxis=dict(
            title='Detection Score',
            range=[0, 1.1],
            gridcolor='rgba(200, 200, 200, 0.3)',
            showgrid=True
        ),
        yaxis=dict(
            title='',
            autorange='reversed'  # Keep highest scores at top
        ),
        height=500,
        margin=dict(l=150, r=50, t=80, b=50),
        plot_bgcolor='rgba(240, 242, 245, 0.5)',
        paper_bgcolor='white',
        showlegend=False
    )
    
    return fig, state

# Create Gradio interface and launch
desc = """This is a demo of the pre-trained models included in the latest release
of the [openWakeWord](https://github.com/dscripka/openWakeWord) library.

Click on the "record from microphone" button below to start capturing.
The real-time scores from each model will be shown in the interactive bar chart (higher bars = stronger detection).

Different models will respond to different wake words/phrases (see [the model docs](https://github.com/dscripka/openWakeWord/tree/main/docs/models) for more details).

**Try these phrases:**
- **alexa** - "alexa"
- **hey mycroft** - "hey mycroft"
- **hey jarvis** - "hey jarvis"
- **hey rhasspy** - "hey rhasspy"
- **weather** - "what's the weather", "tell me today's weather"
- **timer** - "set a timer for 1 minute", "create 1 hour alarm"
"""

gr_int = gr.Interface(
    title="openWakeWord Live Demo",
    description=desc,
    css=".flex {flex-direction: column} .gr-panel {width: 100%}",
    fn=process_audio,
    inputs=[
        gr.Audio(source="microphone", type="numpy", streaming=True, show_label=False),
        "state"
    ],
    outputs=[
        gr.Plot(show_label=False),
        "state"
    ],
    live=True
)

gr_int.launch()