openWakeWord / app.py
mirohristov's picture
Added [FIXED] keyword ;)
afec200 verified
import gradio as gr
import collections
import scipy.signal
import numpy as np
import matplotlib.pyplot as plt
from functools import partial
from openwakeword.utils import download_models
from openwakeword.model import Model
# Download all ONNX + TFLite models once
download_models()
# Initialize the ONNX-based wake-word model
model = Model(inference_framework="onnx")
# Factory for per-model rolling buffers
initial_state = collections.defaultdict(partial(collections.deque, maxlen=60))
def process_audio(audio, state):
sr, samples = audio
# Resample if not 16 kHz
if sr != 16000:
samples = scipy.signal.resample(samples, int(len(samples) / sr * 16000))
data = samples
detected_msg = "" # Will hold our “Detected X!” text
# Slide in 1280-sample windows
for i in range(0, len(data), 1280):
chunk = data[i : i + 1280]
# Stereo → mono
if chunk.ndim == 2 and chunk.shape[1] == 2:
chunk = chunk[:, 0]
if len(chunk) == 1280:
preds = model.predict(chunk)
for name, score in preds.items():
# Prime with zeros the first time
if len(state[name]) == 0:
state[name].extend(np.zeros(60))
state[name].append(score)
# If you want a threshold trigger:
if score > 0.8 and not detected_msg:
detected_msg = f"🗣 Detected **{name}**!"
# Build the plot
fig, ax = plt.subplots()
for name, dq in state.items():
ax.plot(np.arange(len(dq)), list(dq), label=name)
ax.set_ylim(0, 1)
ax.set_xlabel("Time (frames)")
ax.set_ylabel("Model Score")
# Only add a legend if at least one line has a label
if state:
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5), fontsize="small")
plt.tight_layout()
# Capture and close to avoid memory leak
out_fig = fig
plt.close(fig)
return out_fig, state, detected_msg
description = """
Speak one of the wake-words into your mic and watch its score spike!
| Model Name | Phrase |
|----------------|------------------------------------|
| alexa | "alexa" |
| hey_mycroft | "hey mycroft" |
| hey_jarvis | "hey jarvis" |
| hey_rhasspy | "hey rhasspy" |
| weather | "what's the weather" |
| x_minute_timer | "set a timer for 1 minute" |
"""
iface = gr.Interface(
fn=process_audio,
title="openWakeWord Live Demo [FIXED]",
description=description,
inputs=[
gr.Audio(sources=["microphone"], type="numpy", streaming=True, show_label=False),
gr.State(initial_state),
],
outputs=[
gr.Plot(label="Model Scores"),
gr.State(),
gr.Textbox(label="Detection", interactive=False),
],
live=True,
css=".flex {flex-direction: column} .gr-panel {width: 100%}"
)
if __name__ == "__main__":
iface.launch()