openWakeWord

Running

App Files Files Community

mirohristov commited on Jul 7, 2025

Commit

fb4f4cb

verified ·

1 Parent(s): 92ae278

Fixed gr.Plot and sources to work with gradio>=3.16.0

Browse files

Files changed (1) hide show

app.py +78 -89

app.py CHANGED Viewed

@@ -1,110 +1,99 @@
 import gradio as gr
-import json
-import pandas as pd
 import collections
 import scipy.signal
 import numpy as np
 from functools import partial
-from openwakeword.model import Model
-#################################################
 from openwakeword.utils import download_models
-# this will pull down all of the ONNX + TFLite wake-word models into
-# openwakeword/resources/models/
-download_models()
 from openwakeword.model import Model
-model = Model(inference_framework="onnx")
-###############################################
-# Load openWakeWord models
 model = Model(inference_framework="onnx")
-# Define function to process audio
-def process_audio(audio, state=collections.defaultdict(partial(collections.deque, maxlen=60))):
-    # Resample audio to 16khz if needed
-    if audio[0] != 16000:
-        data = scipy.signal.resample(audio[1], int(float(audio[1].shape[0])/audio[0]*16000))
-    # Get predictions
-    for i in range(0, data.shape[0], 1280):
-        if len(data.shape) == 2 or data.shape[-1] == 2:
-            chunk = data[i:i+1280][:, 0]  # just get one channel of audio
-        else:
-            chunk = data[i:i+1280]
-        if chunk.shape[0] == 1280:
-            prediction = model.predict(chunk)
-            for key in prediction:
-                #Fill deque with zeros if it's empty
-                if len(state[key]) == 0:
-                    state[key].extend(np.zeros(60))
-                # Add prediction
-                state[key].append(prediction[key])
-    # Make line plot
-    dfs = []
-    for key in state.keys():
-        df = pd.DataFrame({"x": np.arange(len(state[key])), "y": state[key], "Model": key})
-        dfs.append(df)
-    df = pd.concat(dfs)
-    plot = gr.LinePlot().update(value = df, x='x', y='y', color="Model", y_lim = (0,1), tooltip="Model",
-                                width=600, height=300, x_title="Time (frames)", y_title="Model Score", color_legend_position="bottom")
-    # Manually adjust how the legend is displayed
-    tmp = json.loads(plot["value"]["plot"])
-    tmp["layer"][0]['encoding']['color']['legend']["direction"] = "vertical"
-    tmp["layer"][0]['encoding']['color']['legend']["columns"] = 4
-    tmp["layer"][0]['encoding']['color']['legend']["labelFontSize"] = 12
-    tmp["layer"][0]['encoding']['color']['legend']["titleFontSize"] = 14
-    plot["value"]['plot'] = json.dumps(tmp)
-    return plot, state
-# Create Gradio interface and launch
-desc = """
-This is a demo of the pre-trained models included in the latest release
-of the [openWakeWord](https://github.com/dscripka/openWakeWord) library.
-Click on the "record from microphone" button below to start capturing.
-The real-time scores from each model will be shown in the line plot. Hover over
-each line to see the name of the corresponding model.
-Different models will respond to different wake words/phrases (see [the model docs](https://github.com/dscripka/openWakeWord/tree/main/docs/models) for more details).
-If everything is working properly,
-you should see a spike in the score for a given model after speaking a related word/phrase. Below are some suggested phrases to try!
-| Model Name | Word/Phrase |
-| --- | --- |
-| alexa | "alexa" |
-| hey_mycroft | "hey mycroft"|
-| hey_jarvis | "hey jarvis"|
-| hey_rhasspy | "hey rhasspy"|
-| weather | "what's the weather", "tell me today's weather" |
-| x_minute_timer | "set a timer for 1 minute", "create 1 hour alarm" |
 """
-gr_int = gr.Interface(
-    title = "openWakeWord Live Demo",
-    description = desc,
-    css = ".flex {flex-direction: column} .gr-panel {width: 100%}",
     fn=process_audio,
     inputs=[
         gr.Audio(sources=["microphone"], type="numpy", streaming=True, show_label=False),
-        "state"
     ],
     outputs=[
-        gr.LinePlot(show_label=False),
-        "state"
     ],
-    live=True)
-gr_int.launch()

 import gradio as gr
 import collections
 import scipy.signal
 import numpy as np
+import matplotlib.pyplot as plt
 from functools import partial
 from openwakeword.utils import download_models
 from openwakeword.model import Model
+# Download all ONNX + TFLite models once
+download_models()
+# Initialize the ONNX-based wake-word model
 model = Model(inference_framework="onnx")
+# Factory for per-model rolling buffers
+initial_state = collections.defaultdict(partial(collections.deque, maxlen=60))
+def process_audio(audio, state):
+    sr, samples = audio
+    # Resample if not 16 kHz
+    if sr != 16000:
+        samples = scipy.signal.resample(samples, int(len(samples) / sr * 16000))
+    data = samples
+    detected_msg = ""  # Will hold our “Detected X!” text
+    # Slide in 1280-sample windows
+    for i in range(0, len(data), 1280):
+        chunk = data[i : i + 1280]
+        # Stereo → mono
+        if chunk.ndim == 2 and chunk.shape[1] == 2:
+            chunk = chunk[:, 0]
+        if len(chunk) == 1280:
+            preds = model.predict(chunk)
+            for name, score in preds.items():
+                # Prime with zeros the first time
+                if len(state[name]) == 0:
+                    state[name].extend(np.zeros(60))
+                state[name].append(score)
+                # If you want a threshold trigger:
+                if score > 0.8 and not detected_msg:
+                    detected_msg = f"🗣 Detected **{name}**!"
+    # Build the plot
+    fig, ax = plt.subplots()
+    for name, dq in state.items():
+        ax.plot(np.arange(len(dq)), list(dq), label=name)
+    ax.set_ylim(0, 1)
+    ax.set_xlabel("Time (frames)")
+    ax.set_ylabel("Model Score")
+    # Only add a legend if at least one line has a label
+    if state:
+        ax.legend(loc="center left", bbox_to_anchor=(1, 0.5), fontsize="small")
+    plt.tight_layout()
+    # Capture and close to avoid memory leak
+    out_fig = fig
+    plt.close(fig)
+    return out_fig, state, detected_msg
+description = """
+Speak one of the wake-words into your mic and watch its score spike!
+| Model Name     | Phrase                             |
+|----------------|------------------------------------|
+| alexa          | "alexa"                            |
+| hey_mycroft    | "hey mycroft"                      |
+| hey_jarvis     | "hey jarvis"                       |
+| hey_rhasspy    | "hey rhasspy"                      |
+| weather        | "what's the weather"               |
+| x_minute_timer | "set a timer for 1 minute"         |
 """
+iface = gr.Interface(
     fn=process_audio,
+    title="openWakeWord Live Demo",
+    description=description,
     inputs=[
         gr.Audio(sources=["microphone"], type="numpy", streaming=True, show_label=False),
+        gr.State(initial_state),
     ],
     outputs=[
+        gr.Plot(label="Model Scores"),
+        gr.State(),
+        gr.Textbox(label="Detection", interactive=False),
     ],
+    live=True,
+    css=".flex {flex-direction: column} .gr-panel {width: 100%}"
+)
+if __name__ == "__main__":
+    iface.launch()