sound-broken / app.py
mitvho09's picture
Upload Space app
edb671a verified
Raw
History Blame Contribute Delete
16.2 kB
"""Does It Sound Broken? — thin Gradio client.
Limited resources: this Space does NO heavy compute. It records/uploads audio,
ships the bytes to the Modal backend (modal_backend.Diagnoser), and renders the
returned diagnosis. All librosa/torch/transformers work happens on Modal.
Env:
SOUNDBROKEN_MOCK=1 -> render canned output locally without calling Modal
MODAL_APP_NAME -> override Modal app name (default "sound-broken")
"""
from __future__ import annotations
import html
import os
import time
import gradio as gr
APP_NAME = os.environ.get("MODAL_APP_NAME", "sound-broken")
MOCK = os.environ.get("SOUNDBROKEN_MOCK", "0") == "1"
APPLIANCES = [
"Washing machine", "Tumble dryer", "Refrigerator/Freezer",
"Electric fan", "Air conditioner", "Vacuum cleaner",
"Dishwasher", "Microwave", "Electric motor (generic)",
"Car engine", "Bicycle (chain/gears)", "Power drill",
]
URGENCY_COLOR = {
"CRITICAL": "#E53935", "HIGH": "#FB8C00", "MEDIUM": "#FDD835",
"LOW": "#43A047", "UNKNOWN": "#9E9E9E",
}
URGENCY_ICON = {
"CRITICAL": "!!", "HIGH": "!", "MEDIUM": "~", "LOW": "ok", "UNKNOWN": "?",
}
FEATURE_DEFAULTS = {
"duration_s": 0.0, "rms_db": -120.0, "peak_db": -120.0,
"spectral_centroid_hz": 0.0, "dominant_frequency_hz": 0.0,
"harmonic_ratio": 0.0, "zero_crossing_rate": 0.0, "onset_rate_per_sec": 0.0,
"has_regular_pattern": False, "pattern_interval_ms": 0.0,
"anomaly_score": 0.0, "signal_present": False,
}
# --- Modal client -----------------------------------------------------------
_DIAGNOSER = None
def _diagnoser():
global _DIAGNOSER
if _DIAGNOSER is None:
import modal
_DIAGNOSER = modal.Cls.from_name(APP_NAME, "Diagnoser")
return _DIAGNOSER
def _mock_response(appliance: str) -> dict:
"""Canned, deterministic response for local UI work (no Modal, no librosa)."""
return {
"ok": True, "error": "",
"features": {
"duration_s": 8.0, "rms_db": -18.0, "peak_db": -1.2,
"spectral_centroid_hz": 2450.0, "dominant_frequency_hz": 1800.0,
"harmonic_ratio": 0.62, "zero_crossing_rate": 0.11,
"onset_rate_per_sec": 4.0, "has_regular_pattern": True,
"pattern_interval_ms": 250.0, "anomaly_score": 0.47,
"signal_present": True,
},
"candidates": [{
"name": "Worn drum bearing", "urgency": "HIGH", "weight": 0.9,
"evidence": "Regular 250 ms clicks with a bright spectrum — "
"classic bearing-race signature.",
}],
"result": {
"fault": "Worn drum bearing", "urgency": "HIGH",
"checks": ["Inspect the bearing housing for play or heat.",
"Spin the drum by hand — roughness confirms wear.",
"Replace the bearing if grease does not quiet it."],
"safety": "Disconnect power before inspecting.",
"confidence": 88, "grounded": True,
},
}
def _call_backend(audio_path: str, appliance: str) -> dict:
"""Send audio to Modal; return the response dict or an error dict."""
if MOCK:
return _mock_response(appliance)
try:
with open(audio_path, "rb") as fh:
data = fh.read()
suffix = os.path.splitext(audio_path)[1] or ".wav"
return _diagnoser()().run.remote(data, suffix, appliance)
except Exception as exc:
return {"ok": False,
"error": f"Could not reach the Modal backend ({type(exc).__name__}). "
f"Is it deployed (`modal deploy modal_backend.py`) and are "
f"MODAL_TOKEN_ID / MODAL_TOKEN_SECRET set?",
"features": {}, "candidates": [], "result": {}}
# --- Rendering (all model-derived text is HTML-escaped) ---------------------
def _err_card(msg: str) -> str:
return (f"<div class='verdict' style='border-left:8px solid #E53935'>"
f"<div class='fault'>⚠ {html.escape(str(msg))}</div></div>")
def _verdict_html(result: dict, elapsed_ms: float) -> str:
urgency = str(result.get("urgency", "UNKNOWN")).upper()
color = URGENCY_COLOR.get(urgency, URGENCY_COLOR["UNKNOWN"])
icon = URGENCY_ICON.get(urgency, "?")
fault = html.escape(str(result.get("fault", "Inconclusive")))
confidence = int(result.get("confidence", 0) or 0)
checks = result.get("checks") or []
checks_html = "".join(f"<li>{html.escape(str(c))}</li>" for c in checks)
safety = html.escape(str(result.get("safety", "None")))
badge = "" if result.get("grounded", True) else (
"<span style='font-size:13px;opacity:.7'> (ungrounded)</span>")
return f"""
<div class="verdict" style="border-left:8px solid {color}">
<div class="urgency" style="color:{color}">[{icon}] {urgency}
<span class="conf">{confidence}% confidence | {elapsed_ms:.0f}ms</span></div>
<div class="fault">{fault}{badge}</div>
<div class="label">What to check first:</div>
<ol class="checks">{checks_html}</ol>
<div class="label">Safety:</div>
<div class="safety">{safety}</div>
</div>"""
def _g(d: dict, key: str):
v = d.get(key, FEATURE_DEFAULTS.get(key, 0.0))
return FEATURE_DEFAULTS.get(key, 0.0) if v is None else v
def _features_md(f: dict) -> str:
if not f or not f.get("signal_present", False):
return ("_Recording too quiet, too short, or unreadable — no reliable "
"features. Record 5–10 s closer to the appliance._")
pat = (f"Yes ({round(_g(f,'pattern_interval_ms'))} ms)"
if f.get("has_regular_pattern") else "No")
return (
f"| Metric | Value |\n|---|---|\n"
f"| Duration | {_g(f,'duration_s'):.1f} s |\n"
f"| Loudness | {_g(f,'rms_db'):.1f} dB (peak {_g(f,'peak_db'):.1f}) |\n"
f"| Spectral centroid | {_g(f,'spectral_centroid_hz'):.0f} Hz |\n"
f"| Dominant freq | {_g(f,'dominant_frequency_hz'):.0f} Hz |\n"
f"| Harmonic ratio | {_g(f,'harmonic_ratio'):.2f} |\n"
f"| Harshness (ZCR) | {_g(f,'zero_crossing_rate'):.3f} |\n"
f"| Clicks/sec | {_g(f,'onset_rate_per_sec'):.1f} |\n"
f"| Regular pattern | {pat} |\n"
f"| Anomaly score | {_g(f,'anomaly_score'):.2f} / 1.0 |\n"
)
def _detector_md(detection: dict | None, model_card: dict | None) -> str:
if not detection:
return ""
pct = float(detection.get("p_anomaly", 0.0) or 0.0) * 100
abnormal = bool(detection.get("is_anomaly"))
verdict = "⚠ ABNORMAL" if abnormal else "✓ NORMAL"
lines = [f"**Trained anomaly detector:** {verdict} "
f"({pct:.0f}% probability abnormal)"]
if model_card and model_card.get("accuracy") and model_card.get("roc_auc"):
lines.append(
f"_Real ML model — {model_card['accuracy']*100:.0f}% accuracy, "
f"{model_card['roc_auc']:.2f} ROC-AUC on {model_card.get('n_test','?')} "
f"held-out real machine recordings (DCASE 2025)._"
)
return "\n\n".join(lines)
def _candidates_md(candidates: list) -> str:
if not candidates:
return "No rules fired."
lines = ["**Rules that fired:**\n"]
for i, c in enumerate(candidates):
weight = float(c.get("weight", 0.0) or 0.0)
bar_len = max(0, min(10, int(weight * 10)))
bar = "#" * bar_len + "." * (10 - bar_len)
lines.append(
f"{i+1}. **{html.escape(str(c.get('name','?')))}** "
f"({html.escape(str(c.get('urgency','?')))}) `[{bar}]` {weight:.0%}\n"
f" _{html.escape(str(c.get('evidence','')))}_\n"
)
return "\n".join(lines)
def _history_md(history: list) -> str:
if not history:
return "No diagnoses yet."
rows = ["| # | Urgency | Fault | Appliance | Conf | Time |",
"|---|---|---|---|---|---|"]
for i, h in enumerate(reversed(history[-10:])):
rows.append(
f"| {len(history)-i} | {html.escape(str(h.get('urgency','')))} | "
f"**{html.escape(str(h.get('fault','')))}** | "
f"{html.escape(str(h.get('appliance','')))} | "
f"{int(h.get('confidence',0) or 0)}% | {html.escape(str(h.get('time','')))} |"
)
return "\n".join(rows)
# --- Handlers (never raise) -------------------------------------------------
def diagnose(audio_path, appliance, state):
state = dict(state or {})
try:
if not appliance:
return _err_card("Please select the appliance type."), "", "", state
if not audio_path:
return _err_card("Please record or upload a sound first."), "", "", state
t0 = time.time()
resp = _call_backend(audio_path, appliance)
elapsed_ms = (time.time() - t0) * 1000
if not resp.get("ok"):
return _err_card(resp.get("error", "Unknown backend error.")), "", "", state
features = resp.get("features", {})
result = resp.get("result", {})
candidates = resp.get("candidates", [])
state["last_features"] = features
state["last_appliance"] = appliance
history = list(state.get("history", []))
history.append({
"fault": result.get("fault", "Inconclusive"),
"urgency": result.get("urgency", "UNKNOWN"),
"confidence": result.get("confidence", 0),
"appliance": appliance, "time": time.strftime("%H:%M:%S"),
})
state["history"] = history[-50:]
det_md = _detector_md(resp.get("detection"), resp.get("model_card"))
analysis_md = (det_md + "\n\n---\n\n" + _candidates_md(candidates)
if det_md else _candidates_md(candidates))
return (_verdict_html(result, elapsed_ms), _features_md(features),
analysis_md, state)
except Exception as exc:
return _err_card(f"Unexpected error: {type(exc).__name__}"), "", "", state
def compare(audio_path, appliance, state):
try:
state = state or {}
before = state.get("last_features")
if not before or not before.get("signal_present"):
return "Run a diagnosis first (with a usable recording), then record again here."
if not audio_path:
return "Record the appliance again (after your fix) to compare."
resp = _call_backend(audio_path, appliance or state.get("last_appliance", ""))
if not resp.get("ok"):
return f"⚠ {resp.get('error', 'Backend error.')}"
after = resp.get("features", {})
if not after.get("signal_present"):
return "The second recording was too quiet/short to compare. Try again."
def row(label, key, unit=""):
b = float(before.get(key, 0.0) or 0.0)
a = float(after.get(key, 0.0) or 0.0)
delta = a - b
arrow = "DOWN" if delta < 0 else ("UP" if delta > 0 else "=")
return f"| {label} | {b:.2f}{unit} | {a:.2f}{unit} | {arrow} {delta:+.2f} |"
b_anom = float(before.get("anomaly_score", 0.0) or 0.0)
a_anom = float(after.get("anomaly_score", 0.0) or 0.0)
pct = ((b_anom - a_anom) / max(b_anom, 0.001)) * 100
verdict = (f"**Sound improved** — anomaly score dropped **{pct:.0f}%**."
if a_anom < b_anom else
"**No improvement yet** — the issue likely persists.")
return (
f"### Before / After\n\n{verdict}\n\n"
f"| Metric | Before | After | Change |\n|---|---|---|---|\n"
f"{row('Anomaly score', 'anomaly_score')}\n"
f"{row('Loudness', 'rms_db', ' dB')}\n"
f"{row('Spectral centroid', 'spectral_centroid_hz', ' Hz')}\n"
f"{row('Harshness', 'zero_crossing_rate')}\n"
f"{row('Clicks/sec', 'onset_rate_per_sec')}\n"
)
except Exception as exc:
return f"⚠ Unexpected error: {type(exc).__name__}"
def show_history(state):
return _history_md((state or {}).get("history", []))
def clear_history(state):
state = dict(state or {})
state["history"] = []
return "History cleared.", state
# --- CSS / UI ---------------------------------------------------------------
def _css() -> str:
path = os.path.join(os.path.dirname(__file__), "assets", "custom.css")
try:
with open(path, "r", encoding="utf-8") as fh:
return fh.read()
except Exception:
return ""
EXAMPLE_DATA = [
("assets/sample_washer_bearing.wav", "Washing machine"),
("assets/sample_fan_imbalanced.wav", "Electric fan"),
("assets/sample_motor_squeal.wav", "Electric motor (generic)"),
("assets/sample_washer_good.wav", "Washing machine"),
]
with gr.Blocks(css=_css(), title="Does It Sound Broken?") as demo:
state = gr.State({})
gr.Markdown(
"# Does It Sound Broken?\n"
"*Record your appliance. Get a diagnosis grounded in measured acoustics. "
"All analysis runs on Modal — this page stays light.*"
)
with gr.Tabs():
with gr.Tab("Diagnose"):
with gr.Row():
with gr.Column(scale=1):
audio_in = gr.Audio(
sources=["microphone", "upload"], type="filepath",
label="Record 5-10s of the appliance sound",
)
appliance = gr.Dropdown(
choices=APPLIANCES, value="Washing machine",
label="Appliance type (required)",
)
diagnose_btn = gr.Button("Diagnose", variant="primary", size="lg")
gr.Markdown("**Try these examples:**")
for ex_path, ex_appliance in EXAMPLE_DATA:
short = os.path.basename(ex_path).replace("sample_", "").replace(".wav", "")
b = gr.Button(f" {short} ({ex_appliance})", size="sm")
b.click(fn=lambda p=ex_path, a=ex_appliance: (p, a),
outputs=[audio_in, appliance])
with gr.Column(scale=1):
verdict_out = gr.HTML()
with gr.Accordion("Evidence", open=False):
features_out = gr.Markdown()
candidates_out = gr.Markdown()
diagnose_btn.click(diagnose, [audio_in, appliance, state],
[verdict_out, features_out, candidates_out, state])
with gr.Tab("Compare"):
gr.Markdown("Record again after a fix to prove it worked.")
audio_after = gr.Audio(sources=["microphone", "upload"], type="filepath",
label="Record again (after fix)")
compare_btn = gr.Button("Compare", variant="primary")
compare_out = gr.Markdown()
compare_btn.click(compare, [audio_after, appliance, state], compare_out)
with gr.Tab("History"):
history_out = gr.Markdown()
with gr.Row():
refresh_btn = gr.Button("Refresh")
clear_btn = gr.Button("Clear history")
refresh_btn.click(show_history, [state], history_out)
clear_btn.click(clear_history, [state], [history_out, state])
with gr.Tab("How it works"):
gr.Markdown("""
## Pipeline (all on Modal)
```
Audio -> Modal GPU container:
librosa features -> rule engine -> Nemotron-4B -> validated JSON
-> thin Gradio Space renders the result
```
## Key design
- The model NEVER hears raw audio — only measured features + rule candidates
- 12 appliance types, 30+ dedicated fault rules
- Ungrounded model output is snapped back to the top deterministic candidate
- Robust to silence, clipping, NaN, corrupt files, and runaway model output
- Heavy deps live only in the Modal image, so the Space stays tiny
""")
gr.Markdown(
"<div class='footer'>The model never hears raw audio. No audio is stored. "
"Powered by NVIDIA Nemotron-3-Nano-4B on Modal.</div>"
)
if __name__ == "__main__":
port = int(os.environ.get("SOUNDBROKEN_PORT", "7882"))
demo.launch(server_port=port, server_name="0.0.0.0", show_error=True)