Spaces:
Runtime error
Runtime error
| """Does It Sound Broken? — thin Gradio client. | |
| Limited resources: this Space does NO heavy compute. It records/uploads audio, | |
| ships the bytes to the Modal backend (modal_backend.Diagnoser), and renders the | |
| returned diagnosis. All librosa/torch/transformers work happens on Modal. | |
| Env: | |
| SOUNDBROKEN_MOCK=1 -> render canned output locally without calling Modal | |
| MODAL_APP_NAME -> override Modal app name (default "sound-broken") | |
| """ | |
| from __future__ import annotations | |
| import html | |
| import os | |
| import time | |
| import gradio as gr | |
| APP_NAME = os.environ.get("MODAL_APP_NAME", "sound-broken") | |
| MOCK = os.environ.get("SOUNDBROKEN_MOCK", "0") == "1" | |
| APPLIANCES = [ | |
| "Washing machine", "Tumble dryer", "Refrigerator/Freezer", | |
| "Electric fan", "Air conditioner", "Vacuum cleaner", | |
| "Dishwasher", "Microwave", "Electric motor (generic)", | |
| "Car engine", "Bicycle (chain/gears)", "Power drill", | |
| ] | |
| URGENCY_COLOR = { | |
| "CRITICAL": "#E53935", "HIGH": "#FB8C00", "MEDIUM": "#FDD835", | |
| "LOW": "#43A047", "UNKNOWN": "#9E9E9E", | |
| } | |
| URGENCY_ICON = { | |
| "CRITICAL": "!!", "HIGH": "!", "MEDIUM": "~", "LOW": "ok", "UNKNOWN": "?", | |
| } | |
| FEATURE_DEFAULTS = { | |
| "duration_s": 0.0, "rms_db": -120.0, "peak_db": -120.0, | |
| "spectral_centroid_hz": 0.0, "dominant_frequency_hz": 0.0, | |
| "harmonic_ratio": 0.0, "zero_crossing_rate": 0.0, "onset_rate_per_sec": 0.0, | |
| "has_regular_pattern": False, "pattern_interval_ms": 0.0, | |
| "anomaly_score": 0.0, "signal_present": False, | |
| } | |
| # --- Modal client ----------------------------------------------------------- | |
| _DIAGNOSER = None | |
| def _diagnoser(): | |
| global _DIAGNOSER | |
| if _DIAGNOSER is None: | |
| import modal | |
| _DIAGNOSER = modal.Cls.from_name(APP_NAME, "Diagnoser") | |
| return _DIAGNOSER | |
| def _mock_response(appliance: str) -> dict: | |
| """Canned, deterministic response for local UI work (no Modal, no librosa).""" | |
| return { | |
| "ok": True, "error": "", | |
| "features": { | |
| "duration_s": 8.0, "rms_db": -18.0, "peak_db": -1.2, | |
| "spectral_centroid_hz": 2450.0, "dominant_frequency_hz": 1800.0, | |
| "harmonic_ratio": 0.62, "zero_crossing_rate": 0.11, | |
| "onset_rate_per_sec": 4.0, "has_regular_pattern": True, | |
| "pattern_interval_ms": 250.0, "anomaly_score": 0.47, | |
| "signal_present": True, | |
| }, | |
| "candidates": [{ | |
| "name": "Worn drum bearing", "urgency": "HIGH", "weight": 0.9, | |
| "evidence": "Regular 250 ms clicks with a bright spectrum — " | |
| "classic bearing-race signature.", | |
| }], | |
| "result": { | |
| "fault": "Worn drum bearing", "urgency": "HIGH", | |
| "checks": ["Inspect the bearing housing for play or heat.", | |
| "Spin the drum by hand — roughness confirms wear.", | |
| "Replace the bearing if grease does not quiet it."], | |
| "safety": "Disconnect power before inspecting.", | |
| "confidence": 88, "grounded": True, | |
| }, | |
| } | |
| def _call_backend(audio_path: str, appliance: str) -> dict: | |
| """Send audio to Modal; return the response dict or an error dict.""" | |
| if MOCK: | |
| return _mock_response(appliance) | |
| try: | |
| with open(audio_path, "rb") as fh: | |
| data = fh.read() | |
| suffix = os.path.splitext(audio_path)[1] or ".wav" | |
| return _diagnoser()().run.remote(data, suffix, appliance) | |
| except Exception as exc: | |
| return {"ok": False, | |
| "error": f"Could not reach the Modal backend ({type(exc).__name__}). " | |
| f"Is it deployed (`modal deploy modal_backend.py`) and are " | |
| f"MODAL_TOKEN_ID / MODAL_TOKEN_SECRET set?", | |
| "features": {}, "candidates": [], "result": {}} | |
| # --- Rendering (all model-derived text is HTML-escaped) --------------------- | |
| def _err_card(msg: str) -> str: | |
| return (f"<div class='verdict' style='border-left:8px solid #E53935'>" | |
| f"<div class='fault'>⚠ {html.escape(str(msg))}</div></div>") | |
| def _verdict_html(result: dict, elapsed_ms: float) -> str: | |
| urgency = str(result.get("urgency", "UNKNOWN")).upper() | |
| color = URGENCY_COLOR.get(urgency, URGENCY_COLOR["UNKNOWN"]) | |
| icon = URGENCY_ICON.get(urgency, "?") | |
| fault = html.escape(str(result.get("fault", "Inconclusive"))) | |
| confidence = int(result.get("confidence", 0) or 0) | |
| checks = result.get("checks") or [] | |
| checks_html = "".join(f"<li>{html.escape(str(c))}</li>" for c in checks) | |
| safety = html.escape(str(result.get("safety", "None"))) | |
| badge = "" if result.get("grounded", True) else ( | |
| "<span style='font-size:13px;opacity:.7'> (ungrounded)</span>") | |
| return f""" | |
| <div class="verdict" style="border-left:8px solid {color}"> | |
| <div class="urgency" style="color:{color}">[{icon}] {urgency} | |
| <span class="conf">{confidence}% confidence | {elapsed_ms:.0f}ms</span></div> | |
| <div class="fault">{fault}{badge}</div> | |
| <div class="label">What to check first:</div> | |
| <ol class="checks">{checks_html}</ol> | |
| <div class="label">Safety:</div> | |
| <div class="safety">{safety}</div> | |
| </div>""" | |
| def _g(d: dict, key: str): | |
| v = d.get(key, FEATURE_DEFAULTS.get(key, 0.0)) | |
| return FEATURE_DEFAULTS.get(key, 0.0) if v is None else v | |
| def _features_md(f: dict) -> str: | |
| if not f or not f.get("signal_present", False): | |
| return ("_Recording too quiet, too short, or unreadable — no reliable " | |
| "features. Record 5–10 s closer to the appliance._") | |
| pat = (f"Yes ({round(_g(f,'pattern_interval_ms'))} ms)" | |
| if f.get("has_regular_pattern") else "No") | |
| return ( | |
| f"| Metric | Value |\n|---|---|\n" | |
| f"| Duration | {_g(f,'duration_s'):.1f} s |\n" | |
| f"| Loudness | {_g(f,'rms_db'):.1f} dB (peak {_g(f,'peak_db'):.1f}) |\n" | |
| f"| Spectral centroid | {_g(f,'spectral_centroid_hz'):.0f} Hz |\n" | |
| f"| Dominant freq | {_g(f,'dominant_frequency_hz'):.0f} Hz |\n" | |
| f"| Harmonic ratio | {_g(f,'harmonic_ratio'):.2f} |\n" | |
| f"| Harshness (ZCR) | {_g(f,'zero_crossing_rate'):.3f} |\n" | |
| f"| Clicks/sec | {_g(f,'onset_rate_per_sec'):.1f} |\n" | |
| f"| Regular pattern | {pat} |\n" | |
| f"| Anomaly score | {_g(f,'anomaly_score'):.2f} / 1.0 |\n" | |
| ) | |
| def _detector_md(detection: dict | None, model_card: dict | None) -> str: | |
| if not detection: | |
| return "" | |
| pct = float(detection.get("p_anomaly", 0.0) or 0.0) * 100 | |
| abnormal = bool(detection.get("is_anomaly")) | |
| verdict = "⚠ ABNORMAL" if abnormal else "✓ NORMAL" | |
| lines = [f"**Trained anomaly detector:** {verdict} " | |
| f"({pct:.0f}% probability abnormal)"] | |
| if model_card and model_card.get("accuracy") and model_card.get("roc_auc"): | |
| lines.append( | |
| f"_Real ML model — {model_card['accuracy']*100:.0f}% accuracy, " | |
| f"{model_card['roc_auc']:.2f} ROC-AUC on {model_card.get('n_test','?')} " | |
| f"held-out real machine recordings (DCASE 2025)._" | |
| ) | |
| return "\n\n".join(lines) | |
| def _candidates_md(candidates: list) -> str: | |
| if not candidates: | |
| return "No rules fired." | |
| lines = ["**Rules that fired:**\n"] | |
| for i, c in enumerate(candidates): | |
| weight = float(c.get("weight", 0.0) or 0.0) | |
| bar_len = max(0, min(10, int(weight * 10))) | |
| bar = "#" * bar_len + "." * (10 - bar_len) | |
| lines.append( | |
| f"{i+1}. **{html.escape(str(c.get('name','?')))}** " | |
| f"({html.escape(str(c.get('urgency','?')))}) `[{bar}]` {weight:.0%}\n" | |
| f" _{html.escape(str(c.get('evidence','')))}_\n" | |
| ) | |
| return "\n".join(lines) | |
| def _history_md(history: list) -> str: | |
| if not history: | |
| return "No diagnoses yet." | |
| rows = ["| # | Urgency | Fault | Appliance | Conf | Time |", | |
| "|---|---|---|---|---|---|"] | |
| for i, h in enumerate(reversed(history[-10:])): | |
| rows.append( | |
| f"| {len(history)-i} | {html.escape(str(h.get('urgency','')))} | " | |
| f"**{html.escape(str(h.get('fault','')))}** | " | |
| f"{html.escape(str(h.get('appliance','')))} | " | |
| f"{int(h.get('confidence',0) or 0)}% | {html.escape(str(h.get('time','')))} |" | |
| ) | |
| return "\n".join(rows) | |
| # --- Handlers (never raise) ------------------------------------------------- | |
| def diagnose(audio_path, appliance, state): | |
| state = dict(state or {}) | |
| try: | |
| if not appliance: | |
| return _err_card("Please select the appliance type."), "", "", state | |
| if not audio_path: | |
| return _err_card("Please record or upload a sound first."), "", "", state | |
| t0 = time.time() | |
| resp = _call_backend(audio_path, appliance) | |
| elapsed_ms = (time.time() - t0) * 1000 | |
| if not resp.get("ok"): | |
| return _err_card(resp.get("error", "Unknown backend error.")), "", "", state | |
| features = resp.get("features", {}) | |
| result = resp.get("result", {}) | |
| candidates = resp.get("candidates", []) | |
| state["last_features"] = features | |
| state["last_appliance"] = appliance | |
| history = list(state.get("history", [])) | |
| history.append({ | |
| "fault": result.get("fault", "Inconclusive"), | |
| "urgency": result.get("urgency", "UNKNOWN"), | |
| "confidence": result.get("confidence", 0), | |
| "appliance": appliance, "time": time.strftime("%H:%M:%S"), | |
| }) | |
| state["history"] = history[-50:] | |
| det_md = _detector_md(resp.get("detection"), resp.get("model_card")) | |
| analysis_md = (det_md + "\n\n---\n\n" + _candidates_md(candidates) | |
| if det_md else _candidates_md(candidates)) | |
| return (_verdict_html(result, elapsed_ms), _features_md(features), | |
| analysis_md, state) | |
| except Exception as exc: | |
| return _err_card(f"Unexpected error: {type(exc).__name__}"), "", "", state | |
| def compare(audio_path, appliance, state): | |
| try: | |
| state = state or {} | |
| before = state.get("last_features") | |
| if not before or not before.get("signal_present"): | |
| return "Run a diagnosis first (with a usable recording), then record again here." | |
| if not audio_path: | |
| return "Record the appliance again (after your fix) to compare." | |
| resp = _call_backend(audio_path, appliance or state.get("last_appliance", "")) | |
| if not resp.get("ok"): | |
| return f"⚠ {resp.get('error', 'Backend error.')}" | |
| after = resp.get("features", {}) | |
| if not after.get("signal_present"): | |
| return "The second recording was too quiet/short to compare. Try again." | |
| def row(label, key, unit=""): | |
| b = float(before.get(key, 0.0) or 0.0) | |
| a = float(after.get(key, 0.0) or 0.0) | |
| delta = a - b | |
| arrow = "DOWN" if delta < 0 else ("UP" if delta > 0 else "=") | |
| return f"| {label} | {b:.2f}{unit} | {a:.2f}{unit} | {arrow} {delta:+.2f} |" | |
| b_anom = float(before.get("anomaly_score", 0.0) or 0.0) | |
| a_anom = float(after.get("anomaly_score", 0.0) or 0.0) | |
| pct = ((b_anom - a_anom) / max(b_anom, 0.001)) * 100 | |
| verdict = (f"**Sound improved** — anomaly score dropped **{pct:.0f}%**." | |
| if a_anom < b_anom else | |
| "**No improvement yet** — the issue likely persists.") | |
| return ( | |
| f"### Before / After\n\n{verdict}\n\n" | |
| f"| Metric | Before | After | Change |\n|---|---|---|---|\n" | |
| f"{row('Anomaly score', 'anomaly_score')}\n" | |
| f"{row('Loudness', 'rms_db', ' dB')}\n" | |
| f"{row('Spectral centroid', 'spectral_centroid_hz', ' Hz')}\n" | |
| f"{row('Harshness', 'zero_crossing_rate')}\n" | |
| f"{row('Clicks/sec', 'onset_rate_per_sec')}\n" | |
| ) | |
| except Exception as exc: | |
| return f"⚠ Unexpected error: {type(exc).__name__}" | |
| def show_history(state): | |
| return _history_md((state or {}).get("history", [])) | |
| def clear_history(state): | |
| state = dict(state or {}) | |
| state["history"] = [] | |
| return "History cleared.", state | |
| # --- CSS / UI --------------------------------------------------------------- | |
| def _css() -> str: | |
| path = os.path.join(os.path.dirname(__file__), "assets", "custom.css") | |
| try: | |
| with open(path, "r", encoding="utf-8") as fh: | |
| return fh.read() | |
| except Exception: | |
| return "" | |
| EXAMPLE_DATA = [ | |
| ("assets/sample_washer_bearing.wav", "Washing machine"), | |
| ("assets/sample_fan_imbalanced.wav", "Electric fan"), | |
| ("assets/sample_motor_squeal.wav", "Electric motor (generic)"), | |
| ("assets/sample_washer_good.wav", "Washing machine"), | |
| ] | |
| with gr.Blocks(css=_css(), title="Does It Sound Broken?") as demo: | |
| state = gr.State({}) | |
| gr.Markdown( | |
| "# Does It Sound Broken?\n" | |
| "*Record your appliance. Get a diagnosis grounded in measured acoustics. " | |
| "All analysis runs on Modal — this page stays light.*" | |
| ) | |
| with gr.Tabs(): | |
| with gr.Tab("Diagnose"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| audio_in = gr.Audio( | |
| sources=["microphone", "upload"], type="filepath", | |
| label="Record 5-10s of the appliance sound", | |
| ) | |
| appliance = gr.Dropdown( | |
| choices=APPLIANCES, value="Washing machine", | |
| label="Appliance type (required)", | |
| ) | |
| diagnose_btn = gr.Button("Diagnose", variant="primary", size="lg") | |
| gr.Markdown("**Try these examples:**") | |
| for ex_path, ex_appliance in EXAMPLE_DATA: | |
| short = os.path.basename(ex_path).replace("sample_", "").replace(".wav", "") | |
| b = gr.Button(f" {short} ({ex_appliance})", size="sm") | |
| b.click(fn=lambda p=ex_path, a=ex_appliance: (p, a), | |
| outputs=[audio_in, appliance]) | |
| with gr.Column(scale=1): | |
| verdict_out = gr.HTML() | |
| with gr.Accordion("Evidence", open=False): | |
| features_out = gr.Markdown() | |
| candidates_out = gr.Markdown() | |
| diagnose_btn.click(diagnose, [audio_in, appliance, state], | |
| [verdict_out, features_out, candidates_out, state]) | |
| with gr.Tab("Compare"): | |
| gr.Markdown("Record again after a fix to prove it worked.") | |
| audio_after = gr.Audio(sources=["microphone", "upload"], type="filepath", | |
| label="Record again (after fix)") | |
| compare_btn = gr.Button("Compare", variant="primary") | |
| compare_out = gr.Markdown() | |
| compare_btn.click(compare, [audio_after, appliance, state], compare_out) | |
| with gr.Tab("History"): | |
| history_out = gr.Markdown() | |
| with gr.Row(): | |
| refresh_btn = gr.Button("Refresh") | |
| clear_btn = gr.Button("Clear history") | |
| refresh_btn.click(show_history, [state], history_out) | |
| clear_btn.click(clear_history, [state], [history_out, state]) | |
| with gr.Tab("How it works"): | |
| gr.Markdown(""" | |
| ## Pipeline (all on Modal) | |
| ``` | |
| Audio -> Modal GPU container: | |
| librosa features -> rule engine -> Nemotron-4B -> validated JSON | |
| -> thin Gradio Space renders the result | |
| ``` | |
| ## Key design | |
| - The model NEVER hears raw audio — only measured features + rule candidates | |
| - 12 appliance types, 30+ dedicated fault rules | |
| - Ungrounded model output is snapped back to the top deterministic candidate | |
| - Robust to silence, clipping, NaN, corrupt files, and runaway model output | |
| - Heavy deps live only in the Modal image, so the Space stays tiny | |
| """) | |
| gr.Markdown( | |
| "<div class='footer'>The model never hears raw audio. No audio is stored. " | |
| "Powered by NVIDIA Nemotron-3-Nano-4B on Modal.</div>" | |
| ) | |
| if __name__ == "__main__": | |
| port = int(os.environ.get("SOUNDBROKEN_PORT", "7882")) | |
| demo.launch(server_port=port, server_name="0.0.0.0", show_error=True) | |