"""Does It Sound Broken? — thin Gradio client. Limited resources: this Space does NO heavy compute. It records/uploads audio, ships the bytes to the Modal backend (modal_backend.Diagnoser), and renders the returned diagnosis. All librosa/torch/transformers work happens on Modal. Env: SOUNDBROKEN_MOCK=1 -> render canned output locally without calling Modal MODAL_APP_NAME -> override Modal app name (default "sound-broken") """ from __future__ import annotations import html import os import time import gradio as gr APP_NAME = os.environ.get("MODAL_APP_NAME", "sound-broken") MOCK = os.environ.get("SOUNDBROKEN_MOCK", "0") == "1" APPLIANCES = [ "Washing machine", "Tumble dryer", "Refrigerator/Freezer", "Electric fan", "Air conditioner", "Vacuum cleaner", "Dishwasher", "Microwave", "Electric motor (generic)", "Car engine", "Bicycle (chain/gears)", "Power drill", ] URGENCY_COLOR = { "CRITICAL": "#E53935", "HIGH": "#FB8C00", "MEDIUM": "#FDD835", "LOW": "#43A047", "UNKNOWN": "#9E9E9E", } URGENCY_ICON = { "CRITICAL": "!!", "HIGH": "!", "MEDIUM": "~", "LOW": "ok", "UNKNOWN": "?", } FEATURE_DEFAULTS = { "duration_s": 0.0, "rms_db": -120.0, "peak_db": -120.0, "spectral_centroid_hz": 0.0, "dominant_frequency_hz": 0.0, "harmonic_ratio": 0.0, "zero_crossing_rate": 0.0, "onset_rate_per_sec": 0.0, "has_regular_pattern": False, "pattern_interval_ms": 0.0, "anomaly_score": 0.0, "signal_present": False, } # --- Modal client ----------------------------------------------------------- _DIAGNOSER = None def _diagnoser(): global _DIAGNOSER if _DIAGNOSER is None: import modal _DIAGNOSER = modal.Cls.from_name(APP_NAME, "Diagnoser") return _DIAGNOSER def _mock_response(appliance: str) -> dict: """Canned, deterministic response for local UI work (no Modal, no librosa).""" return { "ok": True, "error": "", "features": { "duration_s": 8.0, "rms_db": -18.0, "peak_db": -1.2, "spectral_centroid_hz": 2450.0, "dominant_frequency_hz": 1800.0, "harmonic_ratio": 0.62, "zero_crossing_rate": 0.11, "onset_rate_per_sec": 4.0, "has_regular_pattern": True, "pattern_interval_ms": 250.0, "anomaly_score": 0.47, "signal_present": True, }, "candidates": [{ "name": "Worn drum bearing", "urgency": "HIGH", "weight": 0.9, "evidence": "Regular 250 ms clicks with a bright spectrum — " "classic bearing-race signature.", }], "result": { "fault": "Worn drum bearing", "urgency": "HIGH", "checks": ["Inspect the bearing housing for play or heat.", "Spin the drum by hand — roughness confirms wear.", "Replace the bearing if grease does not quiet it."], "safety": "Disconnect power before inspecting.", "confidence": 88, "grounded": True, }, } def _call_backend(audio_path: str, appliance: str) -> dict: """Send audio to Modal; return the response dict or an error dict.""" if MOCK: return _mock_response(appliance) try: with open(audio_path, "rb") as fh: data = fh.read() suffix = os.path.splitext(audio_path)[1] or ".wav" return _diagnoser()().run.remote(data, suffix, appliance) except Exception as exc: return {"ok": False, "error": f"Could not reach the Modal backend ({type(exc).__name__}). " f"Is it deployed (`modal deploy modal_backend.py`) and are " f"MODAL_TOKEN_ID / MODAL_TOKEN_SECRET set?", "features": {}, "candidates": [], "result": {}} # --- Rendering (all model-derived text is HTML-escaped) --------------------- def _err_card(msg: str) -> str: return (f"
" f"
⚠ {html.escape(str(msg))}
") def _verdict_html(result: dict, elapsed_ms: float) -> str: urgency = str(result.get("urgency", "UNKNOWN")).upper() color = URGENCY_COLOR.get(urgency, URGENCY_COLOR["UNKNOWN"]) icon = URGENCY_ICON.get(urgency, "?") fault = html.escape(str(result.get("fault", "Inconclusive"))) confidence = int(result.get("confidence", 0) or 0) checks = result.get("checks") or [] checks_html = "".join(f"
  • {html.escape(str(c))}
  • " for c in checks) safety = html.escape(str(result.get("safety", "None"))) badge = "" if result.get("grounded", True) else ( " (ungrounded)") return f"""
    [{icon}] {urgency} {confidence}% confidence | {elapsed_ms:.0f}ms
    {fault}{badge}
    What to check first:
      {checks_html}
    Safety:
    {safety}
    """ def _g(d: dict, key: str): v = d.get(key, FEATURE_DEFAULTS.get(key, 0.0)) return FEATURE_DEFAULTS.get(key, 0.0) if v is None else v def _features_md(f: dict) -> str: if not f or not f.get("signal_present", False): return ("_Recording too quiet, too short, or unreadable — no reliable " "features. Record 5–10 s closer to the appliance._") pat = (f"Yes ({round(_g(f,'pattern_interval_ms'))} ms)" if f.get("has_regular_pattern") else "No") return ( f"| Metric | Value |\n|---|---|\n" f"| Duration | {_g(f,'duration_s'):.1f} s |\n" f"| Loudness | {_g(f,'rms_db'):.1f} dB (peak {_g(f,'peak_db'):.1f}) |\n" f"| Spectral centroid | {_g(f,'spectral_centroid_hz'):.0f} Hz |\n" f"| Dominant freq | {_g(f,'dominant_frequency_hz'):.0f} Hz |\n" f"| Harmonic ratio | {_g(f,'harmonic_ratio'):.2f} |\n" f"| Harshness (ZCR) | {_g(f,'zero_crossing_rate'):.3f} |\n" f"| Clicks/sec | {_g(f,'onset_rate_per_sec'):.1f} |\n" f"| Regular pattern | {pat} |\n" f"| Anomaly score | {_g(f,'anomaly_score'):.2f} / 1.0 |\n" ) def _detector_md(detection: dict | None, model_card: dict | None) -> str: if not detection: return "" pct = float(detection.get("p_anomaly", 0.0) or 0.0) * 100 abnormal = bool(detection.get("is_anomaly")) verdict = "⚠ ABNORMAL" if abnormal else "✓ NORMAL" lines = [f"**Trained anomaly detector:** {verdict} " f"({pct:.0f}% probability abnormal)"] if model_card and model_card.get("accuracy") and model_card.get("roc_auc"): lines.append( f"_Real ML model — {model_card['accuracy']*100:.0f}% accuracy, " f"{model_card['roc_auc']:.2f} ROC-AUC on {model_card.get('n_test','?')} " f"held-out real machine recordings (DCASE 2025)._" ) return "\n\n".join(lines) def _candidates_md(candidates: list) -> str: if not candidates: return "No rules fired." lines = ["**Rules that fired:**\n"] for i, c in enumerate(candidates): weight = float(c.get("weight", 0.0) or 0.0) bar_len = max(0, min(10, int(weight * 10))) bar = "#" * bar_len + "." * (10 - bar_len) lines.append( f"{i+1}. **{html.escape(str(c.get('name','?')))}** " f"({html.escape(str(c.get('urgency','?')))}) `[{bar}]` {weight:.0%}\n" f" _{html.escape(str(c.get('evidence','')))}_\n" ) return "\n".join(lines) def _history_md(history: list) -> str: if not history: return "No diagnoses yet." rows = ["| # | Urgency | Fault | Appliance | Conf | Time |", "|---|---|---|---|---|---|"] for i, h in enumerate(reversed(history[-10:])): rows.append( f"| {len(history)-i} | {html.escape(str(h.get('urgency','')))} | " f"**{html.escape(str(h.get('fault','')))}** | " f"{html.escape(str(h.get('appliance','')))} | " f"{int(h.get('confidence',0) or 0)}% | {html.escape(str(h.get('time','')))} |" ) return "\n".join(rows) # --- Handlers (never raise) ------------------------------------------------- def diagnose(audio_path, appliance, state): state = dict(state or {}) try: if not appliance: return _err_card("Please select the appliance type."), "", "", state if not audio_path: return _err_card("Please record or upload a sound first."), "", "", state t0 = time.time() resp = _call_backend(audio_path, appliance) elapsed_ms = (time.time() - t0) * 1000 if not resp.get("ok"): return _err_card(resp.get("error", "Unknown backend error.")), "", "", state features = resp.get("features", {}) result = resp.get("result", {}) candidates = resp.get("candidates", []) state["last_features"] = features state["last_appliance"] = appliance history = list(state.get("history", [])) history.append({ "fault": result.get("fault", "Inconclusive"), "urgency": result.get("urgency", "UNKNOWN"), "confidence": result.get("confidence", 0), "appliance": appliance, "time": time.strftime("%H:%M:%S"), }) state["history"] = history[-50:] det_md = _detector_md(resp.get("detection"), resp.get("model_card")) analysis_md = (det_md + "\n\n---\n\n" + _candidates_md(candidates) if det_md else _candidates_md(candidates)) return (_verdict_html(result, elapsed_ms), _features_md(features), analysis_md, state) except Exception as exc: return _err_card(f"Unexpected error: {type(exc).__name__}"), "", "", state def compare(audio_path, appliance, state): try: state = state or {} before = state.get("last_features") if not before or not before.get("signal_present"): return "Run a diagnosis first (with a usable recording), then record again here." if not audio_path: return "Record the appliance again (after your fix) to compare." resp = _call_backend(audio_path, appliance or state.get("last_appliance", "")) if not resp.get("ok"): return f"⚠ {resp.get('error', 'Backend error.')}" after = resp.get("features", {}) if not after.get("signal_present"): return "The second recording was too quiet/short to compare. Try again." def row(label, key, unit=""): b = float(before.get(key, 0.0) or 0.0) a = float(after.get(key, 0.0) or 0.0) delta = a - b arrow = "DOWN" if delta < 0 else ("UP" if delta > 0 else "=") return f"| {label} | {b:.2f}{unit} | {a:.2f}{unit} | {arrow} {delta:+.2f} |" b_anom = float(before.get("anomaly_score", 0.0) or 0.0) a_anom = float(after.get("anomaly_score", 0.0) or 0.0) pct = ((b_anom - a_anom) / max(b_anom, 0.001)) * 100 verdict = (f"**Sound improved** — anomaly score dropped **{pct:.0f}%**." if a_anom < b_anom else "**No improvement yet** — the issue likely persists.") return ( f"### Before / After\n\n{verdict}\n\n" f"| Metric | Before | After | Change |\n|---|---|---|---|\n" f"{row('Anomaly score', 'anomaly_score')}\n" f"{row('Loudness', 'rms_db', ' dB')}\n" f"{row('Spectral centroid', 'spectral_centroid_hz', ' Hz')}\n" f"{row('Harshness', 'zero_crossing_rate')}\n" f"{row('Clicks/sec', 'onset_rate_per_sec')}\n" ) except Exception as exc: return f"⚠ Unexpected error: {type(exc).__name__}" def show_history(state): return _history_md((state or {}).get("history", [])) def clear_history(state): state = dict(state or {}) state["history"] = [] return "History cleared.", state # --- CSS / UI --------------------------------------------------------------- def _css() -> str: path = os.path.join(os.path.dirname(__file__), "assets", "custom.css") try: with open(path, "r", encoding="utf-8") as fh: return fh.read() except Exception: return "" EXAMPLE_DATA = [ ("assets/sample_washer_bearing.wav", "Washing machine"), ("assets/sample_fan_imbalanced.wav", "Electric fan"), ("assets/sample_motor_squeal.wav", "Electric motor (generic)"), ("assets/sample_washer_good.wav", "Washing machine"), ] with gr.Blocks(css=_css(), title="Does It Sound Broken?") as demo: state = gr.State({}) gr.Markdown( "# Does It Sound Broken?\n" "*Record your appliance. Get a diagnosis grounded in measured acoustics. " "All analysis runs on Modal — this page stays light.*" ) with gr.Tabs(): with gr.Tab("Diagnose"): with gr.Row(): with gr.Column(scale=1): audio_in = gr.Audio( sources=["microphone", "upload"], type="filepath", label="Record 5-10s of the appliance sound", ) appliance = gr.Dropdown( choices=APPLIANCES, value="Washing machine", label="Appliance type (required)", ) diagnose_btn = gr.Button("Diagnose", variant="primary", size="lg") gr.Markdown("**Try these examples:**") for ex_path, ex_appliance in EXAMPLE_DATA: short = os.path.basename(ex_path).replace("sample_", "").replace(".wav", "") b = gr.Button(f" {short} ({ex_appliance})", size="sm") b.click(fn=lambda p=ex_path, a=ex_appliance: (p, a), outputs=[audio_in, appliance]) with gr.Column(scale=1): verdict_out = gr.HTML() with gr.Accordion("Evidence", open=False): features_out = gr.Markdown() candidates_out = gr.Markdown() diagnose_btn.click(diagnose, [audio_in, appliance, state], [verdict_out, features_out, candidates_out, state]) with gr.Tab("Compare"): gr.Markdown("Record again after a fix to prove it worked.") audio_after = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record again (after fix)") compare_btn = gr.Button("Compare", variant="primary") compare_out = gr.Markdown() compare_btn.click(compare, [audio_after, appliance, state], compare_out) with gr.Tab("History"): history_out = gr.Markdown() with gr.Row(): refresh_btn = gr.Button("Refresh") clear_btn = gr.Button("Clear history") refresh_btn.click(show_history, [state], history_out) clear_btn.click(clear_history, [state], [history_out, state]) with gr.Tab("How it works"): gr.Markdown(""" ## Pipeline (all on Modal) ``` Audio -> Modal GPU container: librosa features -> rule engine -> Nemotron-4B -> validated JSON -> thin Gradio Space renders the result ``` ## Key design - The model NEVER hears raw audio — only measured features + rule candidates - 12 appliance types, 30+ dedicated fault rules - Ungrounded model output is snapped back to the top deterministic candidate - Robust to silence, clipping, NaN, corrupt files, and runaway model output - Heavy deps live only in the Modal image, so the Space stays tiny """) gr.Markdown( "" ) if __name__ == "__main__": port = int(os.environ.get("SOUNDBROKEN_PORT", "7882")) demo.launch(server_port=port, server_name="0.0.0.0", show_error=True)