Spaces:

build-small-hackathon
/

sound-broken

Runtime error

App Files Files Community

sound-broken / app.py

mitvho09

Upload Space app

edb671a verified 19 days ago

Raw

History Blame Contribute Delete

16.2 kB

	"""Does It Sound Broken? — thin Gradio client.

	Limited resources: this Space does NO heavy compute. It records/uploads audio,
	ships the bytes to the Modal backend (modal_backend.Diagnoser), and renders the
	returned diagnosis. All librosa/torch/transformers work happens on Modal.

	Env:
	SOUNDBROKEN_MOCK=1 -> render canned output locally without calling Modal
	MODAL_APP_NAME -> override Modal app name (default "sound-broken")
	"""
	from __future__ import annotations

	import html
	import os
	import time

	import gradio as gr

	APP_NAME = os.environ.get("MODAL_APP_NAME", "sound-broken")
	MOCK = os.environ.get("SOUNDBROKEN_MOCK", "0") == "1"

	APPLIANCES = [
	"Washing machine", "Tumble dryer", "Refrigerator/Freezer",
	"Electric fan", "Air conditioner", "Vacuum cleaner",
	"Dishwasher", "Microwave", "Electric motor (generic)",
	"Car engine", "Bicycle (chain/gears)", "Power drill",
	]

	URGENCY_COLOR = {
	"CRITICAL": "#E53935", "HIGH": "#FB8C00", "MEDIUM": "#FDD835",
	"LOW": "#43A047", "UNKNOWN": "#9E9E9E",
	}
	URGENCY_ICON = {
	"CRITICAL": "!!", "HIGH": "!", "MEDIUM": "~", "LOW": "ok", "UNKNOWN": "?",
	}

	FEATURE_DEFAULTS = {
	"duration_s": 0.0, "rms_db": -120.0, "peak_db": -120.0,
	"spectral_centroid_hz": 0.0, "dominant_frequency_hz": 0.0,
	"harmonic_ratio": 0.0, "zero_crossing_rate": 0.0, "onset_rate_per_sec": 0.0,
	"has_regular_pattern": False, "pattern_interval_ms": 0.0,
	"anomaly_score": 0.0, "signal_present": False,
	}


	# --- Modal client -----------------------------------------------------------
	_DIAGNOSER = None


	def _diagnoser():
	global _DIAGNOSER
	if _DIAGNOSER is None:
	import modal
	_DIAGNOSER = modal.Cls.from_name(APP_NAME, "Diagnoser")
	return _DIAGNOSER


	def _mock_response(appliance: str) -> dict:
	"""Canned, deterministic response for local UI work (no Modal, no librosa)."""
	return {
	"ok": True, "error": "",
	"features": {
	"duration_s": 8.0, "rms_db": -18.0, "peak_db": -1.2,
	"spectral_centroid_hz": 2450.0, "dominant_frequency_hz": 1800.0,
	"harmonic_ratio": 0.62, "zero_crossing_rate": 0.11,
	"onset_rate_per_sec": 4.0, "has_regular_pattern": True,
	"pattern_interval_ms": 250.0, "anomaly_score": 0.47,
	"signal_present": True,
	},
	"candidates": [{
	"name": "Worn drum bearing", "urgency": "HIGH", "weight": 0.9,
	"evidence": "Regular 250 ms clicks with a bright spectrum — "
	"classic bearing-race signature.",
	}],
	"result": {
	"fault": "Worn drum bearing", "urgency": "HIGH",
	"checks": ["Inspect the bearing housing for play or heat.",
	"Spin the drum by hand — roughness confirms wear.",
	"Replace the bearing if grease does not quiet it."],
	"safety": "Disconnect power before inspecting.",
	"confidence": 88, "grounded": True,
	},
	}


	def _call_backend(audio_path: str, appliance: str) -> dict:
	"""Send audio to Modal; return the response dict or an error dict."""
	if MOCK:
	return _mock_response(appliance)
	try:
	with open(audio_path, "rb") as fh:
	data = fh.read()
	suffix = os.path.splitext(audio_path)[1] or ".wav"
	return _diagnoser()().run.remote(data, suffix, appliance)
	except Exception as exc:
	return {"ok": False,
	"error": f"Could not reach the Modal backend ({type(exc).__name__}). "
	f"Is it deployed (`modal deploy modal_backend.py`) and are "
	f"MODAL_TOKEN_ID / MODAL_TOKEN_SECRET set?",
	"features": {}, "candidates": [], "result": {}}


	# --- Rendering (all model-derived text is HTML-escaped) ---------------------
	def _err_card(msg: str) -> str:
	return (f"<div class='verdict' style='border-left:8px solid #E53935'>"
	f"<div class='fault'>⚠ {html.escape(str(msg))}</div></div>")


	def _verdict_html(result: dict, elapsed_ms: float) -> str:
	urgency = str(result.get("urgency", "UNKNOWN")).upper()
	color = URGENCY_COLOR.get(urgency, URGENCY_COLOR["UNKNOWN"])
	icon = URGENCY_ICON.get(urgency, "?")
	fault = html.escape(str(result.get("fault", "Inconclusive")))
	confidence = int(result.get("confidence", 0) or 0)
	checks = result.get("checks") or []
	checks_html = "".join(f"<li>{html.escape(str(c))}</li>" for c in checks)
	safety = html.escape(str(result.get("safety", "None")))
	badge = "" if result.get("grounded", True) else (
	"<span style='font-size:13px;opacity:.7'> (ungrounded)</span>")
	return f"""
	<div class="verdict" style="border-left:8px solid {color}">
	<div class="urgency" style="color:{color}">[{icon}] {urgency}
	<span class="conf">{confidence}% confidence \| {elapsed_ms:.0f}ms</span></div>
	<div class="fault">{fault}{badge}</div>
	<div class="label">What to check first:</div>
	<ol class="checks">{checks_html}</ol>
	<div class="label">Safety:</div>
	<div class="safety">{safety}</div>
	</div>"""


	def _g(d: dict, key: str):
	v = d.get(key, FEATURE_DEFAULTS.get(key, 0.0))
	return FEATURE_DEFAULTS.get(key, 0.0) if v is None else v


	def _features_md(f: dict) -> str:
	if not f or not f.get("signal_present", False):
	return ("_Recording too quiet, too short, or unreadable — no reliable "
	"features. Record 5–10 s closer to the appliance._")
	pat = (f"Yes ({round(_g(f,'pattern_interval_ms'))} ms)"
	if f.get("has_regular_pattern") else "No")
	return (
	f"\| Metric \| Value \|\n\|---\|---\|\n"
	f"\| Duration \| {_g(f,'duration_s'):.1f} s \|\n"
	f"\| Loudness \| {_g(f,'rms_db'):.1f} dB (peak {_g(f,'peak_db'):.1f}) \|\n"
	f"\| Spectral centroid \| {_g(f,'spectral_centroid_hz'):.0f} Hz \|\n"
	f"\| Dominant freq \| {_g(f,'dominant_frequency_hz'):.0f} Hz \|\n"
	f"\| Harmonic ratio \| {_g(f,'harmonic_ratio'):.2f} \|\n"
	f"\| Harshness (ZCR) \| {_g(f,'zero_crossing_rate'):.3f} \|\n"
	f"\| Clicks/sec \| {_g(f,'onset_rate_per_sec'):.1f} \|\n"
	f"\| Regular pattern \| {pat} \|\n"
	f"\| Anomaly score \| {_g(f,'anomaly_score'):.2f} / 1.0 \|\n"
	)


	def _detector_md(detection: dict \| None, model_card: dict \| None) -> str:
	if not detection:
	return ""
	pct = float(detection.get("p_anomaly", 0.0) or 0.0) * 100
	abnormal = bool(detection.get("is_anomaly"))
	verdict = "⚠ ABNORMAL" if abnormal else "✓ NORMAL"
	lines = [f"Trained anomaly detector: {verdict} "
	f"({pct:.0f}% probability abnormal)"]
	if model_card and model_card.get("accuracy") and model_card.get("roc_auc"):
	lines.append(
	f"_Real ML model — {model_card['accuracy']*100:.0f}% accuracy, "
	f"{model_card['roc_auc']:.2f} ROC-AUC on {model_card.get('n_test','?')} "
	f"held-out real machine recordings (DCASE 2025)._"
	)
	return "\n\n".join(lines)


	def _candidates_md(candidates: list) -> str:
	if not candidates:
	return "No rules fired."
	lines = ["Rules that fired:\n"]
	for i, c in enumerate(candidates):
	weight = float(c.get("weight", 0.0) or 0.0)
	bar_len = max(0, min(10, int(weight * 10)))
	bar = "#" * bar_len + "." * (10 - bar_len)
	lines.append(
	f"{i+1}. {html.escape(str(c.get('name','?')))} "
	f"({html.escape(str(c.get('urgency','?')))}) `[{bar}]` {weight:.0%}\n"
	f" _{html.escape(str(c.get('evidence','')))}_\n"
	)
	return "\n".join(lines)


	def _history_md(history: list) -> str:
	if not history:
	return "No diagnoses yet."
	rows = ["\| # \| Urgency \| Fault \| Appliance \| Conf \| Time \|",
	"\|---\|---\|---\|---\|---\|---\|"]
	for i, h in enumerate(reversed(history[-10:])):
	rows.append(
	f"\| {len(history)-i} \| {html.escape(str(h.get('urgency','')))} \| "
	f"{html.escape(str(h.get('fault','')))} \| "
	f"{html.escape(str(h.get('appliance','')))} \| "
	f"{int(h.get('confidence',0) or 0)}% \| {html.escape(str(h.get('time','')))} \|"
	)
	return "\n".join(rows)


	# --- Handlers (never raise) -------------------------------------------------
	def diagnose(audio_path, appliance, state):
	state = dict(state or {})
	try:
	if not appliance:
	return _err_card("Please select the appliance type."), "", "", state
	if not audio_path:
	return _err_card("Please record or upload a sound first."), "", "", state

	t0 = time.time()
	resp = _call_backend(audio_path, appliance)
	elapsed_ms = (time.time() - t0) * 1000

	if not resp.get("ok"):
	return _err_card(resp.get("error", "Unknown backend error.")), "", "", state

	features = resp.get("features", {})
	result = resp.get("result", {})
	candidates = resp.get("candidates", [])

	state["last_features"] = features
	state["last_appliance"] = appliance
	history = list(state.get("history", []))
	history.append({
	"fault": result.get("fault", "Inconclusive"),
	"urgency": result.get("urgency", "UNKNOWN"),
	"confidence": result.get("confidence", 0),
	"appliance": appliance, "time": time.strftime("%H:%M:%S"),
	})
	state["history"] = history[-50:]

	det_md = _detector_md(resp.get("detection"), resp.get("model_card"))
	analysis_md = (det_md + "\n\n---\n\n" + _candidates_md(candidates)
	if det_md else _candidates_md(candidates))
	return (_verdict_html(result, elapsed_ms), _features_md(features),
	analysis_md, state)
	except Exception as exc:
	return _err_card(f"Unexpected error: {type(exc).__name__}"), "", "", state


	def compare(audio_path, appliance, state):
	try:
	state = state or {}
	before = state.get("last_features")
	if not before or not before.get("signal_present"):
	return "Run a diagnosis first (with a usable recording), then record again here."
	if not audio_path:
	return "Record the appliance again (after your fix) to compare."

	resp = _call_backend(audio_path, appliance or state.get("last_appliance", ""))
	if not resp.get("ok"):
	return f"⚠ {resp.get('error', 'Backend error.')}"
	after = resp.get("features", {})
	if not after.get("signal_present"):
	return "The second recording was too quiet/short to compare. Try again."

	def row(label, key, unit=""):
	b = float(before.get(key, 0.0) or 0.0)
	a = float(after.get(key, 0.0) or 0.0)
	delta = a - b
	arrow = "DOWN" if delta < 0 else ("UP" if delta > 0 else "=")
	return f"\| {label} \| {b:.2f}{unit} \| {a:.2f}{unit} \| {arrow} {delta:+.2f} \|"

	b_anom = float(before.get("anomaly_score", 0.0) or 0.0)
	a_anom = float(after.get("anomaly_score", 0.0) or 0.0)
	pct = ((b_anom - a_anom) / max(b_anom, 0.001)) * 100
	verdict = (f"Sound improved — anomaly score dropped {pct:.0f}%."
	if a_anom < b_anom else
	"No improvement yet — the issue likely persists.")
	return (
	f"### Before / After\n\n{verdict}\n\n"
	f"\| Metric \| Before \| After \| Change \|\n\|---\|---\|---\|---\|\n"
	f"{row('Anomaly score', 'anomaly_score')}\n"
	f"{row('Loudness', 'rms_db', ' dB')}\n"
	f"{row('Spectral centroid', 'spectral_centroid_hz', ' Hz')}\n"
	f"{row('Harshness', 'zero_crossing_rate')}\n"
	f"{row('Clicks/sec', 'onset_rate_per_sec')}\n"
	)
	except Exception as exc:
	return f"⚠ Unexpected error: {type(exc).__name__}"


	def show_history(state):
	return _history_md((state or {}).get("history", []))


	def clear_history(state):
	state = dict(state or {})
	state["history"] = []
	return "History cleared.", state


	# --- CSS / UI ---------------------------------------------------------------
	def _css() -> str:
	path = os.path.join(os.path.dirname(__file__), "assets", "custom.css")
	try:
	with open(path, "r", encoding="utf-8") as fh:
	return fh.read()
	except Exception:
	return ""


	EXAMPLE_DATA = [
	("assets/sample_washer_bearing.wav", "Washing machine"),
	("assets/sample_fan_imbalanced.wav", "Electric fan"),
	("assets/sample_motor_squeal.wav", "Electric motor (generic)"),
	("assets/sample_washer_good.wav", "Washing machine"),
	]

	with gr.Blocks(css=_css(), title="Does It Sound Broken?") as demo:
	state = gr.State({})
	gr.Markdown(
	"# Does It Sound Broken?\n"
	"*Record your appliance. Get a diagnosis grounded in measured acoustics. "
	"All analysis runs on Modal — this page stays light.*"
	)

	with gr.Tabs():
	with gr.Tab("Diagnose"):
	with gr.Row():
	with gr.Column(scale=1):
	audio_in = gr.Audio(
	sources=["microphone", "upload"], type="filepath",
	label="Record 5-10s of the appliance sound",
	)
	appliance = gr.Dropdown(
	choices=APPLIANCES, value="Washing machine",
	label="Appliance type (required)",
	)
	diagnose_btn = gr.Button("Diagnose", variant="primary", size="lg")
	gr.Markdown("Try these examples:")
	for ex_path, ex_appliance in EXAMPLE_DATA:
	short = os.path.basename(ex_path).replace("sample_", "").replace(".wav", "")
	b = gr.Button(f" {short} ({ex_appliance})", size="sm")
	b.click(fn=lambda p=ex_path, a=ex_appliance: (p, a),
	outputs=[audio_in, appliance])
	with gr.Column(scale=1):
	verdict_out = gr.HTML()
	with gr.Accordion("Evidence", open=False):
	features_out = gr.Markdown()
	candidates_out = gr.Markdown()
	diagnose_btn.click(diagnose, [audio_in, appliance, state],
	[verdict_out, features_out, candidates_out, state])

	with gr.Tab("Compare"):
	gr.Markdown("Record again after a fix to prove it worked.")
	audio_after = gr.Audio(sources=["microphone", "upload"], type="filepath",
	label="Record again (after fix)")
	compare_btn = gr.Button("Compare", variant="primary")
	compare_out = gr.Markdown()
	compare_btn.click(compare, [audio_after, appliance, state], compare_out)

	with gr.Tab("History"):
	history_out = gr.Markdown()
	with gr.Row():
	refresh_btn = gr.Button("Refresh")
	clear_btn = gr.Button("Clear history")
	refresh_btn.click(show_history, [state], history_out)
	clear_btn.click(clear_history, [state], [history_out, state])

	with gr.Tab("How it works"):
	gr.Markdown("""
	## Pipeline (all on Modal)
	```
	Audio -> Modal GPU container:
	librosa features -> rule engine -> Nemotron-4B -> validated JSON
	-> thin Gradio Space renders the result
	```

	## Key design
	- The model NEVER hears raw audio — only measured features + rule candidates
	- 12 appliance types, 30+ dedicated fault rules
	- Ungrounded model output is snapped back to the top deterministic candidate
	- Robust to silence, clipping, NaN, corrupt files, and runaway model output
	- Heavy deps live only in the Modal image, so the Space stays tiny
	""")

	gr.Markdown(
	"<div class='footer'>The model never hears raw audio. No audio is stored. "
	"Powered by NVIDIA Nemotron-3-Nano-4B on Modal.</div>"
	)


	if __name__ == "__main__":
	port = int(os.environ.get("SOUNDBROKEN_PORT", "7882"))
	demo.launch(server_port=port, server_name="0.0.0.0", show_error=True)