Spaces:

build-small-hackathon
/

coda

Running on Zero

App Files Files Community

coda / test_engine_score.py

blackboxanalytics

Harden best-of-N selection: reject dropouts and squashed takes

1cca94c 18 days ago

Raw

History Blame Contribute Delete

3.79 kB

	"""Unit tests for engine._tail_artifact_score — the best-of-N selection metric.

	The score is "lower is better"; a draw at or below EARLY_ACCEPT_SCORE is taken
	immediately, anything above forces another draw. These tests pin the four
	failure modes the metric must catch (loud burst, whole-tail silence, mid-tail
	dropout, dynamics/transient collapse) and confirm a clean, dynamic take passes.

	Synthetic signals only — no model, no GPU. All are peak-normalized to ~1.0, the
	state the score actually sees (engine peak-normalizes each draw before scoring).
	"""
	import numpy as np
	import types
	import sys

	sys.modules.setdefault("stable_audio_tools", types.ModuleType("stable_audio_tools"))
	import engine # noqa: E402

	SR = engine.SR
	DUR = 10.0


	def _norm(x):
	p = float(np.abs(x).max())
	return x / p if p > 0 else x


	def _stereo(x):
	return np.stack([x, x]).astype(np.float32)


	def _t():
	return np.arange(int(DUR * SR)) / SR


	def _clean_dynamic():
	"""A continuous tonal bed plus short transients: peak set by the transients,
	body level steady (no holes), so crest is high (~real music) and spikiness
	stays moderate — the take the metric should ACCEPT."""
	t = _t()
	bed = 0.12 * (np.sin(2 * np.pi * 220 * t)
	+ 0.6 * np.sin(2 * np.pi * 330 * t)
	+ 0.4 * np.sin(2 * np.pi * 440 * t))
	sig = bed.copy()
	w = int(0.005 * SR) # 5 ms transients every 0.5 s
	for c in range(int(0.4 * SR), len(sig), int(0.5 * SR)):
	env = np.hanning(2 * w)[:w]
	sig[c:c + w] += 0.7 * env * np.sin(2 * np.pi * 660 * t[c:c + w])
	return _norm(sig)


	def test_clean_dynamic_take_is_accepted():
	score = engine._tail_artifact_score(_stereo(_clean_dynamic()), SR)
	assert score <= engine.EARLY_ACCEPT_SCORE, score


	def test_loud_burst_is_rejected():
	sig = _clean_dynamic()
	c = int(5 * SR)
	sig[c:c + int(0.05 * SR)] *= 20 # a single loud spike
	score = engine._tail_artifact_score(_stereo(_norm(sig)), SR)
	assert score > engine.EARLY_ACCEPT_SCORE, score


	def test_whole_tail_silence_is_rejected():
	sig = _clean_dynamic() * 0.004 # below the 0.02 RMS floor
	score = engine._tail_artifact_score(_stereo(sig), SR)
	assert score > engine.EARLY_ACCEPT_SCORE, score


	def test_mid_tail_dropout_is_rejected():
	"""A clean take with a 0.5 s near-silent hole in the middle — healthy overall
	RMS and low spikiness, so only the dropout term can catch it."""
	sig = _clean_dynamic()
	sig[int(5.0 * SR):int(5.5 * SR)] *= 0.01
	score = engine._tail_artifact_score(_stereo(sig), SR)
	assert score > engine.EARLY_ACCEPT_SCORE, score


	def test_squashed_transientless_take_is_rejected():
	"""Dense, near-constant amplitude (crest collapses): tonal and steady, so
	every other term reads clean — only the crest term flags the mush."""
	t = _t()
	sig = np.tanh(5 * (np.sin(2 * np.pi * 220 * t)
	+ 0.8 * np.sin(2 * np.pi * 331 * t)
	+ 0.7 * np.sin(2 * np.pi * 440 * t)))
	score = engine._tail_artifact_score(_stereo(_norm(sig)), SR)
	assert score > engine.EARLY_ACCEPT_SCORE, score


	def test_natural_ending_taper_is_not_a_dropout():
	"""A clean take that simply fades over its final ~0.6 s must NOT be read as a
	dropout (stitch fades the end anyway); the back-guard protects it."""
	sig = _clean_dynamic()
	tail = sig[-int(0.6 * SR):]
	sig[-int(0.6 * SR):] = tail * np.linspace(1.0, 0.0, len(tail))
	score = engine._tail_artifact_score(_stereo(sig), SR)
	assert score <= engine.EARLY_ACCEPT_SCORE, score


	def test_too_short_tail_is_avoided():
	score = engine._tail_artifact_score(_stereo(np.zeros(int(0.01 * SR))), SR)
	assert score == float("inf")