"""Unit tests for engine._tail_artifact_score — the best-of-N selection metric. The score is "lower is better"; a draw at or below EARLY_ACCEPT_SCORE is taken immediately, anything above forces another draw. These tests pin the four failure modes the metric must catch (loud burst, whole-tail silence, mid-tail dropout, dynamics/transient collapse) and confirm a clean, dynamic take passes. Synthetic signals only — no model, no GPU. All are peak-normalized to ~1.0, the state the score actually sees (engine peak-normalizes each draw before scoring). """ import numpy as np import types import sys sys.modules.setdefault("stable_audio_tools", types.ModuleType("stable_audio_tools")) import engine # noqa: E402 SR = engine.SR DUR = 10.0 def _norm(x): p = float(np.abs(x).max()) return x / p if p > 0 else x def _stereo(x): return np.stack([x, x]).astype(np.float32) def _t(): return np.arange(int(DUR * SR)) / SR def _clean_dynamic(): """A continuous tonal bed plus short transients: peak set by the transients, body level steady (no holes), so crest is high (~real music) and spikiness stays moderate — the take the metric should ACCEPT.""" t = _t() bed = 0.12 * (np.sin(2 * np.pi * 220 * t) + 0.6 * np.sin(2 * np.pi * 330 * t) + 0.4 * np.sin(2 * np.pi * 440 * t)) sig = bed.copy() w = int(0.005 * SR) # 5 ms transients every 0.5 s for c in range(int(0.4 * SR), len(sig), int(0.5 * SR)): env = np.hanning(2 * w)[:w] sig[c:c + w] += 0.7 * env * np.sin(2 * np.pi * 660 * t[c:c + w]) return _norm(sig) def test_clean_dynamic_take_is_accepted(): score = engine._tail_artifact_score(_stereo(_clean_dynamic()), SR) assert score <= engine.EARLY_ACCEPT_SCORE, score def test_loud_burst_is_rejected(): sig = _clean_dynamic() c = int(5 * SR) sig[c:c + int(0.05 * SR)] *= 20 # a single loud spike score = engine._tail_artifact_score(_stereo(_norm(sig)), SR) assert score > engine.EARLY_ACCEPT_SCORE, score def test_whole_tail_silence_is_rejected(): sig = _clean_dynamic() * 0.004 # below the 0.02 RMS floor score = engine._tail_artifact_score(_stereo(sig), SR) assert score > engine.EARLY_ACCEPT_SCORE, score def test_mid_tail_dropout_is_rejected(): """A clean take with a 0.5 s near-silent hole in the middle — healthy overall RMS and low spikiness, so only the dropout term can catch it.""" sig = _clean_dynamic() sig[int(5.0 * SR):int(5.5 * SR)] *= 0.01 score = engine._tail_artifact_score(_stereo(sig), SR) assert score > engine.EARLY_ACCEPT_SCORE, score def test_squashed_transientless_take_is_rejected(): """Dense, near-constant amplitude (crest collapses): tonal and steady, so every other term reads clean — only the crest term flags the mush.""" t = _t() sig = np.tanh(5 * (np.sin(2 * np.pi * 220 * t) + 0.8 * np.sin(2 * np.pi * 331 * t) + 0.7 * np.sin(2 * np.pi * 440 * t))) score = engine._tail_artifact_score(_stereo(_norm(sig)), SR) assert score > engine.EARLY_ACCEPT_SCORE, score def test_natural_ending_taper_is_not_a_dropout(): """A clean take that simply fades over its final ~0.6 s must NOT be read as a dropout (stitch fades the end anyway); the back-guard protects it.""" sig = _clean_dynamic() tail = sig[-int(0.6 * SR):] sig[-int(0.6 * SR):] = tail * np.linspace(1.0, 0.0, len(tail)) score = engine._tail_artifact_score(_stereo(sig), SR) assert score <= engine.EARLY_ACCEPT_SCORE, score def test_too_short_tail_is_avoided(): score = engine._tail_artifact_score(_stereo(np.zeros(int(0.01 * SR))), SR) assert score == float("inf")