Spaces:
Running on Zero
Running on Zero
| """Unit tests for engine._tail_artifact_score β the best-of-N selection metric. | |
| The score is "lower is better"; a draw at or below EARLY_ACCEPT_SCORE is taken | |
| immediately, anything above forces another draw. These tests pin the four | |
| failure modes the metric must catch (loud burst, whole-tail silence, mid-tail | |
| dropout, dynamics/transient collapse) and confirm a clean, dynamic take passes. | |
| Synthetic signals only β no model, no GPU. All are peak-normalized to ~1.0, the | |
| state the score actually sees (engine peak-normalizes each draw before scoring). | |
| """ | |
| import numpy as np | |
| import types | |
| import sys | |
| sys.modules.setdefault("stable_audio_tools", types.ModuleType("stable_audio_tools")) | |
| import engine # noqa: E402 | |
| SR = engine.SR | |
| DUR = 10.0 | |
| def _norm(x): | |
| p = float(np.abs(x).max()) | |
| return x / p if p > 0 else x | |
| def _stereo(x): | |
| return np.stack([x, x]).astype(np.float32) | |
| def _t(): | |
| return np.arange(int(DUR * SR)) / SR | |
| def _clean_dynamic(): | |
| """A continuous tonal bed plus short transients: peak set by the transients, | |
| body level steady (no holes), so crest is high (~real music) and spikiness | |
| stays moderate β the take the metric should ACCEPT.""" | |
| t = _t() | |
| bed = 0.12 * (np.sin(2 * np.pi * 220 * t) | |
| + 0.6 * np.sin(2 * np.pi * 330 * t) | |
| + 0.4 * np.sin(2 * np.pi * 440 * t)) | |
| sig = bed.copy() | |
| w = int(0.005 * SR) # 5 ms transients every 0.5 s | |
| for c in range(int(0.4 * SR), len(sig), int(0.5 * SR)): | |
| env = np.hanning(2 * w)[:w] | |
| sig[c:c + w] += 0.7 * env * np.sin(2 * np.pi * 660 * t[c:c + w]) | |
| return _norm(sig) | |
| def test_clean_dynamic_take_is_accepted(): | |
| score = engine._tail_artifact_score(_stereo(_clean_dynamic()), SR) | |
| assert score <= engine.EARLY_ACCEPT_SCORE, score | |
| def test_loud_burst_is_rejected(): | |
| sig = _clean_dynamic() | |
| c = int(5 * SR) | |
| sig[c:c + int(0.05 * SR)] *= 20 # a single loud spike | |
| score = engine._tail_artifact_score(_stereo(_norm(sig)), SR) | |
| assert score > engine.EARLY_ACCEPT_SCORE, score | |
| def test_whole_tail_silence_is_rejected(): | |
| sig = _clean_dynamic() * 0.004 # below the 0.02 RMS floor | |
| score = engine._tail_artifact_score(_stereo(sig), SR) | |
| assert score > engine.EARLY_ACCEPT_SCORE, score | |
| def test_mid_tail_dropout_is_rejected(): | |
| """A clean take with a 0.5 s near-silent hole in the middle β healthy overall | |
| RMS and low spikiness, so only the dropout term can catch it.""" | |
| sig = _clean_dynamic() | |
| sig[int(5.0 * SR):int(5.5 * SR)] *= 0.01 | |
| score = engine._tail_artifact_score(_stereo(sig), SR) | |
| assert score > engine.EARLY_ACCEPT_SCORE, score | |
| def test_squashed_transientless_take_is_rejected(): | |
| """Dense, near-constant amplitude (crest collapses): tonal and steady, so | |
| every other term reads clean β only the crest term flags the mush.""" | |
| t = _t() | |
| sig = np.tanh(5 * (np.sin(2 * np.pi * 220 * t) | |
| + 0.8 * np.sin(2 * np.pi * 331 * t) | |
| + 0.7 * np.sin(2 * np.pi * 440 * t))) | |
| score = engine._tail_artifact_score(_stereo(_norm(sig)), SR) | |
| assert score > engine.EARLY_ACCEPT_SCORE, score | |
| def test_natural_ending_taper_is_not_a_dropout(): | |
| """A clean take that simply fades over its final ~0.6 s must NOT be read as a | |
| dropout (stitch fades the end anyway); the back-guard protects it.""" | |
| sig = _clean_dynamic() | |
| tail = sig[-int(0.6 * SR):] | |
| sig[-int(0.6 * SR):] = tail * np.linspace(1.0, 0.0, len(tail)) | |
| score = engine._tail_artifact_score(_stereo(sig), SR) | |
| assert score <= engine.EARLY_ACCEPT_SCORE, score | |
| def test_too_short_tail_is_avoided(): | |
| score = engine._tail_artifact_score(_stereo(np.zeros(int(0.01 * SR))), SR) | |
| assert score == float("inf") | |