coda / test_engine_score.py
blackboxanalytics's picture
Harden best-of-N selection: reject dropouts and squashed takes
1cca94c
Raw
History Blame Contribute Delete
3.79 kB
"""Unit tests for engine._tail_artifact_score β€” the best-of-N selection metric.
The score is "lower is better"; a draw at or below EARLY_ACCEPT_SCORE is taken
immediately, anything above forces another draw. These tests pin the four
failure modes the metric must catch (loud burst, whole-tail silence, mid-tail
dropout, dynamics/transient collapse) and confirm a clean, dynamic take passes.
Synthetic signals only β€” no model, no GPU. All are peak-normalized to ~1.0, the
state the score actually sees (engine peak-normalizes each draw before scoring).
"""
import numpy as np
import types
import sys
sys.modules.setdefault("stable_audio_tools", types.ModuleType("stable_audio_tools"))
import engine # noqa: E402
SR = engine.SR
DUR = 10.0
def _norm(x):
p = float(np.abs(x).max())
return x / p if p > 0 else x
def _stereo(x):
return np.stack([x, x]).astype(np.float32)
def _t():
return np.arange(int(DUR * SR)) / SR
def _clean_dynamic():
"""A continuous tonal bed plus short transients: peak set by the transients,
body level steady (no holes), so crest is high (~real music) and spikiness
stays moderate β€” the take the metric should ACCEPT."""
t = _t()
bed = 0.12 * (np.sin(2 * np.pi * 220 * t)
+ 0.6 * np.sin(2 * np.pi * 330 * t)
+ 0.4 * np.sin(2 * np.pi * 440 * t))
sig = bed.copy()
w = int(0.005 * SR) # 5 ms transients every 0.5 s
for c in range(int(0.4 * SR), len(sig), int(0.5 * SR)):
env = np.hanning(2 * w)[:w]
sig[c:c + w] += 0.7 * env * np.sin(2 * np.pi * 660 * t[c:c + w])
return _norm(sig)
def test_clean_dynamic_take_is_accepted():
score = engine._tail_artifact_score(_stereo(_clean_dynamic()), SR)
assert score <= engine.EARLY_ACCEPT_SCORE, score
def test_loud_burst_is_rejected():
sig = _clean_dynamic()
c = int(5 * SR)
sig[c:c + int(0.05 * SR)] *= 20 # a single loud spike
score = engine._tail_artifact_score(_stereo(_norm(sig)), SR)
assert score > engine.EARLY_ACCEPT_SCORE, score
def test_whole_tail_silence_is_rejected():
sig = _clean_dynamic() * 0.004 # below the 0.02 RMS floor
score = engine._tail_artifact_score(_stereo(sig), SR)
assert score > engine.EARLY_ACCEPT_SCORE, score
def test_mid_tail_dropout_is_rejected():
"""A clean take with a 0.5 s near-silent hole in the middle β€” healthy overall
RMS and low spikiness, so only the dropout term can catch it."""
sig = _clean_dynamic()
sig[int(5.0 * SR):int(5.5 * SR)] *= 0.01
score = engine._tail_artifact_score(_stereo(sig), SR)
assert score > engine.EARLY_ACCEPT_SCORE, score
def test_squashed_transientless_take_is_rejected():
"""Dense, near-constant amplitude (crest collapses): tonal and steady, so
every other term reads clean β€” only the crest term flags the mush."""
t = _t()
sig = np.tanh(5 * (np.sin(2 * np.pi * 220 * t)
+ 0.8 * np.sin(2 * np.pi * 331 * t)
+ 0.7 * np.sin(2 * np.pi * 440 * t)))
score = engine._tail_artifact_score(_stereo(_norm(sig)), SR)
assert score > engine.EARLY_ACCEPT_SCORE, score
def test_natural_ending_taper_is_not_a_dropout():
"""A clean take that simply fades over its final ~0.6 s must NOT be read as a
dropout (stitch fades the end anyway); the back-guard protects it."""
sig = _clean_dynamic()
tail = sig[-int(0.6 * SR):]
sig[-int(0.6 * SR):] = tail * np.linspace(1.0, 0.0, len(tail))
score = engine._tail_artifact_score(_stereo(sig), SR)
assert score <= engine.EARLY_ACCEPT_SCORE, score
def test_too_short_tail_is_avoided():
score = engine._tail_artifact_score(_stereo(np.zeros(int(0.01 * SR))), SR)
assert score == float("inf")