"""verify.py — mathematical audio QA for CODA continuations (dev/test tool). Not imported by the app at runtime. Given the user's original clip, the finished track, and the splice boundary, it measures whether the generated region is real, continuous music — the things an ear-check can miss between sessions: duration · silence-collapse · clipping · loudness continuity · seam click · tempo continuity · key continuity · spectral rolloff · stereo width Each metric returns a value, a pass/fail against a threshold, and a short reason. `verify(...)` returns a structured report; `plot_report(...)` writes a diagnostic PNG (waveform + mel-spectrogram + seam-RMS overlay + a pass/fail table). CLI: python verify.py [out.png] """ import sys import librosa import numpy as np from analyze import _key_from_audio, _scalar_tempo SR = 44100 # thresholds (plan §4) SILENCE_WINDOW_S = 2.0 SILENCE_MIN_RATIO = 0.30 # every 2s window > 0.3x source RMS CLIP_MAX_FRACTION = 1e-4 # < 0.01% of samples at full scale LOUDNESS_TOL_DB = 6.0 # seam RMS step within +/- 6 dB SEAM_JUMP_FACTOR = 6.0 # seam jump < 6x the 99.9th-pct local delta TEMPO_TOL = 0.08 # generated tempo within +/-8% of source ROLLOFF_MIN_RATIO = 0.70 # new rolloff >= 0.7x source STEREO_CORR_RANGE = (0.10, 0.98) # not mono-collapsed, not decorrelated noise def _mono(y): return y.mean(axis=0) if y.ndim == 2 else y def _rms(x): x = np.asarray(x, dtype=np.float64) return float(np.sqrt(np.mean(x ** 2)) + 1e-12) def _db(ratio): return 20.0 * np.log10(max(ratio, 1e-12)) def _relative_keys(key): """key string + its relative major/minor, for continuity matching.""" names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] try: root, mode = key.split() except ValueError: return {key} i = names.index(root) out = {key} if mode == 'major': out.add(f'{names[(i + 9) % 12]} minor') # relative minor else: out.add(f'{names[(i + 3) % 12]} major') # relative major return out def verify(original_path, finished_path, source_seconds, fade_seconds=4.0): """Return a report dict: {metrics: {name: {value, pass, reason}}, passed:bool, and arrays for plotting}. `fade_seconds` is the stitch closing fade, excluded from the silence-collapse scan so the intentional ending isn't flagged.""" fin, _ = librosa.load(finished_path, sr=SR, mono=False) if fin.ndim == 1: fin = np.stack([fin, fin]) fin_m = _mono(fin) total_s = fin.shape[-1] / SR boundary = int(round(source_seconds * SR)) boundary = max(0, min(boundary, fin.shape[-1] - 1)) src_region = fin_m[:boundary] new_region = fin_m[boundary:] src_rms = _rms(src_region) if len(src_region) else _rms(fin_m) metrics = {} # 1. duration sane metrics['duration'] = { 'value': round(total_s, 2), 'pass': total_s > source_seconds + 1.0, 'reason': f'{total_s:.1f}s total, source {source_seconds:.1f}s', } # 2. silence collapse — every window in the new region carries energy. # exclude the intentional closing fade (last fade_seconds) so a real ending # isn't read as a collapse. win = int(SILENCE_WINDOW_S * SR) body = new_region[:max(0, len(new_region) - int((fade_seconds + 1.0) * SR))] worst = 1.0 if len(body) >= win: ratios = [_rms(body[i:i + win]) / src_rms for i in range(0, len(body) - win + 1, win)] worst = min(ratios) if ratios else 1.0 metrics['no_silence_collapse'] = { 'value': round(worst, 3), 'pass': worst > SILENCE_MIN_RATIO, 'reason': f'quietest 2s window = {worst:.2f}x source RMS ' f'(need > {SILENCE_MIN_RATIO})', } # 3. clipping clip_frac = float(np.mean(np.abs(fin) >= 0.999)) metrics['no_clipping'] = { 'value': clip_frac, 'pass': clip_frac < CLIP_MAX_FRACTION, 'reason': f'{clip_frac*100:.4f}% samples at full scale', } # 4. loudness continuity across the seam w = int(3.0 * SR) pre = fin_m[max(0, boundary - w):boundary] post = fin_m[boundary:boundary + w] step_db = _db(_rms(post) / _rms(pre)) if len(pre) and len(post) else 0.0 metrics['loudness_continuity'] = { 'value': round(step_db, 2), 'pass': abs(step_db) < LOUDNESS_TOL_DB, 'reason': f'seam RMS step {step_db:+.1f} dB ' f'(tol +/-{LOUDNESS_TOL_DB:.0f})', } # 5. seam discontinuity — no audible click at the splice point. reference # the seam's largest sample step against a HIGH QUANTILE of the surrounding # |delta| distribution, not its std: on percussive/bright music the biggest # single delta is heavy-tailed and dwarfs the std with no click present, so # an std-based gate false-positives on drums/electronic continuations. g = int(0.05 * SR) seg = fin_m[max(0, boundary - g):boundary + g] if len(seg) > 4: diffs = np.abs(np.diff(seg)) local = np.abs(np.diff(fin_m[max(0, boundary - SR):boundary + SR])) local_ref = float(np.quantile(local, 0.999) + 1e-9) if len(local) else 1.0 jump = float(diffs.max()) / local_ref else: jump = 0.0 metrics['no_seam_click'] = { 'value': round(jump, 2), 'pass': jump < SEAM_JUMP_FACTOR, 'reason': f'max seam jump {jump:.1f}x the 99.9th-pct local delta ' f'(need < {SEAM_JUMP_FACTOR})', } # 6/7. tempo + key continuity (source region vs new region) src_for_analysis = src_region if len(src_region) > SR else fin_m src_tempo = _scalar_tempo(librosa.beat.beat_track(y=src_for_analysis, sr=SR)[0]) new_tempo = _scalar_tempo(librosa.beat.beat_track(y=new_region, sr=SR)[0]) \ if len(new_region) > SR else src_tempo tempo_dev = abs(new_tempo - src_tempo) / max(src_tempo, 1e-6) # allow half/double-time relationship (common + musically valid) half_double = min(tempo_dev, abs(new_tempo - 2 * src_tempo) / max(2 * src_tempo, 1e-6), abs(new_tempo - 0.5 * src_tempo) / max(0.5 * src_tempo, 1e-6)) metrics['tempo_continuity'] = { 'value': f'{src_tempo:.0f}->{new_tempo:.0f} bpm', 'pass': half_double < TEMPO_TOL, 'reason': f'{half_double*100:.1f}% deviation (tol {TEMPO_TOL*100:.0f}%, ' f'half/double allowed)', } src_key = _key_from_audio(src_for_analysis, SR) new_key = _key_from_audio(new_region, SR) if len(new_region) > SR else src_key metrics['key_continuity'] = { 'value': f'{src_key} -> {new_key}', 'pass': new_key in _relative_keys(src_key), 'reason': f'generated key {new_key} vs source {src_key} ' f'(relative maj/min ok)', } # 8. spectral rolloff — confirm the tail isn't band-limited src_roll = float(np.mean(librosa.feature.spectral_rolloff( y=src_for_analysis, sr=SR))) + 1e-6 new_roll = float(np.mean(librosa.feature.spectral_rolloff( y=new_region, sr=SR))) if len(new_region) > SR else src_roll metrics['spectral_rolloff'] = { 'value': f'{src_roll/1000:.1f}->{new_roll/1000:.1f} kHz', 'pass': new_roll >= ROLLOFF_MIN_RATIO * src_roll, 'reason': f'new {new_roll/1000:.1f}kHz vs source {src_roll/1000:.1f}kHz ' f'(need >= {ROLLOFF_MIN_RATIO:.0%})', } # 9. stereo width in the new region if fin.shape[0] == 2 and fin.shape[-1] - boundary > SR: L, R = fin[0, boundary:], fin[1, boundary:] if np.std(L) > 1e-6 and np.std(R) > 1e-6: corr = float(np.corrcoef(L, R)[0, 1]) else: corr = 1.0 else: corr = 1.0 lo, hi = STEREO_CORR_RANGE metrics['stereo_width'] = { 'value': round(corr, 3), 'pass': lo <= corr <= hi or corr == 1.0, 'reason': f'L/R correlation {corr:.2f} (target {lo}-{hi}; ' f'1.0 = intentional mono)', } passed = all(m['pass'] for m in metrics.values()) return { 'metrics': metrics, 'passed': passed, 'finished': fin, 'boundary': boundary, 'source_seconds': source_seconds, 'total_seconds': total_s, } def print_report(report): print("\n==== CODA VERIFY ====") width = max(len(k) for k in report['metrics']) for name, m in report['metrics'].items(): tag = 'PASS' if m['pass'] else 'FAIL' print(f" [{tag}] {name:<{width}} {m['reason']}") print(f" ----\n OVERALL: {'PASS' if report['passed'] else 'FAIL'}\n") def plot_report(report, out_png): import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import librosa.display # submodule isn't pulled in by `import librosa` fin = report['finished'] fin_m = _mono(fin) b = report['boundary'] sr = SR t = np.arange(fin_m.shape[-1]) / sr fig, axes = plt.subplots(3, 1, figsize=(12, 9), facecolor="#0b0d10") for ax in axes: ax.set_facecolor("#14171c") ax.tick_params(colors="#9aa4b2") for s in ax.spines.values(): s.set_color("#2a2f37") axes[0].plot(t, fin_m, color="#39d0d8", lw=0.4) axes[0].axvline(b / sr, color="#ffb347", lw=1.5, label="seam") axes[0].set_title("Finished waveform (seam marked)", color="#e6e9ef") axes[0].legend(facecolor="#14171c", labelcolor="#e6e9ef") S = librosa.feature.melspectrogram(y=fin_m, sr=sr, n_mels=128) Sdb = librosa.power_to_db(S, ref=np.max) librosa.display.specshow(Sdb, sr=sr, x_axis="time", y_axis="mel", ax=axes[1]) axes[1].axvline(b / sr, color="#ffb347", lw=1.5) axes[1].set_title("Mel-spectrogram", color="#e6e9ef") rows = [[k, ("PASS" if m['pass'] else "FAIL"), str(m['value'])] for k, m in report['metrics'].items()] axes[2].axis("off") tbl = axes[2].table(cellText=rows, colLabels=["metric", "result", "value"], loc="center", cellLoc="left") tbl.auto_set_font_size(False) tbl.set_fontsize(9) for (r, c), cell in tbl.get_celld().items(): cell.set_edgecolor("#2a2f37") if r == 0: cell.set_facecolor("#22272e") cell.set_text_props(color="#e6e9ef") else: ok = report['metrics'][rows[r - 1][0]]['pass'] cell.set_facecolor("#14171c") cell.set_text_props(color="#5fd38d" if ok else "#ff6b6b") fig.suptitle(f"CODA verify — {'PASS' if report['passed'] else 'FAIL'}", color="#e6e9ef", fontsize=14) fig.tight_layout() fig.savefig(out_png, dpi=110, facecolor="#0b0d10") plt.close(fig) print(f" wrote {out_png}") if __name__ == "__main__": if len(sys.argv) < 4: print("usage: python verify.py " " [out.png]") sys.exit(1) rep = verify(sys.argv[1], sys.argv[2], float(sys.argv[3])) print_report(rep) if len(sys.argv) > 4: plot_report(rep, sys.argv[4]) sys.exit(0 if rep['passed'] else 2)