Spaces:

rikhoffbauer2
/

drum-sample-extractor

Sleeping

App Files Files Community

rikhoffbauer2 commited on Apr 30

Commit

d34b37f

verified ·

1 Parent(s): 26de08e

Add optimizer.py

Browse files

Files changed (1) hide show

optimizer.py +385 -0

optimizer.py ADDED Viewed

	@@ -0,0 +1,385 @@

+"""
+Autonomous parameter optimizer for the drum extraction pipeline.
+Runs a loop:
+  1. Generate synthetic songs with known ground truth
+  2. Run the extraction pipeline with current params
+  3. Evaluate extraction quality against ground truth
+  4. Use results to tune parameters for next iteration
+Uses Bayesian-ish optimization: maintain a history of (params → score),
+then perturb the best-so-far params toward improving weak metrics.
+"""
+import json
+import time
+import traceback
+import numpy as np
+from copy import deepcopy
+from dataclasses import dataclass, field
+from pathlib import Path
+from synth_generator import generate_test_song, SyntheticSong
+from evaluation import evaluate_extraction, report_to_dict, EvalReport
+from quality_metrics import drum_sample_score
+@dataclass
+class PipelineParams:
+    """All tunable parameters of the extraction pipeline."""
+    # Onset detection
+    pre_pad: float = 0.005
+    min_hit_dur: float = 0.03
+    max_hit_dur: float = 0.8
+    min_gap: float = 0.02
+    energy_threshold_db: float = -40.0
+    # Overlap separation
+    separate_overlaps: bool = True
+    overlap_energy_threshold: float = 0.15  # band energy ratio to count as significant
+    # Clustering
+    use_clap: bool = False
+    # Selection weights (must sum to 1.0)
+    w_completeness: float = 0.30
+    w_cleanness: float = 0.40
+    w_onset: float = 0.20
+    w_representativeness: float = 0.10
+    # Synthesis
+    synthesize: bool = True
+    synth_best_weight: float = 2.0  # weight multiplier for best sample in cluster
+    def to_dict(self) -> dict:
+        return self.__dict__.copy()
+    @classmethod
+    def from_dict(cls, d: dict) -> 'PipelineParams':
+        valid_keys = cls.__dataclass_fields__.keys()
+        return cls(**{k: v for k, v in d.items() if k in valid_keys})
+@dataclass
+class IterationResult:
+    """Result of one optimization iteration."""
+    iteration: int
+    params: dict
+    eval_report: dict
+    overall_score: float
+    duration_seconds: float
+    test_config: dict        # which synthetic song was used
+    timestamp: str
+@dataclass
+class OptimizerState:
+    """Persistent state of the optimizer."""
+    history: list = field(default_factory=list)  # [IterationResult]
+    best_params: dict = field(default_factory=dict)
+    best_score: float = 0.0
+    iteration: int = 0
+# ─────────────────────────────────────────────────────────────────────────────
+# Parameter perturbation strategies
+# ─────────────────────────────────────────────────────────────────────────────
+def diagnose_and_perturb(params: PipelineParams, report: EvalReport,
+                          rng: np.random.RandomState) -> PipelineParams:
+    """Analyze evaluation report and intelligently perturb parameters.
+    Instead of random search, we diagnose specific failure modes from the
+    evaluation metrics and adjust the relevant parameters.
+    """
+    new_params = deepcopy(params)
+    changes = []
+    # ── Diagnosis 1: Poor onset precision (>20ms mean error) ──
+    if report.mean_onset_error_ms > 20:
+        # Reduce pre_pad to tighten onset capture
+        new_params.pre_pad = max(0.001, params.pre_pad * rng.uniform(0.5, 0.9))
+        # Reduce min_gap to catch faster sequences
+        new_params.min_gap = max(0.01, params.min_gap * rng.uniform(0.6, 0.9))
+        changes.append(f"onset_error={report.mean_onset_error_ms:.1f}ms → tightened pre_pad/min_gap")
+    # ── Diagnosis 2: Missing hits (low hit count accuracy) ──
+    if report.hit_count_accuracy < 0.7:
+        # Lower energy threshold to catch quieter hits
+        new_params.energy_threshold_db = max(-60, params.energy_threshold_db - rng.uniform(2, 8))
+        # Reduce min_hit_dur to catch shorter sounds
+        new_params.min_hit_dur = max(0.01, params.min_hit_dur * rng.uniform(0.5, 0.8))
+        changes.append(f"hit_acc={report.hit_count_accuracy:.2f} → lowered threshold/min_dur")
+    # ── Diagnosis 3: Too many false hits (extracted >> GT) ──
+    total_ext = sum(m.n_hits_extracted for m in report.matches) if report.matches else 0
+    total_gt = sum(m.n_hits_gt for m in report.matches) if report.matches else 1
+    if total_ext > total_gt * 1.5:
+        # Raise energy threshold
+        new_params.energy_threshold_db = min(-20, params.energy_threshold_db + rng.uniform(2, 5))
+        new_params.min_hit_dur = min(0.08, params.min_hit_dur * rng.uniform(1.1, 1.5))
+        changes.append(f"over-extraction ({total_ext} vs {total_gt} GT) → raised threshold")
+    # ── Diagnosis 4: Low SI-SDR (poor sample quality) ──
+    if report.mean_si_sdr < 5:
+        # The extracted samples don't match GT well
+        # Try adjusting overlap separation threshold
+        new_params.overlap_energy_threshold = params.overlap_energy_threshold + rng.uniform(-0.05, 0.05)
+        new_params.overlap_energy_threshold = np.clip(new_params.overlap_energy_threshold, 0.05, 0.4)
+        changes.append(f"SI-SDR={report.mean_si_sdr:.1f}dB → adjusted overlap threshold")
+    # ── Diagnosis 5: Low sample scores (poor completeness/cleanness) ──
+    if report.mean_sample_score < 50:
+        # Adjust selection weights
+        # More weight on cleanness if we're getting bleed-heavy samples
+        new_params.w_cleanness = min(0.6, params.w_cleanness + rng.uniform(0, 0.1))
+        new_params.w_completeness = max(0.15, params.w_completeness + rng.uniform(-0.05, 0.05))
+        # Renormalize
+        total_w = new_params.w_cleanness + new_params.w_completeness + new_params.w_onset + new_params.w_representativeness
+        new_params.w_cleanness /= total_w
+        new_params.w_completeness /= total_w
+        new_params.w_onset /= total_w
+        new_params.w_representativeness /= total_w
+        changes.append(f"sample_score={report.mean_sample_score:.1f} → adjusted selection weights")
+    # ── Diagnosis 6: Low envelope correlation (transient mismatch) ──
+    if report.mean_env_corr < 0.7:
+        new_params.max_hit_dur = min(1.5, params.max_hit_dur * rng.uniform(1.1, 1.3))
+        changes.append(f"env_corr={report.mean_env_corr:.2f} → increased max_hit_dur")
+    # ── Diagnosis 7: Unmatched GT samples (some drums never found) ──
+    if len(report.unmatched_gt) > 0:
+        new_params.energy_threshold_db = max(-60, params.energy_threshold_db - rng.uniform(3, 6))
+        changes.append(f"missed {report.unmatched_gt} → lowered energy threshold")
+    # If no specific diagnosis triggered, apply small random perturbation
+    if not changes:
+        # Explore nearby parameter space
+        new_params.energy_threshold_db += rng.uniform(-3, 3)
+        new_params.pre_pad += rng.uniform(-0.002, 0.002)
+        new_params.pre_pad = max(0.001, new_params.pre_pad)
+        new_params.min_hit_dur += rng.uniform(-0.01, 0.01)
+        new_params.min_hit_dur = max(0.01, new_params.min_hit_dur)
+        changes.append("no specific issue → random exploration")
+    return new_params, changes
+# ─────────────────────────────────────────────────────────────────────────────
+# Main optimization loop
+# ─────────────────────────────────────────────────────────────────────────────
+def run_extraction_with_params(song: SyntheticSong, params: PipelineParams) -> tuple:
+    """Run the extraction pipeline with given params on a song.
+    Returns (clusters, all_hits) or raises on failure."""
+    from drum_extractor import (
+        detect_onsets, classify_and_separate_hits,
+        compute_librosa_embeddings, cluster_hits,
+        select_best_representatives, synthesize_from_cluster,
+    )
+    # Stage 2: Onset detection
+    hits = detect_onsets(
+        song.drums_only, song.sr,
+        pre_pad=params.pre_pad,
+        min_hit_dur=params.min_hit_dur,
+        max_hit_dur=params.max_hit_dur,
+        min_gap=params.min_gap,
+        energy_threshold_db=params.energy_threshold_db,
+    )
+    if len(hits) == 0:
+        return [], []
+    # Stage 3: Classify & separate
+    hits = classify_and_separate_hits(hits, separate_overlaps=params.separate_overlaps)
+    # Stage 4: Embed & cluster
+    embeddings = compute_librosa_embeddings(hits)
+    clusters = cluster_hits(hits, embeddings)
+    # Stage 5: Select best (using our improved scoring)
+    for cluster in clusters:
+        if cluster.count == 1:
+            cluster.best_hit_idx = 0
+            continue
+        scores = []
+        base_label = cluster.label.rsplit('_', 1)[0]
+        # Compute cluster radius for representativeness scoring
+        hit_features = []
+        for hit in cluster.hits:
+            import librosa
+            feat = np.concatenate([
+                librosa.feature.mfcc(y=hit.audio, sr=hit.sr, n_mfcc=13).mean(axis=1),
+                [hit.rms_energy, hit.spectral_centroid, hit.duration]
+            ])
+            hit_features.append(feat)
+        hit_features = np.array(hit_features)
+        mean_f = hit_features.mean(axis=0)
+        std_f = hit_features.std(axis=0) + 1e-8
+        hit_features_norm = (hit_features - mean_f) / std_f
+        centroid = hit_features_norm.mean(axis=0)
+        dists = np.linalg.norm(hit_features_norm - centroid, axis=1)
+        radius = dists.max() + 1e-8
+        for i, hit in enumerate(cluster.hits):
+            score = drum_sample_score(
+                hit.audio, hit.sr, base_label,
+                centroid_dist=dists[i],
+                cluster_radius=radius,
+            )
+            scores.append(score['total'])
+        cluster.best_hit_idx = int(np.argmax(scores))
+    # Stage 6: Synthesis
+    if params.synthesize:
+        for cluster in clusters:
+            if cluster.count >= 2:
+                cluster.synthesized = synthesize_from_cluster(cluster)
+    return clusters, hits
+def run_optimization_loop(
+    n_iterations: int = 10,
+    patterns: list = None,
+    initial_params: PipelineParams = None,
+    seed: int = 42,
+    log_callback=None,
+) -> OptimizerState:
+    """Run the full autonomous optimization loop.
+    Args:
+        n_iterations: number of optimization iterations
+        patterns: list of pattern names to test with (cycles through them)
+        initial_params: starting pipeline parameters
+        seed: random seed
+        log_callback: function(str) called with log messages
+    """
+    if patterns is None:
+        patterns = ['rock', 'funk', 'halftime']
+    if initial_params is None:
+        initial_params = PipelineParams()
+    rng = np.random.RandomState(seed)
+    state = OptimizerState(best_params=initial_params.to_dict())
+    current_params = deepcopy(initial_params)
+    def log(msg):
+        if log_callback:
+            log_callback(msg)
+        print(msg)
+    log(f"Starting optimization loop: {n_iterations} iterations")
+    log(f"Patterns: {patterns}")
+    for i in range(n_iterations):
+        t0 = time.time()
+        pattern_name = patterns[i % len(patterns)]
+        song_seed = seed + i * 17  # different song each iteration
+        log(f"\n{'='*60}")
+        log(f"ITERATION {i+1}/{n_iterations} — pattern={pattern_name}, seed={song_seed}")
+        log(f"{'='*60}")
+        try:
+            # 1. Generate synthetic song
+            log("  Generating synthetic song...")
+            song = generate_test_song(
+                pattern_name=pattern_name,
+                bars=4,
+                bpm=100 + rng.randint(0, 40) * 2,  # vary BPM
+                variation='medium',
+                seed=song_seed,
+            )
+            log(f"    → {song.duration:.1f}s, {song.bpm}BPM, "
+                f"{len(song.hits)} hits, {len(song.samples)} sample types")
+            # 2. Run extraction
+            log(f"  Running extraction with params: threshold={current_params.energy_threshold_db:.1f}dB, "
+                f"pre_pad={current_params.pre_pad:.3f}, min_dur={current_params.min_hit_dur:.3f}")
+            clusters, all_hits = run_extraction_with_params(song, current_params)
+            log(f"    → {len(clusters)} clusters, {len(all_hits)} total hits")
+            # 3. Evaluate
+            log("  Evaluating against ground truth...")
+            gt_samples = {name: s.audio for name, s in song.samples.items()}
+            gt_hit_map = [
+                {'sample': h.sample_name, 'onset': h.onset_time, 'velocity': h.velocity}
+                for h in song.hits
+            ]
+            report = evaluate_extraction(
+                extracted_clusters=clusters,
+                gt_samples=gt_samples,
+                gt_hit_map=gt_hit_map,
+                sr=song.sr,
+                all_hits=all_hits,
+                pipeline_params=current_params.to_dict(),
+            )
+            duration = time.time() - t0
+            log(f"  RESULTS:")
+            log(f"    Overall Score: {report.overall_score:.1f}/100")
+            log(f"    SI-SDR:        {report.mean_si_sdr:.1f} dB")
+            log(f"    Sample Score:  {report.mean_sample_score:.1f}/100")
+            log(f"    Env Corr:      {report.mean_env_corr:.3f}")
+            log(f"    Onset Error:   {report.mean_onset_error_ms:.1f} ms")
+            log(f"    Hit Count Acc: {report.hit_count_accuracy:.2f}")
+            log(f"    Matched:       {len(report.matches)}/{len(song.samples)}")
+            if report.unmatched_gt:
+                log(f"    ⚠ Unmatched GT: {report.unmatched_gt}")
+            # Record iteration
+            result = IterationResult(
+                iteration=i,
+                params=current_params.to_dict(),
+                eval_report=report_to_dict(report),
+                overall_score=report.overall_score,
+                duration_seconds=duration,
+                test_config={'pattern': pattern_name, 'bpm': song.bpm, 'seed': song_seed},
+                timestamp=time.strftime('%Y-%m-%d %H:%M:%S'),
+            )
+            state.history.append(result)
+            # Update best
+            if report.overall_score > state.best_score:
+                state.best_score = report.overall_score
+                state.best_params = current_params.to_dict()
+                log(f"  ★ NEW BEST SCORE: {report.overall_score:.1f}")
+            # 4. Tune parameters for next iteration
+            new_params, changes = diagnose_and_perturb(current_params, report, rng)
+            log(f"  Parameter adjustments:")
+            for change in changes:
+                log(f"    → {change}")
+            current_params = new_params
+        except Exception as e:
+            log(f"  ✗ ERROR: {e}")
+            log(traceback.format_exc())
+            # On error, try random perturbation
+            current_params.energy_threshold_db += rng.uniform(-5, 5)
+            state.history.append(IterationResult(
+                iteration=i,
+                params=current_params.to_dict(),
+                eval_report={'error': str(e)},
+                overall_score=0.0,
+                duration_seconds=time.time() - t0,
+                test_config={'pattern': pattern_name},
+                timestamp=time.strftime('%Y-%m-%d %H:%M:%S'),
+            ))
+        state.iteration = i + 1
+    log(f"\n{'='*60}")
+    log(f"OPTIMIZATION COMPLETE")
+    log(f"{'='*60}")
+    log(f"  Best score: {state.best_score:.1f}/100")
+    log(f"  Best params: {json.dumps(state.best_params, indent=2)}")
+    return state