Spaces:

rikhoffbauer2
/

drum-sample-extractor

Sleeping

App Files Files Community

rikhoffbauer2 commited on May 1

Commit

63565aa

verified ·

1 Parent(s): d1fa59c

v6: Update app defaults for real music — delta=0.12, energy=-35, min_gap=0.03, NCC compare=0 (auto)

Browse files

Files changed (1) hide show

app.py +65 -94

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-Gradio UI — Sample Extractor v4.
-NCC clustering, full parameter control, Demucs model selection.
 """
 import gradio as gr
@@ -105,7 +105,6 @@ def run_extraction(audio_in, stem_choice, demucs_model, demucs_shifts, demucs_ov
         zip_path = build_archive(clusters, bpm, stem_sr,
                                   midi_path=midi_path, rendered_audio=rendered)
-        # Metrics
         rows = []
         for c in sorted(clusters, key=lambda x: x.count, reverse=True):
             best = c.best_hit
@@ -120,7 +119,11 @@ def run_extraction(audio_in, stem_choice, demucs_model, demucs_shifts, demucs_ov
             })
         summary = f"**Detected BPM: {bpm}** · **{len(clusters)} unique samples** from {len(hits)} hits\n\n"
-        summary += f"Model: `{demucs_model}` · NCC threshold: `{ncc_threshold}` · Onset delta: `{onset_delta}`\n\n"
         summary += "| Sample | Hits | MIDI Note |\n|---|---|---|\n"
         for c in sorted(clusters, key=lambda x: x.count, reverse=True):
             summary += f"| {c.label} | {c.count} | {c.midi_note} |\n"
@@ -135,11 +138,10 @@ def run_extraction(audio_in, stem_choice, demucs_model, demucs_shifts, demucs_ov
 # ─── Tab 2: Evaluate ─────────────────────────────────────────────────────────
-def run_eval(pattern, bpm, bars, ncc_threshold, progress=gr.Progress()):
     progress(0.0, desc="Generating synthetic song...")
     song = generate_test_song(pattern_name=pattern, bars=int(bars),
                                bpm=float(bpm), variation='medium', seed=42)
     detected_bpm = detect_bpm(song.drums_only, song.sr)
     progress(0.2, desc="Extracting...")
@@ -147,7 +149,8 @@ def run_eval(pattern, bpm, bars, ncc_threshold, progress=gr.Progress()):
     if not hits: return None, None, None, None, "", ""
     hits = classify_hits(hits)
-    clusters = cluster_hits(hits, ncc_threshold=float(ncc_threshold))
     select_best(clusters)
     for c in clusters:
         if c.count >= 2: c.synthesized = synthesize_from_cluster(c)
@@ -161,9 +164,6 @@ def run_eval(pattern, bpm, bars, ncc_threshold, progress=gr.Progress()):
                for h in song.hits]
     report = evaluate_extraction(clusters, gt, gt_hits, song.sr, hits)
-    mix_out = audio_tuple(song.mix, song.sr)
-    rendered_out = audio_tuple(rendered, song.sr)
     summary = [
         {'Metric': 'Detected BPM', 'Value': f"{detected_bpm}", 'Target': f"{song.bpm}"},
         {'Metric': 'Clusters', 'Value': str(len(clusters)), 'Target': str(len(gt))},
@@ -173,59 +173,47 @@ def run_eval(pattern, bpm, bars, ncc_threshold, progress=gr.Progress()):
     ]
     if report.unmatched_gt:
         summary.append({'Metric': '⚠ Unmatched', 'Value': ', '.join(report.unmatched_gt), 'Target': 'None'})
     matches = [{'Cluster': m.cluster_label, 'GT': m.gt_name, 'SI-SDR': f"{m.si_sdr:.1f}",
                 'Score': f"{m.sample_score:.1f}"} for m in report.matches]
     progress(1.0)
-    return (mix_out, rendered_out, pd.DataFrame(summary),
-            pd.DataFrame(matches) if matches else None, "", "")
-# ─── Tab 3: Optimize ─────────────────────────────────────────────────────────
 def run_optimize(n_iters, config_name, author, save_hub, progress=gr.Progress()):
     logs = []
-    progress(0.0, desc="Starting optimization...")
-    state = run_optimization(n_iterations=int(n_iters),
-                              config_name=config_name or "optimized",
-                              author=author or "anonymous",
-                              save_to_hub=bool(save_hub), log_fn=lambda m: logs.append(m))
     progress(1.0)
-    hist = [{'Iter': r.iteration, 'Score': f"{r.avg_score:.1f}",
-             'Time': f"{r.duration_s:.1f}s"} for r in state.history]
     if state.history:
-        fig, ax = plt.subplots(figsize=(10, 4))
         ax.plot([r.iteration for r in state.history], [r.avg_score for r in state.history], 'b-o')
         ax.set_xlabel('Iteration'); ax.set_ylabel('Score'); ax.grid(True, alpha=0.3); plt.tight_layout()
-    else:
-        fig, ax = plt.subplots(); ax.text(0.5, 0.5, "No data")
     return '\n'.join(logs), pd.DataFrame(hist), fig, json.dumps(state.best_config, indent=2)
-# ─── Tab 4: Leaderboard ──────────────────────────────────────────────────────
 def refresh_leaderboard():
     try:
         lb = get_leaderboard()
         return pd.DataFrame(lb) if lb else pd.DataFrame(), ""
-    except Exception as e:
-        return pd.DataFrame(), str(e)
 # ─── Build App ────────────────────────────────────────────────────────────────
-def get_stems_for_model(model_name):
-    stems = DEMUCS_STEMS.get(model_name, ["drums", "bass", "other", "vocals"])
-    return gr.update(choices=stems + ["all"], value=stems[0])
 def build_app():
     with gr.Blocks(title="🎵 Sample Extractor", theme=gr.themes.Soft(),
                    css=".gradio-container{max-width:1300px!important}") as app:
-        gr.Markdown("# 🎵 Sample Extractor v4\n"
-                    "Extract distinct sounds from audio using **NCC waveform matching** — "
-                    "correctly groups identical samples regardless of velocity.\n"
-                    "Full control over Demucs model, onset detection, and clustering parameters.")
         with gr.Tabs():
             # ── Extract ──
@@ -234,51 +222,36 @@ def build_app():
                 with gr.Accordion("🔧 Stem Separation", open=False):
                     with gr.Row():
-                        demucs_model = gr.Dropdown(DEMUCS_MODELS, value="htdemucs_ft",
-                                                    label="Demucs Model")
-                        stem_dd = gr.Dropdown(['drums','bass','other','vocals','all'],
-                                              value='drums', label='Stem')
-                        demucs_shifts = gr.Slider(0, 5, value=1, step=1,
-                                                   label='Shifts (TTA, 0=fastest)')
-                        demucs_overlap = gr.Slider(0.0, 0.5, value=0.25, step=0.05,
-                                                    label='Overlap')
                 with gr.Accordion("🎯 Onset Detection", open=False):
                     with gr.Row():
-                        onset_mode = gr.Dropdown(['auto','percussive','harmonic','broadband'],
-                                                  value='auto', label='Mode')
-                        onset_delta = gr.Slider(0.01, 0.5, value=0.07, step=0.01,
-                                                 label='Delta (sensitivity)')
-                        energy_db = gr.Slider(-70, -10, value=-45, step=1,
-                                               label='Energy threshold (dB)')
                     with gr.Row():
-                        pre_pad = gr.Slider(0.0, 0.05, value=0.005, step=0.001,
-                                             label='Pre-pad (s)')
-                        min_dur = gr.Slider(0.005, 0.2, value=0.02, step=0.005,
-                                             label='Min duration (s)')
-                        max_dur = gr.Slider(0.1, 5.0, value=1.5, step=0.1,
-                                             label='Max duration (s)')
-                        min_gap = gr.Slider(0.005, 0.2, value=0.015, step=0.005,
-                                             label='Min gap (s)')
-                with gr.Accordion("🔗 Clustering", open=False):
                     with gr.Row():
-                        ncc_thresh = gr.Slider(0.3, 0.99, value=0.80, step=0.01,
-                                                label='NCC threshold (higher = stricter)')
-                        ncc_ms = gr.Slider(50, 1000, value=200, step=50,
-                                            label='Compare window (ms)')
-                        linkage_dd = gr.Dropdown(['average', 'complete', 'single'],
-                                                  value='average', label='Linkage')
                     with gr.Row():
-                        target_min = gr.Number(value=0, label='Target min clusters (0 = use threshold)',
-                                               precision=0)
-                        target_max = gr.Number(value=0, label='Target max clusters (0 = use threshold)',
-                                               precision=0)
-                    gr.Markdown("*Set both target min/max > 0 to auto-search for the right threshold. "
-                                "Leave at 0 to use the NCC threshold directly.*")
                 with gr.Accordion("⚙️ Post-processing", open=False):
-                    do_synth = gr.Checkbox(value=True, label='Synthesize optimal samples from clusters')
                 extract_btn = gr.Button("🔬 Extract Samples", variant="primary", size="lg")
@@ -286,23 +259,18 @@ def build_app():
                 with gr.Row():
                     stem_out = gr.Audio(type='numpy', label='Stem', interactive=False)
                     rendered_out = gr.Audio(type='numpy', label='🔊 Reconstruction', interactive=False)
                 gr.Markdown("### Downloads")
                 with gr.Row():
                     archive_file = gr.File(label="📦 ZIP Archive", interactive=False)
                     midi_file = gr.File(label="🎹 MIDI", interactive=False)
-                sample_files = gr.File(label="Individual WAV samples", file_count="multiple",
-                                       interactive=False)
                 metrics_tbl = gr.Dataframe(label="Extracted Samples")
                 status_txt = gr.Textbox(visible=False)
-                # Update available stems when model changes
                 demucs_model.change(
-                    fn=lambda m: gr.update(choices=DEMUCS_STEMS.get(m, ["drums","bass","other","vocals"]) + ["all"]),
                     inputs=[demucs_model], outputs=[stem_dd])
-                extract_btn.click(
-                    run_extraction,
                     [audio_in, stem_dd, demucs_model, demucs_shifts, demucs_overlap,
                      onset_mode, onset_delta, energy_db, pre_pad, min_dur, max_dur, min_gap,
                      ncc_thresh, ncc_ms, linkage_dd, target_min, target_max, do_synth],
@@ -316,7 +284,10 @@ def build_app():
                     ev_pat = gr.Dropdown(['rock','funk','halftime'], value='rock', label='Pattern')
                     ev_bpm = gr.Slider(80, 200, value=120, step=2, label='BPM')
                     ev_bars = gr.Slider(2, 8, value=4, step=1, label='Bars')
-                    ev_ncc = gr.Slider(0.5, 0.99, value=0.80, step=0.01, label='NCC threshold')
                 ev_btn = gr.Button("🧪 Evaluate", variant="primary", size="lg")
                 with gr.Row():
                     ev_mix = gr.Audio(type='numpy', label='Original', interactive=False)
@@ -324,31 +295,31 @@ def build_app():
                 ev_summary = gr.Dataframe(label="Summary")
                 ev_matches = gr.Dataframe(label="Matches")
                 ev_s1 = gr.Textbox(visible=False); ev_s2 = gr.Textbox(visible=False)
-                ev_btn.click(run_eval, [ev_pat, ev_bpm, ev_bars, ev_ncc],
                              [ev_mix, ev_rendered, ev_summary, ev_matches, ev_s1, ev_s2])
             # ── Optimize ──
             with gr.Tab("🔄 Optimize"):
-                gr.Markdown("### Autonomous Optimization\nTests across 6 diverse songs, saves best config to Hub.")
                 with gr.Row():
-                    opt_n = gr.Slider(2, 30, value=5, step=1, label='Iterations')
-                    opt_name = gr.Textbox(value="optimized", label='Config name')
-                    opt_author = gr.Textbox(value="", label='Author')
-                    opt_save = gr.Checkbox(value=True, label='Save to Hub')
                 opt_btn = gr.Button("🚀 Optimize", variant="primary", size="lg")
-                opt_log = gr.Textbox(label="Log", lines=20, max_lines=40)
                 opt_hist = gr.Dataframe(label="History")
                 opt_plot = gr.Plot(label="Progress")
-                opt_params = gr.Code(label="Best Config", language="json")
-                opt_btn.click(run_optimize, [opt_n, opt_name, opt_author, opt_save],
-                              [opt_log, opt_hist, opt_plot, opt_params])
             # ── Leaderboard ──
             with gr.Tab("🏆 Leaderboard"):
                 gr.Markdown("### Config Leaderboard")
                 lb_btn = gr.Button("🔄 Refresh"); lb_tbl = gr.Dataframe()
                 lb_s = gr.Textbox(visible=False)
-                lb_btn.click(refresh_leaderboard, [], [lb_tbl, lb_s])
     return app

 """
+Gradio UI — Sample Extractor v6.
+NCC clustering with target range, auto-scale compare window, better defaults.
 """
 import gradio as gr
         zip_path = build_archive(clusters, bpm, stem_sr,
                                   midi_path=midi_path, rendered_audio=rendered)
         rows = []
         for c in sorted(clusters, key=lambda x: x.count, reverse=True):
             best = c.best_hit
             })
         summary = f"**Detected BPM: {bpm}** · **{len(clusters)} unique samples** from {len(hits)} hits\n\n"
+        summary += f"Model: `{demucs_model}` · Onset delta: `{onset_delta}` · Energy: `{energy_db}dB`\n\n"
+        if int(target_min) > 0 and int(target_max) > 0:
+            summary += f"Target clusters: `{int(target_min)}–{int(target_max)}`\n\n"
+        else:
+            summary += f"NCC threshold: `{ncc_threshold}`\n\n"
         summary += "| Sample | Hits | MIDI Note |\n|---|---|---|\n"
         for c in sorted(clusters, key=lambda x: x.count, reverse=True):
             summary += f"| {c.label} | {c.count} | {c.midi_note} |\n"
 # ─── Tab 2: Evaluate ─────────────────────────────────────────────────────────
+def run_eval(pattern, bpm, bars, ncc_threshold, target_min, target_max, progress=gr.Progress()):
     progress(0.0, desc="Generating synthetic song...")
     song = generate_test_song(pattern_name=pattern, bars=int(bars),
                                bpm=float(bpm), variation='medium', seed=42)
     detected_bpm = detect_bpm(song.drums_only, song.sr)
     progress(0.2, desc="Extracting...")
     if not hits: return None, None, None, None, "", ""
     hits = classify_hits(hits)
+    clusters = cluster_hits(hits, ncc_threshold=float(ncc_threshold),
+                             target_min=int(target_min), target_max=int(target_max))
     select_best(clusters)
     for c in clusters:
         if c.count >= 2: c.synthesized = synthesize_from_cluster(c)
                for h in song.hits]
     report = evaluate_extraction(clusters, gt, gt_hits, song.sr, hits)
     summary = [
         {'Metric': 'Detected BPM', 'Value': f"{detected_bpm}", 'Target': f"{song.bpm}"},
         {'Metric': 'Clusters', 'Value': str(len(clusters)), 'Target': str(len(gt))},
     ]
     if report.unmatched_gt:
         summary.append({'Metric': '⚠ Unmatched', 'Value': ', '.join(report.unmatched_gt), 'Target': 'None'})
     matches = [{'Cluster': m.cluster_label, 'GT': m.gt_name, 'SI-SDR': f"{m.si_sdr:.1f}",
                 'Score': f"{m.sample_score:.1f}"} for m in report.matches]
     progress(1.0)
+    return (audio_tuple(song.mix, song.sr), audio_tuple(rendered, song.sr),
+            pd.DataFrame(summary), pd.DataFrame(matches) if matches else None, "", "")
+# ─── Tab 3 & 4: Optimize + Leaderboard ───────────────────────────────────────
 def run_optimize(n_iters, config_name, author, save_hub, progress=gr.Progress()):
     logs = []
+    progress(0.0)
+    state = run_optimization(n_iterations=int(n_iters), config_name=config_name or "optimized",
+                              author=author or "anon", save_to_hub=bool(save_hub),
+                              log_fn=lambda m: logs.append(m))
     progress(1.0)
+    hist = [{'Iter': r.iteration, 'Score': f"{r.avg_score:.1f}", 'Time': f"{r.duration_s:.1f}s"}
+            for r in state.history]
     if state.history:
+        fig, ax = plt.subplots(figsize=(10,4))
         ax.plot([r.iteration for r in state.history], [r.avg_score for r in state.history], 'b-o')
         ax.set_xlabel('Iteration'); ax.set_ylabel('Score'); ax.grid(True, alpha=0.3); plt.tight_layout()
+    else: fig, ax = plt.subplots(); ax.text(0.5,0.5,"No data")
     return '\n'.join(logs), pd.DataFrame(hist), fig, json.dumps(state.best_config, indent=2)
 def refresh_leaderboard():
     try:
         lb = get_leaderboard()
         return pd.DataFrame(lb) if lb else pd.DataFrame(), ""
+    except Exception as e: return pd.DataFrame(), str(e)
 # ─── Build App ────────────────────────────────────────────────────────────────
 def build_app():
     with gr.Blocks(title="🎵 Sample Extractor", theme=gr.themes.Soft(),
                    css=".gradio-container{max-width:1300px!important}") as app:
+        gr.Markdown("# 🎵 Sample Extractor v6\n"
+                    "Extract distinct sounds from audio using **NCC waveform matching**. "
+                    "Set a **target cluster range** to control how many unique samples to extract.")
         with gr.Tabs():
             # ── Extract ──
                 with gr.Accordion("🔧 Stem Separation", open=False):
                     with gr.Row():
+                        demucs_model = gr.Dropdown(DEMUCS_MODELS, value="htdemucs_ft", label="Demucs Model")
+                        stem_dd = gr.Dropdown(['drums','bass','other','vocals','all'], value='drums', label='Stem')
+                        demucs_shifts = gr.Slider(0, 5, value=1, step=1, label='Shifts (0=fastest)')
+                        demucs_overlap = gr.Slider(0.0, 0.5, value=0.25, step=0.05, label='Overlap')
                 with gr.Accordion("🎯 Onset Detection", open=False):
                     with gr.Row():
+                        onset_mode = gr.Dropdown(['auto','percussive','harmonic','broadband'], value='auto', label='Mode')
+                        onset_delta = gr.Slider(0.01, 0.5, value=0.12, step=0.01, label='Delta (lower=more onsets)')
+                        energy_db = gr.Slider(-70, -10, value=-35, step=1, label='Energy threshold (dB)')
                     with gr.Row():
+                        pre_pad = gr.Slider(0.0, 0.05, value=0.005, step=0.001, label='Pre-pad (s)')
+                        min_dur = gr.Slider(0.005, 0.2, value=0.02, step=0.005, label='Min duration (s)')
+                        max_dur = gr.Slider(0.1, 5.0, value=1.5, step=0.1, label='Max duration (s)')
+                        min_gap = gr.Slider(0.005, 0.2, value=0.03, step=0.005, label='Min gap (s)')
+                with gr.Accordion("🔗 Clustering", open=True):
+                    gr.Markdown("**Target cluster range** — set both > 0 to auto-find the right threshold:")
                     with gr.Row():
+                        target_min = gr.Number(value=5, label='Target min clusters', precision=0)
+                        target_max = gr.Number(value=20, label='Target max clusters', precision=0)
+                    gr.Markdown("Or set both to 0 and use manual threshold:")
                     with gr.Row():
+                        ncc_thresh = gr.Slider(0.3, 0.99, value=0.80, step=0.01, label='NCC threshold')
+                        ncc_ms = gr.Slider(0, 1000, value=0, step=50,
+                                            label='Compare window ms (0=auto)')
+                        linkage_dd = gr.Dropdown(['average','complete','single'], value='average', label='Linkage')
                 with gr.Accordion("⚙️ Post-processing", open=False):
+                    do_synth = gr.Checkbox(value=True, label='Synthesize optimal samples')
                 extract_btn = gr.Button("🔬 Extract Samples", variant="primary", size="lg")
                 with gr.Row():
                     stem_out = gr.Audio(type='numpy', label='Stem', interactive=False)
                     rendered_out = gr.Audio(type='numpy', label='🔊 Reconstruction', interactive=False)
                 gr.Markdown("### Downloads")
                 with gr.Row():
                     archive_file = gr.File(label="📦 ZIP Archive", interactive=False)
                     midi_file = gr.File(label="🎹 MIDI", interactive=False)
+                sample_files = gr.File(label="Individual WAV samples", file_count="multiple", interactive=False)
                 metrics_tbl = gr.Dataframe(label="Extracted Samples")
                 status_txt = gr.Textbox(visible=False)
                 demucs_model.change(
+                    fn=lambda m: gr.update(choices=DEMUCS_STEMS.get(m,["drums","bass","other","vocals"])+["all"]),
                     inputs=[demucs_model], outputs=[stem_dd])
+                extract_btn.click(run_extraction,
                     [audio_in, stem_dd, demucs_model, demucs_shifts, demucs_overlap,
                      onset_mode, onset_delta, energy_db, pre_pad, min_dur, max_dur, min_gap,
                      ncc_thresh, ncc_ms, linkage_dd, target_min, target_max, do_synth],
                     ev_pat = gr.Dropdown(['rock','funk','halftime'], value='rock', label='Pattern')
                     ev_bpm = gr.Slider(80, 200, value=120, step=2, label='BPM')
                     ev_bars = gr.Slider(2, 8, value=4, step=1, label='Bars')
+                with gr.Row():
+                    ev_ncc = gr.Slider(0.3, 0.99, value=0.80, step=0.01, label='NCC threshold')
+                    ev_tmin = gr.Number(value=0, label='Target min', precision=0)
+                    ev_tmax = gr.Number(value=0, label='Target max', precision=0)
                 ev_btn = gr.Button("🧪 Evaluate", variant="primary", size="lg")
                 with gr.Row():
                     ev_mix = gr.Audio(type='numpy', label='Original', interactive=False)
                 ev_summary = gr.Dataframe(label="Summary")
                 ev_matches = gr.Dataframe(label="Matches")
                 ev_s1 = gr.Textbox(visible=False); ev_s2 = gr.Textbox(visible=False)
+                ev_btn.click(run_eval, [ev_pat, ev_bpm, ev_bars, ev_ncc, ev_tmin, ev_tmax],
                              [ev_mix, ev_rendered, ev_summary, ev_matches, ev_s1, ev_s2])
             # ── Optimize ──
             with gr.Tab("🔄 Optimize"):
+                gr.Markdown("### Autonomous Optimization\nTests across 6 diverse songs.")
                 with gr.Row():
+                    opt_n = gr.Slider(2,30,value=5,step=1,label='Iterations')
+                    opt_name = gr.Textbox(value="optimized",label='Config name')
+                    opt_author = gr.Textbox(value="",label='Author')
+                    opt_save = gr.Checkbox(value=True,label='Save to Hub')
                 opt_btn = gr.Button("🚀 Optimize", variant="primary", size="lg")
+                opt_log = gr.Textbox(label="Log",lines=20,max_lines=40)
                 opt_hist = gr.Dataframe(label="History")
                 opt_plot = gr.Plot(label="Progress")
+                opt_params = gr.Code(label="Best Config",language="json")
+                opt_btn.click(run_optimize,[opt_n,opt_name,opt_author,opt_save],
+                              [opt_log,opt_hist,opt_plot,opt_params])
             # ── Leaderboard ──
             with gr.Tab("🏆 Leaderboard"):
                 gr.Markdown("### Config Leaderboard")
                 lb_btn = gr.Button("🔄 Refresh"); lb_tbl = gr.Dataframe()
                 lb_s = gr.Textbox(visible=False)
+                lb_btn.click(refresh_leaderboard,[],[lb_tbl,lb_s])
     return app