Spaces:

rikhoffbauer2
/

drum-sample-extractor

Sleeping

App Files Files Community

rikhoffbauer2 commited on about 1 month ago

Commit

0a6fa16

verified ·

1 Parent(s): 33c4b3e

v3: sample_extractor.py

Browse files

Files changed (1) hide show

sample_extractor.py +131 -0

sample_extractor.py CHANGED Viewed

@@ -475,6 +475,137 @@ def build_sample_map(clusters: list) -> dict:
     }
 # ─── Main pipeline ───────────────────────────────────────────────────────────
 def run_pipeline(

     }
+# ─── BPM Detection ───────────────────────────────────────────────────────────
+def detect_bpm(y: np.ndarray, sr: int) -> float:
+    """Detect BPM from audio using onset-strength autocorrelation.
+    Handles the common halving/doubling ambiguity."""
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr, aggregate=np.median)
+    # Primary estimate
+    tempo_arr = librosa.feature.tempo(onset_envelope=onset_env, sr=sr)
+    bpm = float(tempo_arr.item())
+    # Cross-check with beat_track inter-beat interval
+    _, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, units='time')
+    if len(beats) > 2:
+        ibi_bpm = 60.0 / float(np.median(np.diff(beats)))
+        # If the two estimates diverge by ~2x, prefer the one in [70, 200]
+        for candidate in [bpm, ibi_bpm]:
+            if 70 <= candidate <= 200:
+                bpm = candidate
+                break
+        else:
+            # Force into reasonable range
+            if bpm < 70: bpm *= 2
+            elif bpm > 200: bpm /= 2
+    return round(bpm, 1)
+# ─── MIDI Rendering with extracted samples ───────────────────────────────────
+def render_midi_with_samples(clusters: list, sr: int = 44100) -> np.ndarray:
+    """Render the MIDI reconstruction back to audio using extracted samples.
+    Each cluster's best_hit is placed at every onset, scaled by velocity."""
+    # Determine total length
+    max_end = 0.0
+    for c in clusters:
+        for h in c.hits:
+            max_end = max(max_end, h.onset_time + h.duration)
+    total_samples = int((max_end + 1.0) * sr)  # +1s tail
+    buf = np.zeros(total_samples, dtype=np.float64)
+    # Build note→sample lookup
+    for c in clusters:
+        sample = c.best_hit.audio.astype(np.float64)
+        # Compute reference energy for velocity scaling
+        ref_energy = c.best_hit.rms_energy if c.best_hit.rms_energy > 0 else 0.1
+        for h in c.hits:
+            # Velocity: scale by hit energy relative to best hit
+            vel_scale = min(2.0, h.rms_energy / (ref_energy + 1e-8))
+            vel_scale = vel_scale ** 0.5  # perceptual square-root scaling
+            start_idx = int(h.onset_time * sr)
+            end_idx = start_idx + len(sample)
+            if end_idx > len(buf):
+                buf = np.concatenate([buf, np.zeros(end_idx - len(buf))])
+            buf[start_idx:end_idx] += sample * vel_scale
+    # Normalize
+    pk = np.abs(buf).max()
+    if pk > 1e-8:
+        buf = buf / pk * 0.9
+    return buf.astype(np.float32)
+# ─── ZIP Archive Export ──────────────────────────────────────────────────────
+def build_archive(clusters: list, bpm: float, sr: int,
+                  midi_path: str = None, rendered_audio: np.ndarray = None) -> str:
+    """Build a ZIP archive containing all samples, index, MIDI, and rendered audio.
+    Returns path to the ZIP file."""
+    import zipfile, tempfile, io
+    zip_path = tempfile.mktemp(suffix='.zip')
+    index = {
+        'bpm': round(bpm, 1),
+        'sample_rate': sr,
+        'total_clusters': len(clusters),
+        'total_hits': sum(c.count for c in clusters),
+        'samples': {},
+    }
+    with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_STORED) as zf:
+        for c in clusters:
+            best = c.best_hit
+            fname = f"samples/{c.label}.wav"
+            # Write WAV to memory
+            wav_buf = io.BytesIO()
+            sf.write(wav_buf, best.audio, sr, format='WAV', subtype='PCM_24')
+            zf.writestr(fname, wav_buf.getvalue())
+            # Collect all onset times for this cluster
+            onset_times = sorted([h.onset_time for h in c.hits])
+            index['samples'][c.label] = {
+                'file': fname,
+                'classification': c.label.rsplit('_', 1)[0],
+                'midi_note': c.midi_note,
+                'occurrences': c.count,
+                'onset_times_sec': [round(t, 4) for t in onset_times],
+                'duration_sec': round(best.duration, 4),
+                'rms_energy': round(best.rms_energy, 6),
+                'spectral_centroid_hz': round(best.spectral_centroid, 1),
+            }
+            # Also include synthesized version if available
+            if c.synthesized is not None:
+                syn_fname = f"samples/{c.label}__synthesized.wav"
+                syn_buf = io.BytesIO()
+                sf.write(syn_buf, c.synthesized, sr, format='WAV', subtype='PCM_24')
+                zf.writestr(syn_fname, syn_buf.getvalue())
+                index['samples'][c.label]['synthesized_file'] = syn_fname
+        # Add index
+        zf.writestr('index.json', json.dumps(index, indent=2))
+        # Add MIDI
+        if midi_path and os.path.exists(midi_path):
+            zf.write(midi_path, 'reconstruction.mid')
+        # Add rendered audio
+        if rendered_audio is not None:
+            render_buf = io.BytesIO()
+            sf.write(render_buf, rendered_audio, sr, format='WAV', subtype='PCM_16')
+            zf.writestr('rendered_reconstruction.wav', render_buf.getvalue())
+    return zip_path
 # ─── Main pipeline ───────────────────────────────────────────────────────────
 def run_pipeline(