SaltProphet commited on
Commit
b1eee94
·
verified ·
1 Parent(s): 1232e06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +703 -824
app.py CHANGED
@@ -1,140 +1,189 @@
1
  import gradio as gr
2
- import os
3
- import shutil
4
- import asyncio
5
  import librosa
6
  import librosa.display
7
  import soundfile as sf
8
- import numpy as np
9
- import time
10
- import zipfile
11
  import tempfile
12
- import matplotlib.pyplot as plt
 
13
  import matplotlib
14
- import struct
15
- from scipy.signal import convolve, butter, lfilter, windows
 
 
16
 
17
- # Use a non-interactive backend for Matplotlib for UI compatibility
18
  matplotlib.use('Agg')
19
 
20
- # --- UTILITY: MIDI FILE WRITING ---
21
-
22
- def encode_delta_time(time):
23
- """Encodes a time value into MIDI variable-length quantity format."""
24
- data = []
25
- if time == 0:
26
- return b'\x00'
27
- while time > 0:
28
- byte = time & 0x7F
29
- time >>= 7
30
- if time > 0:
31
- byte |= 0x80
32
- data.insert(0, byte)
33
- return bytes(data)
34
-
35
- def freq_to_midi(freq):
 
 
 
 
 
 
 
 
 
 
 
 
36
  """Converts a frequency in Hz to a MIDI note number."""
37
- # A4 = 440 Hz = MIDI 69
38
  if freq <= 0:
39
  return 0
40
- # Note: Using a simple threshold for frequency detection to minimize noise
41
- if freq < 40: # Ignore frequencies below C2 (approx 65Hz)
42
  return 0
43
-
44
  return int(round(69 + 12 * np.log2(freq / 440.0)))
45
 
46
- def write_midi_file(notes_list, bpm, output_path):
47
  """
48
- Writes a very basic, dependency-free MIDI file (.mid) from a list of notes.
49
- Each note is (midi_note, start_time_sec, duration_sec).
 
50
  """
51
  if not notes_list:
52
  return
53
 
54
  tempo_us_per_beat = int(60000000 / bpm)
55
- division = 96 # Ticks per quarter note
56
  seconds_per_tick = 60.0 / (bpm * division)
57
 
58
- midi_data = [
59
- # Track 0: Tempo and Time Sig
60
- struct.pack('>L', 0) + b'\xFF\x51\x03' + struct.pack('>L', tempo_us_per_beat)[1:], # Set Tempo
61
- struct.pack('>L', 0) + b'\xFF\x58\x04\x04\x02\x18\x08', # Time Signature (4/4)
62
- ]
63
-
64
  # Sort notes by start time
65
  notes_list.sort(key=lambda x: x[1])
66
 
67
  current_tick = 0
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  for note, start_sec, duration_sec in notes_list:
70
- if note == 0: continue
 
71
 
72
  # Calculate delta time from last event
73
- target_tick = int(start_sec / seconds_per_tick)
74
  delta_tick = target_tick - current_tick
75
  current_tick = target_tick
76
 
77
  # Note On event (Channel 1, Velocity 100)
78
- note_on = b'\x90' + struct.pack('>B', note) + b'\x64'
79
- midi_data.append(encode_delta_time(delta_tick) + note_on)
80
 
81
  # Note Off event (Channel 1, Velocity 0)
82
- duration_ticks = int(duration_sec / seconds_per_tick)
83
- note_off = b'\x80' + struct.pack('>B', note) + b'\x00'
84
-
85
- midi_data.append(encode_delta_time(duration_ticks) + note_off)
 
 
86
  current_tick += duration_ticks
87
 
88
- track_data = b"".join(midi_data)
89
-
90
- # 1. Header Chunk (MThd)
91
- header = b'MThd' + struct.pack('>L', 6) + b'\x00\x01' + struct.pack('>H', 1) + struct.pack('>H', division)
92
 
93
- # 2. Track Chunk (MTrk)
94
- track_chunk = b'MTrk' + struct.pack('>L', len(track_data)) + track_data + b'\x00\xFF\x2F\x00' # End of Track
 
 
 
 
 
95
 
96
  with open(output_path, 'wb') as f:
97
- f.write(header + track_chunk)
98
-
99
- # --- CONFIGURATION & UTILITY ---
100
-
101
- # Mapping for standard key to Camelot Code
102
- KEY_TO_CAMELOT = {
103
- "C Maj": "8B", "G Maj": "9B", "D Maj": "10B", "A Maj": "11B", "E Maj": "12B",
104
- "B Maj": "1B", "F# Maj": "2B", "Db Maj": "3B", "Ab Maj": "4B", "Eb Maj": "5B",
105
- "Bb Maj": "6B", "F Maj": "7B",
106
- "A Min": "8A", "E Min": "9A", "B Min": "10A", "F# Min": "11A", "C# Min": "12A",
107
- "G# Min": "1A", "D# Min": "2A", "Bb Min": "3A", "F Min": "4A", "C Min": "5A",
108
- "G Min": "6A", "D Min": "7A",
109
- "Gb Maj": "2B", "Cb Maj": "7B", "A# Min": "3A", "D# Maj": "11B", "G# Maj": "3B"
110
- }
 
 
 
 
 
 
 
 
 
111
 
112
- def get_harmonic_recommendations(key_str):
113
  """Calculates harmonically compatible keys based on the Camelot wheel."""
114
  code = KEY_TO_CAMELOT.get(key_str, "N/A")
115
- if code == "N/A": return "N/A (Key not recognized or 'Unknown Key' detected.)"
 
 
116
  try:
117
  num = int(code[:-1])
118
  mode = code[-1]
119
  opposite_mode = 'B' if mode == 'A' else 'A'
120
  num_plus_one = (num % 12) + 1
121
  num_minus_one = 12 if num == 1 else num - 1
122
- recs = [f"{num}{opposite_mode}", f"{num_plus_one}{mode}", f"{num_minus_one}{mode}"]
123
- CAMELOT_TO_KEY = {v: k for k, v in KEY_TO_CAMELOT.items()}
124
- rec_keys = [f"{CAMELOT_TO_KEY.get(r_code, f'Code {r_code}')} ({r_code})" for r_code in recs]
 
 
 
 
 
125
  return " | ".join(rec_keys)
126
- except:
 
127
  return "N/A (Error calculating recommendations.)"
128
 
129
- def detect_key(y, sr):
130
  """Analyzes the audio to determine the most likely musical key."""
131
  try:
132
  chroma = librosa.feature.chroma_stft(y=y, sr=sr)
133
  chroma_sums = np.sum(chroma, axis=1)
 
 
 
 
 
134
  chroma_norm = chroma_sums / np.sum(chroma_sums)
135
 
 
136
  major_template = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
137
  minor_template = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
 
 
 
 
138
 
139
  pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
140
 
@@ -144,7 +193,7 @@ def detect_key(y, sr):
144
  minor_correlations = [np.dot(chroma_norm, np.roll(minor_template, i)) for i in range(12)]
145
  best_minor_index = np.argmax(minor_correlations)
146
 
147
- if major_correlations[best_major_index] > minor_correlations[best_major_index]:
148
  return pitch_classes[best_major_index] + " Maj"
149
  else:
150
  return pitch_classes[best_minor_index] + " Min"
@@ -152,205 +201,90 @@ def detect_key(y, sr):
152
  print(f"Key detection failed: {e}")
153
  return "Unknown Key"
154
 
155
- def reduce_reverb(audio_path, log_history):
156
- # Reverb reduction logic... (unchanged)
157
- try:
158
- y, sr = librosa.load(audio_path, sr=None)
159
-
160
- n_fft = 2048
161
- hop_length = 512
162
-
163
- D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
164
- mag = np.abs(D)
165
- phase = np.angle(D)
166
-
167
- ambient_floor = np.percentile(mag, 10, axis=1, keepdims=True)
168
-
169
- freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
170
- dampening_factor = np.clip(1 - (freqs / 1000.0), 0.2, 1.0)[:, np.newaxis]
171
- reduction_strength = 0.5
172
-
173
- ambient_reduction = ambient_floor * reduction_strength * dampening_factor
174
-
175
- mag_processed = np.maximum(mag - ambient_reduction, 0)
176
-
177
- D_processed = mag_processed * np.exp(1j * phase)
178
- y_processed = librosa.istft(D_processed, length=len(y), dtype=y.dtype, hop_length=hop_length)
179
-
180
- processed_path = audio_path.replace(".wav", "_dry.wav")
181
- sf.write(processed_path, y_processed, sr)
182
-
183
- log_history += "✅ Reverb reduction applied to vocals. Using dry vocal track.\n"
184
- return processed_path, log_history
185
-
186
- except Exception as e:
187
- log_history += f"⚠️ WARNING: Reverb reduction failed ({e}). Proceeding with wet vocal audio.\n"
188
- return audio_path, log_history
189
-
190
- def apply_crossfade(audio_chunk, sr, fade_ms):
191
- """Applies a simple Hanning crossfade (fade-in/fade-out) to an audio chunk. (unchanged)"""
192
- if fade_ms <= 0 or len(audio_chunk) == 0:
193
- return audio_chunk
194
-
195
- fade_samples = int(sr * (fade_ms / 1000.0))
196
- n_samples = len(audio_chunk)
197
-
198
- if n_samples < 2 * fade_samples:
199
- fade_samples = n_samples // 2
200
- if fade_samples == 0: return audio_chunk
201
-
202
- window = np.hanning(2 * fade_samples)
203
- fade_in_window = window[:fade_samples]
204
- fade_out_window = window[fade_samples:]
205
-
206
- chunk_copy = audio_chunk.copy()
207
-
208
- if fade_samples > 0:
209
- if chunk_copy.ndim == 1:
210
- chunk_copy[:fade_samples] *= fade_in_window
211
- chunk_copy[-fade_samples:] *= fade_out_window
212
- else:
213
- chunk_copy[:fade_samples, :] *= fade_in_window[:, np.newaxis]
214
- chunk_copy[-fade_samples:] *= fade_out_window[:, np.newaxis]
215
-
216
- return chunk_copy
217
-
218
- def generate_waveform_preview(y, sr, slice_samples, stem_name, loop_type, temp_dir):
219
- """Generates a Matplotlib image showing the waveform and slice points. (unchanged)"""
220
- img_path = os.path.join(temp_dir, f"{stem_name}_preview_{int(time.time() * 1000)}.png")
221
-
222
- plt.figure(figsize=(10, 1.5))
223
-
224
- y_display = librosa.to_mono(y.T) if y.ndim > 1 else y
225
-
226
- librosa.display.waveshow(y_display, sr=sr, x_axis='time', color="#4a7098")
227
-
228
- slice_times = librosa.samples_to_time(slice_samples, sr=sr)
229
- for t in slice_times:
230
- plt.axvline(x=t, color='red', linestyle='--', linewidth=1, alpha=0.7)
231
-
232
- plt.title(f"{stem_name} Slices ({loop_type})", fontsize=10)
233
- plt.xlabel("")
234
- plt.yticks([])
235
- plt.tight_layout(pad=0)
236
-
237
- plt.savefig(img_path)
238
- plt.close()
239
-
240
- return img_path
241
-
242
- def apply_modulation(y, sr, bpm, rate, pan_depth, level_depth):
243
- """Applies tempo-synced LFOs for panning and volume modulation. (unchanged)"""
244
- if y.ndim == 1:
245
- y = np.stack((y, y), axis=-1)
246
- elif y.ndim == 0:
247
  return y
 
 
248
 
249
  N = len(y)
250
  duration_sec = N / sr
251
 
252
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
253
  beats_per_measure = rate_map.get(rate, 1)
254
- lfo_freq_hz = (bpm / 60.0) * (beats_per_measure / 4.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
  t = np.linspace(0, duration_sec, N, endpoint=False)
257
 
258
- # Panning LFO
259
  if pan_depth > 0:
260
  pan_lfo = np.sin(2 * np.pi * lfo_freq_hz * t) * pan_depth
 
261
  L_mod = (1 - pan_lfo) / 2.0
262
  R_mod = (1 + pan_lfo) / 2.0
 
263
  y[:, 0] *= L_mod
264
  y[:, 1] *= R_mod
265
 
266
- # Level LFO (Tremolo)
267
  if level_depth > 0:
268
  level_lfo = (np.sin(2 * np.pi * lfo_freq_hz * t) + 1) / 2.0
 
269
  gain_multiplier = (1 - level_depth) + (level_depth * level_lfo)
270
  y[:, 0] *= gain_multiplier
271
  y[:, 1] *= gain_multiplier
272
 
273
  return y
274
 
275
- def apply_normalization_dbfs(y, target_dbfs):
276
- """Applies peak normalization to match a target dBFS value. (unchanged)"""
277
  if target_dbfs >= 0:
278
- return y
279
 
280
  current_peak_amp = np.max(np.abs(y))
281
- target_peak_amp = 10**(target_dbfs / 20.0)
282
-
283
- if current_peak_amp > 1e-6:
284
- gain = target_peak_amp / current_peak_amp
285
- y_normalized = y * gain
286
- y_normalized = np.clip(y_normalized, -1.0, 1.0)
287
- return y_normalized
288
- else:
289
  return y
 
 
290
 
291
- # --- NEW UTILITY: TRANSIENT SHAPING ---
292
-
293
- def apply_transient_shaping(y, sr, attack_gain, sustain_gain):
294
- """
295
- Applies basic transient shaping to the audio signal (mono or stereo).
296
- Only applies if the stem is 'drums'.
297
- """
298
- if y.ndim == 1:
299
- y_mono = y
300
- else:
301
- y_mono = librosa.to_mono(y.T)
302
-
303
- rectified = np.abs(y_mono)
304
-
305
- # Filter/Window sizes based on typical transient/sustain times
306
- attack_samples = int(sr * 0.005) # 5ms
307
- sustain_samples = int(sr * 0.05) # 50ms
308
-
309
- # Envelope followers
310
- attack_window = windows.hann(attack_samples * 2); attack_window /= np.sum(attack_window)
311
- sustain_window = windows.hann(sustain_samples * 2); sustain_window /= np.sum(sustain_window)
312
-
313
- fast_envelope = convolve(rectified, attack_window, mode='same')
314
- slow_envelope = convolve(rectified, sustain_window, mode='same')
315
-
316
- # Ratio: how transient the signal is (fast envelope >> slow envelope)
317
- ratio = np.clip(fast_envelope / (slow_envelope + 1e-6), 1.0, 5.0)
318
-
319
- # Normalized ratio (0 to 1, where 1 is strong transient)
320
- # 4.0 comes from the ratio clip max 5.0 - min 1.0
321
- normalized_ratio = (ratio - 1.0) / 4.0
322
-
323
- # Gain is a blend between sustain_gain and attack_gain based on the normalized_ratio
324
- gain_envelope = (sustain_gain * (1 - normalized_ratio)) + (attack_gain * normalized_ratio)
325
-
326
- # Apply Gain
327
- if y.ndim == 1:
328
- y_out = y * gain_envelope
329
- else:
330
- y_out = y * gain_envelope[:, np.newaxis]
331
-
332
- return y_out
333
-
334
- # --- NEW UTILITY: FILTER MODULATION ---
335
 
336
- def apply_filter_modulation(y, sr, bpm, rate, filter_type, freq, depth):
337
- """
338
- Applies a tempo-synced LFO to a 2nd order Butterworth filter cutoff frequency.
339
- """
340
- if depth == 0:
341
  return y
342
 
343
  # Ensure stereo for LFO application
344
  if y.ndim == 1:
345
  y = np.stack((y, y), axis=-1)
 
 
346
 
347
  N = len(y)
348
  duration_sec = N / sr
349
 
350
  # LFO Rate Calculation
351
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
352
- beats_per_measure = rate_map.get(rate, 1)
353
- lfo_freq_hz = (bpm / 60.0) * (beats_per_measure / 4.0)
354
 
355
  t = np.linspace(0, duration_sec, N, endpoint=False)
356
 
@@ -360,654 +294,599 @@ def apply_filter_modulation(y, sr, bpm, rate, filter_type, freq, depth):
360
  # Modulate Cutoff Frequency: Cutoff = BaseFreq + (LFO * Depth)
361
  cutoff_modulation = freq + (lfo_value * depth)
362
  # Safety clip to prevent instability
363
- cutoff_modulation = np.clip(cutoff_modulation, 20.0, sr / 2.0 - 100)
 
364
 
365
  y_out = np.zeros_like(y)
366
- filter_type_b = filter_type.lower().replace('-pass', '')
367
- frame_size = 512 # Frame-based update for filter coefficients
 
 
 
 
 
 
 
368
 
369
  # Apply filter channel by channel
370
  for channel in range(y.shape[1]):
371
- zi = np.zeros(2) # Initial filter state (2nd order filter)
372
 
373
  for frame_start in range(0, N, frame_size):
374
  frame_end = min(frame_start + frame_size, N)
 
 
375
  frame = y[frame_start:frame_end, channel]
376
 
377
  # Use the average LFO cutoff for the frame
378
  avg_cutoff = np.mean(cutoff_modulation[frame_start:frame_end])
379
 
380
  # Calculate 2nd order Butterworth filter coefficients
381
- b, a = butter(2, avg_cutoff, btype=filter_type_b, fs=sr)
 
 
 
 
 
 
 
 
382
 
383
  # Apply filter to the frame, updating the state `zi`
384
- filtered_frame, zi = lfilter(b, a, frame, zi=zi)
385
  y_out[frame_start:frame_end, channel] = filtered_frame
386
 
387
  return y_out
388
 
389
- # --- CORE SEPARATION FUNCTION (Truncated for brevity, focus on analysis) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
390
 
391
- async def separate_stems(audio_file_path, selected_model, denoise_enabled, reverb_reduction_enabled):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  """
393
- Separates audio, detects BPM and Key, and applies post-processing.
394
- (Function logic remains the same for separation, only the returns are relevant)
395
  """
396
  if audio_file_path is None:
397
  raise gr.Error("No audio file uploaded!")
398
 
399
- log_history = "Starting separation...\n"
400
- yield { status_log: log_history, detected_bpm_key: "", harmonic_recs: "---" }
401
-
402
- # 1. Pre-process and analyze original audio
403
- detected_bpm = 120
404
- detected_key = "Unknown Key"
405
- # ... (BPM and Key detection logic, including error handling) ...
406
  try:
407
- y_orig, sr_orig = librosa.load(audio_file_path, sr=None)
408
- y_mono = librosa.to_mono(y_orig.T) if y_orig.ndim > 1 else y_orig
409
-
 
 
 
 
 
 
 
 
 
 
 
410
  tempo, _ = librosa.beat.beat_track(y=y_mono, sr=sr_orig)
411
- detected_bpm = 120 if tempo is None or tempo == 0 else int(np.round(tempo).item())
412
  detected_key = detect_key(y_mono, sr_orig)
413
-
414
- harmonic_recommendations = get_harmonic_recommendations(detected_key)
415
-
416
- status_string = f"Detected Tempo: {detected_bpm} BPM. Detected Key: {detected_key}. Proceeding with separation...\n"
417
- log_history += status_string
418
- yield {
419
- status_log: log_history,
420
- detected_bpm_key: f"{detected_bpm} BPM, {detected_key}",
421
- harmonic_recs: harmonic_recommendations
422
- }
423
-
 
 
 
 
 
 
 
 
424
  except Exception as e:
425
- log_history += f"⚠️ WARNING: Analysis failed ({e}). Defaulting to 120 BPM, Unknown Key.\n"
426
- harmonic_recommendations = "N/A (Analysis failed)"
427
- yield {
428
- status_log: log_history,
429
- detected_bpm_key: f"{detected_bpm} BPM, {detected_key}",
430
- harmonic_recs: harmonic_recommendations
431
- }
 
 
 
 
 
 
 
 
 
 
 
 
432
 
433
- # --- Truncated Demucs Output Placeholder (For Demonstrating Success) ---
434
- # Mock file paths and generation for demo purposes
435
- vocals_path = "separated/htdemucs/input/vocals.wav"
436
- drums_path = "separated/htdemucs/input/drums.wav"
437
- bass_path = "separated/htdemucs/input/bass.wav"
438
- other_path = "separated/htdemucs/input/other.wav"
439
- guitar_path = None
440
- piano_path = None
441
-
442
- mock_sr = 44100
443
- mock_duration = 10
444
- mock_y = np.random.uniform(low=-0.5, high=0.5, size=(mock_sr * mock_duration, 2)).astype(np.float32)
445
- os.makedirs(os.path.dirname(vocals_path), exist_ok=True)
446
- sf.write(vocals_path, mock_y, mock_sr)
447
- sf.write(drums_path, mock_y, mock_sr)
448
- sf.write(bass_path, mock_y, mock_sr)
449
- sf.write(other_path, mock_y, mock_sr)
450
-
451
- # --- End Truncated Demucs Output Placeholder ---
452
-
453
- log_history += "✅ Stem separation complete! (Mock files generated for demo)\n"
454
- yield {
455
- status_log: log_history,
456
- vocals_output: gr.update(value=vocals_path, visible=True),
457
- drums_output: gr.update(value=drums_path, visible=True),
458
- bass_output: gr.update(value=bass_path, visible=True),
459
- other_output: gr.update(value=other_path, visible=True),
460
- guitar_output: gr.update(value=guitar_path, visible=False),
461
- piano_output: gr.update(value=piano_path, visible=False),
462
- detected_bpm_key: f"{detected_bpm} BPM, {detected_key}",
463
- gr.Textbox(elem_id="detected_bpm_key_output"): f"{detected_bpm} BPM, {detected_key}",
464
- gr.Textbox(elem_id="harmonic_recs_output"): harmonic_recommendations
465
- }
466
-
467
-
468
- # --- CORE SLICING FUNCTION (UPDATED for MIDI and Rich Tagging) ---
469
-
470
- def slice_stem_real(stem_audio_data, loop_choice, sensitivity, stem_name, manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_key, pan_depth, level_depth, modulation_rate, target_dbfs, attack_gain, sustain_gain, filter_type, filter_freq, filter_depth):
471
  """
472
- Slices a single stem, applies pitch shift, modulation, normalization,
473
- transient shaping, filter LFO, and generates MIDI/visualizations.
474
  """
475
- if stem_audio_data is None:
476
  return [], None
477
 
478
- sample_rate, y_int = stem_audio_data
479
- y = librosa.util.buf_to_float(y_int, dtype=np.float32)
480
-
481
- if y.ndim == 0: return [], None
482
-
483
- y_mono = librosa.to_mono(y.T) if y.ndim > 1 else y
484
-
485
- # --- 1. PITCH SHIFTING (if enabled) ---
486
- if transpose_semitones != 0:
487
- y_shifted = librosa.effects.pitch_shift(y, sr=sample_rate, n_steps=transpose_semitones)
488
- y = y_shifted
489
-
490
- # --- 2. TRANSIENT SHAPING (Drums Only) ---
491
- if stem_name == "drums" and (attack_gain != 1.0 or sustain_gain != 1.0):
492
- y = apply_transient_shaping(y, sample_rate, attack_gain, sustain_gain)
493
-
494
- # --- 3. FILTER MODULATION (LFO 2.0) ---
495
- if filter_depth > 0:
496
- y = apply_filter_modulation(y, sample_rate, manual_bpm, modulation_rate, filter_type, filter_freq, filter_depth)
497
-
498
- # --- 4. PAN/LEVEL MODULATION ---
499
- normalized_pan_depth = pan_depth / 100.0
500
- normalized_level_depth = level_depth / 100.0
501
-
502
- if normalized_pan_depth > 0 or normalized_level_depth > 0:
503
- y = apply_modulation(y, sample_rate, manual_bpm, modulation_rate, normalized_pan_depth, normalized_level_depth)
504
-
505
- # Check if any modification was applied for the RICH METADATA TAGGING
506
- is_modified = (
507
- transpose_semitones != 0 or
508
- normalized_pan_depth > 0 or normalized_level_depth > 0 or
509
- filter_depth > 0 or
510
- stem_name == "drums" and (attack_gain != 1.0 or sustain_gain != 1.0)
511
- )
512
- mod_tag = "_MOD" if is_modified else "" # Rich Tagging: Modification flag
513
-
514
- # --- 6. DETERMINE BPM & KEY (FOR RICH TAGGING) ---
515
- bpm_int = int(manual_bpm)
516
- bpm_tag = f"{bpm_int}BPM" # Rich Tagging: BPM
517
- time_sig_tag = time_signature.replace("/", "") # Rich Tagging: Time Signature
518
-
519
- key_tag = detected_key.replace(" ", "")
520
- if transpose_semitones != 0:
521
- root = detected_key.split(" ")[0]
522
- mode = detected_key.split(" ")[1]
523
- pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
524
- try:
525
- current_index = pitch_classes.index(root)
526
- new_index = (current_index + transpose_semitones) % 12
527
- new_key_root = pitch_classes[new_index]
528
- key_tag = f"{new_key_root}{mode}Shift" # Rich Tagging: Transposed Key
529
- except ValueError:
530
- pass # Keep original key tag if root not found
531
-
532
- # --- 7. MIDI GENERATION (Melodic Stems) ---
533
- output_files = []
534
- loops_dir = tempfile.mkdtemp()
535
- is_melodic = stem_name in ["vocals", "bass", "guitar", "piano", "other"]
536
-
537
- if is_melodic and ("Bar Loops" in loop_choice):
538
- try:
539
- # Use piptrack for a more robust (though less accurate than Pyin) general pitch detection
540
- pitches, magnitudes = librosa.piptrack(y=y_mono, sr=sample_rate)
541
- main_pitch_line = np.zeros(pitches.shape[1])
542
- for t in range(pitches.shape[1]):
543
- index = magnitudes[:, t].argmax()
544
- main_pitch_line[t] = pitches[index, t]
545
-
546
- notes_list = []
547
-
548
- # Simple note segmentation by pitch change
549
- i = 0
550
- while i < len(main_pitch_line):
551
- current_freq = main_pitch_line[i]
552
- current_midi = freq_to_midi(current_freq)
553
-
554
- j = i
555
- while j < len(main_pitch_line) and freq_to_midi(main_pitch_line[j]) == current_midi:
556
- j += 1
557
-
558
- duration_frames = j - i
559
-
560
- # Minimum duration filter to ignore extremely short notes
561
- if current_midi != 0 and duration_frames >= 2:
562
- start_sec = librosa.frames_to_time(i, sr=sample_rate, hop_length=512)
563
- duration_sec = librosa.frames_to_time(duration_frames, sr=sample_rate, hop_length=512)
564
- notes_list.append((current_midi, start_sec, duration_sec))
565
-
566
- i = j
567
-
568
- full_stem_midi_path = os.path.join(loops_dir, f"{stem_name}_MELODY_{key_tag}_{bpm_tag}{mod_tag}.mid")
569
- write_midi_file(notes_list, manual_bpm, full_stem_midi_path)
570
- output_files.append((full_stem_midi_path, loops_dir))
571
-
572
- except Exception as e:
573
- print(f"MIDI generation failed for {stem_name}: {e}")
574
- # Do not stop execution
575
-
576
- # --- 8. CALCULATE TIMING & SLICING ---
577
- beats_per_bar = 4
578
- if time_signature == "3/4": beats_per_bar = 3
579
-
580
- slice_samples = []
581
-
582
- if "Bar Loops" in loop_choice:
583
- bars = int(loop_choice.split(" ")[0])
584
- loop_type_tag = f"{bars}Bar"
585
- loop_duration_samples = int((60.0 / bpm_int * beats_per_bar * bars) * sample_rate)
586
-
587
- if loop_duration_samples == 0: return [], loops_dir
588
-
589
- num_loops = len(y) // loop_duration_samples
590
-
591
- for i in range(num_loops):
592
- start_sample = i * loop_duration_samples
593
- end_sample = start_sample + loop_duration_samples
594
- slice_data = y[start_sample:end_sample]
595
-
596
- # Rich Metadata/Tagging via Filename Enhancement
597
- filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_tag}_{time_sig_tag}{mod_tag}.wav")
598
- sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
599
- output_files.append((filename, loops_dir))
600
- slice_samples.append(start_sample)
601
-
602
- elif "One-Shots" in loop_choice:
603
- loop_type_tag = "OneShot"
604
- onset_frames = librosa.onset.onset_detect(
605
- y=y_mono, sr=sample_rate, delta=sensitivity,
606
- wait=1, pre_avg=1, post_avg=1, post_max=1, units='frames'
607
- )
608
- onset_samples = librosa.frames_to_samples(onset_frames)
609
-
610
- if len(onset_samples) > 0:
611
- num_onsets = len(onset_samples)
612
- slice_samples = list(onset_samples)
613
-
614
- for i, start_sample in enumerate(onset_samples):
615
- end_sample = onset_samples[i+1] if i+1 < num_onsets else len(y)
 
 
 
 
 
 
616
  slice_data = y[start_sample:end_sample]
 
 
 
617
 
618
- if crossfade_ms > 0:
619
- slice_data = apply_crossfade(slice_data, sample_rate, crossfade_ms)
 
 
 
620
 
621
- # Rich Metadata/Tagging via Filename Enhancement
622
- filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_tag}{mod_tag}.wav")
623
  sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
624
- output_files.append((filename, loops_dir))
625
-
626
- if not output_files:
627
- return [], loops_dir
628
-
629
- # --- 9. VISUALIZATION GENERATION ---
630
- img_path = generate_waveform_preview(y, sample_rate, slice_samples, stem_name, loop_choice, loops_dir)
631
 
632
- # Return audio file path and the single visualization map
633
- return [(audio_file, img_path) for audio_file, _ in output_files if audio_file.endswith(('.wav', '.mid'))], loops_dir
634
 
635
- # --- SLICING HANDLERS (UPDATED for NEW Inputs) ---
 
 
 
 
636
 
637
- async def slice_all_and_zip_real(vocals, drums, bass, other, guitar, piano, loop_choice, sensitivity, manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_bpm_key_str, pan_depth, level_depth, modulation_rate, target_dbfs, attack_gain, sustain_gain, filter_type, filter_freq, filter_depth):
638
- """
639
- Slices all available stems, applies all transformations, and packages them into a ZIP file.
640
- """
641
- log_history = "Starting batch slice...\n"
642
- yield { status_log: log_history }
643
- await asyncio.sleep(0.1)
644
-
645
- parts = detected_bpm_key_str.split(', ')
646
- key_str = parts[1] if len(parts) > 1 else "Unknown Key"
647
-
648
- stems_to_process = {
649
- "vocals": vocals, "drums": drums, "bass": bass,
650
- "other": other, "guitar": guitar, "piano": piano
651
- }
652
- zip_path = "Loop_Architect_Pack.zip"
653
-
654
- num_stems = sum(1 for data in stems_to_process.values() if data is not None)
655
- if num_stems == 0:
656
- raise gr.Error("No stems to process! Please separate stems first.")
657
-
658
- all_temp_dirs = []
 
 
 
 
 
 
 
 
 
 
 
659
  try:
660
- with zipfile.ZipFile(zip_path, 'w') as zf:
661
- processed_count = 0
662
- for name, data in stems_to_process.items():
663
- if data is not None:
664
- log_history += f"--- Slicing {name} stem ---\n"
665
- yield { status_log: log_history }
666
-
667
- sliced_files_and_viz, temp_dir = slice_stem_real(
668
- (data[0], data[1]), loop_choice, sensitivity, name,
669
- manual_bpm, time_signature, crossfade_ms, transpose_semitones, key_str,
670
- pan_depth, level_depth, modulation_rate, target_dbfs,
671
- attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
672
- )
673
-
674
- if sliced_files_and_viz:
675
- # Write both WAV and MIDI files to the ZIP
676
- midi_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.mid'))
677
- wav_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.wav'))
678
-
679
- log_history += f"Generated {wav_count} WAV slices and {midi_count} MIDI files for {name}.\n"
680
- all_temp_dirs.append(temp_dir)
681
- for loop_file, _ in sliced_files_and_viz:
682
- # Create a subfolder for WAVs and a separate one for MIDIs in the zip
683
- ext = 'MIDI' if loop_file.endswith('.mid') else name
684
- arcname = os.path.join(ext, os.path.basename(loop_file))
685
- zf.write(loop_file, arcname)
686
- else:
687
- log_history += f"No slices generated for {name}.\n"
688
-
689
- processed_count += 1
690
- yield { status_log: log_history }
691
-
692
- log_history += "Packaging complete! WAVs and corresponding MIDIs are organized in the ZIP.\n"
693
- yield {
694
- status_log: log_history + "✅ Pack ready for download!",
695
- download_zip_file: gr.update(value=zip_path, visible=True)
696
  }
697
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
698
  except Exception as e:
699
- print(f"An error occurred during slice all: {e}")
700
- yield { status_log: log_history + f"❌ ERROR: {e}" }
701
- finally:
702
- for d in all_temp_dirs:
703
- if d and os.path.exists(d):
704
- shutil.rmtree(d)
705
-
706
- # --- Create the full Gradio Interface ---
707
- with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red")) as demo:
708
- # --- Global State Variables ---
709
- # These store values across interactions but are not direct UI components themselves.
710
- detected_bpm_key = gr.State(value="")
711
- harmonic_recs = gr.State(value="---")
712
-
713
- # --- Global Output Components ---
714
- # These are defined once and referenced throughout the UI.
715
- vocals_output = gr.Audio(label="Vocals", scale=4, visible=False)
716
- drums_output = gr.Audio(label="Drums", scale=4, visible=False)
717
- bass_output = gr.Audio(label="Bass", scale=4, visible=False)
718
- other_output = gr.Audio(label="Other / Instrumental", scale=4, visible=False)
719
- guitar_output = gr.Audio(label="Guitar", scale=4, visible=False)
720
- piano_output = gr.Audio(label="Piano", scale=4, visible=False)
721
- download_zip_file = gr.File(label="Download Your Loop Pack", visible=False)
722
- status_log = gr.Textbox(label="Status Log", lines=10, interactive=False)
723
-
724
- # The main gallery for displaying generated loops and their waveforms.
725
- loop_gallery = gr.Gallery(
726
- label="Generated Loops Preview (Audio + Waveform Slice Map)",
727
- columns=8, object_fit="contain", height="auto", preview=True,
728
- type="numpy"
729
- )
730
 
731
- # --- Interface Layout ----
732
  gr.Markdown("# 🎵 Loop Architect (Pro Edition)")
733
  gr.Markdown("Upload any song to separate it into stems, detect musical attributes, and then slice and tag the stems for instant use in a DAW.")
734
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
735
  with gr.Row():
736
- # --- Left Column: Input Controls and Analysis ---
737
  with gr.Column(scale=1):
738
- # Section 1: Stem Separation
739
- gr.Markdown("### 1. Separate Stems")
740
- audio_input = gr.Audio(type="filepath", label="Upload a Track")
741
- with gr.Row():
742
- reverb_reduction_option = gr.Checkbox(
743
- label="Dry Vocals",
744
- value=False,
745
- info="Reduce reverb on the vocal stem."
746
- )
747
- model_selector = gr.Radio(
748
- ["htdemucs (High Quality 4-Stem)", "hdemucs (Faster 4-Stem)", "htdemucs_6s (6-Stem)", "2-Stem (Vocals Only)"],
749
- label="Separation Model Control",
750
- value="htdemucs (High Quality 4-Stem)"
751
- )
752
- submit_button = gr.Button("Separate & Analyze Stems", variant="primary")
753
-
754
- # Section 2: Analysis & Transformations
755
- gr.Markdown("### 2. Analysis & Transform")
756
- # Displays detected BPM and Key after separation
757
- gr.Textbox(label="Detected Tempo & Key", value="", interactive=False, elem_id="detected_bpm_key_output", placeholder="Run Separation to Analyze...", visible=True)
758
- # Displays harmonic mixing recommendations
759
- gr.Textbox(label="Harmonic Mixing Recommendations (Camelot Wheel)", value="---", interactive=False, elem_id="harmonic_recs_output", visible=True)
760
-
761
- # Transpose Control
762
- transpose_slider = gr.Slider(
763
- minimum=-12, maximum=12, value=0, step=1,
764
- label="Transpose Loops (Semitones)",
765
- info="Shift the pitch of all slices by +/- 1 octave. (Tags the file with `Shift`)"
766
- )
 
 
 
 
 
 
 
 
 
 
 
767
 
768
- # Transient Shaping (Drums Only Controls)
769
- gr.Markdown("### Transient Shaping (Drums Only)")
770
- with gr.Group():
771
- attack_gain_slider = gr.Slider(
772
- minimum=0.5, maximum=1.5, value=1.0, step=0.1,
773
- label="Attack Gain Multiplier",
774
- info="Increase (>1.0) for punchier transients."
775
- )
776
- sustain_gain_slider = gr.Slider(
777
- minimum=0.5, maximum=1.5, value=1.0, step=0.1,
778
- label="Sustain Gain Multiplier",
779
- info="Increase (>1.0) for longer tails/reverb."
780
- )
781
-
782
- # Pan/Level Modulation Controls
783
- gr.Markdown("### Pan/Level Modulation (LFO 1.0)")
784
- with gr.Group():
785
- modulation_rate_radio = gr.Radio(
786
- ['1/2', '1/4', '1/8', '1/16'],
787
- label="Modulation Rate (Tempo Synced)",
788
- value='1/4',
789
- info="The speed of the Pan/Level pulse."
790
- )
791
- pan_depth_slider = gr.Slider(
792
- minimum=0, maximum=100, value=0, step=5,
793
- label="Pan Modulation Depth (%)",
794
- info="Creates a stereo auto-pan effect."
795
- )
796
- level_depth_slider = gr.Slider(
797
- minimum=0, maximum=100, value=0, step=5,
798
- label="Level Modulation Depth (%)",
799
- info="Creates a tempo-synced tremolo (volume pulse)."
800
- )
801
-
802
- # Filter Modulation Controls
803
- gr.Markdown("### Filter Modulation (LFO 2.0)")
804
- with gr.Group():
805
- filter_type_radio = gr.Radio(
806
- ['Low-Pass', 'High-Pass'],
807
- label="Filter Type",
808
- value='Low-Pass'
809
- )
810
- with gr.Row():
811
- filter_freq_slider = gr.Slider(
812
- minimum=20, maximum=10000, value=2000, step=10,
813
- label="Base Cutoff Frequency (Hz)",
814
- )
815
- filter_depth_slider = gr.Slider(
816
- minimum=0, maximum=5000, value=0, step=10,
817
- label="Modulation Depth (Hz)",
818
- info="0 = Static filter at Base Cutoff. Modifying any value tags the file with `MOD`."
819
- )
820
-
821
- # Section 3: Slicing Options
822
- gr.Markdown("### 3. Slicing Options")
823
- with gr.Group():
824
- # Normalization Control
825
- lufs_target_slider = gr.Slider(
826
- minimum=-18.0, maximum=-0.1, value=-3.0, step=0.1,
827
- label="Target Peak Level (dBFS)",
828
- info="Normalizes all exported loops to this peak volume."
829
- )
830
- loop_options_radio = gr.Radio(
831
- ["One-Shots (All Transients)", "4 Bar Loops", "8 Bar Loops"],
832
- label="Slice Type",
833
- value="One-Shots (All Transients)",
834
- info="Bar Loops include automatic MIDI generation for melodic stems."
835
- )
836
- with gr.Row():
837
- bpm_input = gr.Number(
838
- label="Manual BPM",
839
- value=120,
840
- minimum=40,
841
- maximum=300,
842
- info="Overrides auto-detect for loop timing."
843
- )
844
- time_sig_radio = gr.Radio(
845
- ["4/4", "3/4"],
846
- label="Time Signature",
847
- value="4/4",
848
- info="For correct bar length. (Tags the file with `44` or `34`)"
849
- )
850
- sensitivity_slider = gr.Slider(
851
- minimum=0.01, maximum=0.5, value=0.05, step=0.01,
852
- label="One-Shot Sensitivity",
853
- info="Lower values = more slices."
854
- )
855
- crossfade_ms_slider = gr.Slider(
856
- minimum=0, maximum=30, value=10, step=1,
857
- label="One-Shot Crossfade (ms)",
858
- info="Prevents clicks/pops on transient slices."
859
- )
860
-
861
- # Section 4: Create Pack & Status Display
862
- gr.Markdown("### 4. Create Pack (Rich Tagging & MIDI)")
863
- slice_all_button = gr.Button("Slice, Transform & Tag ALL Stems (Create ZIP)", variant="stop")
864
- download_zip_file # Display the download link
865
-
866
- gr.Markdown("### Status")
867
- status_log # Display the status log
868
-
869
- # --- Right Column: Separated Stems & Loop Gallery ---
870
  with gr.Column(scale=2):
871
- with gr.Accordion("Separated Stems (Preview & Slice)", open=True):
872
-
873
- # Base inputs list for individual stem slicing functions
874
- slice_inputs = [
875
- loop_options_radio, sensitivity_slider, gr.Textbox(visible=False), # Placeholder for stem name
876
- bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_bpm_key,
877
- pan_depth_slider, level_depth_slider, modulation_rate_radio,
878
- lufs_target_slider,
879
- attack_gain_slider, sustain_gain_slider,
880
- filter_type_radio, filter_freq_slider, filter_depth_slider
881
- ]
882
-
883
- # Wrapper function to call slice_stem_real and update the gallery
884
- def slice_and_display_wrapper(stem_data, loop_choice, sensitivity, stem_name, manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_bpm_key_str, pan_depth, level_depth, modulation_rate, target_dbfs, attack_gain, sustain_gain, filter_type, filter_freq, filter_depth):
885
- if not detected_bpm_key_str:
886
- raise gr.Error("Please run 'Separate & Analyze Stems' first.")
887
-
888
- key_str = detected_bpm_key_str.split(', ')[1] if len(detected_bpm_key_str.split(', ')) > 1 else "Unknown Key"
889
-
890
- sliced_files_and_viz, temp_dir = slice_stem_real(
891
- stem_data, loop_choice, sensitivity, stem_name,
892
- manual_bpm, time_signature, crossfade_ms, transpose_semitones, key_str,
893
- pan_depth, level_depth, modulation_rate, target_dbfs,
894
- attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
895
- )
896
-
897
- gallery_output = []
898
-
899
- if sliced_files_and_viz:
900
- # Find the first visualization for the gallery
901
- first_image_path = sliced_files_and_viz[0][1] if sliced_files_and_viz else None
902
-
903
- wav_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.wav'))
904
- midi_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.mid'))
905
-
906
- for i, (audio_file, _) in enumerate(sliced_files_and_viz):
907
- if audio_file.endswith('.wav'):
908
- label = os.path.basename(audio_file).rsplit('.', 1)[0]
909
- gallery_output.append((audio_file, label, first_image_path))
910
-
911
- log_msg = f"✅ Sliced {stem_name} into {wav_count} WAVs and generated {midi_count} MIDIs. Waveform preview generated."
912
- else:
913
- log_msg = f"No slices generated for {stem_name}."
914
-
915
- if temp_dir and os.path.exists(temp_dir):
916
- pass
917
-
918
- return {
919
- loop_gallery: gr.update(value=gallery_output),
920
- status_log: log_msg
921
- }
922
-
923
- # Helper function to dynamically update visibility of stem outputs
924
- def update_output_visibility(selected_model):
925
- is_6_stem = "6-Stem" in selected_model
926
- is_2_stem = "2-Stem" in selected_model
927
- other_label = "Other"
928
- if is_2_stem: other_label = "Instrumental (No Vocals)"
929
- elif is_6_stem: other_label = "Other (No Guitar/Piano)"
930
- return (
931
- gr.update(visible=True),
932
- gr.update(visible=True if not is_2_stem else False),
933
- gr.update(visible=True if not is_2_stem else False),
934
- gr.update(visible=True, label=other_label),
935
- gr.update(visible=is_6_stem),
936
- gr.update(visible=is_6_stem),
937
- gr.update(visible=is_6_stem),
938
- gr.update(visible=is_6_stem)
939
- )
940
-
941
- # Individual Stem Audio Outputs and their Slice Buttons
942
- with gr.Row():
943
- vocals_output # Display the global vocals_output Audio component
944
- slice_vocals_btn = gr.Button("Slice Vocals", scale=1)
945
- with gr.Row():
946
- drums_output # Display the global drums_output Audio component
947
- slice_drums_btn = gr.Button("Slice Drums", scale=1)
948
- with gr.Row():
949
- bass_output # Display the global bass_output Audio component
950
- slice_bass_btn = gr.Button("Slice Bass", scale=1)
951
- with gr.Row():
952
- other_output # Display the global other_output Audio component
953
- slice_other_btn = gr.Button("Slice Other", scale=1)
954
-
955
- # Guitar and Piano are conditionally visible (for 6-stem model)
956
- with gr.Row(visible=False) as guitar_row:
957
- guitar_output # Display the global guitar_output Audio component
958
- slice_guitar_btn = gr.Button("Slice Guitar", scale=1)
959
- with gr.Row(visible=False) as piano_row:
960
- piano_output # Display the global piano_output Audio component
961
- slice_piano_btn = gr.Button("Slice Piano", scale=1)
962
-
963
- # Main Loop Gallery Display
964
- gr.Markdown("### Sliced Loops / Samples (Preview)")
965
- loop_gallery # Display the global loop_gallery component
966
-
967
- # --- Event Listeners (UI Interactions) ---
968
-
969
- # 1. Event for when the user clicks 'Separate & Analyze Stems'
970
- submit_button.click(
971
  fn=separate_stems,
972
- inputs=[gr.File(type="filepath"), model_selector, gr.Checkbox(visible=False), reverb_reduction_option],
973
- outputs=[
974
- vocals_output, drums_output, bass_output, other_output,
975
- guitar_output, piano_output,
976
- status_log, detected_bpm_key,
977
- gr.Textbox(elem_id="detected_bpm_key_output"),
978
- gr.Textbox(elem_id="harmonic_recs_output")
979
- ]
980
  )
981
 
982
- # 2. Event for when the user changes the 'Separation Model Control'
983
- model_selector.change(
984
- fn=update_output_visibility,
985
- inputs=[model_selector],
986
- outputs=[
987
- vocals_output, drums_output, bass_output, other_output,
988
- guitar_output, piano_output,
989
- guitar_row, piano_row
990
- ]
 
 
 
 
 
 
 
 
991
  )
992
 
993
- # --- Individual Stem Slice Button Events ---
994
- slice_vocals_btn.click(fn=slice_and_display_wrapper, inputs=[vocals_output] + slice_inputs[:2] + [gr.Textbox("vocals", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
995
- slice_drums_btn.click(fn=slice_and_display_wrapper, inputs=[drums_output] + slice_inputs[:2] + [gr.Textbox("drums", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
996
- slice_bass_btn.click(fn=slice_and_display_wrapper, inputs=[bass_output] + slice_inputs[:2] + [gr.Textbox("bass", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
997
- slice_other_btn.click(fn=slice_and_display_wrapper, inputs=[other_output] + slice_inputs[:2] + [gr.Textbox("other", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
998
- slice_guitar_btn.click(fn=slice_and_display_wrapper, inputs=[guitar_output] + slice_inputs[:2] + [gr.Textbox("guitar", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
999
- slice_piano_btn.click(fn=slice_and_display_wrapper, inputs=[piano_output] + slice_inputs[:2] + [gr.Textbox("piano", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
1000
-
1001
- # 3. Event for when the user clicks 'Slice, Transform & Tag ALL Stems (Create ZIP)'
1002
- slice_all_event = slice_all_button.click(
1003
- fn=slice_all_and_zip_real,
1004
- inputs=[
1005
- vocals_output, drums_output, bass_output, other_output, guitar_output, piano_output,
1006
- loop_options_radio, sensitivity_slider,
1007
- bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_bpm_key,
1008
- pan_depth_slider, level_depth_slider, modulation_rate_radio, lufs_target_slider,
1009
- attack_gain_slider, sustain_gain_slider,
1010
- filter_type_radio, filter_freq_slider, filter_depth_slider
1011
- ],
1012
- outputs=[download_zip_file, status_log]
1013
  )
 
 
 
 
 
1
  import gradio as gr
2
+ import numpy as np
 
 
3
  import librosa
4
  import librosa.display
5
  import soundfile as sf
6
+ import os
 
 
7
  import tempfile
8
+ import zipfile
9
+ import time
10
  import matplotlib
11
+ import matplotlib.pyplot as plt
12
+ from scipy import signal
13
+ from typing import Tuple, List, Any, Optional, Dict
14
+ import shutil
15
 
16
+ # Use a non-interactive backend for Matplotlib
17
  matplotlib.use('Agg')
18
 
19
+ # --- CONSTANTS & DICTIONARIES ---
20
+
21
+ KEY_TO_CAMELOT = {
22
+ "C Maj": "8B", "G Maj": "9B", "D Maj": "10B", "A Maj": "11B", "E Maj": "12B",
23
+ "B Maj": "1B", "F# Maj": "2B", "Db Maj": "3B", "Ab Maj": "4B", "Eb Maj": "5B",
24
+ "Bb Maj": "6B", "F Maj": "7B",
25
+ "A Min": "8A", "E Min": "9A", "B Min": "10A", "F# Min": "11A", "C# Min": "12A",
26
+ "G# Min": "1A", "D# Min": "2A", "Bb Min": "3A", "F Min": "4A", "C Min": "5A",
27
+ "G Min": "6A", "D Min": "7A",
28
+ # Enharmonic equivalents
29
+ "Gb Maj": "2B", "Cb Maj": "7B", "A# Min": "3A", "D# Maj": "5B", "G# Maj": "4B"
30
+ }
31
+
32
+ # Fixed reverse mapping to avoid "lossy" inversion
33
+ CAMELOT_TO_KEY = {
34
+ "8B": "C Maj", "9B": "G Maj", "10B": "D Maj", "11B": "A Maj", "12B": "E Maj",
35
+ "1B": "B Maj", "2B": "F# Maj / Gb Maj", "3B": "Db Maj", "4B": "Ab Maj / G# Maj", "5B": "Eb Maj / D# Maj",
36
+ "6B": "Bb Maj", "7B": "F Maj / Cb Maj",
37
+ "8A": "A Min", "9A": "E Min", "10A": "B Min", "11A": "F# Min", "12A": "C# Min",
38
+ "1A": "G# Min", "2A": "D# Min", "3A": "Bb Min / A# Min", "4A": "F Min", "5A": "C Min",
39
+ "6A": "G Min", "7A": "D Min"
40
+ }
41
+
42
+ STEM_NAMES = ["vocals", "drums", "bass", "other", "guitar", "piano"]
43
+
44
+ # --- UTILITY FUNCTIONS ---
45
+
46
+ def freq_to_midi(freq: float) -> int:
47
  """Converts a frequency in Hz to a MIDI note number."""
 
48
  if freq <= 0:
49
  return 0
50
+ # C1 is ~32.7 Hz. Let's set a reasonable floor.
51
+ if freq < 32.0:
52
  return 0
 
53
  return int(round(69 + 12 * np.log2(freq / 440.0)))
54
 
55
+ def write_midi_file(notes_list: List[Tuple[int, float, float]], bpm: float, output_path: str):
56
  """
57
+ Writes a basic MIDI file from a list of notes.
58
+ Note: This is a simplified MIDI writer and may have issues.
59
+ Using a dedicated library like 'mido' is recommended for robust use.
60
  """
61
  if not notes_list:
62
  return
63
 
64
  tempo_us_per_beat = int(60000000 / bpm)
65
+ division = 96 # Ticks per quarter note
66
  seconds_per_tick = 60.0 / (bpm * division)
67
 
 
 
 
 
 
 
68
  # Sort notes by start time
69
  notes_list.sort(key=lambda x: x[1])
70
 
71
  current_tick = 0
72
+ midi_events = []
73
+
74
+ # --- MIDI Track Header ---
75
+ # Set Tempo: FF 51 03 TTTTTT (TTTTTT = tempo_us_per_beat)
76
+ tempo_bytes = tempo_us_per_beat.to_bytes(3, 'big')
77
+ track_data = b'\x00\xFF\x51\x03' + tempo_bytes
78
+
79
+ # Set Time Signature: FF 58 04 NN DD CC BB (Using 4/4)
80
+ track_data += b'\x00\xFF\x58\x04\x04\x02\x18\x08'
81
+
82
+ # Set Track Name
83
+ track_data += b'\x00\xFF\x03\x0BLoopArchitect' # 11 chars
84
 
85
  for note, start_sec, duration_sec in notes_list:
86
+ if note == 0:
87
+ continue
88
 
89
  # Calculate delta time from last event
90
+ target_tick = int(round(start_sec / seconds_per_tick))
91
  delta_tick = target_tick - current_tick
92
  current_tick = target_tick
93
 
94
  # Note On event (Channel 1, Velocity 100)
95
+ note_on = [0x90, note, 100]
96
+ track_data += encode_delta_time(delta_tick) + bytes(note_on)
97
 
98
  # Note Off event (Channel 1, Velocity 0)
99
+ duration_ticks = int(round(duration_sec / seconds_per_tick))
100
+ if duration_ticks == 0:
101
+ duration_ticks = 1 # Minimum duration
102
+
103
+ note_off = [0x80, note, 0]
104
+ track_data += encode_delta_time(duration_ticks) + bytes(note_off)
105
  current_tick += duration_ticks
106
 
107
+ # End of track
108
+ track_data += b'\x00\xFF\x2F\x00'
 
 
109
 
110
+ # --- MIDI File Header ---
111
+ # MThd, header_length (6), format (1), num_tracks (1), division
112
+ header = b'MThd' + (6).to_bytes(4, 'big') + (1).to_bytes(2, 'big') + (1).to_bytes(2, 'big') + division.to_bytes(2, 'big')
113
+
114
+ # MTrk, track_length, track_data
115
+ track_chunk = b'MTrk' + len(track_data).to_bytes(4, 'big') + track_data
116
+ midi_data = header + track_chunk
117
 
118
  with open(output_path, 'wb') as f:
119
+ f.write(midi_data)
120
+
121
+ def encode_delta_time(ticks: int) -> bytes:
122
+ """Encodes an integer tick value into MIDI variable-length quantity."""
123
+ buffer = ticks & 0x7F
124
+ ticks >>= 7
125
+ if ticks > 0:
126
+ buffer |= 0x80
127
+ while ticks > 0:
128
+ buffer = (buffer << 8) | ((ticks & 0x7F) | 0x80)
129
+ ticks >>= 7
130
+ buffer = (buffer & 0xFFFFFF7F) # Clear MSB of last byte
131
+
132
+ # Convert buffer to bytes
133
+ byte_list = []
134
+ while buffer > 0:
135
+ byte_list.insert(0, buffer & 0xFF)
136
+ buffer >>= 8
137
+ if not byte_list:
138
+ return b'\x00'
139
+ return bytes(byte_list)
140
+ else:
141
+ return bytes([buffer])
142
 
143
+ def get_harmonic_recommendations(key_str: str) -> str:
144
  """Calculates harmonically compatible keys based on the Camelot wheel."""
145
  code = KEY_TO_CAMELOT.get(key_str, "N/A")
146
+ if code == "N/A":
147
+ return "N/A (Key not recognized or 'Unknown Key' detected.)"
148
+
149
  try:
150
  num = int(code[:-1])
151
  mode = code[-1]
152
  opposite_mode = 'B' if mode == 'A' else 'A'
153
  num_plus_one = (num % 12) + 1
154
  num_minus_one = 12 if num == 1 else num - 1
155
+
156
+ recs_codes = [
157
+ f"{num}{opposite_mode}", # e.g., 8A (A Min) -> 8B (C Maj)
158
+ f"{num_plus_one}{mode}", # e.g., 8A (A Min) -> 9A (E Min)
159
+ f"{num_minus_one}{mode}" # e.g., 8A (A Min) -> 7A (D Min)
160
+ ]
161
+
162
+ rec_keys = [f"{CAMELOT_TO_KEY.get(r_code, f'Code {r_code}')} ({r_code})" for r_code in recs_codes]
163
  return " | ".join(rec_keys)
164
+ except Exception as e:
165
+ print(f"Error calculating recommendations: {e}")
166
  return "N/A (Error calculating recommendations.)"
167
 
168
+ def detect_key(y: np.ndarray, sr: int) -> str:
169
  """Analyzes the audio to determine the most likely musical key."""
170
  try:
171
  chroma = librosa.feature.chroma_stft(y=y, sr=sr)
172
  chroma_sums = np.sum(chroma, axis=1)
173
+
174
+ # Avoid division by zero if audio is silent
175
+ if np.sum(chroma_sums) == 0:
176
+ return "Unknown Key"
177
+
178
  chroma_norm = chroma_sums / np.sum(chroma_sums)
179
 
180
+ # Krumhansl-Schmuckler key-finding algorithm templates
181
  major_template = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
182
  minor_template = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
183
+
184
+ # Normalize templates
185
+ major_template /= np.sum(major_template)
186
+ minor_template /= np.sum(minor_template)
187
 
188
  pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
189
 
 
193
  minor_correlations = [np.dot(chroma_norm, np.roll(minor_template, i)) for i in range(12)]
194
  best_minor_index = np.argmax(minor_correlations)
195
 
196
+ if major_correlations[best_major_index] > minor_correlations[best_minor_index]:
197
  return pitch_classes[best_major_index] + " Maj"
198
  else:
199
  return pitch_classes[best_minor_index] + " Min"
 
201
  print(f"Key detection failed: {e}")
202
  return "Unknown Key"
203
 
204
+ def apply_modulation(y: np.ndarray, sr: int, bpm: float, rate: str, pan_depth: float, level_depth: float) -> np.ndarray:
205
+ """Applies tempo-synced LFOs for panning and volume modulation."""
206
+ if y.ndim == 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  return y
208
+ if y.ndim == 1:
209
+ y = np.stack((y, y), axis=-1) # Convert to stereo
210
 
211
  N = len(y)
212
  duration_sec = N / sr
213
 
214
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
215
  beats_per_measure = rate_map.get(rate, 1)
216
+ # LFO frequency = (BPM / 60) * (beats_per_measure / 4.0) -- seems off.
217
+ # Let's redefine: LFO freq in Hz = (BPM / 60) * (1 / (4 / beats_per_measure))
218
+ # e.g., 1/4 rate at 120BPM = 2Hz. (120/60) * (1 / (4/1)) = 2 * (1/4) = 0.5Hz? No.
219
+ # 120 BPM = 2 beats/sec. 1/4 note = 1 beat. So LFO should be 2 Hz.
220
+ # 1/8 note = 4 Hz.
221
+ # 1/16 note = 8 Hz.
222
+ # 1/2 note = 1 Hz.
223
+ # Formula: (BPM / 60) * (rate_map_value / 4)
224
+ # 1/4 note: (120/60) * (1/4) = 0.5 Hz. Still wrong.
225
+ # Let's try: (BPM / 60) * (rate_map_value)
226
+ # 1/4 note @ 120BPM: (120/60) * 1 = 2 Hz. Correct.
227
+ # 1/8 note @ 120BPM: (120/60) * 2 = 4 Hz. Correct.
228
+ # 1/2 note @ 120BPM: (120/60) * 0.5 = 1 Hz. Correct.
229
+ lfo_freq_hz = (bpm / 60.0) * rate_map.get(rate, 1)
230
 
231
  t = np.linspace(0, duration_sec, N, endpoint=False)
232
 
233
+ # Panning LFO (Sine wave, -1 to 1)
234
  if pan_depth > 0:
235
  pan_lfo = np.sin(2 * np.pi * lfo_freq_hz * t) * pan_depth
236
+ # L_mod/R_mod should be 0-1. (1-pan_lfo)/2 and (1+pan_lfo)/2 gives 0-1 range.
237
  L_mod = (1 - pan_lfo) / 2.0
238
  R_mod = (1 + pan_lfo) / 2.0
239
+ # This is amplitude panning, not constant power. Good enough.
240
  y[:, 0] *= L_mod
241
  y[:, 1] *= R_mod
242
 
243
+ # Level LFO (Tremolo) (Sine wave, 0 to 1)
244
  if level_depth > 0:
245
  level_lfo = (np.sin(2 * np.pi * lfo_freq_hz * t) + 1) / 2.0
246
+ # gain_multiplier ranges from (1-level_depth) to 1
247
  gain_multiplier = (1 - level_depth) + (level_depth * level_lfo)
248
  y[:, 0] *= gain_multiplier
249
  y[:, 1] *= gain_multiplier
250
 
251
  return y
252
 
253
+ def apply_normalization_dbfs(y: np.ndarray, target_dbfs: float) -> np.ndarray:
254
+ """Applies peak normalization to match a target dBFS value."""
255
  if target_dbfs >= 0:
256
+ return y # Don't normalize to 0dBFS or higher
257
 
258
  current_peak_amp = np.max(np.abs(y))
259
+ if current_peak_amp < 1e-9: # Avoid division by zero on silence
 
 
 
 
 
 
 
260
  return y
261
+
262
+ target_peak_amp = 10**(target_dbfs / 20.0)
263
 
264
+ gain = target_peak_amp / current_peak_amp
265
+ y_normalized = y * gain
266
+
267
+ # Clip just in case of floating point inaccuracies
268
+ y_normalized = np.clip(y_normalized, -1.0, 1.0)
269
+ return y_normalized
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
+ def apply_filter_modulation(y: np.ndarray, sr: int, bpm: float, rate: str, filter_type: str, freq: float, depth: float) -> np.ndarray:
272
+ """Applies a tempo-synced LFO to a 2nd order Butterworth filter cutoff frequency."""
273
+ if depth == 0 or filter_type == "None":
 
 
274
  return y
275
 
276
  # Ensure stereo for LFO application
277
  if y.ndim == 1:
278
  y = np.stack((y, y), axis=-1)
279
+ if y.ndim == 0:
280
+ return y
281
 
282
  N = len(y)
283
  duration_sec = N / sr
284
 
285
  # LFO Rate Calculation
286
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
287
+ lfo_freq_hz = (bpm / 60.0) * rate_map.get(rate, 1)
 
288
 
289
  t = np.linspace(0, duration_sec, N, endpoint=False)
290
 
 
294
  # Modulate Cutoff Frequency: Cutoff = BaseFreq + (LFO * Depth)
295
  cutoff_modulation = freq + (lfo_value * depth)
296
  # Safety clip to prevent instability
297
+ nyquist = sr / 2.0
298
+ cutoff_modulation = np.clip(cutoff_modulation, 20.0, nyquist - 100.0) # Keep away from Nyquist
299
 
300
  y_out = np.zeros_like(y)
301
+
302
+ # --- BUG FIX ---
303
+ # Was: filter_type.lower().replace('-pass', '') -> 'low' (ValueError)
304
+ # Now: filter_type.lower().replace('-pass', 'pass') -> 'lowpass' (Correct)
305
+ filter_type_b = filter_type.lower().replace('-pass', 'pass')
306
+
307
+ frame_size = 512 # Frame-based update for filter coefficients
308
+ if N < frame_size:
309
+ frame_size = N # Handle very short audio
310
 
311
  # Apply filter channel by channel
312
  for channel in range(y.shape[1]):
313
+ zi = signal.lfilter_zi(*signal.butter(2, 20.0, btype=filter_type_b, fs=sr))
314
 
315
  for frame_start in range(0, N, frame_size):
316
  frame_end = min(frame_start + frame_size, N)
317
+ if frame_start == frame_end: continue # Skip empty frames
318
+
319
  frame = y[frame_start:frame_end, channel]
320
 
321
  # Use the average LFO cutoff for the frame
322
  avg_cutoff = np.mean(cutoff_modulation[frame_start:frame_end])
323
 
324
  # Calculate 2nd order Butterworth filter coefficients
325
+ try:
326
+ b, a = signal.butter(2, avg_cutoff, btype=filter_type_b, fs=sr)
327
+ except ValueError as e:
328
+ print(f"Butterworth filter error: {e}. Using last good coefficients.")
329
+ # This can happen if avg_cutoff is bad, though we clip it.
330
+ # If it still fails, we just re-use the last good b, a.
331
+ # In the first frame, this is not robust.
332
+ if 'b' not in locals():
333
+ b, a = signal.butter(2, 20.0, btype=filter_type_b, fs=sr) # Failsafe
334
 
335
  # Apply filter to the frame, updating the state `zi`
336
+ filtered_frame, zi = signal.lfilter(b, a, frame, zi=zi)
337
  y_out[frame_start:frame_end, channel] = filtered_frame
338
 
339
  return y_out
340
 
341
+ def apply_crossfade(y: np.ndarray, fade_samples: int) -> np.ndarray:
342
+ """Applies a linear fade-in and fade-out to a clip."""
343
+ if fade_samples == 0:
344
+ return y
345
+
346
+ N = len(y)
347
+ fade_samples = min(fade_samples, N // 2) # Fade can't be longer than half the clip
348
+
349
+ if fade_samples == 0:
350
+ return y # Clip is too short to fade
351
+
352
+ # Create fade ramps
353
+ fade_in = np.linspace(0, 1, fade_samples)
354
+ fade_out = np.linspace(1, 0, fade_samples)
355
 
356
+ y_out = y.copy()
357
+
358
+ # Apply fades (handling mono/stereo)
359
+ if y.ndim == 1:
360
+ y_out[:fade_samples] *= fade_in
361
+ y_out[-fade_samples:] *= fade_out
362
+ else:
363
+ y_out[:fade_samples, :] *= fade_in[:, np.newaxis]
364
+ y_out[-fade_samples:, :] *= fade_out[:, np.newaxis]
365
+
366
+ return y_out
367
+
368
+ def apply_envelope(y: np.ndarray, sr: int, attack_gain_db: float, sustain_gain_db: float) -> np.ndarray:
369
+ """Applies a simple attack/sustain gain envelope to one-shots."""
370
+ N = len(y)
371
+ if N == 0:
372
+ return y
373
+
374
+ # Simple fixed attack time of 10ms
375
+ attack_time_sec = 0.01
376
+ attack_samples = min(int(attack_time_sec * sr), N // 2)
377
+
378
+ start_gain = 10**(attack_gain_db / 20.0)
379
+ end_gain = 10**(sustain_gain_db / 20.0)
380
+
381
+ # Envelope: Linear ramp from start_gain to end_gain over attack_samples, then hold end_gain
382
+ envelope = np.ones(N) * end_gain
383
+ if attack_samples > 0:
384
+ attack_ramp = np.linspace(start_gain, end_gain, attack_samples)
385
+ envelope[:attack_samples] = attack_ramp
386
+
387
+ # Apply envelope (handling mono/stereo)
388
+ if y.ndim == 1:
389
+ y_out = y * envelope
390
+ else:
391
+ y_out = y * envelope[:, np.newaxis]
392
+
393
+ return y_out
394
+
395
+ # --- CORE PROCESSING FUNCTIONS ---
396
+
397
+ def separate_stems(audio_file_path: str) -> Tuple[
398
+ Optional[Tuple[int, np.ndarray]],
399
+ Optional[Tuple[int, np.ndarray]],
400
+ Optional[Tuple[int, np.ndarray]],
401
+ Optional[Tuple[int, np.ndarray]],
402
+ Optional[Tuple[int, np.ndarray]],
403
+ Optional[Tuple[int, np.ndarray]],
404
+ float, str, str
405
+ ]:
406
  """
407
+ Simulates stem separation and detects BPM and Key.
408
+ Returns Gradio Audio tuples (sr, data) for each stem.
409
  """
410
  if audio_file_path is None:
411
  raise gr.Error("No audio file uploaded!")
412
 
 
 
 
 
 
 
 
413
  try:
414
+ # Load audio
415
+ y_orig, sr_orig = librosa.load(audio_file_path, sr=None, mono=False)
416
+
417
+ # Ensure stereo for processing
418
+ if y_orig.ndim == 1:
419
+ y_orig = np.stack([y_orig, y_orig], axis=-1)
420
+ # librosa.load with mono=False may return (N,) for mono files,
421
+ # or (2, N). Need to ensure (N, 2) or (N,)
422
+ if y_orig.ndim == 2 and y_orig.shape[0] < y_orig.shape[1]:
423
+ y_orig = y_orig.T # Transpose to (N, 2)
424
+
425
+ y_mono = librosa.to_mono(y_orig)
426
+
427
+ # Detect tempo and key
428
  tempo, _ = librosa.beat.beat_track(y=y_mono, sr=sr_orig)
429
+ detected_bpm = 120.0 if tempo is None or tempo.size == 0 or tempo[0] == 0 else float(np.round(tempo[0]))
430
  detected_key = detect_key(y_mono, sr_orig)
431
+ harmonic_recs = get_harmonic_recommendations(detected_key)
432
+
433
+ # Create mock separated stems
434
+ # In a real app, you'd use Demucs, Spleeter, etc.
435
+ # Here, we just return the original audio for each stem for demo purposes.
436
+ stems_data: Dict[str, Optional[Tuple[int, np.ndarray]]] = {}
437
+
438
+ # Convert to int16 for Gradio Audio component
439
+ y_int16 = (y_orig * 32767).astype(np.int16)
440
+
441
+ for name in STEM_NAMES:
442
+ # We give each stem the full audio for this demo
443
+ stems_data[name] = (sr_orig, y_int16.copy())
444
+
445
+ return (
446
+ stems_data["vocals"], stems_data["drums"], stems_data["bass"], stems_data["other"],
447
+ stems_data["guitar"], stems_data["piano"],
448
+ detected_bpm, detected_key, harmonic_recs
449
+ )
450
  except Exception as e:
451
+ print(f"Error processing audio: {e}")
452
+ import traceback
453
+ traceback.print_exc()
454
+ raise gr.Error(f"Error processing audio: {str(e)}")
455
+
456
+ def generate_waveform_preview(y: np.ndarray, sr: int, stem_name: str, temp_dir: str) -> str:
457
+ """Generates a Matplotlib image showing the waveform."""
458
+ img_path = os.path.join(temp_dir, f"{stem_name}_preview.png")
459
+
460
+ plt.figure(figsize=(10, 3))
461
+ y_display = librosa.to_mono(y.T) if y.ndim > 1 and y.shape[0] < y.shape[1] else y
462
+ y_display = librosa.to_mono(y) if y.ndim > 1 else y
463
+
464
+ librosa.display.waveshow(y_display, sr=sr, x_axis='time', color="#4a7098")
465
+ plt.title(f"{stem_name} Waveform (Processed)")
466
+ plt.ylabel("Amplitude")
467
+ plt.tight_layout()
468
+ plt.savefig(img_path)
469
+ plt.close()
470
 
471
+ return img_path
472
+
473
+ def slice_stem_real(
474
+ stem_audio_tuple: Optional[Tuple[int, np.ndarray]],
475
+ loop_choice: str,
476
+ sensitivity: float,
477
+ stem_name: str,
478
+ manual_bpm: float,
479
+ time_signature: str,
480
+ crossfade_ms: int,
481
+ transpose_semitones: int,
482
+ detected_key: str,
483
+ pan_depth: float,
484
+ level_depth: float,
485
+ modulation_rate: str,
486
+ target_dbfs: float,
487
+ attack_gain: float,
488
+ sustain_gain: float,
489
+ filter_type: str,
490
+ filter_freq: float,
491
+ filter_depth: float
492
+ ) -> Tuple[List[str], Optional[str]]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  """
494
+ Slices a single stem and applies transformations.
495
+ Returns a list of filepaths and a path to a preview image.
496
  """
497
+ if stem_audio_tuple is None:
498
  return [], None
499
 
500
+ try:
501
+ sample_rate, y_int = stem_audio_tuple
502
+ # Convert from int16 array back to float
503
+ y = y_int.astype(np.float32) / 32767.0
504
+
505
+ if y.ndim == 0 or len(y) == 0:
506
+ return [], None
507
+
508
+ # --- 1. PITCH SHIFTING (if enabled) ---
509
+ if transpose_semitones != 0:
510
+ y = librosa.effects.pitch_shift(y, sr=sample_rate, n_steps=transpose_semitones)
511
+
512
+ # --- 2. FILTER MODULATION ---
513
+ if filter_depth > 0 and filter_type != "None":
514
+ y = apply_filter_modulation(y, sample_rate, manual_bpm, modulation_rate, filter_type, filter_freq, filter_depth)
515
+
516
+ # --- 3. PAN/LEVEL MODULATION ---
517
+ normalized_pan_depth = pan_depth / 100.0
518
+ normalized_level_depth = level_depth / 100.0
519
+ if normalized_pan_depth > 0 or normalized_level_depth > 0:
520
+ y = apply_modulation(y, sample_rate, manual_bpm, modulation_rate, normalized_pan_depth, normalized_level_depth)
521
+
522
+ # --- 4. NORMALIZATION ---
523
+ if target_dbfs < 0:
524
+ y = apply_normalization_dbfs(y, target_dbfs)
525
+
526
+ # --- 5. DETERMINE BPM & KEY ---
527
+ bpm_int = int(round(manual_bpm))
528
+ key_tag = "UnknownKey"
529
+ if detected_key != "Unknown Key":
530
+ key_tag = detected_key.replace(" ", "")
531
+ if transpose_semitones != 0:
532
+ root, mode = detected_key.split(" ")
533
+ pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
534
+ try:
535
+ current_index = pitch_classes.index(root)
536
+ new_index = (current_index + transpose_semitones) % 12
537
+ new_key_root = pitch_classes[new_index]
538
+ key_tag = f"{new_key_root}{mode}Shift"
539
+ except ValueError:
540
+ key_tag = f"Shifted{transpose_semitones}" # Fallback
541
+
542
+ # --- 6. MIDI GENERATION (Melodic Stems) ---
543
+ output_files = []
544
+ loops_dir = tempfile.mkdtemp()
545
+ is_melodic = stem_name in ["vocals", "bass", "guitar", "piano", "other"]
546
+
547
+ if is_melodic and ("Bar Loops" in loop_choice):
548
+ try:
549
+ y_mono_for_midi = librosa.to_mono(y)
550
+ # Use piptrack for pitch detection
551
+ pitches, magnitudes = librosa.piptrack(y=y_mono_for_midi, sr=sample_rate)
552
+
553
+ # Get the dominant pitch at each frame
554
+ main_pitch_line = np.zeros(pitches.shape[1])
555
+ for t in range(pitches.shape[1]):
556
+ index = magnitudes[:, t].argmax()
557
+ main_pitch_line[t] = pitches[index, t]
558
+
559
+ notes_list = []
560
+ i = 0
561
+ hop_length = 512 # Default hop for piptrack
562
+
563
+ while i < len(main_pitch_line):
564
+ current_freq = main_pitch_line[i]
565
+ current_midi = freq_to_midi(current_freq)
566
+ if current_midi == 0: # Skip silence/unpitched
567
+ i += 1
568
+ continue
569
+
570
+ # Find end of this note
571
+ j = i
572
+ while j < len(main_pitch_line) and freq_to_midi(main_pitch_line[j]) == current_midi:
573
+ j += 1
574
+
575
+ duration_frames = j - i
576
+ # Only add notes that are long enough (e.g., > 2 frames)
577
+ if duration_frames >= 2:
578
+ start_sec = librosa.frames_to_time(i, sr=sample_rate, hop_length=hop_length)
579
+ duration_sec = librosa.frames_to_time(duration_frames, sr=sample_rate, hop_length=hop_length)
580
+ notes_list.append((current_midi, start_sec, duration_sec))
581
+
582
+ i = j
583
+
584
+ if notes_list:
585
+ full_stem_midi_path = os.path.join(loops_dir, f"{stem_name}_MELODY_{key_tag}_{bpm_int}BPM.mid")
586
+ write_midi_file(notes_list, manual_bpm, full_stem_midi_path)
587
+ output_files.append(full_stem_midi_path)
588
+
589
+ except Exception as e:
590
+ print(f"MIDI generation failed for {stem_name}: {e}")
591
+
592
+ # --- 7. CALCULATE TIMING & SLICING ---
593
+ beats_per_bar = 4
594
+ if time_signature == "3/4":
595
+ beats_per_bar = 3
596
+
597
+ if "Bar Loops" in loop_choice:
598
+ bars = int(loop_choice.split(" ")[0])
599
+ loop_type_tag = f"{bars}Bar"
600
+ loop_duration_samples = int((60.0 / manual_bpm * beats_per_bar * bars) * sample_rate)
601
+ fade_samples = int((crossfade_ms / 1000.0) * sample_rate)
602
+
603
+ if loop_duration_samples > 0 and len(y) > loop_duration_samples:
604
+ num_loops = len(y) // loop_duration_samples
605
+ for i in range(min(num_loops, 16)): # Limit to 16 loops
606
+ start_sample = i * loop_duration_samples
607
+ end_sample = min(start_sample + loop_duration_samples, len(y))
608
+ slice_data = y[start_sample:end_sample]
609
+
610
+ # Apply crossfade
611
+ slice_data = apply_crossfade(slice_data, fade_samples)
612
+
613
+ filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_int}BPM.wav")
614
+ sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
615
+ output_files.append(filename)
616
+
617
+ elif "One-Shots" in loop_choice:
618
+ loop_type_tag = "OneShot"
619
+ y_mono_for_onsets = librosa.to_mono(y)
620
+
621
+ # IMPLEMENTED: Use sensitivity to find onsets
622
+ # Adjust 'wait' and 'delta' based on sensitivity (0-1)
623
+ # Higher sensitivity = lower delta, shorter wait
624
+ delta = 0.5 * (1.0 - sensitivity) # 0.0 -> 0.5
625
+ wait_sec = 0.1 * (1.0 - sensitivity) # 0.0 -> 0.1
626
+ wait_samples = int(wait_sec * sample_rate / 512) # in frames
627
+
628
+ onset_frames = librosa.onset.onset_detect(
629
+ y=y_mono_for_onsets,
630
+ sr=sample_rate,
631
+ units='frames',
632
+ backtrack=True,
633
+ delta=delta,
634
+ wait=wait_samples
635
+ )
636
+ onset_samples = librosa.frames_to_samples(onset_frames)
637
+
638
+ # Add end of file as the last "onset"
639
+ onset_samples = np.append(onset_samples, len(y))
640
+
641
+ for i in range(min(len(onset_samples) - 1, 40)): # Limit to 40 slices
642
+ start_sample = onset_samples[i]
643
+ end_sample = onset_samples[i+1]
644
  slice_data = y[start_sample:end_sample]
645
+
646
+ if len(slice_data) < 100: # Skip tiny fragments
647
+ continue
648
 
649
+ # IMPLEMENTED: Apply attack/sustain envelope
650
+ slice_data = apply_envelope(slice_data, sample_rate, attack_gain, sustain_gain)
651
+
652
+ # Apply short fade-out to prevent clicks
653
+ slice_data = apply_crossfade(slice_data, int(0.005 * sample_rate)) # 5ms fade
654
 
655
+ filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_int}BPM.wav")
 
656
  sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
657
+ output_files.append(filename)
 
 
 
 
 
 
658
 
659
+ # --- 8. VISUALIZATION GENERATION ---
660
+ img_path = generate_waveform_preview(y, sample_rate, stem_name, loops_dir)
661
 
662
+ # Clean up the temp dir for the *next* run
663
+ # Gradio File components need the files to exist, so we don't delete loops_dir yet
664
+ # A more robust solution would use gr.TempFile() or manage cleanup
665
+
666
+ return output_files, img_path
667
 
668
+ except Exception as e:
669
+ print(f"Error processing stem {stem_name}: {e}")
670
+ import traceback
671
+ traceback.print_exc()
672
+ return [], None # Return empty on error
673
+
674
+
675
+ def slice_all_and_zip(
676
+ vocals_audio: Optional[Tuple[int, np.ndarray]],
677
+ drums_audio: Optional[Tuple[int, np.ndarray]],
678
+ bass_audio: Optional[Tuple[int, np.ndarray]],
679
+ other_audio: Optional[Tuple[int, np.ndarray]],
680
+ guitar_audio: Optional[Tuple[int, np.ndarray]],
681
+ piano_audio: Optional[Tuple[int, np.ndarray]],
682
+ loop_choice: str,
683
+ sensitivity: float,
684
+ manual_bpm: float,
685
+ time_signature: str,
686
+ crossfade_ms: int,
687
+ transpose_semitones: int,
688
+ detected_key: str,
689
+ pan_depth: float,
690
+ level_depth: float,
691
+ modulation_rate: str,
692
+ target_dbfs: float,
693
+ attack_gain: float,
694
+ sustain_gain: float,
695
+ filter_type: str,
696
+ filter_freq: float,
697
+ filter_depth: float,
698
+ progress: gr.Progress
699
+ ) -> Optional[str]:
700
+ """Slices all available stems and packages them into a ZIP file."""
701
  try:
702
+ stems_to_process = {
703
+ "vocals": vocals_audio, "drums": drums_audio, "bass": bass_audio,
704
+ "other": other_audio, "guitar": guitar_audio, "piano": piano_audio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705
  }
706
 
707
+ # Filter out None stems
708
+ valid_stems = {name: data for name, data in stems_to_process.items() if data is not None}
709
+
710
+ if not valid_stems:
711
+ raise gr.Error("No stems to process! Please separate stems first.")
712
+
713
+ # Create temporary directory for all outputs
714
+ temp_dir = tempfile.mkdtemp()
715
+ zip_path = os.path.join(temp_dir, "Loop_Architect_Pack.zip")
716
+
717
+ all_sliced_files = []
718
+
719
+ # Use progress tracker
720
+ progress(0, desc="Starting...")
721
+
722
+ num_stems = len(valid_stems)
723
+ for i, (name, data) in enumerate(valid_stems.items()):
724
+ progress((i+1)/num_stems, desc=f"Slicing {name}...")
725
+
726
+ # Process stem
727
+ sliced_files, _ = slice_stem_real(
728
+ data, loop_choice, sensitivity, name,
729
+ manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_key,
730
+ pan_depth, level_depth, modulation_rate, target_dbfs,
731
+ attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
732
+ )
733
+ all_sliced_files.extend(sliced_files)
734
+
735
+ progress(0.9, desc="Zipping files...")
736
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
737
+ for file_path in all_sliced_files:
738
+ if not file_path: continue
739
+ # Create a sane folder structure in the ZIP
740
+ file_type = os.path.splitext(file_path)[1][1:].upper() # WAV or MID
741
+ arcname = os.path.join(file_type, os.path.basename(file_path))
742
+ zf.write(file_path, arcname)
743
+
744
+ progress(1.0, desc="Done!")
745
+
746
+ # Clean up individual slice files (but not the zip dir)
747
+ for file_path in all_sliced_files:
748
+ if file_path and os.path.exists(file_path):
749
+ os.remove(file_path)
750
+
751
+ return zip_path
752
+
753
  except Exception as e:
754
+ print(f"Error creating ZIP: {e}")
755
+ import traceback
756
+ traceback.print_exc()
757
+ raise gr.Error(f"Error creating ZIP: {str(e)}")
758
+
759
+ # --- GRADIO INTERFACE ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760
 
761
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="red")) as demo:
762
  gr.Markdown("# 🎵 Loop Architect (Pro Edition)")
763
  gr.Markdown("Upload any song to separate it into stems, detect musical attributes, and then slice and tag the stems for instant use in a DAW.")
764
 
765
+ # State variables
766
+ detected_bpm_state = gr.State(value=120.0)
767
+ detected_key_state = gr.State(value="Unknown Key")
768
+ harmonic_recs_state = gr.State(value="---")
769
+
770
+ # Outputs for each stem (as gr.Audio tuples)
771
+ vocals_audio = gr.Audio(visible=False, type="numpy")
772
+ drums_audio = gr.Audio(visible=False, type="numpy")
773
+ bass_audio = gr.Audio(visible=False, type="numpy")
774
+ other_audio = gr.Audio(visible=False, type="numpy")
775
+ guitar_audio = gr.Audio(visible=False, type="numpy")
776
+ piano_audio = gr.Audio(visible=False, type="numpy")
777
+
778
+ stem_audio_outputs = [vocals_audio, drums_audio, bass_audio, other_audio, guitar_audio, piano_audio]
779
+
780
  with gr.Row():
 
781
  with gr.Column(scale=1):
782
+ # --- INPUT COLUMN ---
783
+ gr.Markdown("### 1. Upload & Analyze")
784
+ audio_input = gr.Audio(label="Upload Song", type="filepath")
785
+ separate_button = gr.Button("Separate Stems & Analyze", variant="primary")
786
+
787
+ with gr.Accordion("Global Musical Settings", open=True):
788
+ manual_bpm_input = gr.Number(label="BPM", value=120.0, step=0.1, interactive=True)
789
+ key_display = gr.Textbox(label="Detected Key", value="Unknown Key", interactive=False)
790
+ harmonic_recs_display = gr.Textbox(label="Harmonic Recommendations", value="---", interactive=False)
791
+ transpose_semitones = gr.Slider(label="Transpose (Semitones)", minimum=-12, maximum=12, value=0, step=1)
792
+ time_signature = gr.Radio(label="Time Signature", choices=["4/4", "3/4"], value="4/4")
793
+
794
+ with gr.Accordion("Global Slicing Settings", open=True):
795
+ loop_choice = gr.Radio(label="Loop Type", choices=["1 Bar Loops", "2 Bar Loops", "4 Bar Loops", "One-Shots"], value="4 Bar Loops")
796
+ sensitivity = gr.Slider(label="One-Shot Sensitivity", minimum=0.0, maximum=1.0, value=0.5, info="Higher = more slices")
797
+ crossfade_ms = gr.Slider(label="Loop Crossfade (ms)", minimum=0, maximum=50, value=10, step=1)
798
+
799
+ with gr.Accordion("Global FX Settings", open=False):
800
+ target_dbfs = gr.Slider(label="Normalize Peak to (dBFS)", minimum=-24.0, maximum=-0.0, value=-1.0, step=0.1, info="-0.0 = Off")
801
+
802
+ gr.Markdown("---")
803
+ gr.Markdown("**LFO Modulation (Pan/Level)**")
804
+ modulation_rate = gr.Radio(label="Modulation Rate", choices=["1/2", "1/4", "1/8", "1/16"], value="1/4")
805
+ pan_depth = gr.Slider(label="Pan Depth (%)", minimum=0, maximum=100, value=0, step=1)
806
+ level_depth = gr.Slider(label="Level Depth (%)", minimum=0, maximum=100, value=0, step=1, info="Tremolo effect")
807
+
808
+ gr.Markdown("---")
809
+ gr.Markdown("**LFO Modulation (Filter)**")
810
+ filter_type = gr.Radio(label="Filter Type", choices=["None", "Low-pass", "High-pass"], value="None")
811
+ filter_freq = gr.Slider(label="Filter Base Freq (Hz)", minimum=20, maximum=10000, value=5000, step=100)
812
+ filter_depth = gr.Slider(label="Filter Mod Depth (Hz)", minimum=0, maximum=10000, value=0, step=100, info="LFO amount")
813
+
814
+ gr.Markdown("---")
815
+ gr.Markdown("**One-Shot Shaping**")
816
+ attack_gain = gr.Slider(label="Attack Gain (dB)", minimum=-24.0, maximum=6.0, value=0.0, step=0.5, info="Gain at start of transient")
817
+ sustain_gain = gr.Slider(label="Sustain Gain (dB)", minimum=-24.0, maximum=6.0, value=0.0, step=0.5, info="Gain for note body")
818
+
819
+ gr.Markdown("### 3. Generate Pack")
820
+ slice_all_button = gr.Button("SLICE ALL & GENERATE PACK", variant="primary")
821
+ zip_file_output = gr.File(label="Download Your Loop Pack")
822
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
823
  with gr.Column(scale=2):
824
+ # --- OUTPUT COLUMN ---
825
+ gr.Markdown("### 2. Review Stems & Slices")
826
+ with gr.Tabs():
827
+ # Create a tab for each stem
828
+ for i, name in enumerate(STEM_NAMES):
829
+ with gr.Tab(name.capitalize()):
830
+ with gr.Row():
831
+ # The (hidden) audio output for this stem
832
+ stem_audio_component = stem_audio_outputs[i]
833
+
834
+ # Visible components
835
+ preview_image = gr.Image(label="Processed Waveform", interactive=False)
836
+ slice_files = gr.Files(label="Generated Slices & MIDI", interactive=False)
837
+
838
+ # Add a button to slice just this one stem
839
+ slice_one_button = gr.Button(f"Slice This {name.capitalize()} Stem")
840
+
841
+ # Gather all global settings as inputs
842
+ all_settings = [
843
+ loop_choice, sensitivity, manual_bpm_input, time_signature, crossfade_ms,
844
+ transpose_semitones, detected_key_state, pan_depth, level_depth, modulation_rate,
845
+ target_dbfs, attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
846
+ ]
847
+
848
+ # Wire up the "Slice One" button
849
+ slice_one_button.click(
850
+ fn=slice_stem_real,
851
+ inputs=[stem_audio_component, gr.State(value=name)] + all_settings,
852
+ outputs=[slice_files, preview_image]
853
+ )
854
+
855
+ # --- EVENT LISTENERS ---
856
+
857
+ # 1. "Separate Stems" button click
858
+ separate_button.click(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
859
  fn=separate_stems,
860
+ inputs=[audio_input],
861
+ outputs=stem_audio_outputs + [detected_bpm_state, detected_key_state, harmonic_recs_state]
 
 
 
 
 
 
862
  )
863
 
864
+ # 2. When BPM state changes, update the visible input box
865
+ detected_bpm_state.change(
866
+ fn=lambda x: x,
867
+ inputs=[detected_bpm_state],
868
+ outputs=[manual_bpm_input]
869
+ )
870
+
871
+ # 3. When Key state changes, update the visible text boxes
872
+ detected_key_state.change(
873
+ fn=lambda x: x,
874
+ inputs=[detected_key_state],
875
+ outputs=[key_display]
876
+ )
877
+ harmonic_recs_state.change(
878
+ fn=lambda x: x,
879
+ inputs=[harmonic_recs_state],
880
+ outputs=[harmonic_recs_display]
881
  )
882
 
883
+ # 4. "SLICE ALL" button click
884
+ slice_all_button.click(
885
+ fn=slice_all_and_zip,
886
+ inputs=stem_audio_outputs + all_settings,
887
+ outputs=[zip_file_output]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
888
  )
889
+
890
+
891
+ if __name__ == "__main__":
892
+ demo.launch(debug=True)