SaltProphet commited on
Commit
1232e06
·
verified ·
1 Parent(s): 4c35f75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +220 -221
app.py CHANGED
@@ -40,7 +40,7 @@ def freq_to_midi(freq):
40
  # Note: Using a simple threshold for frequency detection to minimize noise
41
  if freq < 40: # Ignore frequencies below C2 (approx 65Hz)
42
  return 0
43
-
44
  return int(round(69 + 12 * np.log2(freq / 440.0)))
45
 
46
  def write_midi_file(notes_list, bpm, output_path):
@@ -54,18 +54,18 @@ def write_midi_file(notes_list, bpm, output_path):
54
  tempo_us_per_beat = int(60000000 / bpm)
55
  division = 96 # Ticks per quarter note
56
  seconds_per_tick = 60.0 / (bpm * division)
57
-
58
  midi_data = [
59
  # Track 0: Tempo and Time Sig
60
  struct.pack('>L', 0) + b'\xFF\x51\x03' + struct.pack('>L', tempo_us_per_beat)[1:], # Set Tempo
61
  struct.pack('>L', 0) + b'\xFF\x58\x04\x04\x02\x18\x08', # Time Signature (4/4)
62
  ]
63
-
64
  # Sort notes by start time
65
  notes_list.sort(key=lambda x: x[1])
66
 
67
  current_tick = 0
68
-
69
  for note, start_sec, duration_sec in notes_list:
70
  if note == 0: continue
71
 
@@ -73,18 +73,18 @@ def write_midi_file(notes_list, bpm, output_path):
73
  target_tick = int(start_sec / seconds_per_tick)
74
  delta_tick = target_tick - current_tick
75
  current_tick = target_tick
76
-
77
  # Note On event (Channel 1, Velocity 100)
78
- note_on = b'\x90' + struct.pack('>B', note) + b'\x64'
79
  midi_data.append(encode_delta_time(delta_tick) + note_on)
80
-
81
  # Note Off event (Channel 1, Velocity 0)
82
  duration_ticks = int(duration_sec / seconds_per_tick)
83
- note_off = b'\x80' + struct.pack('>B', note) + b'\x00'
84
-
85
  midi_data.append(encode_delta_time(duration_ticks) + note_off)
86
  current_tick += duration_ticks
87
-
88
  track_data = b"".join(midi_data)
89
 
90
  # 1. Header Chunk (MThd)
@@ -100,13 +100,13 @@ def write_midi_file(notes_list, bpm, output_path):
100
 
101
  # Mapping for standard key to Camelot Code
102
  KEY_TO_CAMELOT = {
103
- "C Maj": "8B", "G Maj": "9B", "D Maj": "10B", "A Maj": "11B", "E Maj": "12B",
104
- "B Maj": "1B", "F# Maj": "2B", "Db Maj": "3B", "Ab Maj": "4B", "Eb Maj": "5B",
105
- "Bb Maj": "6B", "F Maj": "7B",
106
- "A Min": "8A", "E Min": "9A", "B Min": "10A", "F# Min": "11A", "C# Min": "12A",
107
- "G# Min": "1A", "D# Min": "2A", "Bb Min": "3A", "F Min": "4A", "C Min": "5A",
108
  "G Min": "6A", "D Min": "7A",
109
- "Gb Maj": "2B", "Cb Maj": "7B", "A# Min": "3A", "D# Maj": "11B", "G# Maj": "3B"
110
  }
111
 
112
  def get_harmonic_recommendations(key_str):
@@ -115,7 +115,7 @@ def get_harmonic_recommendations(key_str):
115
  if code == "N/A": return "N/A (Key not recognized or 'Unknown Key' detected.)"
116
  try:
117
  num = int(code[:-1])
118
- mode = code[-1]
119
  opposite_mode = 'B' if mode == 'A' else 'A'
120
  num_plus_one = (num % 12) + 1
121
  num_minus_one = 12 if num == 1 else num - 1
@@ -132,19 +132,19 @@ def detect_key(y, sr):
132
  chroma = librosa.feature.chroma_stft(y=y, sr=sr)
133
  chroma_sums = np.sum(chroma, axis=1)
134
  chroma_norm = chroma_sums / np.sum(chroma_sums)
135
-
136
  major_template = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
137
  minor_template = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
138
-
139
  pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
140
 
141
  major_correlations = [np.dot(chroma_norm, np.roll(major_template, i)) for i in range(12)]
142
  best_major_index = np.argmax(major_correlations)
143
-
144
  minor_correlations = [np.dot(chroma_norm, np.roll(minor_template, i)) for i in range(12)]
145
  best_minor_index = np.argmax(minor_correlations)
146
-
147
- if major_correlations[best_major_index] > minor_correlations[best_minor_index]:
148
  return pitch_classes[best_major_index] + " Maj"
149
  else:
150
  return pitch_classes[best_minor_index] + " Min"
@@ -156,24 +156,24 @@ def reduce_reverb(audio_path, log_history):
156
  # Reverb reduction logic... (unchanged)
157
  try:
158
  y, sr = librosa.load(audio_path, sr=None)
159
-
160
  n_fft = 2048
161
  hop_length = 512
162
-
163
  D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
164
  mag = np.abs(D)
165
  phase = np.angle(D)
166
-
167
  ambient_floor = np.percentile(mag, 10, axis=1, keepdims=True)
168
-
169
  freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
170
- dampening_factor = np.clip(1 - (freqs / 1000.0), 0.2, 1.0)[:, np.newaxis]
171
- reduction_strength = 0.5
172
-
173
  ambient_reduction = ambient_floor * reduction_strength * dampening_factor
174
-
175
  mag_processed = np.maximum(mag - ambient_reduction, 0)
176
-
177
  D_processed = mag_processed * np.exp(1j * phase)
178
  y_processed = librosa.istft(D_processed, length=len(y), dtype=y.dtype, hop_length=hop_length)
179
 
@@ -191,10 +191,10 @@ def apply_crossfade(audio_chunk, sr, fade_ms):
191
  """Applies a simple Hanning crossfade (fade-in/fade-out) to an audio chunk. (unchanged)"""
192
  if fade_ms <= 0 or len(audio_chunk) == 0:
193
  return audio_chunk
194
-
195
  fade_samples = int(sr * (fade_ms / 1000.0))
196
  n_samples = len(audio_chunk)
197
-
198
  if n_samples < 2 * fade_samples:
199
  fade_samples = n_samples // 2
200
  if fade_samples == 0: return audio_chunk
@@ -202,17 +202,17 @@ def apply_crossfade(audio_chunk, sr, fade_ms):
202
  window = np.hanning(2 * fade_samples)
203
  fade_in_window = window[:fade_samples]
204
  fade_out_window = window[fade_samples:]
205
-
206
  chunk_copy = audio_chunk.copy()
207
 
208
  if fade_samples > 0:
209
- if chunk_copy.ndim == 1:
210
  chunk_copy[:fade_samples] *= fade_in_window
211
  chunk_copy[-fade_samples:] *= fade_out_window
212
  else:
213
  chunk_copy[:fade_samples, :] *= fade_in_window[:, np.newaxis]
214
  chunk_copy[-fade_samples:] *= fade_out_window[:, np.newaxis]
215
-
216
  return chunk_copy
217
 
218
  def generate_waveform_preview(y, sr, slice_samples, stem_name, loop_type, temp_dir):
@@ -220,23 +220,23 @@ def generate_waveform_preview(y, sr, slice_samples, stem_name, loop_type, temp_d
220
  img_path = os.path.join(temp_dir, f"{stem_name}_preview_{int(time.time() * 1000)}.png")
221
 
222
  plt.figure(figsize=(10, 1.5))
223
-
224
  y_display = librosa.to_mono(y.T) if y.ndim > 1 else y
225
-
226
  librosa.display.waveshow(y_display, sr=sr, x_axis='time', color="#4a7098")
227
-
228
  slice_times = librosa.samples_to_time(slice_samples, sr=sr)
229
  for t in slice_times:
230
  plt.axvline(x=t, color='red', linestyle='--', linewidth=1, alpha=0.7)
231
-
232
  plt.title(f"{stem_name} Slices ({loop_type})", fontsize=10)
233
- plt.xlabel("")
234
  plt.yticks([])
235
  plt.tight_layout(pad=0)
236
-
237
  plt.savefig(img_path)
238
  plt.close()
239
-
240
  return img_path
241
 
242
  def apply_modulation(y, sr, bpm, rate, pan_depth, level_depth):
@@ -250,22 +250,22 @@ def apply_modulation(y, sr, bpm, rate, pan_depth, level_depth):
250
  duration_sec = N / sr
251
 
252
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
253
- beats_per_measure = rate_map.get(rate, 1)
254
  lfo_freq_hz = (bpm / 60.0) * (beats_per_measure / 4.0)
255
 
256
  t = np.linspace(0, duration_sec, N, endpoint=False)
257
-
258
  # Panning LFO
259
  if pan_depth > 0:
260
  pan_lfo = np.sin(2 * np.pi * lfo_freq_hz * t) * pan_depth
261
  L_mod = (1 - pan_lfo) / 2.0
262
  R_mod = (1 + pan_lfo) / 2.0
263
- y[:, 0] *= L_mod
264
  y[:, 1] *= R_mod
265
-
266
  # Level LFO (Tremolo)
267
  if level_depth > 0:
268
- level_lfo = (np.sin(2 * np.pi * lfo_freq_hz * t) + 1) / 2.0
269
  gain_multiplier = (1 - level_depth) + (level_depth * level_lfo)
270
  y[:, 0] *= gain_multiplier
271
  y[:, 1] *= gain_multiplier
@@ -276,14 +276,14 @@ def apply_normalization_dbfs(y, target_dbfs):
276
  """Applies peak normalization to match a target dBFS value. (unchanged)"""
277
  if target_dbfs >= 0:
278
  return y
279
-
280
  current_peak_amp = np.max(np.abs(y))
281
  target_peak_amp = 10**(target_dbfs / 20.0)
282
-
283
- if current_peak_amp > 1e-6:
284
  gain = target_peak_amp / current_peak_amp
285
  y_normalized = y * gain
286
- y_normalized = np.clip(y_normalized, -1.0, 1.0)
287
  return y_normalized
288
  else:
289
  return y
@@ -299,36 +299,36 @@ def apply_transient_shaping(y, sr, attack_gain, sustain_gain):
299
  y_mono = y
300
  else:
301
  y_mono = librosa.to_mono(y.T)
302
-
303
  rectified = np.abs(y_mono)
304
-
305
  # Filter/Window sizes based on typical transient/sustain times
306
  attack_samples = int(sr * 0.005) # 5ms
307
  sustain_samples = int(sr * 0.05) # 50ms
308
-
309
  # Envelope followers
310
  attack_window = windows.hann(attack_samples * 2); attack_window /= np.sum(attack_window)
311
  sustain_window = windows.hann(sustain_samples * 2); sustain_window /= np.sum(sustain_window)
312
-
313
  fast_envelope = convolve(rectified, attack_window, mode='same')
314
  slow_envelope = convolve(rectified, sustain_window, mode='same')
315
-
316
  # Ratio: how transient the signal is (fast envelope >> slow envelope)
317
- ratio = np.clip(fast_envelope / (slow_envelope + 1e-6), 1.0, 5.0)
318
-
319
  # Normalized ratio (0 to 1, where 1 is strong transient)
320
  # 4.0 comes from the ratio clip max 5.0 - min 1.0
321
- normalized_ratio = (ratio - 1.0) / 4.0
322
-
323
  # Gain is a blend between sustain_gain and attack_gain based on the normalized_ratio
324
  gain_envelope = (sustain_gain * (1 - normalized_ratio)) + (attack_gain * normalized_ratio)
325
-
326
  # Apply Gain
327
  if y.ndim == 1:
328
  y_out = y * gain_envelope
329
  else:
330
  y_out = y * gain_envelope[:, np.newaxis]
331
-
332
  return y_out
333
 
334
  # --- NEW UTILITY: FILTER MODULATION ---
@@ -339,51 +339,51 @@ def apply_filter_modulation(y, sr, bpm, rate, filter_type, freq, depth):
339
  """
340
  if depth == 0:
341
  return y
342
-
343
  # Ensure stereo for LFO application
344
  if y.ndim == 1:
345
  y = np.stack((y, y), axis=-1)
346
-
347
  N = len(y)
348
  duration_sec = N / sr
349
 
350
  # LFO Rate Calculation
351
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
352
- beats_per_measure = rate_map.get(rate, 1)
353
  lfo_freq_hz = (bpm / 60.0) * (beats_per_measure / 4.0)
354
 
355
  t = np.linspace(0, duration_sec, N, endpoint=False)
356
-
357
  # LFO: ranges from 0 to 1
358
- lfo_value = (np.sin(2 * np.pi * lfo_freq_hz * t) + 1) / 2.0
359
-
360
  # Modulate Cutoff Frequency: Cutoff = BaseFreq + (LFO * Depth)
361
  cutoff_modulation = freq + (lfo_value * depth)
362
  # Safety clip to prevent instability
363
- cutoff_modulation = np.clip(cutoff_modulation, 20.0, sr / 2.0 - 100)
364
 
365
  y_out = np.zeros_like(y)
366
  filter_type_b = filter_type.lower().replace('-pass', '')
367
  frame_size = 512 # Frame-based update for filter coefficients
368
-
369
  # Apply filter channel by channel
370
  for channel in range(y.shape[1]):
371
  zi = np.zeros(2) # Initial filter state (2nd order filter)
372
-
373
  for frame_start in range(0, N, frame_size):
374
  frame_end = min(frame_start + frame_size, N)
375
  frame = y[frame_start:frame_end, channel]
376
-
377
  # Use the average LFO cutoff for the frame
378
  avg_cutoff = np.mean(cutoff_modulation[frame_start:frame_end])
379
-
380
  # Calculate 2nd order Butterworth filter coefficients
381
  b, a = butter(2, avg_cutoff, btype=filter_type_b, fs=sr)
382
-
383
  # Apply filter to the frame, updating the state `zi`
384
  filtered_frame, zi = lfilter(b, a, frame, zi=zi)
385
  y_out[frame_start:frame_end, channel] = filtered_frame
386
-
387
  return y_out
388
 
389
  # --- CORE SEPARATION FUNCTION (Truncated for brevity, focus on analysis) ---
@@ -410,13 +410,13 @@ async def separate_stems(audio_file_path, selected_model, denoise_enabled, rever
410
  tempo, _ = librosa.beat.beat_track(y=y_mono, sr=sr_orig)
411
  detected_bpm = 120 if tempo is None or tempo == 0 else int(np.round(tempo).item())
412
  detected_key = detect_key(y_mono, sr_orig)
413
-
414
  harmonic_recommendations = get_harmonic_recommendations(detected_key)
415
 
416
  status_string = f"Detected Tempo: {detected_bpm} BPM. Detected Key: {detected_key}. Proceeding with separation...\n"
417
  log_history += status_string
418
- yield {
419
- status_log: log_history,
420
  detected_bpm_key: f"{detected_bpm} BPM, {detected_key}",
421
  harmonic_recs: harmonic_recommendations
422
  }
@@ -424,12 +424,12 @@ async def separate_stems(audio_file_path, selected_model, denoise_enabled, rever
424
  except Exception as e:
425
  log_history += f"⚠️ WARNING: Analysis failed ({e}). Defaulting to 120 BPM, Unknown Key.\n"
426
  harmonic_recommendations = "N/A (Analysis failed)"
427
- yield {
428
- status_log: log_history,
429
  detected_bpm_key: f"{detected_bpm} BPM, {detected_key}",
430
  harmonic_recs: harmonic_recommendations
431
  }
432
-
433
  # --- Truncated Demucs Output Placeholder (For Demonstrating Success) ---
434
  # Mock file paths and generation for demo purposes
435
  vocals_path = "separated/htdemucs/input/vocals.wav"
@@ -438,16 +438,16 @@ async def separate_stems(audio_file_path, selected_model, denoise_enabled, rever
438
  other_path = "separated/htdemucs/input/other.wav"
439
  guitar_path = None
440
  piano_path = None
441
-
442
  mock_sr = 44100
443
- mock_duration = 10
444
  mock_y = np.random.uniform(low=-0.5, high=0.5, size=(mock_sr * mock_duration, 2)).astype(np.float32)
445
  os.makedirs(os.path.dirname(vocals_path), exist_ok=True)
446
  sf.write(vocals_path, mock_y, mock_sr)
447
  sf.write(drums_path, mock_y, mock_sr)
448
  sf.write(bass_path, mock_y, mock_sr)
449
  sf.write(other_path, mock_y, mock_sr)
450
-
451
  # --- End Truncated Demucs Output Placeholder ---
452
 
453
  log_history += "✅ Stem separation complete! (Mock files generated for demo)\n"
@@ -469,57 +469,53 @@ async def separate_stems(audio_file_path, selected_model, denoise_enabled, rever
469
 
470
  def slice_stem_real(stem_audio_data, loop_choice, sensitivity, stem_name, manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_key, pan_depth, level_depth, modulation_rate, target_dbfs, attack_gain, sustain_gain, filter_type, filter_freq, filter_depth):
471
  """
472
- Slices a single stem, applies pitch shift, modulation, normalization,
473
  transient shaping, filter LFO, and generates MIDI/visualizations.
474
  """
475
  if stem_audio_data is None:
476
  return [], None
477
-
478
  sample_rate, y_int = stem_audio_data
479
- y = librosa.util.buf_to_float(y_int, dtype=np.float32)
480
-
481
  if y.ndim == 0: return [], None
482
-
483
  y_mono = librosa.to_mono(y.T) if y.ndim > 1 else y
484
-
485
  # --- 1. PITCH SHIFTING (if enabled) ---
486
  if transpose_semitones != 0:
487
  y_shifted = librosa.effects.pitch_shift(y, sr=sample_rate, n_steps=transpose_semitones)
488
  y = y_shifted
489
-
490
  # --- 2. TRANSIENT SHAPING (Drums Only) ---
491
  if stem_name == "drums" and (attack_gain != 1.0 or sustain_gain != 1.0):
492
  y = apply_transient_shaping(y, sample_rate, attack_gain, sustain_gain)
493
-
494
  # --- 3. FILTER MODULATION (LFO 2.0) ---
495
  if filter_depth > 0:
496
  y = apply_filter_modulation(y, sample_rate, manual_bpm, modulation_rate, filter_type, filter_freq, filter_depth)
497
-
498
  # --- 4. PAN/LEVEL MODULATION ---
499
  normalized_pan_depth = pan_depth / 100.0
500
  normalized_level_depth = level_depth / 100.0
501
-
502
  if normalized_pan_depth > 0 or normalized_level_depth > 0:
503
  y = apply_modulation(y, sample_rate, manual_bpm, modulation_rate, normalized_pan_depth, normalized_level_depth)
504
-
505
  # Check if any modification was applied for the RICH METADATA TAGGING
506
  is_modified = (
507
- transpose_semitones != 0 or
508
- normalized_pan_depth > 0 or normalized_level_depth > 0 or
509
- filter_depth > 0 or
510
  stem_name == "drums" and (attack_gain != 1.0 or sustain_gain != 1.0)
511
  )
512
  mod_tag = "_MOD" if is_modified else "" # Rich Tagging: Modification flag
513
 
514
- # --- 5. NORMALIZATION ---
515
- if target_dbfs < 0:
516
- y = apply_normalization_dbfs(y, target_dbfs)
517
-
518
  # --- 6. DETERMINE BPM & KEY (FOR RICH TAGGING) ---
519
  bpm_int = int(manual_bpm)
520
  bpm_tag = f"{bpm_int}BPM" # Rich Tagging: BPM
521
  time_sig_tag = time_signature.replace("/", "") # Rich Tagging: Time Signature
522
-
523
  key_tag = detected_key.replace(" ", "")
524
  if transpose_semitones != 0:
525
  root = detected_key.split(" ")[0]
@@ -537,8 +533,8 @@ def slice_stem_real(stem_audio_data, loop_choice, sensitivity, stem_name, manual
537
  output_files = []
538
  loops_dir = tempfile.mkdtemp()
539
  is_melodic = stem_name in ["vocals", "bass", "guitar", "piano", "other"]
540
-
541
- if is_melodic and ("Bar Loops" in loop_choice):
542
  try:
543
  # Use piptrack for a more robust (though less accurate than Pyin) general pitch detection
544
  pitches, magnitudes = librosa.piptrack(y=y_mono, sr=sample_rate)
@@ -546,33 +542,33 @@ def slice_stem_real(stem_audio_data, loop_choice, sensitivity, stem_name, manual
546
  for t in range(pitches.shape[1]):
547
  index = magnitudes[:, t].argmax()
548
  main_pitch_line[t] = pitches[index, t]
549
-
550
  notes_list = []
551
-
552
  # Simple note segmentation by pitch change
553
  i = 0
554
  while i < len(main_pitch_line):
555
  current_freq = main_pitch_line[i]
556
  current_midi = freq_to_midi(current_freq)
557
-
558
  j = i
559
  while j < len(main_pitch_line) and freq_to_midi(main_pitch_line[j]) == current_midi:
560
  j += 1
561
-
562
  duration_frames = j - i
563
-
564
  # Minimum duration filter to ignore extremely short notes
565
- if current_midi != 0 and duration_frames >= 2:
566
  start_sec = librosa.frames_to_time(i, sr=sample_rate, hop_length=512)
567
  duration_sec = librosa.frames_to_time(duration_frames, sr=sample_rate, hop_length=512)
568
  notes_list.append((current_midi, start_sec, duration_sec))
569
-
570
  i = j
571
-
572
  full_stem_midi_path = os.path.join(loops_dir, f"{stem_name}_MELODY_{key_tag}_{bpm_tag}{mod_tag}.mid")
573
  write_midi_file(notes_list, manual_bpm, full_stem_midi_path)
574
  output_files.append((full_stem_midi_path, loops_dir))
575
-
576
  except Exception as e:
577
  print(f"MIDI generation failed for {stem_name}: {e}")
578
  # Do not stop execution
@@ -580,59 +576,59 @@ def slice_stem_real(stem_audio_data, loop_choice, sensitivity, stem_name, manual
580
  # --- 8. CALCULATE TIMING & SLICING ---
581
  beats_per_bar = 4
582
  if time_signature == "3/4": beats_per_bar = 3
583
-
584
  slice_samples = []
585
-
586
  if "Bar Loops" in loop_choice:
587
  bars = int(loop_choice.split(" ")[0])
588
  loop_type_tag = f"{bars}Bar"
589
  loop_duration_samples = int((60.0 / bpm_int * beats_per_bar * bars) * sample_rate)
590
 
591
  if loop_duration_samples == 0: return [], loops_dir
592
-
593
  num_loops = len(y) // loop_duration_samples
594
-
595
  for i in range(num_loops):
596
  start_sample = i * loop_duration_samples
597
  end_sample = start_sample + loop_duration_samples
598
  slice_data = y[start_sample:end_sample]
599
-
600
  # Rich Metadata/Tagging via Filename Enhancement
601
  filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_tag}_{time_sig_tag}{mod_tag}.wav")
602
  sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
603
- output_files.append((filename, loops_dir))
604
  slice_samples.append(start_sample)
605
-
606
  elif "One-Shots" in loop_choice:
607
  loop_type_tag = "OneShot"
608
  onset_frames = librosa.onset.onset_detect(
609
- y=y_mono, sr=sample_rate, delta=sensitivity,
610
  wait=1, pre_avg=1, post_avg=1, post_max=1, units='frames'
611
  )
612
  onset_samples = librosa.frames_to_samples(onset_frames)
613
-
614
  if len(onset_samples) > 0:
615
  num_onsets = len(onset_samples)
616
- slice_samples = list(onset_samples)
617
-
618
  for i, start_sample in enumerate(onset_samples):
619
  end_sample = onset_samples[i+1] if i+1 < num_onsets else len(y)
620
- slice_data = y[start_sample:end_sample]
621
-
622
  if crossfade_ms > 0:
623
  slice_data = apply_crossfade(slice_data, sample_rate, crossfade_ms)
624
-
625
  # Rich Metadata/Tagging via Filename Enhancement
626
  filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_tag}{mod_tag}.wav")
627
  sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
628
  output_files.append((filename, loops_dir))
629
-
630
  if not output_files:
631
  return [], loops_dir
632
 
633
  # --- 9. VISUALIZATION GENERATION ---
634
  img_path = generate_waveform_preview(y, sample_rate, slice_samples, stem_name, loop_choice, loops_dir)
635
-
636
  # Return audio file path and the single visualization map
637
  return [(audio_file, img_path) for audio_file, _ in output_files if audio_file.endswith(('.wav', '.mid'))], loops_dir
638
 
@@ -644,17 +640,17 @@ async def slice_all_and_zip_real(vocals, drums, bass, other, guitar, piano, loop
644
  """
645
  log_history = "Starting batch slice...\n"
646
  yield { status_log: log_history }
647
- await asyncio.sleep(0.1)
648
 
649
  parts = detected_bpm_key_str.split(', ')
650
  key_str = parts[1] if len(parts) > 1 else "Unknown Key"
651
 
652
  stems_to_process = {
653
- "vocals": vocals, "drums": drums, "bass": bass,
654
  "other": other, "guitar": guitar, "piano": piano
655
  }
656
  zip_path = "Loop_Architect_Pack.zip"
657
-
658
  num_stems = sum(1 for data in stems_to_process.values() if data is not None)
659
  if num_stems == 0:
660
  raise gr.Error("No stems to process! Please separate stems first.")
@@ -669,17 +665,17 @@ async def slice_all_and_zip_real(vocals, drums, bass, other, guitar, piano, loop
669
  yield { status_log: log_history }
670
 
671
  sliced_files_and_viz, temp_dir = slice_stem_real(
672
- (data[0], data[1]), loop_choice, sensitivity, name,
673
  manual_bpm, time_signature, crossfade_ms, transpose_semitones, key_str,
674
  pan_depth, level_depth, modulation_rate, target_dbfs,
675
  attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
676
  )
677
-
678
  if sliced_files_and_viz:
679
  # Write both WAV and MIDI files to the ZIP
680
  midi_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.mid'))
681
  wav_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.wav'))
682
-
683
  log_history += f"Generated {wav_count} WAV slices and {midi_count} MIDI files for {name}.\n"
684
  all_temp_dirs.append(temp_dir)
685
  for loop_file, _ in sliced_files_and_viz:
@@ -689,7 +685,7 @@ async def slice_all_and_zip_real(vocals, drums, bass, other, guitar, piano, loop
689
  zf.write(loop_file, arcname)
690
  else:
691
  log_history += f"No slices generated for {name}.\n"
692
-
693
  processed_count += 1
694
  yield { status_log: log_history }
695
 
@@ -709,11 +705,13 @@ async def slice_all_and_zip_real(vocals, drums, bass, other, guitar, piano, loop
709
 
710
  # --- Create the full Gradio Interface ---
711
  with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red")) as demo:
712
- # State variables
 
713
  detected_bpm_key = gr.State(value="")
714
  harmonic_recs = gr.State(value="---")
715
-
716
- # Define outputs globally
 
717
  vocals_output = gr.Audio(label="Vocals", scale=4, visible=False)
718
  drums_output = gr.Audio(label="Drums", scale=4, visible=False)
719
  bass_output = gr.Audio(label="Bass", scale=4, visible=False)
@@ -723,51 +721,51 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red"))
723
  download_zip_file = gr.File(label="Download Your Loop Pack", visible=False)
724
  status_log = gr.Textbox(label="Status Log", lines=10, interactive=False)
725
 
 
726
  loop_gallery = gr.Gallery(
727
  label="Generated Loops Preview (Audio + Waveform Slice Map)",
728
  columns=8, object_fit="contain", height="auto", preview=True,
729
  type="numpy"
730
  )
731
-
 
732
  gr.Markdown("# 🎵 Loop Architect (Pro Edition)")
733
  gr.Markdown("Upload any song to separate it into stems, detect musical attributes, and then slice and tag the stems for instant use in a DAW.")
734
-
735
  with gr.Row():
 
736
  with gr.Column(scale=1):
 
737
  gr.Markdown("### 1. Separate Stems")
738
  audio_input = gr.Audio(type="filepath", label="Upload a Track")
739
-
740
  with gr.Row():
741
  reverb_reduction_option = gr.Checkbox(
742
  label="Dry Vocals",
743
  value=False,
744
  info="Reduce reverb on the vocal stem."
745
  )
746
-
747
  model_selector = gr.Radio(
748
  ["htdemucs (High Quality 4-Stem)", "hdemucs (Faster 4-Stem)", "htdemucs_6s (6-Stem)", "2-Stem (Vocals Only)"],
749
  label="Separation Model Control",
750
  value="htdemucs (High Quality 4-Stem)"
751
  )
752
-
753
  submit_button = gr.Button("Separate & Analyze Stems", variant="primary")
754
 
 
755
  gr.Markdown("### 2. Analysis & Transform")
756
-
757
- # Key/BPM Display
758
- gr.Textbox(label="Detected Tempo & Key", value="", interactive=False, elem_id="detected_bpm_key_output", placeholder="Run Separation to Analyze...", render=True, visible=True)
759
-
760
- # Harmonic Recommendations Display
761
- gr.Textbox(label="Harmonic Mixing Recommendations (Camelot Wheel)", value="---", interactive=False, elem_id="harmonic_recs_output", render=True, visible=True)
762
-
763
  # Transpose Control
764
  transpose_slider = gr.Slider(
765
  minimum=-12, maximum=12, value=0, step=1,
766
  label="Transpose Loops (Semitones)",
767
  info="Shift the pitch of all slices by +/- 1 octave. (Tags the file with `Shift`)"
768
  )
769
-
770
- # --- TRANSIENT SHAPING ---
771
  gr.Markdown("### Transient Shaping (Drums Only)")
772
  with gr.Group():
773
  attack_gain_slider = gr.Slider(
@@ -781,9 +779,9 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red"))
781
  info="Increase (>1.0) for longer tails/reverb."
782
  )
783
 
784
- # --- MODULATION (PAN/LEVEL) ---
785
  gr.Markdown("### Pan/Level Modulation (LFO 1.0)")
786
- with gr.Group():
787
  modulation_rate_radio = gr.Radio(
788
  ['1/2', '1/4', '1/8', '1/16'],
789
  label="Modulation Rate (Tempo Synced)",
@@ -800,10 +798,10 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red"))
800
  label="Level Modulation Depth (%)",
801
  info="Creates a tempo-synced tremolo (volume pulse)."
802
  )
803
-
804
- # --- FILTER MODULATION ---
805
  gr.Markdown("### Filter Modulation (LFO 2.0)")
806
- with gr.Group():
807
  filter_type_radio = gr.Radio(
808
  ['Low-Pass', 'High-Pass'],
809
  label="Filter Type",
@@ -820,7 +818,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red"))
820
  info="0 = Static filter at Base Cutoff. Modifying any value tags the file with `MOD`."
821
  )
822
 
823
-
824
  gr.Markdown("### 3. Slicing Options")
825
  with gr.Group():
826
  # Normalization Control
@@ -829,58 +827,56 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red"))
829
  label="Target Peak Level (dBFS)",
830
  info="Normalizes all exported loops to this peak volume."
831
  )
832
-
833
  loop_options_radio = gr.Radio(
834
  ["One-Shots (All Transients)", "4 Bar Loops", "8 Bar Loops"],
835
  label="Slice Type",
836
  value="One-Shots (All Transients)",
837
  info="Bar Loops include automatic MIDI generation for melodic stems."
838
  )
839
-
840
  with gr.Row():
841
  bpm_input = gr.Number(
842
- label="Manual BPM",
843
- value=120,
844
- minimum=40,
845
  maximum=300,
846
  info="Overrides auto-detect for loop timing."
847
  )
848
  time_sig_radio = gr.Radio(
849
- ["4/4", "3/4"],
850
- label="Time Signature",
851
  value="4/4",
852
  info="For correct bar length. (Tags the file with `44` or `34`)"
853
  )
854
-
855
  sensitivity_slider = gr.Slider(
856
  minimum=0.01, maximum=0.5, value=0.05, step=0.01,
857
  label="One-Shot Sensitivity",
858
  info="Lower values = more slices."
859
  )
860
-
861
  crossfade_ms_slider = gr.Slider(
862
  minimum=0, maximum=30, value=10, step=1,
863
  label="One-Shot Crossfade (ms)",
864
  info="Prevents clicks/pops on transient slices."
865
  )
866
 
 
867
  gr.Markdown("### 4. Create Pack (Rich Tagging & MIDI)")
868
  slice_all_button = gr.Button("Slice, Transform & Tag ALL Stems (Create ZIP)", variant="stop")
869
- download_zip_file
870
 
871
  gr.Markdown("### Status")
872
- status_log.render()
873
 
 
874
  with gr.Column(scale=2):
875
  with gr.Accordion("Separated Stems (Preview & Slice)", open=True):
876
-
877
- # Base slice inputs - ALL inputs for slice_stem_real
878
  slice_inputs = [
879
  loop_options_radio, sensitivity_slider, gr.Textbox(visible=False), # Placeholder for stem name
880
- bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_bpm_key,
881
- pan_depth_slider, level_depth_slider, modulation_rate_radio,
882
- lufs_target_slider,
883
- attack_gain_slider, sustain_gain_slider,
884
  filter_type_radio, filter_freq_slider, filter_depth_slider
885
  ]
886
 
@@ -888,42 +884,43 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red"))
888
  def slice_and_display_wrapper(stem_data, loop_choice, sensitivity, stem_name, manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_bpm_key_str, pan_depth, level_depth, modulation_rate, target_dbfs, attack_gain, sustain_gain, filter_type, filter_freq, filter_depth):
889
  if not detected_bpm_key_str:
890
  raise gr.Error("Please run 'Separate & Analyze Stems' first.")
891
-
892
  key_str = detected_bpm_key_str.split(', ')[1] if len(detected_bpm_key_str.split(', ')) > 1 else "Unknown Key"
893
-
894
  sliced_files_and_viz, temp_dir = slice_stem_real(
895
  stem_data, loop_choice, sensitivity, stem_name,
896
  manual_bpm, time_signature, crossfade_ms, transpose_semitones, key_str,
897
  pan_depth, level_depth, modulation_rate, target_dbfs,
898
  attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
899
  )
900
-
901
  gallery_output = []
902
-
903
  if sliced_files_and_viz:
904
  # Find the first visualization for the gallery
905
  first_image_path = sliced_files_and_viz[0][1] if sliced_files_and_viz else None
906
-
907
  wav_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.wav'))
908
  midi_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.mid'))
909
 
910
  for i, (audio_file, _) in enumerate(sliced_files_and_viz):
911
  if audio_file.endswith('.wav'):
912
  label = os.path.basename(audio_file).rsplit('.', 1)[0]
913
- gallery_output.append((audio_file, label, first_image_path))
914
-
915
  log_msg = f"✅ Sliced {stem_name} into {wav_count} WAVs and generated {midi_count} MIDIs. Waveform preview generated."
916
  else:
917
  log_msg = f"No slices generated for {stem_name}."
918
 
919
  if temp_dir and os.path.exists(temp_dir):
920
- pass
921
-
922
  return {
923
  loop_gallery: gr.update(value=gallery_output),
924
  status_log: log_msg
925
  }
926
 
 
927
  def update_output_visibility(selected_model):
928
  is_6_stem = "6-Stem" in selected_model
929
  is_2_stem = "2-Stem" in selected_model
@@ -931,66 +928,69 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red"))
931
  if is_2_stem: other_label = "Instrumental (No Vocals)"
932
  elif is_6_stem: other_label = "Other (No Guitar/Piano)"
933
  return (
934
- gr.update(visible=True),
935
- gr.update(visible=True if not is_2_stem else False),
936
- gr.update(visible=True if not is_2_stem else False),
937
  gr.update(visible=True, label=other_label),
938
- gr.update(visible=is_6_stem),
939
- gr.update(visible=is_6_stem),
940
- gr.update(visible=is_6_stem),
941
- gr.update(visible=is_6_stem)
942
  )
943
 
 
944
  with gr.Row():
945
- vocals_output.render()
946
  slice_vocals_btn = gr.Button("Slice Vocals", scale=1)
947
  with gr.Row():
948
- drums_output.render()
949
  slice_drums_btn = gr.Button("Slice Drums", scale=1)
950
  with gr.Row():
951
- bass_output.render()
952
  slice_bass_btn = gr.Button("Slice Bass", scale=1)
953
  with gr.Row():
954
- other_output.render()
955
  slice_other_btn = gr.Button("Slice Other", scale=1)
956
-
 
957
  with gr.Row(visible=False) as guitar_row:
958
- guitar_output.render()
959
  slice_guitar_btn = gr.Button("Slice Guitar", scale=1)
960
  with gr.Row(visible=False) as piano_row:
961
- piano_output.render()
962
  slice_piano_btn = gr.Button("Slice Piano", scale=1)
963
-
 
964
  gr.Markdown("### Sliced Loops / Samples (Preview)")
965
- loop_gallery.render()
966
 
967
- # --- MAIN EVENT LISTENERS ---
968
-
969
- # 1. Separation Event
970
  submit_button.click(
971
  fn=separate_stems,
972
- inputs=[gr.File(type="filepath"), model_selector, gr.Checkbox(visible=False), reverb_reduction_option],
973
  outputs=[
974
  vocals_output, drums_output, bass_output, other_output,
975
  guitar_output, piano_output,
976
- status_log, detected_bpm_key,
977
- gr.Textbox(elem_id="detected_bpm_key_output"),
978
- gr.Textbox(elem_id="harmonic_recs_output")
979
  ]
980
  )
981
 
982
- # 2. UI Visibility Event
983
  model_selector.change(
984
  fn=update_output_visibility,
985
  inputs=[model_selector],
986
  outputs=[
987
- vocals_output, drums_output, bass_output, other_output,
988
- guitar_output, piano_output,
989
  guitar_row, piano_row
990
  ]
991
  )
992
-
993
- # --- Single Slice Button Events ---
994
  slice_vocals_btn.click(fn=slice_and_display_wrapper, inputs=[vocals_output] + slice_inputs[:2] + [gr.Textbox("vocals", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
995
  slice_drums_btn.click(fn=slice_and_display_wrapper, inputs=[drums_output] + slice_inputs[:2] + [gr.Textbox("drums", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
996
  slice_bass_btn.click(fn=slice_and_display_wrapper, inputs=[bass_output] + slice_inputs[:2] + [gr.Textbox("bass", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
@@ -998,17 +998,16 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red"))
998
  slice_guitar_btn.click(fn=slice_and_display_wrapper, inputs=[guitar_output] + slice_inputs[:2] + [gr.Textbox("guitar", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
999
  slice_piano_btn.click(fn=slice_and_display_wrapper, inputs=[piano_output] + slice_inputs[:2] + [gr.Textbox("piano", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
1000
 
1001
- # 3. Slice All Event
1002
  slice_all_event = slice_all_button.click(
1003
  fn=slice_all_and_zip_real,
1004
  inputs=[
1005
- vocals_output, drums_output, bass_output, other_output, guitar_output, piano_output,
1006
- loop_options_radio, sensitivity_slider,
1007
- bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_bpm_key,
1008
  pan_depth_slider, level_depth_slider, modulation_rate_radio, lufs_target_slider,
1009
- attack_gain_slider, sustain_gain_slider,
1010
  filter_type_radio, filter_freq_slider, filter_depth_slider
1011
  ],
1012
  outputs=[download_zip_file, status_log]
1013
  )
1014
-
 
40
  # Note: Using a simple threshold for frequency detection to minimize noise
41
  if freq < 40: # Ignore frequencies below C2 (approx 65Hz)
42
  return 0
43
+
44
  return int(round(69 + 12 * np.log2(freq / 440.0)))
45
 
46
  def write_midi_file(notes_list, bpm, output_path):
 
54
  tempo_us_per_beat = int(60000000 / bpm)
55
  division = 96 # Ticks per quarter note
56
  seconds_per_tick = 60.0 / (bpm * division)
57
+
58
  midi_data = [
59
  # Track 0: Tempo and Time Sig
60
  struct.pack('>L', 0) + b'\xFF\x51\x03' + struct.pack('>L', tempo_us_per_beat)[1:], # Set Tempo
61
  struct.pack('>L', 0) + b'\xFF\x58\x04\x04\x02\x18\x08', # Time Signature (4/4)
62
  ]
63
+
64
  # Sort notes by start time
65
  notes_list.sort(key=lambda x: x[1])
66
 
67
  current_tick = 0
68
+
69
  for note, start_sec, duration_sec in notes_list:
70
  if note == 0: continue
71
 
 
73
  target_tick = int(start_sec / seconds_per_tick)
74
  delta_tick = target_tick - current_tick
75
  current_tick = target_tick
76
+
77
  # Note On event (Channel 1, Velocity 100)
78
+ note_on = b'\x90' + struct.pack('>B', note) + b'\x64'
79
  midi_data.append(encode_delta_time(delta_tick) + note_on)
80
+
81
  # Note Off event (Channel 1, Velocity 0)
82
  duration_ticks = int(duration_sec / seconds_per_tick)
83
+ note_off = b'\x80' + struct.pack('>B', note) + b'\x00'
84
+
85
  midi_data.append(encode_delta_time(duration_ticks) + note_off)
86
  current_tick += duration_ticks
87
+
88
  track_data = b"".join(midi_data)
89
 
90
  # 1. Header Chunk (MThd)
 
100
 
101
  # Mapping for standard key to Camelot Code
102
  KEY_TO_CAMELOT = {
103
+ "C Maj": "8B", "G Maj": "9B", "D Maj": "10B", "A Maj": "11B", "E Maj": "12B",
104
+ "B Maj": "1B", "F# Maj": "2B", "Db Maj": "3B", "Ab Maj": "4B", "Eb Maj": "5B",
105
+ "Bb Maj": "6B", "F Maj": "7B",
106
+ "A Min": "8A", "E Min": "9A", "B Min": "10A", "F# Min": "11A", "C# Min": "12A",
107
+ "G# Min": "1A", "D# Min": "2A", "Bb Min": "3A", "F Min": "4A", "C Min": "5A",
108
  "G Min": "6A", "D Min": "7A",
109
+ "Gb Maj": "2B", "Cb Maj": "7B", "A# Min": "3A", "D# Maj": "11B", "G# Maj": "3B"
110
  }
111
 
112
  def get_harmonic_recommendations(key_str):
 
115
  if code == "N/A": return "N/A (Key not recognized or 'Unknown Key' detected.)"
116
  try:
117
  num = int(code[:-1])
118
+ mode = code[-1]
119
  opposite_mode = 'B' if mode == 'A' else 'A'
120
  num_plus_one = (num % 12) + 1
121
  num_minus_one = 12 if num == 1 else num - 1
 
132
  chroma = librosa.feature.chroma_stft(y=y, sr=sr)
133
  chroma_sums = np.sum(chroma, axis=1)
134
  chroma_norm = chroma_sums / np.sum(chroma_sums)
135
+
136
  major_template = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
137
  minor_template = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
138
+
139
  pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
140
 
141
  major_correlations = [np.dot(chroma_norm, np.roll(major_template, i)) for i in range(12)]
142
  best_major_index = np.argmax(major_correlations)
143
+
144
  minor_correlations = [np.dot(chroma_norm, np.roll(minor_template, i)) for i in range(12)]
145
  best_minor_index = np.argmax(minor_correlations)
146
+
147
+ if major_correlations[best_major_index] > minor_correlations[best_major_index]:
148
  return pitch_classes[best_major_index] + " Maj"
149
  else:
150
  return pitch_classes[best_minor_index] + " Min"
 
156
  # Reverb reduction logic... (unchanged)
157
  try:
158
  y, sr = librosa.load(audio_path, sr=None)
159
+
160
  n_fft = 2048
161
  hop_length = 512
162
+
163
  D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
164
  mag = np.abs(D)
165
  phase = np.angle(D)
166
+
167
  ambient_floor = np.percentile(mag, 10, axis=1, keepdims=True)
168
+
169
  freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
170
+ dampening_factor = np.clip(1 - (freqs / 1000.0), 0.2, 1.0)[:, np.newaxis]
171
+ reduction_strength = 0.5
172
+
173
  ambient_reduction = ambient_floor * reduction_strength * dampening_factor
174
+
175
  mag_processed = np.maximum(mag - ambient_reduction, 0)
176
+
177
  D_processed = mag_processed * np.exp(1j * phase)
178
  y_processed = librosa.istft(D_processed, length=len(y), dtype=y.dtype, hop_length=hop_length)
179
 
 
191
  """Applies a simple Hanning crossfade (fade-in/fade-out) to an audio chunk. (unchanged)"""
192
  if fade_ms <= 0 or len(audio_chunk) == 0:
193
  return audio_chunk
194
+
195
  fade_samples = int(sr * (fade_ms / 1000.0))
196
  n_samples = len(audio_chunk)
197
+
198
  if n_samples < 2 * fade_samples:
199
  fade_samples = n_samples // 2
200
  if fade_samples == 0: return audio_chunk
 
202
  window = np.hanning(2 * fade_samples)
203
  fade_in_window = window[:fade_samples]
204
  fade_out_window = window[fade_samples:]
205
+
206
  chunk_copy = audio_chunk.copy()
207
 
208
  if fade_samples > 0:
209
+ if chunk_copy.ndim == 1:
210
  chunk_copy[:fade_samples] *= fade_in_window
211
  chunk_copy[-fade_samples:] *= fade_out_window
212
  else:
213
  chunk_copy[:fade_samples, :] *= fade_in_window[:, np.newaxis]
214
  chunk_copy[-fade_samples:] *= fade_out_window[:, np.newaxis]
215
+
216
  return chunk_copy
217
 
218
  def generate_waveform_preview(y, sr, slice_samples, stem_name, loop_type, temp_dir):
 
220
  img_path = os.path.join(temp_dir, f"{stem_name}_preview_{int(time.time() * 1000)}.png")
221
 
222
  plt.figure(figsize=(10, 1.5))
223
+
224
  y_display = librosa.to_mono(y.T) if y.ndim > 1 else y
225
+
226
  librosa.display.waveshow(y_display, sr=sr, x_axis='time', color="#4a7098")
227
+
228
  slice_times = librosa.samples_to_time(slice_samples, sr=sr)
229
  for t in slice_times:
230
  plt.axvline(x=t, color='red', linestyle='--', linewidth=1, alpha=0.7)
231
+
232
  plt.title(f"{stem_name} Slices ({loop_type})", fontsize=10)
233
+ plt.xlabel("")
234
  plt.yticks([])
235
  plt.tight_layout(pad=0)
236
+
237
  plt.savefig(img_path)
238
  plt.close()
239
+
240
  return img_path
241
 
242
  def apply_modulation(y, sr, bpm, rate, pan_depth, level_depth):
 
250
  duration_sec = N / sr
251
 
252
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
253
+ beats_per_measure = rate_map.get(rate, 1)
254
  lfo_freq_hz = (bpm / 60.0) * (beats_per_measure / 4.0)
255
 
256
  t = np.linspace(0, duration_sec, N, endpoint=False)
257
+
258
  # Panning LFO
259
  if pan_depth > 0:
260
  pan_lfo = np.sin(2 * np.pi * lfo_freq_hz * t) * pan_depth
261
  L_mod = (1 - pan_lfo) / 2.0
262
  R_mod = (1 + pan_lfo) / 2.0
263
+ y[:, 0] *= L_mod
264
  y[:, 1] *= R_mod
265
+
266
  # Level LFO (Tremolo)
267
  if level_depth > 0:
268
+ level_lfo = (np.sin(2 * np.pi * lfo_freq_hz * t) + 1) / 2.0
269
  gain_multiplier = (1 - level_depth) + (level_depth * level_lfo)
270
  y[:, 0] *= gain_multiplier
271
  y[:, 1] *= gain_multiplier
 
276
  """Applies peak normalization to match a target dBFS value. (unchanged)"""
277
  if target_dbfs >= 0:
278
  return y
279
+
280
  current_peak_amp = np.max(np.abs(y))
281
  target_peak_amp = 10**(target_dbfs / 20.0)
282
+
283
+ if current_peak_amp > 1e-6:
284
  gain = target_peak_amp / current_peak_amp
285
  y_normalized = y * gain
286
+ y_normalized = np.clip(y_normalized, -1.0, 1.0)
287
  return y_normalized
288
  else:
289
  return y
 
299
  y_mono = y
300
  else:
301
  y_mono = librosa.to_mono(y.T)
302
+
303
  rectified = np.abs(y_mono)
304
+
305
  # Filter/Window sizes based on typical transient/sustain times
306
  attack_samples = int(sr * 0.005) # 5ms
307
  sustain_samples = int(sr * 0.05) # 50ms
308
+
309
  # Envelope followers
310
  attack_window = windows.hann(attack_samples * 2); attack_window /= np.sum(attack_window)
311
  sustain_window = windows.hann(sustain_samples * 2); sustain_window /= np.sum(sustain_window)
312
+
313
  fast_envelope = convolve(rectified, attack_window, mode='same')
314
  slow_envelope = convolve(rectified, sustain_window, mode='same')
315
+
316
  # Ratio: how transient the signal is (fast envelope >> slow envelope)
317
+ ratio = np.clip(fast_envelope / (slow_envelope + 1e-6), 1.0, 5.0)
318
+
319
  # Normalized ratio (0 to 1, where 1 is strong transient)
320
  # 4.0 comes from the ratio clip max 5.0 - min 1.0
321
+ normalized_ratio = (ratio - 1.0) / 4.0
322
+
323
  # Gain is a blend between sustain_gain and attack_gain based on the normalized_ratio
324
  gain_envelope = (sustain_gain * (1 - normalized_ratio)) + (attack_gain * normalized_ratio)
325
+
326
  # Apply Gain
327
  if y.ndim == 1:
328
  y_out = y * gain_envelope
329
  else:
330
  y_out = y * gain_envelope[:, np.newaxis]
331
+
332
  return y_out
333
 
334
  # --- NEW UTILITY: FILTER MODULATION ---
 
339
  """
340
  if depth == 0:
341
  return y
342
+
343
  # Ensure stereo for LFO application
344
  if y.ndim == 1:
345
  y = np.stack((y, y), axis=-1)
346
+
347
  N = len(y)
348
  duration_sec = N / sr
349
 
350
  # LFO Rate Calculation
351
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
352
+ beats_per_measure = rate_map.get(rate, 1)
353
  lfo_freq_hz = (bpm / 60.0) * (beats_per_measure / 4.0)
354
 
355
  t = np.linspace(0, duration_sec, N, endpoint=False)
356
+
357
  # LFO: ranges from 0 to 1
358
+ lfo_value = (np.sin(2 * np.pi * lfo_freq_hz * t) + 1) / 2.0
359
+
360
  # Modulate Cutoff Frequency: Cutoff = BaseFreq + (LFO * Depth)
361
  cutoff_modulation = freq + (lfo_value * depth)
362
  # Safety clip to prevent instability
363
+ cutoff_modulation = np.clip(cutoff_modulation, 20.0, sr / 2.0 - 100)
364
 
365
  y_out = np.zeros_like(y)
366
  filter_type_b = filter_type.lower().replace('-pass', '')
367
  frame_size = 512 # Frame-based update for filter coefficients
368
+
369
  # Apply filter channel by channel
370
  for channel in range(y.shape[1]):
371
  zi = np.zeros(2) # Initial filter state (2nd order filter)
372
+
373
  for frame_start in range(0, N, frame_size):
374
  frame_end = min(frame_start + frame_size, N)
375
  frame = y[frame_start:frame_end, channel]
376
+
377
  # Use the average LFO cutoff for the frame
378
  avg_cutoff = np.mean(cutoff_modulation[frame_start:frame_end])
379
+
380
  # Calculate 2nd order Butterworth filter coefficients
381
  b, a = butter(2, avg_cutoff, btype=filter_type_b, fs=sr)
382
+
383
  # Apply filter to the frame, updating the state `zi`
384
  filtered_frame, zi = lfilter(b, a, frame, zi=zi)
385
  y_out[frame_start:frame_end, channel] = filtered_frame
386
+
387
  return y_out
388
 
389
  # --- CORE SEPARATION FUNCTION (Truncated for brevity, focus on analysis) ---
 
410
  tempo, _ = librosa.beat.beat_track(y=y_mono, sr=sr_orig)
411
  detected_bpm = 120 if tempo is None or tempo == 0 else int(np.round(tempo).item())
412
  detected_key = detect_key(y_mono, sr_orig)
413
+
414
  harmonic_recommendations = get_harmonic_recommendations(detected_key)
415
 
416
  status_string = f"Detected Tempo: {detected_bpm} BPM. Detected Key: {detected_key}. Proceeding with separation...\n"
417
  log_history += status_string
418
+ yield {
419
+ status_log: log_history,
420
  detected_bpm_key: f"{detected_bpm} BPM, {detected_key}",
421
  harmonic_recs: harmonic_recommendations
422
  }
 
424
  except Exception as e:
425
  log_history += f"⚠️ WARNING: Analysis failed ({e}). Defaulting to 120 BPM, Unknown Key.\n"
426
  harmonic_recommendations = "N/A (Analysis failed)"
427
+ yield {
428
+ status_log: log_history,
429
  detected_bpm_key: f"{detected_bpm} BPM, {detected_key}",
430
  harmonic_recs: harmonic_recommendations
431
  }
432
+
433
  # --- Truncated Demucs Output Placeholder (For Demonstrating Success) ---
434
  # Mock file paths and generation for demo purposes
435
  vocals_path = "separated/htdemucs/input/vocals.wav"
 
438
  other_path = "separated/htdemucs/input/other.wav"
439
  guitar_path = None
440
  piano_path = None
441
+
442
  mock_sr = 44100
443
+ mock_duration = 10
444
  mock_y = np.random.uniform(low=-0.5, high=0.5, size=(mock_sr * mock_duration, 2)).astype(np.float32)
445
  os.makedirs(os.path.dirname(vocals_path), exist_ok=True)
446
  sf.write(vocals_path, mock_y, mock_sr)
447
  sf.write(drums_path, mock_y, mock_sr)
448
  sf.write(bass_path, mock_y, mock_sr)
449
  sf.write(other_path, mock_y, mock_sr)
450
+
451
  # --- End Truncated Demucs Output Placeholder ---
452
 
453
  log_history += "✅ Stem separation complete! (Mock files generated for demo)\n"
 
469
 
470
  def slice_stem_real(stem_audio_data, loop_choice, sensitivity, stem_name, manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_key, pan_depth, level_depth, modulation_rate, target_dbfs, attack_gain, sustain_gain, filter_type, filter_freq, filter_depth):
471
  """
472
+ Slices a single stem, applies pitch shift, modulation, normalization,
473
  transient shaping, filter LFO, and generates MIDI/visualizations.
474
  """
475
  if stem_audio_data is None:
476
  return [], None
477
+
478
  sample_rate, y_int = stem_audio_data
479
+ y = librosa.util.buf_to_float(y_int, dtype=np.float32)
480
+
481
  if y.ndim == 0: return [], None
482
+
483
  y_mono = librosa.to_mono(y.T) if y.ndim > 1 else y
484
+
485
  # --- 1. PITCH SHIFTING (if enabled) ---
486
  if transpose_semitones != 0:
487
  y_shifted = librosa.effects.pitch_shift(y, sr=sample_rate, n_steps=transpose_semitones)
488
  y = y_shifted
489
+
490
  # --- 2. TRANSIENT SHAPING (Drums Only) ---
491
  if stem_name == "drums" and (attack_gain != 1.0 or sustain_gain != 1.0):
492
  y = apply_transient_shaping(y, sample_rate, attack_gain, sustain_gain)
493
+
494
  # --- 3. FILTER MODULATION (LFO 2.0) ---
495
  if filter_depth > 0:
496
  y = apply_filter_modulation(y, sample_rate, manual_bpm, modulation_rate, filter_type, filter_freq, filter_depth)
497
+
498
  # --- 4. PAN/LEVEL MODULATION ---
499
  normalized_pan_depth = pan_depth / 100.0
500
  normalized_level_depth = level_depth / 100.0
501
+
502
  if normalized_pan_depth > 0 or normalized_level_depth > 0:
503
  y = apply_modulation(y, sample_rate, manual_bpm, modulation_rate, normalized_pan_depth, normalized_level_depth)
504
+
505
  # Check if any modification was applied for the RICH METADATA TAGGING
506
  is_modified = (
507
+ transpose_semitones != 0 or
508
+ normalized_pan_depth > 0 or normalized_level_depth > 0 or
509
+ filter_depth > 0 or
510
  stem_name == "drums" and (attack_gain != 1.0 or sustain_gain != 1.0)
511
  )
512
  mod_tag = "_MOD" if is_modified else "" # Rich Tagging: Modification flag
513
 
 
 
 
 
514
  # --- 6. DETERMINE BPM & KEY (FOR RICH TAGGING) ---
515
  bpm_int = int(manual_bpm)
516
  bpm_tag = f"{bpm_int}BPM" # Rich Tagging: BPM
517
  time_sig_tag = time_signature.replace("/", "") # Rich Tagging: Time Signature
518
+
519
  key_tag = detected_key.replace(" ", "")
520
  if transpose_semitones != 0:
521
  root = detected_key.split(" ")[0]
 
533
  output_files = []
534
  loops_dir = tempfile.mkdtemp()
535
  is_melodic = stem_name in ["vocals", "bass", "guitar", "piano", "other"]
536
+
537
+ if is_melodic and ("Bar Loops" in loop_choice):
538
  try:
539
  # Use piptrack for a more robust (though less accurate than Pyin) general pitch detection
540
  pitches, magnitudes = librosa.piptrack(y=y_mono, sr=sample_rate)
 
542
  for t in range(pitches.shape[1]):
543
  index = magnitudes[:, t].argmax()
544
  main_pitch_line[t] = pitches[index, t]
545
+
546
  notes_list = []
547
+
548
  # Simple note segmentation by pitch change
549
  i = 0
550
  while i < len(main_pitch_line):
551
  current_freq = main_pitch_line[i]
552
  current_midi = freq_to_midi(current_freq)
553
+
554
  j = i
555
  while j < len(main_pitch_line) and freq_to_midi(main_pitch_line[j]) == current_midi:
556
  j += 1
557
+
558
  duration_frames = j - i
559
+
560
  # Minimum duration filter to ignore extremely short notes
561
+ if current_midi != 0 and duration_frames >= 2:
562
  start_sec = librosa.frames_to_time(i, sr=sample_rate, hop_length=512)
563
  duration_sec = librosa.frames_to_time(duration_frames, sr=sample_rate, hop_length=512)
564
  notes_list.append((current_midi, start_sec, duration_sec))
565
+
566
  i = j
567
+
568
  full_stem_midi_path = os.path.join(loops_dir, f"{stem_name}_MELODY_{key_tag}_{bpm_tag}{mod_tag}.mid")
569
  write_midi_file(notes_list, manual_bpm, full_stem_midi_path)
570
  output_files.append((full_stem_midi_path, loops_dir))
571
+
572
  except Exception as e:
573
  print(f"MIDI generation failed for {stem_name}: {e}")
574
  # Do not stop execution
 
576
  # --- 8. CALCULATE TIMING & SLICING ---
577
  beats_per_bar = 4
578
  if time_signature == "3/4": beats_per_bar = 3
579
+
580
  slice_samples = []
581
+
582
  if "Bar Loops" in loop_choice:
583
  bars = int(loop_choice.split(" ")[0])
584
  loop_type_tag = f"{bars}Bar"
585
  loop_duration_samples = int((60.0 / bpm_int * beats_per_bar * bars) * sample_rate)
586
 
587
  if loop_duration_samples == 0: return [], loops_dir
588
+
589
  num_loops = len(y) // loop_duration_samples
590
+
591
  for i in range(num_loops):
592
  start_sample = i * loop_duration_samples
593
  end_sample = start_sample + loop_duration_samples
594
  slice_data = y[start_sample:end_sample]
595
+
596
  # Rich Metadata/Tagging via Filename Enhancement
597
  filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_tag}_{time_sig_tag}{mod_tag}.wav")
598
  sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
599
+ output_files.append((filename, loops_dir))
600
  slice_samples.append(start_sample)
601
+
602
  elif "One-Shots" in loop_choice:
603
  loop_type_tag = "OneShot"
604
  onset_frames = librosa.onset.onset_detect(
605
+ y=y_mono, sr=sample_rate, delta=sensitivity,
606
  wait=1, pre_avg=1, post_avg=1, post_max=1, units='frames'
607
  )
608
  onset_samples = librosa.frames_to_samples(onset_frames)
609
+
610
  if len(onset_samples) > 0:
611
  num_onsets = len(onset_samples)
612
+ slice_samples = list(onset_samples)
613
+
614
  for i, start_sample in enumerate(onset_samples):
615
  end_sample = onset_samples[i+1] if i+1 < num_onsets else len(y)
616
+ slice_data = y[start_sample:end_sample]
617
+
618
  if crossfade_ms > 0:
619
  slice_data = apply_crossfade(slice_data, sample_rate, crossfade_ms)
620
+
621
  # Rich Metadata/Tagging via Filename Enhancement
622
  filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_tag}{mod_tag}.wav")
623
  sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
624
  output_files.append((filename, loops_dir))
625
+
626
  if not output_files:
627
  return [], loops_dir
628
 
629
  # --- 9. VISUALIZATION GENERATION ---
630
  img_path = generate_waveform_preview(y, sample_rate, slice_samples, stem_name, loop_choice, loops_dir)
631
+
632
  # Return audio file path and the single visualization map
633
  return [(audio_file, img_path) for audio_file, _ in output_files if audio_file.endswith(('.wav', '.mid'))], loops_dir
634
 
 
640
  """
641
  log_history = "Starting batch slice...\n"
642
  yield { status_log: log_history }
643
+ await asyncio.sleep(0.1)
644
 
645
  parts = detected_bpm_key_str.split(', ')
646
  key_str = parts[1] if len(parts) > 1 else "Unknown Key"
647
 
648
  stems_to_process = {
649
+ "vocals": vocals, "drums": drums, "bass": bass,
650
  "other": other, "guitar": guitar, "piano": piano
651
  }
652
  zip_path = "Loop_Architect_Pack.zip"
653
+
654
  num_stems = sum(1 for data in stems_to_process.values() if data is not None)
655
  if num_stems == 0:
656
  raise gr.Error("No stems to process! Please separate stems first.")
 
665
  yield { status_log: log_history }
666
 
667
  sliced_files_and_viz, temp_dir = slice_stem_real(
668
+ (data[0], data[1]), loop_choice, sensitivity, name,
669
  manual_bpm, time_signature, crossfade_ms, transpose_semitones, key_str,
670
  pan_depth, level_depth, modulation_rate, target_dbfs,
671
  attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
672
  )
673
+
674
  if sliced_files_and_viz:
675
  # Write both WAV and MIDI files to the ZIP
676
  midi_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.mid'))
677
  wav_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.wav'))
678
+
679
  log_history += f"Generated {wav_count} WAV slices and {midi_count} MIDI files for {name}.\n"
680
  all_temp_dirs.append(temp_dir)
681
  for loop_file, _ in sliced_files_and_viz:
 
685
  zf.write(loop_file, arcname)
686
  else:
687
  log_history += f"No slices generated for {name}.\n"
688
+
689
  processed_count += 1
690
  yield { status_log: log_history }
691
 
 
705
 
706
  # --- Create the full Gradio Interface ---
707
  with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red")) as demo:
708
+ # --- Global State Variables ---
709
+ # These store values across interactions but are not direct UI components themselves.
710
  detected_bpm_key = gr.State(value="")
711
  harmonic_recs = gr.State(value="---")
712
+
713
+ # --- Global Output Components ---
714
+ # These are defined once and referenced throughout the UI.
715
  vocals_output = gr.Audio(label="Vocals", scale=4, visible=False)
716
  drums_output = gr.Audio(label="Drums", scale=4, visible=False)
717
  bass_output = gr.Audio(label="Bass", scale=4, visible=False)
 
721
  download_zip_file = gr.File(label="Download Your Loop Pack", visible=False)
722
  status_log = gr.Textbox(label="Status Log", lines=10, interactive=False)
723
 
724
+ # The main gallery for displaying generated loops and their waveforms.
725
  loop_gallery = gr.Gallery(
726
  label="Generated Loops Preview (Audio + Waveform Slice Map)",
727
  columns=8, object_fit="contain", height="auto", preview=True,
728
  type="numpy"
729
  )
730
+
731
+ # --- Interface Layout ----
732
  gr.Markdown("# 🎵 Loop Architect (Pro Edition)")
733
  gr.Markdown("Upload any song to separate it into stems, detect musical attributes, and then slice and tag the stems for instant use in a DAW.")
734
+
735
  with gr.Row():
736
+ # --- Left Column: Input Controls and Analysis ---
737
  with gr.Column(scale=1):
738
+ # Section 1: Stem Separation
739
  gr.Markdown("### 1. Separate Stems")
740
  audio_input = gr.Audio(type="filepath", label="Upload a Track")
 
741
  with gr.Row():
742
  reverb_reduction_option = gr.Checkbox(
743
  label="Dry Vocals",
744
  value=False,
745
  info="Reduce reverb on the vocal stem."
746
  )
 
747
  model_selector = gr.Radio(
748
  ["htdemucs (High Quality 4-Stem)", "hdemucs (Faster 4-Stem)", "htdemucs_6s (6-Stem)", "2-Stem (Vocals Only)"],
749
  label="Separation Model Control",
750
  value="htdemucs (High Quality 4-Stem)"
751
  )
 
752
  submit_button = gr.Button("Separate & Analyze Stems", variant="primary")
753
 
754
+ # Section 2: Analysis & Transformations
755
  gr.Markdown("### 2. Analysis & Transform")
756
+ # Displays detected BPM and Key after separation
757
+ gr.Textbox(label="Detected Tempo & Key", value="", interactive=False, elem_id="detected_bpm_key_output", placeholder="Run Separation to Analyze...", visible=True)
758
+ # Displays harmonic mixing recommendations
759
+ gr.Textbox(label="Harmonic Mixing Recommendations (Camelot Wheel)", value="---", interactive=False, elem_id="harmonic_recs_output", visible=True)
760
+
 
 
761
  # Transpose Control
762
  transpose_slider = gr.Slider(
763
  minimum=-12, maximum=12, value=0, step=1,
764
  label="Transpose Loops (Semitones)",
765
  info="Shift the pitch of all slices by +/- 1 octave. (Tags the file with `Shift`)"
766
  )
767
+
768
+ # Transient Shaping (Drums Only Controls)
769
  gr.Markdown("### Transient Shaping (Drums Only)")
770
  with gr.Group():
771
  attack_gain_slider = gr.Slider(
 
779
  info="Increase (>1.0) for longer tails/reverb."
780
  )
781
 
782
+ # Pan/Level Modulation Controls
783
  gr.Markdown("### Pan/Level Modulation (LFO 1.0)")
784
+ with gr.Group():
785
  modulation_rate_radio = gr.Radio(
786
  ['1/2', '1/4', '1/8', '1/16'],
787
  label="Modulation Rate (Tempo Synced)",
 
798
  label="Level Modulation Depth (%)",
799
  info="Creates a tempo-synced tremolo (volume pulse)."
800
  )
801
+
802
+ # Filter Modulation Controls
803
  gr.Markdown("### Filter Modulation (LFO 2.0)")
804
+ with gr.Group():
805
  filter_type_radio = gr.Radio(
806
  ['Low-Pass', 'High-Pass'],
807
  label="Filter Type",
 
818
  info="0 = Static filter at Base Cutoff. Modifying any value tags the file with `MOD`."
819
  )
820
 
821
+ # Section 3: Slicing Options
822
  gr.Markdown("### 3. Slicing Options")
823
  with gr.Group():
824
  # Normalization Control
 
827
  label="Target Peak Level (dBFS)",
828
  info="Normalizes all exported loops to this peak volume."
829
  )
 
830
  loop_options_radio = gr.Radio(
831
  ["One-Shots (All Transients)", "4 Bar Loops", "8 Bar Loops"],
832
  label="Slice Type",
833
  value="One-Shots (All Transients)",
834
  info="Bar Loops include automatic MIDI generation for melodic stems."
835
  )
 
836
  with gr.Row():
837
  bpm_input = gr.Number(
838
+ label="Manual BPM",
839
+ value=120,
840
+ minimum=40,
841
  maximum=300,
842
  info="Overrides auto-detect for loop timing."
843
  )
844
  time_sig_radio = gr.Radio(
845
+ ["4/4", "3/4"],
846
+ label="Time Signature",
847
  value="4/4",
848
  info="For correct bar length. (Tags the file with `44` or `34`)"
849
  )
 
850
  sensitivity_slider = gr.Slider(
851
  minimum=0.01, maximum=0.5, value=0.05, step=0.01,
852
  label="One-Shot Sensitivity",
853
  info="Lower values = more slices."
854
  )
 
855
  crossfade_ms_slider = gr.Slider(
856
  minimum=0, maximum=30, value=10, step=1,
857
  label="One-Shot Crossfade (ms)",
858
  info="Prevents clicks/pops on transient slices."
859
  )
860
 
861
+ # Section 4: Create Pack & Status Display
862
  gr.Markdown("### 4. Create Pack (Rich Tagging & MIDI)")
863
  slice_all_button = gr.Button("Slice, Transform & Tag ALL Stems (Create ZIP)", variant="stop")
864
+ download_zip_file # Display the download link
865
 
866
  gr.Markdown("### Status")
867
+ status_log # Display the status log
868
 
869
+ # --- Right Column: Separated Stems & Loop Gallery ---
870
  with gr.Column(scale=2):
871
  with gr.Accordion("Separated Stems (Preview & Slice)", open=True):
872
+
873
+ # Base inputs list for individual stem slicing functions
874
  slice_inputs = [
875
  loop_options_radio, sensitivity_slider, gr.Textbox(visible=False), # Placeholder for stem name
876
+ bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_bpm_key,
877
+ pan_depth_slider, level_depth_slider, modulation_rate_radio,
878
+ lufs_target_slider,
879
+ attack_gain_slider, sustain_gain_slider,
880
  filter_type_radio, filter_freq_slider, filter_depth_slider
881
  ]
882
 
 
884
  def slice_and_display_wrapper(stem_data, loop_choice, sensitivity, stem_name, manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_bpm_key_str, pan_depth, level_depth, modulation_rate, target_dbfs, attack_gain, sustain_gain, filter_type, filter_freq, filter_depth):
885
  if not detected_bpm_key_str:
886
  raise gr.Error("Please run 'Separate & Analyze Stems' first.")
887
+
888
  key_str = detected_bpm_key_str.split(', ')[1] if len(detected_bpm_key_str.split(', ')) > 1 else "Unknown Key"
889
+
890
  sliced_files_and_viz, temp_dir = slice_stem_real(
891
  stem_data, loop_choice, sensitivity, stem_name,
892
  manual_bpm, time_signature, crossfade_ms, transpose_semitones, key_str,
893
  pan_depth, level_depth, modulation_rate, target_dbfs,
894
  attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
895
  )
896
+
897
  gallery_output = []
898
+
899
  if sliced_files_and_viz:
900
  # Find the first visualization for the gallery
901
  first_image_path = sliced_files_and_viz[0][1] if sliced_files_and_viz else None
902
+
903
  wav_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.wav'))
904
  midi_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.mid'))
905
 
906
  for i, (audio_file, _) in enumerate(sliced_files_and_viz):
907
  if audio_file.endswith('.wav'):
908
  label = os.path.basename(audio_file).rsplit('.', 1)[0]
909
+ gallery_output.append((audio_file, label, first_image_path))
910
+
911
  log_msg = f"✅ Sliced {stem_name} into {wav_count} WAVs and generated {midi_count} MIDIs. Waveform preview generated."
912
  else:
913
  log_msg = f"No slices generated for {stem_name}."
914
 
915
  if temp_dir and os.path.exists(temp_dir):
916
+ pass
917
+
918
  return {
919
  loop_gallery: gr.update(value=gallery_output),
920
  status_log: log_msg
921
  }
922
 
923
+ # Helper function to dynamically update visibility of stem outputs
924
  def update_output_visibility(selected_model):
925
  is_6_stem = "6-Stem" in selected_model
926
  is_2_stem = "2-Stem" in selected_model
 
928
  if is_2_stem: other_label = "Instrumental (No Vocals)"
929
  elif is_6_stem: other_label = "Other (No Guitar/Piano)"
930
  return (
931
+ gr.update(visible=True),
932
+ gr.update(visible=True if not is_2_stem else False),
933
+ gr.update(visible=True if not is_2_stem else False),
934
  gr.update(visible=True, label=other_label),
935
+ gr.update(visible=is_6_stem),
936
+ gr.update(visible=is_6_stem),
937
+ gr.update(visible=is_6_stem),
938
+ gr.update(visible=is_6_stem)
939
  )
940
 
941
+ # Individual Stem Audio Outputs and their Slice Buttons
942
  with gr.Row():
943
+ vocals_output # Display the global vocals_output Audio component
944
  slice_vocals_btn = gr.Button("Slice Vocals", scale=1)
945
  with gr.Row():
946
+ drums_output # Display the global drums_output Audio component
947
  slice_drums_btn = gr.Button("Slice Drums", scale=1)
948
  with gr.Row():
949
+ bass_output # Display the global bass_output Audio component
950
  slice_bass_btn = gr.Button("Slice Bass", scale=1)
951
  with gr.Row():
952
+ other_output # Display the global other_output Audio component
953
  slice_other_btn = gr.Button("Slice Other", scale=1)
954
+
955
+ # Guitar and Piano are conditionally visible (for 6-stem model)
956
  with gr.Row(visible=False) as guitar_row:
957
+ guitar_output # Display the global guitar_output Audio component
958
  slice_guitar_btn = gr.Button("Slice Guitar", scale=1)
959
  with gr.Row(visible=False) as piano_row:
960
+ piano_output # Display the global piano_output Audio component
961
  slice_piano_btn = gr.Button("Slice Piano", scale=1)
962
+
963
+ # Main Loop Gallery Display
964
  gr.Markdown("### Sliced Loops / Samples (Preview)")
965
+ loop_gallery # Display the global loop_gallery component
966
 
967
+ # --- Event Listeners (UI Interactions) ---
968
+
969
+ # 1. Event for when the user clicks 'Separate & Analyze Stems'
970
  submit_button.click(
971
  fn=separate_stems,
972
+ inputs=[gr.File(type="filepath"), model_selector, gr.Checkbox(visible=False), reverb_reduction_option],
973
  outputs=[
974
  vocals_output, drums_output, bass_output, other_output,
975
  guitar_output, piano_output,
976
+ status_log, detected_bpm_key,
977
+ gr.Textbox(elem_id="detected_bpm_key_output"),
978
+ gr.Textbox(elem_id="harmonic_recs_output")
979
  ]
980
  )
981
 
982
+ # 2. Event for when the user changes the 'Separation Model Control'
983
  model_selector.change(
984
  fn=update_output_visibility,
985
  inputs=[model_selector],
986
  outputs=[
987
+ vocals_output, drums_output, bass_output, other_output,
988
+ guitar_output, piano_output,
989
  guitar_row, piano_row
990
  ]
991
  )
992
+
993
+ # --- Individual Stem Slice Button Events ---
994
  slice_vocals_btn.click(fn=slice_and_display_wrapper, inputs=[vocals_output] + slice_inputs[:2] + [gr.Textbox("vocals", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
995
  slice_drums_btn.click(fn=slice_and_display_wrapper, inputs=[drums_output] + slice_inputs[:2] + [gr.Textbox("drums", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
996
  slice_bass_btn.click(fn=slice_and_display_wrapper, inputs=[bass_output] + slice_inputs[:2] + [gr.Textbox("bass", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
 
998
  slice_guitar_btn.click(fn=slice_and_display_wrapper, inputs=[guitar_output] + slice_inputs[:2] + [gr.Textbox("guitar", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
999
  slice_piano_btn.click(fn=slice_and_display_wrapper, inputs=[piano_output] + slice_inputs[:2] + [gr.Textbox("piano", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
1000
 
1001
+ # 3. Event for when the user clicks 'Slice, Transform & Tag ALL Stems (Create ZIP)'
1002
  slice_all_event = slice_all_button.click(
1003
  fn=slice_all_and_zip_real,
1004
  inputs=[
1005
+ vocals_output, drums_output, bass_output, other_output, guitar_output, piano_output,
1006
+ loop_options_radio, sensitivity_slider,
1007
+ bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_bpm_key,
1008
  pan_depth_slider, level_depth_slider, modulation_rate_radio, lufs_target_slider,
1009
+ attack_gain_slider, sustain_gain_slider,
1010
  filter_type_radio, filter_freq_slider, filter_depth_slider
1011
  ],
1012
  outputs=[download_zip_file, status_log]
1013
  )