SaltProphet commited on
Commit
8da4576
·
verified ·
1 Parent(s): 369f0ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +827 -705
app.py CHANGED
@@ -1,198 +1,149 @@
1
  import gradio as gr
2
- import numpy as np
 
 
3
  import librosa
4
  import librosa.display
5
  import soundfile as sf
6
- import os
7
- import tempfile
8
- import zipfile
9
  import time
10
- import matplotlib
 
11
  import matplotlib.pyplot as plt
12
- from scipy import signal
13
- from typing import Tuple, List, Any, Optional, Dict
14
- import shutil
15
 
16
- # Use a non-interactive backend for Matplotlib
17
  matplotlib.use('Agg')
18
 
19
- # --- CONSTANTS & DICTIONARIES ---
20
-
21
- KEY_TO_CAMELOT = {
22
- "C Maj": "8B", "G Maj": "9B", "D Maj": "10B", "A Maj": "11B", "E Maj": "12B",
23
- "B Maj": "1B", "F# Maj": "2B", "Db Maj": "3B", "Ab Maj": "4B", "Eb Maj": "5B",
24
- "Bb Maj": "6B", "F Maj": "7B",
25
- "A Min": "8A", "E Min": "9A", "B Min": "10A", "F# Min": "11A", "C# Min": "12A",
26
- "G# Min": "1A", "D# Min": "2A", "Bb Min": "3A", "F Min": "4A", "C Min": "5A",
27
- "G Min": "6A", "D Min": "7A",
28
- # Enharmonic equivalents
29
- "Gb Maj": "2B", "Cb Maj": "7B", "A# Min": "3A", "D# Maj": "5B", "G# Maj": "4B"
30
- }
31
-
32
- # Fixed reverse mapping to avoid "lossy" inversion
33
- CAMELOT_TO_KEY = {
34
- "8B": "C Maj", "9B": "G Maj", "10B": "D Maj", "11B": "A Maj", "12B": "E Maj",
35
- "1B": "B Maj", "2B": "F# Maj / Gb Maj", "3B": "Db Maj", "4B": "Ab Maj / G# Maj", "5B": "Eb Maj / D# Maj",
36
- "6B": "Bb Maj", "7B": "F Maj / Cb Maj",
37
- "8A": "A Min", "9A": "E Min", "10A": "B Min", "11A": "F# Min", "12A": "C# Min",
38
- "1A": "G# Min", "2A": "D# Min", "3A": "Bb Min / A# Min", "4A": "F Min", "5A": "C Min",
39
- "6A": "G Min", "7A": "D Min"
40
- }
41
-
42
- STEM_NAMES = ["vocals", "drums", "bass", "other", "guitar", "piano"]
43
-
44
- # --- UTILITY FUNCTIONS ---
45
-
46
- def freq_to_midi(freq: float) -> int:
47
  """Converts a frequency in Hz to a MIDI note number."""
 
48
  if freq <= 0:
49
  return 0
50
- # C1 is ~32.7 Hz. Let's set a reasonable floor.
51
- if freq < 32.0:
52
  return 0
 
53
  return int(round(69 + 12 * np.log2(freq / 440.0)))
54
 
55
- def write_midi_file(notes_list: List[Tuple[int, float, float]], bpm: float, output_path: str):
56
  """
57
- Writes a basic MIDI file from a list of notes.
58
- Note: This is a simplified MIDI writer and may have issues.
59
- Using a dedicated library like 'mido' is recommended for robust use.
60
  """
61
  if not notes_list:
62
  return
63
 
64
  tempo_us_per_beat = int(60000000 / bpm)
65
- division = 96 # Ticks per quarter note
66
  seconds_per_tick = 60.0 / (bpm * division)
67
-
 
 
 
 
 
 
68
  # Sort notes by start time
69
  notes_list.sort(key=lambda x: x[1])
70
 
71
  current_tick = 0
72
- midi_events = []
73
-
74
- # --- MIDI Track Header ---
75
- # Set Tempo: FF 51 03 TTTTTT (TTTTTT = tempo_us_per_beat)
76
- tempo_bytes = tempo_us_per_beat.to_bytes(3, 'big')
77
- track_data = b'\x00\xFF\x51\x03' + tempo_bytes
78
-
79
- # Set Time Signature: FF 58 04 NN DD CC BB (Using 4/4)
80
- track_data += b'\x00\xFF\x58\x04\x04\x02\x18\x08'
81
 
82
- # Set Track Name
83
- track_data += b'\x00\xFF\x03\x0BLoopArchitect' # 11 chars
84
-
85
  for note, start_sec, duration_sec in notes_list:
86
- if note == 0:
87
- continue
88
 
89
  # Calculate delta time from last event
90
- target_tick = int(round(start_sec / seconds_per_tick))
91
  delta_tick = target_tick - current_tick
92
  current_tick = target_tick
93
-
94
  # Note On event (Channel 1, Velocity 100)
95
- note_on = [0x90, note, 100]
96
- track_data += encode_delta_time(delta_tick) + bytes(note_on)
97
-
98
  # Note Off event (Channel 1, Velocity 0)
99
- duration_ticks = int(round(duration_sec / seconds_per_tick))
100
- if duration_ticks == 0:
101
- duration_ticks = 1 # Minimum duration
102
-
103
- note_off = [0x80, note, 0]
104
- track_data += encode_delta_time(duration_ticks) + bytes(note_off)
105
  current_tick += duration_ticks
 
 
106
 
107
- # End of track
108
- track_data += b'\x00\xFF\x2F\x00'
109
 
110
- # --- MIDI File Header ---
111
- # MThd, header_length (6), format (1), num_tracks (1), division
112
- header = b'MThd' + (6).to_bytes(4, 'big') + (1).to_bytes(2, 'big') + (1).to_bytes(2, 'big') + division.to_bytes(2, 'big')
113
-
114
- # MTrk, track_length, track_data
115
- track_chunk = b'MTrk' + len(track_data).to_bytes(4, 'big') + track_data
116
- midi_data = header + track_chunk
117
 
118
  with open(output_path, 'wb') as f:
119
- f.write(midi_data)
120
-
121
- def encode_delta_time(ticks: int) -> bytes:
122
- """Encodes an integer tick value into MIDI variable-length quantity."""
123
- buffer = ticks & 0x7F
124
- ticks >>= 7
125
- if ticks > 0:
126
- buffer |= 0x80
127
- while ticks > 0:
128
- buffer = (buffer << 8) | ((ticks & 0x7F) | 0x80)
129
- ticks >>= 7
130
- buffer = (buffer & 0xFFFFFF7F) # Clear MSB of last byte
131
-
132
- # Convert buffer to bytes
133
- byte_list = []
134
- while buffer > 0:
135
- byte_list.insert(0, buffer & 0xFF)
136
- buffer >>= 8
137
- if not byte_list:
138
- return b'\x00'
139
- return bytes(byte_list)
140
- else:
141
- return bytes([buffer])
142
 
143
- def get_harmonic_recommendations(key_str: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  """Calculates harmonically compatible keys based on the Camelot wheel."""
145
  code = KEY_TO_CAMELOT.get(key_str, "N/A")
146
- if code == "N/A":
147
- return "N/A (Key not recognized or 'Unknown Key' detected.)"
148
-
149
  try:
150
  num = int(code[:-1])
151
- mode = code[-1]
152
  opposite_mode = 'B' if mode == 'A' else 'A'
153
  num_plus_one = (num % 12) + 1
154
  num_minus_one = 12 if num == 1 else num - 1
155
-
156
- recs_codes = [
157
- f"{num}{opposite_mode}", # e.g., 8A (A Min) -> 8B (C Maj)
158
- f"{num_plus_one}{mode}", # e.g., 8A (A Min) -> 9A (E Min)
159
- f"{num_minus_one}{mode}" # e.g., 8A (A Min) -> 7A (D Min)
160
- ]
161
-
162
- rec_keys = [f"{CAMELOT_TO_KEY.get(r_code, f'Code {r_code}')} ({r_code})" for r_code in recs_codes]
163
  return " | ".join(rec_keys)
164
- except Exception as e:
165
- print(f"Error calculating recommendations: {e}")
166
  return "N/A (Error calculating recommendations.)"
167
 
168
- def detect_key(y: np.ndarray, sr: int) -> str:
169
  """Analyzes the audio to determine the most likely musical key."""
170
  try:
171
  chroma = librosa.feature.chroma_stft(y=y, sr=sr)
172
  chroma_sums = np.sum(chroma, axis=1)
173
-
174
- # Avoid division by zero if audio is silent
175
- if np.sum(chroma_sums) == 0:
176
- return "Unknown Key"
177
-
178
  chroma_norm = chroma_sums / np.sum(chroma_sums)
179
-
180
- # Krumhansl-Schmuckler key-finding algorithm templates
181
  major_template = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
182
  minor_template = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
183
 
184
- # Normalize templates
185
- major_template /= np.sum(major_template)
186
- minor_template /= np.sum(minor_template)
187
-
188
  pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
189
 
190
  major_correlations = [np.dot(chroma_norm, np.roll(major_template, i)) for i in range(12)]
191
  best_major_index = np.argmax(major_correlations)
192
-
193
  minor_correlations = [np.dot(chroma_norm, np.roll(minor_template, i)) for i in range(12)]
194
  best_minor_index = np.argmax(minor_correlations)
195
-
196
  if major_correlations[best_major_index] > minor_correlations[best_minor_index]:
197
  return pitch_classes[best_major_index] + " Maj"
198
  else:
@@ -201,692 +152,863 @@ def detect_key(y: np.ndarray, sr: int) -> str:
201
  print(f"Key detection failed: {e}")
202
  return "Unknown Key"
203
 
204
- def apply_modulation(y: np.ndarray, sr: int, bpm: float, rate: str, pan_depth: float, level_depth: float) -> np.ndarray:
205
- """Applies tempo-synced LFOs for panning and volume modulation."""
206
- if y.ndim == 0:
207
- return y
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  if y.ndim == 1:
209
- y = np.stack((y, y), axis=-1) # Convert to stereo
 
 
210
 
211
  N = len(y)
212
  duration_sec = N / sr
213
 
214
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
215
- beats_per_measure = rate_map.get(rate, 1)
216
- # LFO frequency = (BPM / 60) * (beats_per_measure / 4.0) -- seems off.
217
- # Let's redefine: LFO freq in Hz = (BPM / 60) * (1 / (4 / beats_per_measure))
218
- # e.g., 1/4 rate at 120BPM = 2Hz. (120/60) * (1 / (4/1)) = 2 * (1/4) = 0.5Hz? No.
219
- # 120 BPM = 2 beats/sec. 1/4 note = 1 beat. So LFO should be 2 Hz.
220
- # 1/8 note = 4 Hz.
221
- # 1/16 note = 8 Hz.
222
- # 1/2 note = 1 Hz.
223
- # Formula: (BPM / 60) * (rate_map_value / 4)
224
- # 1/4 note: (120/60) * (1/4) = 0.5 Hz. Still wrong.
225
- # Let's try: (BPM / 60) * (rate_map_value)
226
- # 1/4 note @ 120BPM: (120/60) * 1 = 2 Hz. Correct.
227
- # 1/8 note @ 120BPM: (120/60) * 2 = 4 Hz. Correct.
228
- # 1/2 note @ 120BPM: (120/60) * 0.5 = 1 Hz. Correct.
229
- lfo_freq_hz = (bpm / 60.0) * rate_map.get(rate, 1)
230
 
231
  t = np.linspace(0, duration_sec, N, endpoint=False)
232
-
233
- # Panning LFO (Sine wave, -1 to 1)
234
  if pan_depth > 0:
235
  pan_lfo = np.sin(2 * np.pi * lfo_freq_hz * t) * pan_depth
236
- # L_mod/R_mod should be 0-1. (1-pan_lfo)/2 and (1+pan_lfo)/2 gives 0-1 range.
237
  L_mod = (1 - pan_lfo) / 2.0
238
  R_mod = (1 + pan_lfo) / 2.0
239
- # This is amplitude panning, not constant power. Good enough.
240
- y[:, 0] *= L_mod
241
  y[:, 1] *= R_mod
242
-
243
- # Level LFO (Tremolo) (Sine wave, 0 to 1)
244
  if level_depth > 0:
245
- level_lfo = (np.sin(2 * np.pi * lfo_freq_hz * t) + 1) / 2.0
246
- # gain_multiplier ranges from (1-level_depth) to 1
247
  gain_multiplier = (1 - level_depth) + (level_depth * level_lfo)
248
  y[:, 0] *= gain_multiplier
249
  y[:, 1] *= gain_multiplier
250
 
251
  return y
252
 
253
- def apply_normalization_dbfs(y: np.ndarray, target_dbfs: float) -> np.ndarray:
254
- """Applies peak normalization to match a target dBFS value."""
255
  if target_dbfs >= 0:
256
- return y # Don't normalize to 0dBFS or higher
257
-
258
- current_peak_amp = np.max(np.abs(y))
259
- if current_peak_amp < 1e-9: # Avoid division by zero on silence
260
  return y
261
 
 
262
  target_peak_amp = 10**(target_dbfs / 20.0)
 
 
 
 
 
 
 
 
263
 
264
- gain = target_peak_amp / current_peak_amp
265
- y_normalized = y * gain
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
- # Clip just in case of floating point inaccuracies
268
- y_normalized = np.clip(y_normalized, -1.0, 1.0)
269
- return y_normalized
 
 
 
 
 
 
 
270
 
271
- def apply_filter_modulation(y: np.ndarray, sr: int, bpm: float, rate: str, filter_type: str, freq: float, depth: float) -> np.ndarray:
272
- """Applies a tempo-synced LFO to a 2nd order Butterworth filter cutoff frequency."""
273
- if depth == 0 or filter_type == "None":
274
- return y
275
 
 
 
 
 
 
 
 
276
  # Ensure stereo for LFO application
277
  if y.ndim == 1:
278
  y = np.stack((y, y), axis=-1)
279
- if y.ndim == 0:
280
- return y
281
-
282
  N = len(y)
283
  duration_sec = N / sr
284
 
285
  # LFO Rate Calculation
286
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
287
- lfo_freq_hz = (bpm / 60.0) * rate_map.get(rate, 1)
 
288
 
289
  t = np.linspace(0, duration_sec, N, endpoint=False)
290
-
291
  # LFO: ranges from 0 to 1
292
- lfo_value = (np.sin(2 * np.pi * lfo_freq_hz * t) + 1) / 2.0
293
-
294
  # Modulate Cutoff Frequency: Cutoff = BaseFreq + (LFO * Depth)
295
  cutoff_modulation = freq + (lfo_value * depth)
296
  # Safety clip to prevent instability
297
- nyquist = sr / 2.0
298
- cutoff_modulation = np.clip(cutoff_modulation, 20.0, nyquist - 100.0) # Keep away from Nyquist
299
 
300
  y_out = np.zeros_like(y)
 
 
301
 
302
- # --- BUG FIX ---
303
- # Was: filter_type.lower().replace('-pass', '') -> 'low' (ValueError)
304
- # Now: filter_type.lower().replace('-pass', 'pass') -> 'lowpass' (Correct)
305
- filter_type_b = filter_type.lower().replace('-pass', 'pass')
306
-
307
- frame_size = 512 # Frame-based update for filter coefficients
308
- if N < frame_size:
309
- frame_size = N # Handle very short audio
310
-
311
  # Apply filter channel by channel
312
  for channel in range(y.shape[1]):
313
- zi = signal.lfilter_zi(*signal.butter(2, 20.0, btype=filter_type_b, fs=sr))
314
-
315
  for frame_start in range(0, N, frame_size):
316
  frame_end = min(frame_start + frame_size, N)
317
- if frame_start == frame_end: continue # Skip empty frames
318
-
319
  frame = y[frame_start:frame_end, channel]
320
-
321
  # Use the average LFO cutoff for the frame
322
  avg_cutoff = np.mean(cutoff_modulation[frame_start:frame_end])
323
-
324
  # Calculate 2nd order Butterworth filter coefficients
325
- try:
326
- b, a = signal.butter(2, avg_cutoff, btype=filter_type_b, fs=sr)
327
- except ValueError as e:
328
- print(f"Butterworth filter error: {e}. Using last good coefficients.")
329
- # This can happen if avg_cutoff is bad, though we clip it.
330
- # If it still fails, we just re-use the last good b, a.
331
- # In the first frame, this is not robust.
332
- if 'b' not in locals():
333
- b, a = signal.butter(2, 20.0, btype=filter_type_b, fs=sr) # Failsafe
334
-
335
  # Apply filter to the frame, updating the state `zi`
336
- filtered_frame, zi = signal.lfilter(b, a, frame, zi=zi)
337
  y_out[frame_start:frame_end, channel] = filtered_frame
338
-
339
- return y_out
340
-
341
- def apply_crossfade(y: np.ndarray, fade_samples: int) -> np.ndarray:
342
- """Applies a linear fade-in and fade-out to a clip."""
343
- if fade_samples == 0:
344
- return y
345
-
346
- N = len(y)
347
- fade_samples = min(fade_samples, N // 2) # Fade can't be longer than half the clip
348
-
349
- if fade_samples == 0:
350
- return y # Clip is too short to fade
351
-
352
- # Create fade ramps
353
- fade_in = np.linspace(0, 1, fade_samples)
354
- fade_out = np.linspace(1, 0, fade_samples)
355
-
356
- y_out = y.copy()
357
 
358
- # Apply fades (handling mono/stereo)
359
- if y.ndim == 1:
360
- y_out[:fade_samples] *= fade_in
361
- y_out[-fade_samples:] *= fade_out
362
- else:
363
- y_out[:fade_samples, :] *= fade_in[:, np.newaxis]
364
- y_out[-fade_samples:, :] *= fade_out[:, np.newaxis]
365
-
366
  return y_out
367
 
368
- def apply_envelope(y: np.ndarray, sr: int, attack_gain_db: float, sustain_gain_db: float) -> np.ndarray:
369
- """Applies a simple attack/sustain gain envelope to one-shots."""
370
- N = len(y)
371
- if N == 0:
372
- return y
373
-
374
- # Simple fixed attack time of 10ms
375
- attack_time_sec = 0.01
376
- attack_samples = min(int(attack_time_sec * sr), N // 2)
377
-
378
- start_gain = 10**(attack_gain_db / 20.0)
379
- end_gain = 10**(sustain_gain_db / 20.0)
380
 
381
- # Envelope: Linear ramp from start_gain to end_gain over attack_samples, then hold end_gain
382
- envelope = np.ones(N) * end_gain
383
- if attack_samples > 0:
384
- attack_ramp = np.linspace(start_gain, end_gain, attack_samples)
385
- envelope[:attack_samples] = attack_ramp
386
-
387
- # Apply envelope (handling mono/stereo)
388
- if y.ndim == 1:
389
- y_out = y * envelope
390
- else:
391
- y_out = y * envelope[:, np.newaxis]
392
-
393
- return y_out
394
-
395
- # --- CORE PROCESSING FUNCTIONS ---
396
-
397
- def separate_stems(audio_file_path: str) -> Tuple[
398
- Optional[Tuple[int, np.ndarray]],
399
- Optional[Tuple[int, np.ndarray]],
400
- Optional[Tuple[int, np.ndarray]],
401
- Optional[Tuple[int, np.ndarray]],
402
- Optional[Tuple[int, np.ndarray]],
403
- Optional[Tuple[int, np.ndarray]],
404
- float, str, str
405
- ]:
406
  """
407
- Simulates stem separation and detects BPM and Key.
408
- Returns Gradio Audio tuples (sr, data) for each stem.
409
  """
410
  if audio_file_path is None:
411
  raise gr.Error("No audio file uploaded!")
412
 
 
 
 
 
 
 
 
413
  try:
414
- # Load audio
415
- y_orig, sr_orig = librosa.load(audio_file_path, sr=None, mono=False)
416
-
417
- # Ensure stereo for processing
418
- if y_orig.ndim == 1:
419
- y_orig = np.stack([y_orig, y_orig], axis=-1)
420
- # librosa.load with mono=False may return (N,) for mono files,
421
- # or (2, N). Need to ensure (N, 2) or (N,)
422
- if y_orig.ndim == 2 and y_orig.shape[0] < y_orig.shape[1]:
423
- y_orig = y_orig.T # Transpose to (N, 2)
424
-
425
- y_mono = librosa.to_mono(y_orig)
426
 
427
- # Detect tempo and key
428
  tempo, _ = librosa.beat.beat_track(y=y_mono, sr=sr_orig)
429
- detected_bpm = 120.0 if tempo is None or tempo.size == 0 or tempo[0] == 0 else float(np.round(tempo[0]))
430
  detected_key = detect_key(y_mono, sr_orig)
431
- harmonic_recs = get_harmonic_recommendations(detected_key)
432
-
433
- # Create mock separated stems
434
- # In a real app, you'd use Demucs, Spleeter, etc.
435
- # Here, we just return the original audio for each stem for demo purposes.
436
- stems_data: Dict[str, Optional[Tuple[int, np.ndarray]]] = {}
437
 
438
- # Convert to int16 for Gradio Audio component
439
- y_int16 = (y_orig * 32767).astype(np.int16)
440
-
441
- for name in STEM_NAMES:
442
- # We give each stem the full audio for this demo
443
- stems_data[name] = (sr_orig, y_int16.copy())
 
 
 
444
 
445
- return (
446
- stems_data["vocals"], stems_data["drums"], stems_data["bass"], stems_data["other"],
447
- stems_data["guitar"], stems_data["piano"],
448
- detected_bpm, detected_key, harmonic_recs
449
- )
450
  except Exception as e:
451
- print(f"Error processing audio: {e}")
452
- import traceback
453
- traceback.print_exc()
454
- raise gr.Error(f"Error processing audio: {str(e)}")
455
-
456
- def generate_waveform_preview(y: np.ndarray, sr: int, stem_name: str, temp_dir: str) -> str:
457
- """Generates a Matplotlib image showing the waveform."""
458
- img_path = os.path.join(temp_dir, f"{stem_name}_preview.png")
459
-
460
- plt.figure(figsize=(10, 3))
461
- y_display = librosa.to_mono(y.T) if y.ndim > 1 and y.shape[0] < y.shape[1] else y
462
- y_display = librosa.to_mono(y) if y.ndim > 1 else y
463
 
464
- librosa.display.waveshow(y_display, sr=sr, x_axis='time', color="#4a7098")
465
- plt.title(f"{stem_name} Waveform (Processed)")
466
- plt.ylabel("Amplitude")
467
- plt.tight_layout()
468
- plt.savefig(img_path)
469
- plt.close()
470
-
471
- return img_path
472
-
473
- def slice_stem_real(
474
- stem_audio_tuple: Optional[Tuple[int, np.ndarray]],
475
- loop_choice: str,
476
- sensitivity: float,
477
- stem_name: str,
478
- manual_bpm: float,
479
- time_signature: str,
480
- crossfade_ms: int,
481
- transpose_semitones: int,
482
- detected_key: str,
483
- pan_depth: float,
484
- level_depth: float,
485
- modulation_rate: str,
486
- target_dbfs: float,
487
- attack_gain: float,
488
- sustain_gain: float,
489
- filter_type: str,
490
- filter_freq: float,
491
- filter_depth: float
492
- ) -> Tuple[List[str], Optional[str]]:
 
 
 
 
 
 
 
 
 
493
  """
494
- Slices a single stem and applies transformations.
495
- Returns a list of filepaths and a path to a preview image.
496
  """
497
- if stem_audio_tuple is None:
498
  return [], None
499
-
500
- try:
501
- sample_rate, y_int = stem_audio_tuple
502
- # Convert from int16 array back to float
503
- y = y_int.astype(np.float32) / 32767.0
504
 
505
- if y.ndim == 0 or len(y) == 0:
506
- return [], None
507
-
508
- # --- 1. PITCH SHIFTING (if enabled) ---
509
- if transpose_semitones != 0:
510
- y = librosa.effects.pitch_shift(y, sr=sample_rate, n_steps=transpose_semitones)
511
-
512
- # --- 2. FILTER MODULATION ---
513
- if filter_depth > 0 and filter_type != "None":
514
- y = apply_filter_modulation(y, sample_rate, manual_bpm, modulation_rate, filter_type, filter_freq, filter_depth)
515
-
516
- # --- 3. PAN/LEVEL MODULATION ---
517
- normalized_pan_depth = pan_depth / 100.0
518
- normalized_level_depth = level_depth / 100.0
519
- if normalized_pan_depth > 0 or normalized_level_depth > 0:
520
- y = apply_modulation(y, sample_rate, manual_bpm, modulation_rate, normalized_pan_depth, normalized_level_depth)
521
-
522
- # --- 4. NORMALIZATION ---
523
- if target_dbfs < 0:
524
- y = apply_normalization_dbfs(y, target_dbfs)
525
-
526
- # --- 5. DETERMINE BPM & KEY ---
527
- bpm_int = int(round(manual_bpm))
528
- key_tag = "UnknownKey"
529
- if detected_key != "Unknown Key":
530
- key_tag = detected_key.replace(" ", "")
531
- if transpose_semitones != 0:
532
- root, mode = detected_key.split(" ")
533
- pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
534
- try:
535
- current_index = pitch_classes.index(root)
536
- new_index = (current_index + transpose_semitones) % 12
537
- new_key_root = pitch_classes[new_index]
538
- key_tag = f"{new_key_root}{mode}Shift"
539
- except ValueError:
540
- key_tag = f"Shifted{transpose_semitones}" # Fallback
541
 
542
- # --- 6. MIDI GENERATION (Melodic Stems) ---
543
- output_files = []
544
- loops_dir = tempfile.mkdtemp()
545
- is_melodic = stem_name in ["vocals", "bass", "guitar", "piano", "other"]
546
-
547
- if is_melodic and ("Bar Loops" in loop_choice):
548
- try:
549
- y_mono_for_midi = librosa.to_mono(y)
550
- # Use piptrack for pitch detection
551
- pitches, magnitudes = librosa.piptrack(y=y_mono_for_midi, sr=sample_rate)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
552
 
553
- # Get the dominant pitch at each frame
554
- main_pitch_line = np.zeros(pitches.shape[1])
555
- for t in range(pitches.shape[1]):
556
- index = magnitudes[:, t].argmax()
557
- main_pitch_line[t] = pitches[index, t]
558
-
559
- notes_list = []
560
- i = 0
561
- hop_length = 512 # Default hop for piptrack
562
 
563
- while i < len(main_pitch_line):
564
- current_freq = main_pitch_line[i]
565
- current_midi = freq_to_midi(current_freq)
566
- if current_midi == 0: # Skip silence/unpitched
567
- i += 1
568
- continue
569
-
570
- # Find end of this note
571
- j = i
572
- while j < len(main_pitch_line) and freq_to_midi(main_pitch_line[j]) == current_midi:
573
- j += 1
574
-
575
- duration_frames = j - i
576
- # Only add notes that are long enough (e.g., > 2 frames)
577
- if duration_frames >= 2:
578
- start_sec = librosa.frames_to_time(i, sr=sample_rate, hop_length=hop_length)
579
- duration_sec = librosa.frames_to_time(duration_frames, sr=sample_rate, hop_length=hop_length)
580
- notes_list.append((current_midi, start_sec, duration_sec))
581
-
582
- i = j
583
-
584
- if notes_list:
585
- full_stem_midi_path = os.path.join(loops_dir, f"{stem_name}_MELODY_{key_tag}_{bpm_int}BPM.mid")
586
- write_midi_file(notes_list, manual_bpm, full_stem_midi_path)
587
- output_files.append(full_stem_midi_path)
588
-
589
- except Exception as e:
590
- print(f"MIDI generation failed for {stem_name}: {e}")
591
-
592
- # --- 7. CALCULATE TIMING & SLICING ---
593
- beats_per_bar = 4
594
- if time_signature == "3/4":
595
- beats_per_bar = 3
596
-
597
- if "Bar Loops" in loop_choice:
598
- bars = int(loop_choice.split(" ")[0])
599
- loop_type_tag = f"{bars}Bar"
600
- loop_duration_samples = int((60.0 / manual_bpm * beats_per_bar * bars) * sample_rate)
601
- fade_samples = int((crossfade_ms / 1000.0) * sample_rate)
602
-
603
- if loop_duration_samples > 0 and len(y) > loop_duration_samples:
604
- num_loops = len(y) // loop_duration_samples
605
- for i in range(min(num_loops, 16)): # Limit to 16 loops
606
- start_sample = i * loop_duration_samples
607
- end_sample = min(start_sample + loop_duration_samples, len(y))
608
- slice_data = y[start_sample:end_sample]
609
-
610
- # Apply crossfade
611
- slice_data = apply_crossfade(slice_data, fade_samples)
612
-
613
- filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_int}BPM.wav")
614
- sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
615
- output_files.append(filename)
616
-
617
- elif "One-Shots" in loop_choice:
618
- loop_type_tag = "OneShot"
619
- y_mono_for_onsets = librosa.to_mono(y)
620
 
621
- # IMPLEMENTED: Use sensitivity to find onsets
622
- # Adjust 'wait' and 'delta' based on sensitivity (0-1)
623
- # Higher sensitivity = lower delta, shorter wait
624
- delta = 0.5 * (1.0 - sensitivity) # 0.0 -> 0.5
625
- wait_sec = 0.1 * (1.0 - sensitivity) # 0.0 -> 0.1
626
- wait_samples = int(wait_sec * sample_rate / 512) # in frames
 
 
 
 
 
 
 
 
 
 
 
 
627
 
628
- onset_frames = librosa.onset.onset_detect(
629
- y=y_mono_for_onsets,
630
- sr=sample_rate,
631
- units='frames',
632
- backtrack=True,
633
- delta=delta,
634
- wait=wait_samples
635
- )
636
- onset_samples = librosa.frames_to_samples(onset_frames)
637
 
638
- # Add end of file as the last "onset"
639
- onset_samples = np.append(onset_samples, len(y))
640
-
641
- for i in range(min(len(onset_samples) - 1, 40)): # Limit to 40 slices
642
- start_sample = onset_samples[i]
643
- end_sample = onset_samples[i+1]
644
- slice_data = y[start_sample:end_sample]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
 
646
- if len(slice_data) < 100: # Skip tiny fragments
647
- continue
648
-
649
- # IMPLEMENTED: Apply attack/sustain envelope
650
- slice_data = apply_envelope(slice_data, sample_rate, attack_gain, sustain_gain)
651
 
652
- # Apply short fade-out to prevent clicks
653
- slice_data = apply_crossfade(slice_data, int(0.005 * sample_rate)) # 5ms fade
654
-
655
- filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_int}BPM.wav")
656
  sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
657
- output_files.append(filename)
658
-
659
- # --- 8. VISUALIZATION GENERATION ---
660
- img_path = generate_waveform_preview(y, sample_rate, stem_name, loops_dir)
661
-
662
- # Clean up the temp dir for the *next* run
663
- # Gradio File components need the files to exist, so we don't delete loops_dir yet
664
- # A more robust solution would use gr.TempFile() or manage cleanup
665
-
666
- return output_files, img_path
667
-
668
- except Exception as e:
669
- print(f"Error processing stem {stem_name}: {e}")
670
- import traceback
671
- traceback.print_exc()
672
- return [], None # Return empty on error
673
-
674
-
675
- def slice_all_and_zip(
676
- vocals_audio: Optional[Tuple[int, np.ndarray]],
677
- drums_audio: Optional[Tuple[int, np.ndarray]],
678
- bass_audio: Optional[Tuple[int, np.ndarray]],
679
- other_audio: Optional[Tuple[int, np.ndarray]],
680
- guitar_audio: Optional[Tuple[int, np.ndarray]],
681
- piano_audio: Optional[Tuple[int, np.ndarray]],
682
- loop_choice: str,
683
- sensitivity: float,
684
- manual_bpm: float,
685
- time_signature: str,
686
- crossfade_ms: int,
687
- transpose_semitones: int,
688
- detected_key: str,
689
- pan_depth: float,
690
- level_depth: float,
691
- modulation_rate: str,
692
- target_dbfs: float,
693
- attack_gain: float,
694
- sustain_gain: float,
695
- filter_type: str,
696
- filter_freq: float,
697
- filter_depth: float,
698
- progress: gr.Progress
699
- ) -> Optional[str]:
700
- """Slices all available stems and packages them into a ZIP file."""
701
- try:
702
- stems_to_process = {
703
- "vocals": vocals_audio, "drums": drums_audio, "bass": bass_audio,
704
- "other": other_audio, "guitar": guitar_audio, "piano": piano_audio
705
- }
706
 
707
- # Filter out None stems
708
- valid_stems = {name: data for name, data in stems_to_process.items() if data is not None}
 
 
 
709
 
710
- if not valid_stems:
711
- raise gr.Error("No stems to process! Please separate stems first.")
712
 
713
- # Create temporary directory for all outputs
714
- temp_dir = tempfile.mkdtemp()
715
- zip_path = os.path.join(temp_dir, "Loop_Architect_Pack.zip")
716
-
717
- all_sliced_files = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
718
 
719
- # Use progress tracker
720
- progress(0, desc="Starting...")
721
-
722
- num_stems = len(valid_stems)
723
- for i, (name, data) in enumerate(valid_stems.items()):
724
- progress((i+1)/num_stems, desc=f"Slicing {name}...")
725
-
726
- # Process stem
727
- sliced_files, _ = slice_stem_real(
728
- data, loop_choice, sensitivity, name,
729
- manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_key,
730
- pan_depth, level_depth, modulation_rate, target_dbfs,
731
- attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
732
- )
733
- all_sliced_files.extend(sliced_files)
734
-
735
- progress(0.9, desc="Zipping files...")
736
- with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
737
- for file_path in all_sliced_files:
738
- if not file_path: continue
739
- # Create a sane folder structure in the ZIP
740
- file_type = os.path.splitext(file_path)[1][1:].upper() # WAV or MID
741
- arcname = os.path.join(file_type, os.path.basename(file_path))
742
- zf.write(file_path, arcname)
743
-
744
- progress(1.0, desc="Done!")
745
-
746
- # Clean up individual slice files (but not the zip dir)
747
- for file_path in all_sliced_files:
748
- if file_path and os.path.exists(file_path):
749
- os.remove(file_path)
 
 
750
 
751
- return zip_path
 
 
 
 
752
 
753
  except Exception as e:
754
- print(f"Error creating ZIP: {e}")
755
- import traceback
756
- traceback.print_exc()
757
- raise gr.Error(f"Error creating ZIP: {str(e)}")
758
-
759
- # --- GRADIO INTERFACE ---
760
-
761
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="red")) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
762
  gr.Markdown("# 🎵 Loop Architect (Pro Edition)")
763
  gr.Markdown("Upload any song to separate it into stems, detect musical attributes, and then slice and tag the stems for instant use in a DAW.")
764
-
765
- # State variables
766
- detected_bpm_state = gr.State(value=120.0)
767
- detected_key_state = gr.State(value="Unknown Key")
768
- harmonic_recs_state = gr.State(value="---")
769
-
770
- # Outputs for each stem (as gr.Audio tuples)
771
- vocals_audio = gr.Audio(visible=False, type="numpy")
772
- drums_audio = gr.Audio(visible=False, type="numpy")
773
- bass_audio = gr.Audio(visible=False, type="numpy")
774
- other_audio = gr.Audio(visible=False, type="numpy")
775
- guitar_audio = gr.Audio(visible=False, type="numpy")
776
- piano_audio = gr.Audio(visible=False, type="numpy")
777
 
778
- stem_audio_outputs = [vocals_audio, drums_audio, bass_audio, other_audio, guitar_audio, piano_audio]
779
-
780
  with gr.Row():
781
  with gr.Column(scale=1):
782
- # --- INPUT COLUMN ---
783
- gr.Markdown("### 1. Upload & Analyze")
784
- audio_input = gr.Audio(label="Upload Song", type="filepath")
785
- separate_button = gr.Button("Separate Stems & Analyze", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
786
 
787
- with gr.Accordion("Global Musical Settings", open=True):
788
- manual_bpm_input = gr.Number(label="BPM", value=120.0, step=0.1, interactive=True)
789
- key_display = gr.Textbox(label="Detected Key", value="Unknown Key", interactive=False)
790
- harmonic_recs_display = gr.Textbox(label="Harmonic Recommendations", value="---", interactive=False)
791
- transpose_semitones = gr.Slider(label="Transpose (Semitones)", minimum=-12, maximum=12, value=0, step=1)
792
- time_signature = gr.Radio(label="Time Signature", choices=["4/4", "3/4"], value="4/4")
793
-
794
- with gr.Accordion("Global Slicing Settings", open=True):
795
- loop_choice = gr.Radio(label="Loop Type", choices=["1 Bar Loops", "2 Bar Loops", "4 Bar Loops", "One-Shots"], value="4 Bar Loops")
796
- sensitivity = gr.Slider(label="One-Shot Sensitivity", minimum=0.0, maximum=1.0, value=0.5, info="Higher = more slices")
797
- crossfade_ms = gr.Slider(label="Loop Crossfade (ms)", minimum=0, maximum=50, value=10, step=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
798
 
799
- with gr.Accordion("Global FX Settings", open=False):
800
- target_dbfs = gr.Slider(label="Normalize Peak to (dBFS)", minimum=-24.0, maximum=-0.0, value=-1.0, step=0.1, info="-0.0 = Off")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
801
 
802
- gr.Markdown("---")
803
- gr.Markdown("**LFO Modulation (Pan/Level)**")
804
- modulation_rate = gr.Radio(label="Modulation Rate", choices=["1/2", "1/4", "1/8", "1/16"], value="1/4")
805
- pan_depth = gr.Slider(label="Pan Depth (%)", minimum=0, maximum=100, value=0, step=1)
806
- level_depth = gr.Slider(label="Level Depth (%)", minimum=0, maximum=100, value=0, step=1, info="Tremolo effect")
 
807
 
808
- gr.Markdown("---")
809
- gr.Markdown("**LFO Modulation (Filter)**")
810
- filter_type = gr.Radio(label="Filter Type", choices=["None", "Low-pass", "High-pass"], value="None")
811
- filter_freq = gr.Slider(label="Filter Base Freq (Hz)", minimum=20, maximum=10000, value=5000, step=100)
812
- filter_depth = gr.Slider(label="Filter Mod Depth (Hz)", minimum=0, maximum=10000, value=0, step=100, info="LFO amount")
 
 
 
 
 
 
 
 
 
813
 
814
- gr.Markdown("---")
815
- gr.Markdown("**One-Shot Shaping**")
816
- attack_gain = gr.Slider(label="Attack Gain (dB)", minimum=-24.0, maximum=6.0, value=0.0, step=0.5, info="Gain at start of transient")
817
- sustain_gain = gr.Slider(label="Sustain Gain (dB)", minimum=-24.0, maximum=6.0, value=0.0, step=0.5, info="Gain for note body")
 
 
 
 
 
 
 
 
 
 
 
818
 
819
- gr.Markdown("### 3. Generate Pack")
820
- slice_all_button = gr.Button("SLICE ALL & GENERATE PACK", variant="primary")
821
- zip_file_output = gr.File(label="Download Your Loop Pack")
822
 
823
  with gr.Column(scale=2):
824
- # --- OUTPUT COLUMN ---
825
- gr.Markdown("### 2. Review Stems & Slices")
826
- with gr.Tabs():
827
- # Create a tab for each stem
828
- for i, name in enumerate(STEM_NAMES):
829
- with gr.Tab(name.capitalize()):
830
- with gr.Row():
831
- # The (hidden) audio output for this stem
832
- stem_audio_component = stem_audio_outputs[i]
833
-
834
- # Visible components
835
- preview_image = gr.Image(label="Processed Waveform", interactive=False)
836
- slice_files = gr.Files(label="Generated Slices & MIDI", interactive=False)
837
-
838
- # Add a button to slice just this one stem
839
- slice_one_button = gr.Button(f"Slice This {name.capitalize()} Stem")
840
-
841
- # Gather all global settings as inputs
842
- all_settings = [
843
- loop_choice, sensitivity, manual_bpm_input, time_signature, crossfade_ms,
844
- transpose_semitones, detected_key_state, pan_depth, level_depth, modulation_rate,
845
- target_dbfs, attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
846
- ]
 
 
 
 
 
 
 
 
847
 
848
- # Wire up the "Slice One" button
849
- slice_one_button.click(
850
- fn=slice_stem_real,
851
- inputs=[stem_audio_component, gr.State(value=name)] + all_settings,
852
- outputs=[slice_files, preview_image]
853
- )
854
 
855
- # --- EVENT LISTENERS ---
 
 
 
 
 
 
 
856
 
857
- # 1. "Separate Stems" button click
858
- separate_button.click(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
859
  fn=separate_stems,
860
- inputs=[audio_input],
861
- outputs=stem_audio_outputs + [detected_bpm_state, detected_key_state, harmonic_recs_state]
 
 
 
 
 
 
862
  )
863
 
864
- # 2. When BPM state changes, update the visible input box
865
- detected_bpm_state.change(
866
- fn=lambda x: x,
867
- inputs=[detected_bpm_state],
868
- outputs=[manual_bpm_input]
 
 
 
 
869
  )
870
 
871
- # 3. When Key state changes, update the visible text boxes
872
- detected_key_state.change(
873
- fn=lambda x: x,
874
- inputs=[detected_key_state],
875
- outputs=[key_display]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
876
  )
877
- harmonic_recs_state.change(
878
- fn=lambda x: x,
879
- inputs=[harmonic_recs_state],
880
- outputs=[harmonic_recs_display]
881
- )
882
-
883
- # 4. "SLICE ALL" button click
884
- slice_all_button.click(
885
- fn=slice_all_and_zip,
886
- inputs=stem_audio_outputs + all_settings,
887
- outputs=[zip_file_output]
888
- )
889
-
890
 
891
- if __name__ == "__main__":
892
- demo.launch(debug=True)
 
1
  import gradio as gr
2
+ import os
3
+ import shutil
4
+ import asyncio
5
  import librosa
6
  import librosa.display
7
  import soundfile as sf
8
+ import numpy as np
 
 
9
  import time
10
+ import zipfile
11
+ import tempfile
12
  import matplotlib.pyplot as plt
13
+ import matplotlib
14
+ import struct
15
+ from scipy.signal import convolve, butter, lfilter, windows
16
 
17
+ # Use a non-interactive backend for Matplotlib for UI compatibility
18
  matplotlib.use('Agg')
19
 
20
+ # --- UTILITY: MIDI FILE WRITING ---
21
+
22
+ def encode_delta_time(time):
23
+ """Encodes a time value into MIDI variable-length quantity format."""
24
+ data = []
25
+ if time == 0:
26
+ return b'\x00'
27
+ while time > 0:
28
+ byte = time & 0x7F
29
+ time >>= 7
30
+ if time > 0:
31
+ byte |= 0x80
32
+ data.insert(0, byte)
33
+ return bytes(data)
34
+
35
+ def freq_to_midi(freq):
 
 
 
 
 
 
 
 
 
 
 
 
36
  """Converts a frequency in Hz to a MIDI note number."""
37
+ # A4 = 440 Hz = MIDI 69
38
  if freq <= 0:
39
  return 0
40
+ # Note: Using a simple threshold for frequency detection to minimize noise
41
+ if freq < 40: # Ignore frequencies below C2 (approx 65Hz)
42
  return 0
43
+
44
  return int(round(69 + 12 * np.log2(freq / 440.0)))
45
 
46
+ def write_midi_file(notes_list, bpm, output_path):
47
  """
48
+ Writes a very basic, dependency-free MIDI file (.mid) from a list of notes.
49
+ Each note is (midi_note, start_time_sec, duration_sec).
 
50
  """
51
  if not notes_list:
52
  return
53
 
54
  tempo_us_per_beat = int(60000000 / bpm)
55
+ division = 96 # Ticks per quarter note
56
  seconds_per_tick = 60.0 / (bpm * division)
57
+
58
+ midi_data = [
59
+ # Track 0: Tempo and Time Sig
60
+ struct.pack('>L', 0) + b'\xFF\x51\x03' + struct.pack('>L', tempo_us_per_beat)[1:], # Set Tempo
61
+ struct.pack('>L', 0) + b'\xFF\x58\x04\x04\x02\x18\x08', # Time Signature (4/4)
62
+ ]
63
+
64
  # Sort notes by start time
65
  notes_list.sort(key=lambda x: x[1])
66
 
67
  current_tick = 0
 
 
 
 
 
 
 
 
 
68
 
 
 
 
69
  for note, start_sec, duration_sec in notes_list:
70
+ if note == 0: continue
 
71
 
72
  # Calculate delta time from last event
73
+ target_tick = int(start_sec / seconds_per_tick)
74
  delta_tick = target_tick - current_tick
75
  current_tick = target_tick
76
+
77
  # Note On event (Channel 1, Velocity 100)
78
+ note_on = b'\x90' + struct.pack('>B', note) + b'\x64'
79
+ midi_data.append(encode_delta_time(delta_tick) + note_on)
80
+
81
  # Note Off event (Channel 1, Velocity 0)
82
+ duration_ticks = int(duration_sec / seconds_per_tick)
83
+ note_off = b'\x80' + struct.pack('>B', note) + b'\x00'
84
+
85
+ midi_data.append(encode_delta_time(duration_ticks) + note_off)
 
 
86
  current_tick += duration_ticks
87
+
88
+ track_data = b"".join(midi_data)
89
 
90
+ # 1. Header Chunk (MThd)
91
+ header = b'MThd' + struct.pack('>L', 6) + b'\x00\x01' + struct.pack('>H', 1) + struct.pack('>H', division)
92
 
93
+ # 2. Track Chunk (MTrk)
94
+ track_chunk = b'MTrk' + struct.pack('>L', len(track_data)) + track_data + b'\x00\xFF\x2F\x00' # End of Track
 
 
 
 
 
95
 
96
  with open(output_path, 'wb') as f:
97
+ f.write(header + track_chunk)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ # --- CONFIGURATION & UTILITY ---
100
+
101
+ # Mapping for standard key to Camelot Code
102
+ KEY_TO_CAMELOT = {
103
+ "C Maj": "8B", "G Maj": "9B", "D Maj": "10B", "A Maj": "11B", "E Maj": "12B",
104
+ "B Maj": "1B", "F# Maj": "2B", "Db Maj": "3B", "Ab Maj": "4B", "Eb Maj": "5B",
105
+ "Bb Maj": "6B", "F Maj": "7B",
106
+ "A Min": "8A", "E Min": "9A", "B Min": "10A", "F# Min": "11A", "C# Min": "12A",
107
+ "G# Min": "1A", "D# Min": "2A", "Bb Min": "3A", "F Min": "4A", "C Min": "5A",
108
+ "G Min": "6A", "D Min": "7A",
109
+ "Gb Maj": "2B", "Cb Maj": "7B", "A# Min": "3A", "D# Maj": "11B", "G# Maj": "3B"
110
+ }
111
+
112
+ def get_harmonic_recommendations(key_str):
113
  """Calculates harmonically compatible keys based on the Camelot wheel."""
114
  code = KEY_TO_CAMELOT.get(key_str, "N/A")
115
+ if code == "N/A": return "N/A (Key not recognized or 'Unknown Key' detected.)"
 
 
116
  try:
117
  num = int(code[:-1])
118
+ mode = code[-1]
119
  opposite_mode = 'B' if mode == 'A' else 'A'
120
  num_plus_one = (num % 12) + 1
121
  num_minus_one = 12 if num == 1 else num - 1
122
+ recs = [f"{num}{opposite_mode}", f"{num_plus_one}{mode}", f"{num_minus_one}{mode}"]
123
+ CAMELOT_TO_KEY = {v: k for k, v in KEY_TO_CAMELOT.items()}
124
+ rec_keys = [f"{CAMELOT_TO_KEY.get(r_code, f'Code {r_code}')} ({r_code})" for r_code in recs]
 
 
 
 
 
125
  return " | ".join(rec_keys)
126
+ except:
 
127
  return "N/A (Error calculating recommendations.)"
128
 
129
+ def detect_key(y, sr):
130
  """Analyzes the audio to determine the most likely musical key."""
131
  try:
132
  chroma = librosa.feature.chroma_stft(y=y, sr=sr)
133
  chroma_sums = np.sum(chroma, axis=1)
 
 
 
 
 
134
  chroma_norm = chroma_sums / np.sum(chroma_sums)
135
+
 
136
  major_template = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
137
  minor_template = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
138
 
 
 
 
 
139
  pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
140
 
141
  major_correlations = [np.dot(chroma_norm, np.roll(major_template, i)) for i in range(12)]
142
  best_major_index = np.argmax(major_correlations)
143
+
144
  minor_correlations = [np.dot(chroma_norm, np.roll(minor_template, i)) for i in range(12)]
145
  best_minor_index = np.argmax(minor_correlations)
146
+
147
  if major_correlations[best_major_index] > minor_correlations[best_minor_index]:
148
  return pitch_classes[best_major_index] + " Maj"
149
  else:
 
152
  print(f"Key detection failed: {e}")
153
  return "Unknown Key"
154
 
155
+ def reduce_reverb(audio_path, log_history):
156
+ # Reverb reduction logic... (unchanged)
157
+ try:
158
+ y, sr = librosa.load(audio_path, sr=None)
159
+
160
+ n_fft = 2048
161
+ hop_length = 512
162
+
163
+ D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
164
+ mag = np.abs(D)
165
+ phase = np.angle(D)
166
+
167
+ ambient_floor = np.percentile(mag, 10, axis=1, keepdims=True)
168
+
169
+ freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
170
+ dampening_factor = np.clip(1 - (freqs / 1000.0), 0.2, 1.0)[:, np.newaxis]
171
+ reduction_strength = 0.5
172
+
173
+ ambient_reduction = ambient_floor * reduction_strength * dampening_factor
174
+
175
+ mag_processed = np.maximum(mag - ambient_reduction, 0)
176
+
177
+ D_processed = mag_processed * np.exp(1j * phase)
178
+ y_processed = librosa.istft(D_processed, length=len(y), dtype=y.dtype, hop_length=hop_length)
179
+
180
+ processed_path = audio_path.replace(".wav", "_dry.wav")
181
+ sf.write(processed_path, y_processed, sr)
182
+
183
+ log_history += "✅ Reverb reduction applied to vocals. Using dry vocal track.\n"
184
+ return processed_path, log_history
185
+
186
+ except Exception as e:
187
+ log_history += f"⚠️ WARNING: Reverb reduction failed ({e}). Proceeding with wet vocal audio.\n"
188
+ return audio_path, log_history
189
+
190
+ def apply_crossfade(audio_chunk, sr, fade_ms):
191
+ """Applies a simple Hanning crossfade (fade-in/fade-out) to an audio chunk. (unchanged)"""
192
+ if fade_ms <= 0 or len(audio_chunk) == 0:
193
+ return audio_chunk
194
+
195
+ fade_samples = int(sr * (fade_ms / 1000.0))
196
+ n_samples = len(audio_chunk)
197
+
198
+ if n_samples < 2 * fade_samples:
199
+ fade_samples = n_samples // 2
200
+ if fade_samples == 0: return audio_chunk
201
+
202
+ window = np.hanning(2 * fade_samples)
203
+ fade_in_window = window[:fade_samples]
204
+ fade_out_window = window[fade_samples:]
205
+
206
+ chunk_copy = audio_chunk.copy()
207
+
208
+ if fade_samples > 0:
209
+ if chunk_copy.ndim == 1:
210
+ chunk_copy[:fade_samples] *= fade_in_window
211
+ chunk_copy[-fade_samples:] *= fade_out_window
212
+ else:
213
+ chunk_copy[:fade_samples, :] *= fade_in_window[:, np.newaxis]
214
+ chunk_copy[-fade_samples:] *= fade_out_window[:, np.newaxis]
215
+
216
+ return chunk_copy
217
+
218
+ def generate_waveform_preview(y, sr, slice_samples, stem_name, loop_type, temp_dir):
219
+ """Generates a Matplotlib image showing the waveform and slice points. (unchanged)"""
220
+ img_path = os.path.join(temp_dir, f"{stem_name}_preview_{int(time.time() * 1000)}.png")
221
+
222
+ plt.figure(figsize=(10, 1.5))
223
+
224
+ y_display = librosa.to_mono(y.T) if y.ndim > 1 else y
225
+
226
+ librosa.display.waveshow(y_display, sr=sr, x_axis='time', color="#4a7098")
227
+
228
+ slice_times = librosa.samples_to_time(slice_samples, sr=sr)
229
+ for t in slice_times:
230
+ plt.axvline(x=t, color='red', linestyle='--', linewidth=1, alpha=0.7)
231
+
232
+ plt.title(f"{stem_name} Slices ({loop_type})", fontsize=10)
233
+ plt.xlabel("")
234
+ plt.yticks([])
235
+ plt.tight_layout(pad=0)
236
+
237
+ plt.savefig(img_path)
238
+ plt.close()
239
+
240
+ return img_path
241
+
242
+ def apply_modulation(y, sr, bpm, rate, pan_depth, level_depth):
243
+ """Applies tempo-synced LFOs for panning and volume modulation. (unchanged)"""
244
  if y.ndim == 1:
245
+ y = np.stack((y, y), axis=-1)
246
+ elif y.ndim == 0:
247
+ return y
248
 
249
  N = len(y)
250
  duration_sec = N / sr
251
 
252
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
253
+ beats_per_measure = rate_map.get(rate, 1)
254
+ lfo_freq_hz = (bpm / 60.0) * (beats_per_measure / 4.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
  t = np.linspace(0, duration_sec, N, endpoint=False)
257
+
258
+ # Panning LFO
259
  if pan_depth > 0:
260
  pan_lfo = np.sin(2 * np.pi * lfo_freq_hz * t) * pan_depth
 
261
  L_mod = (1 - pan_lfo) / 2.0
262
  R_mod = (1 + pan_lfo) / 2.0
263
+ y[:, 0] *= L_mod
 
264
  y[:, 1] *= R_mod
265
+
266
+ # Level LFO (Tremolo)
267
  if level_depth > 0:
268
+ level_lfo = (np.sin(2 * np.pi * lfo_freq_hz * t) + 1) / 2.0
 
269
  gain_multiplier = (1 - level_depth) + (level_depth * level_lfo)
270
  y[:, 0] *= gain_multiplier
271
  y[:, 1] *= gain_multiplier
272
 
273
  return y
274
 
275
+ def apply_normalization_dbfs(y, target_dbfs):
276
+ """Applies peak normalization to match a target dBFS value. (unchanged)"""
277
  if target_dbfs >= 0:
 
 
 
 
278
  return y
279
 
280
+ current_peak_amp = np.max(np.abs(y))
281
  target_peak_amp = 10**(target_dbfs / 20.0)
282
+
283
+ if current_peak_amp > 1e-6:
284
+ gain = target_peak_amp / current_peak_amp
285
+ y_normalized = y * gain
286
+ y_normalized = np.clip(y_normalized, -1.0, 1.0)
287
+ return y_normalized
288
+ else:
289
+ return y
290
 
291
+ # --- NEW UTILITY: TRANSIENT SHAPING ---
292
+
293
+ def apply_transient_shaping(y, sr, attack_gain, sustain_gain):
294
+ """
295
+ Applies basic transient shaping to the audio signal (mono or stereo).
296
+ Only applies if the stem is 'drums'.
297
+ """
298
+ if y.ndim == 1:
299
+ y_mono = y
300
+ else:
301
+ y_mono = librosa.to_mono(y.T)
302
+
303
+ rectified = np.abs(y_mono)
304
+
305
+ # Filter/Window sizes based on typical transient/sustain times
306
+ attack_samples = int(sr * 0.005) # 5ms
307
+ sustain_samples = int(sr * 0.05) # 50ms
308
+
309
+ # Envelope followers
310
+ attack_window = windows.hann(attack_samples * 2); attack_window /= np.sum(attack_window)
311
+ sustain_window = windows.hann(sustain_samples * 2); sustain_window /= np.sum(sustain_window)
312
+
313
+ fast_envelope = convolve(rectified, attack_window, mode='same')
314
+ slow_envelope = convolve(rectified, sustain_window, mode='same')
315
+
316
+ # Ratio: how transient the signal is (fast envelope >> slow envelope)
317
+ ratio = np.clip(fast_envelope / (slow_envelope + 1e-6), 1.0, 5.0)
318
+
319
+ # Normalized ratio (0 to 1, where 1 is strong transient)
320
+ # 4.0 comes from the ratio clip max 5.0 - min 1.0
321
+ normalized_ratio = (ratio - 1.0) / 4.0
322
 
323
+ # Gain is a blend between sustain_gain and attack_gain based on the normalized_ratio
324
+ gain_envelope = (sustain_gain * (1 - normalized_ratio)) + (attack_gain * normalized_ratio)
325
+
326
+ # Apply Gain
327
+ if y.ndim == 1:
328
+ y_out = y * gain_envelope
329
+ else:
330
+ y_out = y * gain_envelope[:, np.newaxis]
331
+
332
+ return y_out
333
 
334
+ # --- NEW UTILITY: FILTER MODULATION ---
 
 
 
335
 
336
+ def apply_filter_modulation(y, sr, bpm, rate, filter_type, freq, depth):
337
+ """
338
+ Applies a tempo-synced LFO to a 2nd order Butterworth filter cutoff frequency.
339
+ """
340
+ if depth == 0:
341
+ return y
342
+
343
  # Ensure stereo for LFO application
344
  if y.ndim == 1:
345
  y = np.stack((y, y), axis=-1)
346
+
 
 
347
  N = len(y)
348
  duration_sec = N / sr
349
 
350
  # LFO Rate Calculation
351
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
352
+ beats_per_measure = rate_map.get(rate, 1)
353
+ lfo_freq_hz = (bpm / 60.0) * (beats_per_measure / 4.0)
354
 
355
  t = np.linspace(0, duration_sec, N, endpoint=False)
356
+
357
  # LFO: ranges from 0 to 1
358
+ lfo_value = (np.sin(2 * np.pi * lfo_freq_hz * t) + 1) / 2.0
359
+
360
  # Modulate Cutoff Frequency: Cutoff = BaseFreq + (LFO * Depth)
361
  cutoff_modulation = freq + (lfo_value * depth)
362
  # Safety clip to prevent instability
363
+ cutoff_modulation = np.clip(cutoff_modulation, 20.0, sr / 2.0 - 100)
 
364
 
365
  y_out = np.zeros_like(y)
366
+ filter_type_b = filter_type.lower().replace('-pass', '')
367
+ frame_size = 512 # Frame-based update for filter coefficients
368
 
 
 
 
 
 
 
 
 
 
369
  # Apply filter channel by channel
370
  for channel in range(y.shape[1]):
371
+ zi = np.zeros(2) # Initial filter state (2nd order filter)
372
+
373
  for frame_start in range(0, N, frame_size):
374
  frame_end = min(frame_start + frame_size, N)
 
 
375
  frame = y[frame_start:frame_end, channel]
376
+
377
  # Use the average LFO cutoff for the frame
378
  avg_cutoff = np.mean(cutoff_modulation[frame_start:frame_end])
379
+
380
  # Calculate 2nd order Butterworth filter coefficients
381
+ b, a = butter(2, avg_cutoff, btype=filter_type_b, fs=sr)
382
+
 
 
 
 
 
 
 
 
383
  # Apply filter to the frame, updating the state `zi`
384
+ filtered_frame, zi = lfilter(b, a, frame, zi=zi)
385
  y_out[frame_start:frame_end, channel] = filtered_frame
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
 
 
 
 
 
 
 
 
 
387
  return y_out
388
 
389
+ # --- CORE SEPARATION FUNCTION (Truncated for brevity, focus on analysis) ---
 
 
 
 
 
 
 
 
 
 
 
390
 
391
+ async def separate_stems(audio_file_path, selected_model, denoise_enabled, reverb_reduction_enabled):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  """
393
+ Separates audio, detects BPM and Key, and applies post-processing.
394
+ (Function logic remains the same for separation, only the returns are relevant)
395
  """
396
  if audio_file_path is None:
397
  raise gr.Error("No audio file uploaded!")
398
 
399
+ log_history = "Starting separation...\n"
400
+ yield { status_log: log_history, detected_bpm_key: "", harmonic_recs: "---" }
401
+
402
+ # 1. Pre-process and analyze original audio
403
+ detected_bpm = 120
404
+ detected_key = "Unknown Key"
405
+ # ... (BPM and Key detection logic, including error handling) ...
406
  try:
407
+ y_orig, sr_orig = librosa.load(audio_file_path, sr=None)
408
+ y_mono = librosa.to_mono(y_orig.T) if y_orig.ndim > 1 else y_orig
 
 
 
 
 
 
 
 
 
 
409
 
 
410
  tempo, _ = librosa.beat.beat_track(y=y_mono, sr=sr_orig)
411
+ detected_bpm = 120 if tempo is None or tempo == 0 else int(np.round(tempo).item())
412
  detected_key = detect_key(y_mono, sr_orig)
 
 
 
 
 
 
413
 
414
+ harmonic_recommendations = get_harmonic_recommendations(detected_key)
415
+
416
+ status_string = f"Detected Tempo: {detected_bpm} BPM. Detected Key: {detected_key}. Proceeding with separation...\n"
417
+ log_history += status_string
418
+ yield {
419
+ status_log: log_history,
420
+ detected_bpm_key: f"{detected_bpm} BPM, {detected_key}",
421
+ harmonic_recs: harmonic_recommendations
422
+ }
423
 
 
 
 
 
 
424
  except Exception as e:
425
+ log_history += f"⚠️ WARNING: Analysis failed ({e}). Defaulting to 120 BPM, Unknown Key.\n"
426
+ harmonic_recommendations = "N/A (Analysis failed)"
427
+ yield {
428
+ status_log: log_history,
429
+ detected_bpm_key: f"{detected_bpm} BPM, {detected_key}",
430
+ harmonic_recs: harmonic_recommendations
431
+ }
 
 
 
 
 
432
 
433
+ # --- Truncated Demucs Output Placeholder (For Demonstrating Success) ---
434
+ # Mock file paths and generation for demo purposes
435
+ vocals_path = "separated/htdemucs/input/vocals.wav"
436
+ drums_path = "separated/htdemucs/input/drums.wav"
437
+ bass_path = "separated/htdemucs/input/bass.wav"
438
+ other_path = "separated/htdemucs/input/other.wav"
439
+ guitar_path = None
440
+ piano_path = None
441
+
442
+ mock_sr = 44100
443
+ mock_duration = 10
444
+ mock_y = np.random.uniform(low=-0.5, high=0.5, size=(mock_sr * mock_duration, 2)).astype(np.float32)
445
+ os.makedirs(os.path.dirname(vocals_path), exist_ok=True)
446
+ sf.write(vocals_path, mock_y, mock_sr)
447
+ sf.write(drums_path, mock_y, mock_sr)
448
+ sf.write(bass_path, mock_y, mock_sr)
449
+ sf.write(other_path, mock_y, mock_sr)
450
+
451
+ # --- End Truncated Demucs Output Placeholder ---
452
+
453
+ log_history += "✅ Stem separation complete! (Mock files generated for demo)\n"
454
+ yield {
455
+ status_log: log_history,
456
+ vocals_output: gr.update(value=vocals_path, visible=True),
457
+ drums_output: gr.update(value=drums_path, visible=True),
458
+ bass_output: gr.update(value=bass_path, visible=True),
459
+ other_output: gr.update(value=other_path, visible=True),
460
+ guitar_output: gr.update(value=guitar_path, visible=False),
461
+ piano_output: gr.update(value=piano_path, visible=False),
462
+ detected_bpm_key: f"{detected_bpm} BPM, {detected_key}",
463
+ gr.Textbox(elem_id="detected_bpm_key_output"): f"{detected_bpm} BPM, {detected_key}",
464
+ gr.Textbox(elem_id="harmonic_recs_output"): harmonic_recommendations
465
+ }
466
+
467
+
468
+ # --- CORE SLICING FUNCTION (UPDATED for MIDI and Rich Tagging) ---
469
+
470
+ def slice_stem_real(stem_audio_data, loop_choice, sensitivity, stem_name, manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_key, pan_depth, level_depth, modulation_rate, target_dbfs, attack_gain, sustain_gain, filter_type, filter_freq, filter_depth):
471
  """
472
+ Slices a single stem, applies pitch shift, modulation, normalization,
473
+ transient shaping, filter LFO, and generates MIDI/visualizations.
474
  """
475
+ if stem_audio_data is None:
476
  return [], None
477
+
478
+ sample_rate, y_int = stem_audio_data
479
+ y = librosa.util.buf_to_float(y_int, dtype=np.float32)
480
+
481
+ if y.ndim == 0: return [], None
482
 
483
+ y_mono = librosa.to_mono(y.T) if y.ndim > 1 else y
484
+
485
+ # --- 1. PITCH SHIFTING (if enabled) ---
486
+ if transpose_semitones != 0:
487
+ y_shifted = librosa.effects.pitch_shift(y, sr=sample_rate, n_steps=transpose_semitones)
488
+ y = y_shifted
489
+
490
+ # --- 2. TRANSIENT SHAPING (Drums Only) ---
491
+ if stem_name == "drums" and (attack_gain != 1.0 or sustain_gain != 1.0):
492
+ y = apply_transient_shaping(y, sample_rate, attack_gain, sustain_gain)
493
+
494
+ # --- 3. FILTER MODULATION (LFO 2.0) ---
495
+ if filter_depth > 0:
496
+ y = apply_filter_modulation(y, sample_rate, manual_bpm, modulation_rate, filter_type, filter_freq, filter_depth)
497
+
498
+ # --- 4. PAN/LEVEL MODULATION ---
499
+ normalized_pan_depth = pan_depth / 100.0
500
+ normalized_level_depth = level_depth / 100.0
501
+
502
+ if normalized_pan_depth > 0 or normalized_level_depth > 0:
503
+ y = apply_modulation(y, sample_rate, manual_bpm, modulation_rate, normalized_pan_depth, normalized_level_depth)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
 
505
+ # Check if any modification was applied for the RICH METADATA TAGGING
506
+ is_modified = (
507
+ transpose_semitones != 0 or
508
+ normalized_pan_depth > 0 or normalized_level_depth > 0 or
509
+ filter_depth > 0 or
510
+ stem_name == "drums" and (attack_gain != 1.0 or sustain_gain != 1.0)
511
+ )
512
+ mod_tag = "_MOD" if is_modified else "" # Rich Tagging: Modification flag
513
+
514
+ # --- 5. NORMALIZATION ---
515
+ if target_dbfs < 0:
516
+ y = apply_normalization_dbfs(y, target_dbfs)
517
+
518
+ # --- 6. DETERMINE BPM & KEY (FOR RICH TAGGING) ---
519
+ bpm_int = int(manual_bpm)
520
+ bpm_tag = f"{bpm_int}BPM" # Rich Tagging: BPM
521
+ time_sig_tag = time_signature.replace("/", "") # Rich Tagging: Time Signature
522
+
523
+ key_tag = detected_key.replace(" ", "")
524
+ if transpose_semitones != 0:
525
+ root = detected_key.split(" ")[0]
526
+ mode = detected_key.split(" ")[1]
527
+ pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
528
+ try:
529
+ current_index = pitch_classes.index(root)
530
+ new_index = (current_index + transpose_semitones) % 12
531
+ new_key_root = pitch_classes[new_index]
532
+ key_tag = f"{new_key_root}{mode}Shift" # Rich Tagging: Transposed Key
533
+ except ValueError:
534
+ pass # Keep original key tag if root not found
535
+
536
+ # --- 7. MIDI GENERATION (Melodic Stems) ---
537
+ output_files = []
538
+ loops_dir = tempfile.mkdtemp()
539
+ is_melodic = stem_name in ["vocals", "bass", "guitar", "piano", "other"]
540
+
541
+ if is_melodic and ("Bar Loops" in loop_choice):
542
+ try:
543
+ # Use piptrack for a more robust (though less accurate than Pyin) general pitch detection
544
+ pitches, magnitudes = librosa.piptrack(y=y_mono, sr=sample_rate)
545
+ main_pitch_line = np.zeros(pitches.shape[1])
546
+ for t in range(pitches.shape[1]):
547
+ index = magnitudes[:, t].argmax()
548
+ main_pitch_line[t] = pitches[index, t]
549
+
550
+ notes_list = []
551
+
552
+ # Simple note segmentation by pitch change
553
+ i = 0
554
+ while i < len(main_pitch_line):
555
+ current_freq = main_pitch_line[i]
556
+ current_midi = freq_to_midi(current_freq)
557
 
558
+ j = i
559
+ while j < len(main_pitch_line) and freq_to_midi(main_pitch_line[j]) == current_midi:
560
+ j += 1
 
 
 
 
 
 
561
 
562
+ duration_frames = j - i
563
+
564
+ # Minimum duration filter to ignore extremely short notes
565
+ if current_midi != 0 and duration_frames >= 2:
566
+ start_sec = librosa.frames_to_time(i, sr=sample_rate, hop_length=512)
567
+ duration_sec = librosa.frames_to_time(duration_frames, sr=sample_rate, hop_length=512)
568
+ notes_list.append((current_midi, start_sec, duration_sec))
569
+
570
+ i = j
571
+
572
+ full_stem_midi_path = os.path.join(loops_dir, f"{stem_name}_MELODY_{key_tag}_{bpm_tag}{mod_tag}.mid")
573
+ write_midi_file(notes_list, manual_bpm, full_stem_midi_path)
574
+ output_files.append((full_stem_midi_path, loops_dir))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
 
576
+ except Exception as e:
577
+ print(f"MIDI generation failed for {stem_name}: {e}")
578
+ # Do not stop execution
579
+
580
+ # --- 8. CALCULATE TIMING & SLICING ---
581
+ beats_per_bar = 4
582
+ if time_signature == "3/4": beats_per_bar = 3
583
+
584
+ slice_samples = []
585
+
586
+ if "Bar Loops" in loop_choice:
587
+ bars = int(loop_choice.split(" ")[0])
588
+ loop_type_tag = f"{bars}Bar"
589
+ loop_duration_samples = int((60.0 / bpm_int * beats_per_bar * bars) * sample_rate)
590
+
591
+ if loop_duration_samples == 0: return [], loops_dir
592
+
593
+ num_loops = len(y) // loop_duration_samples
594
 
595
+ for i in range(num_loops):
596
+ start_sample = i * loop_duration_samples
597
+ end_sample = start_sample + loop_duration_samples
598
+ slice_data = y[start_sample:end_sample]
 
 
 
 
 
599
 
600
+ # Rich Metadata/Tagging via Filename Enhancement
601
+ filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_tag}_{time_sig_tag}{mod_tag}.wav")
602
+ sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
603
+ output_files.append((filename, loops_dir))
604
+ slice_samples.append(start_sample)
605
+
606
+ elif "One-Shots" in loop_choice:
607
+ loop_type_tag = "OneShot"
608
+ onset_frames = librosa.onset.onset_detect(
609
+ y=y_mono, sr=sample_rate, delta=sensitivity,
610
+ wait=1, pre_avg=1, post_avg=1, post_max=1, units='frames'
611
+ )
612
+ onset_samples = librosa.frames_to_samples(onset_frames)
613
+
614
+ if len(onset_samples) > 0:
615
+ num_onsets = len(onset_samples)
616
+ slice_samples = list(onset_samples)
617
+
618
+ for i, start_sample in enumerate(onset_samples):
619
+ end_sample = onset_samples[i+1] if i+1 < num_onsets else len(y)
620
+ slice_data = y[start_sample:end_sample]
621
 
622
+ if crossfade_ms > 0:
623
+ slice_data = apply_crossfade(slice_data, sample_rate, crossfade_ms)
 
 
 
624
 
625
+ # Rich Metadata/Tagging via Filename Enhancement
626
+ filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_tag}{mod_tag}.wav")
 
 
627
  sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
628
+ output_files.append((filename, loops_dir))
629
+
630
+ if not output_files:
631
+ return [], loops_dir
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
632
 
633
+ # --- 9. VISUALIZATION GENERATION ---
634
+ img_path = generate_waveform_preview(y, sample_rate, slice_samples, stem_name, loop_choice, loops_dir)
635
+
636
+ # Return audio file path and the single visualization map
637
+ return [(audio_file, img_path) for audio_file, _ in output_files if audio_file.endswith(('.wav', '.mid'))], loops_dir
638
 
639
+ # --- SLICING HANDLERS (UPDATED for NEW Inputs) ---
 
640
 
641
+ async def slice_all_and_zip_real(vocals, drums, bass, other, guitar, piano, loop_choice, sensitivity, manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_bpm_key_str, pan_depth, level_depth, modulation_rate, target_dbfs, attack_gain, sustain_gain, filter_type, filter_freq, filter_depth):
642
+ """
643
+ Slices all available stems, applies all transformations, and packages them into a ZIP file.
644
+ """
645
+ log_history = "Starting batch slice...\n"
646
+ yield { status_log: log_history }
647
+ await asyncio.sleep(0.1)
648
+
649
+ parts = detected_bpm_key_str.split(', ')
650
+ key_str = parts[1] if len(parts) > 1 else "Unknown Key"
651
+
652
+ stems_to_process = {
653
+ "vocals": vocals, "drums": drums, "bass": bass,
654
+ "other": other, "guitar": guitar, "piano": piano
655
+ }
656
+ zip_path = "Loop_Architect_Pack.zip"
657
+
658
+ num_stems = sum(1 for data in stems_to_process.values() if data is not None)
659
+ if num_stems == 0:
660
+ raise gr.Error("No stems to process! Please separate stems first.")
661
 
662
+ all_temp_dirs = []
663
+ try:
664
+ with zipfile.ZipFile(zip_path, 'w') as zf:
665
+ processed_count = 0
666
+ for name, data in stems_to_process.items():
667
+ if data is not None:
668
+ log_history += f"--- Slicing {name} stem ---\n"
669
+ yield { status_log: log_history }
670
+
671
+ sliced_files_and_viz, temp_dir = slice_stem_real(
672
+ (data[0], data[1]), loop_choice, sensitivity, name,
673
+ manual_bpm, time_signature, crossfade_ms, transpose_semitones, key_str,
674
+ pan_depth, level_depth, modulation_rate, target_dbfs,
675
+ attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
676
+ )
677
+
678
+ if sliced_files_and_viz:
679
+ # Write both WAV and MIDI files to the ZIP
680
+ midi_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.mid'))
681
+ wav_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.wav'))
682
+
683
+ log_history += f"Generated {wav_count} WAV slices and {midi_count} MIDI files for {name}.\n"
684
+ all_temp_dirs.append(temp_dir)
685
+ for loop_file, _ in sliced_files_and_viz:
686
+ # Create a subfolder for WAVs and a separate one for MIDIs in the zip
687
+ ext = 'MIDI' if loop_file.endswith('.mid') else name
688
+ arcname = os.path.join(ext, os.path.basename(loop_file))
689
+ zf.write(loop_file, arcname)
690
+ else:
691
+ log_history += f"No slices generated for {name}.\n"
692
+
693
+ processed_count += 1
694
+ yield { status_log: log_history }
695
 
696
+ log_history += "Packaging complete! WAVs and corresponding MIDIs are organized in the ZIP.\n"
697
+ yield {
698
+ status_log: log_history + "✅ Pack ready for download!",
699
+ download_zip_file: gr.update(value=zip_path, visible=True)
700
+ }
701
 
702
  except Exception as e:
703
+ print(f"An error occurred during slice all: {e}")
704
+ yield { status_log: log_history + f"❌ ERROR: {e}" }
705
+ finally:
706
+ for d in all_temp_dirs:
707
+ if d and os.path.exists(d):
708
+ shutil.rmtree(d)
709
+
710
+ # --- Create the full Gradio Interface ---
711
+ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red")) as demo:
712
+ # State variables
713
+ detected_bpm_key = gr.State(value="")
714
+ harmonic_recs = gr.State(value="---")
715
+
716
+ # Define outputs globally
717
+ vocals_output = gr.Audio(label="Vocals", scale=4, visible=False)
718
+ drums_output = gr.Audio(label="Drums", scale=4, visible=False)
719
+ bass_output = gr.Audio(label="Bass", scale=4, visible=False)
720
+ other_output = gr.Audio(label="Other / Instrumental", scale=4, visible=False)
721
+ guitar_output = gr.Audio(label="Guitar", scale=4, visible=False)
722
+ piano_output = gr.Audio(label="Piano", scale=4, visible=False)
723
+ download_zip_file = gr.File(label="Download Your Loop Pack", visible=False)
724
+ status_log = gr.Textbox(label="Status Log", lines=10, interactive=False)
725
+
726
+ loop_gallery = gr.Gallery(
727
+ label="Generated Loops Preview (Audio + Waveform Slice Map)",
728
+ columns=8, object_fit="contain", height="auto", preview=True,
729
+ type="numpy"
730
+ )
731
+
732
  gr.Markdown("# 🎵 Loop Architect (Pro Edition)")
733
  gr.Markdown("Upload any song to separate it into stems, detect musical attributes, and then slice and tag the stems for instant use in a DAW.")
 
 
 
 
 
 
 
 
 
 
 
 
 
734
 
 
 
735
  with gr.Row():
736
  with gr.Column(scale=1):
737
+ gr.Markdown("### 1. Separate Stems")
738
+ audio_input = gr.Audio(type="filepath", label="Upload a Track")
739
+
740
+ with gr.Row():
741
+ reverb_reduction_option = gr.Checkbox(
742
+ label="Dry Vocals",
743
+ value=False,
744
+ info="Reduce reverb on the vocal stem."
745
+ )
746
+
747
+ model_selector = gr.Radio(
748
+ ["htdemucs (High Quality 4-Stem)", "hdemucs (Faster 4-Stem)", "htdemucs_6s (6-Stem)", "2-Stem (Vocals Only)"],
749
+ label="Separation Model Control",
750
+ value="htdemucs (High Quality 4-Stem)"
751
+ )
752
+
753
+ submit_button = gr.Button("Separate & Analyze Stems", variant="primary")
754
+
755
+ gr.Markdown("### 2. Analysis & Transform")
756
+
757
+ # Key/BPM Display
758
+ gr.Textbox(label="Detected Tempo & Key", value="", interactive=False, elem_id="detected_bpm_key_output", placeholder="Run Separation to Analyze...", render=True, visible=True)
759
+
760
+ # Harmonic Recommendations Display
761
+ gr.Textbox(label="Harmonic Mixing Recommendations (Camelot Wheel)", value="---", interactive=False, elem_id="harmonic_recs_output", render=True, visible=True)
762
+
763
+ # Transpose Control
764
+ transpose_slider = gr.Slider(
765
+ minimum=-12, maximum=12, value=0, step=1,
766
+ label="Transpose Loops (Semitones)",
767
+ info="Shift the pitch of all slices by +/- 1 octave. (Tags the file with `Shift`)"
768
+ )
769
 
770
+ # --- TRANSIENT SHAPING ---
771
+ gr.Markdown("### Transient Shaping (Drums Only)")
772
+ with gr.Group():
773
+ attack_gain_slider = gr.Slider(
774
+ minimum=0.5, maximum=1.5, value=1.0, step=0.1,
775
+ label="Attack Gain Multiplier",
776
+ info="Increase (>1.0) for punchier transients."
777
+ )
778
+ sustain_gain_slider = gr.Slider(
779
+ minimum=0.5, maximum=1.5, value=1.0, step=0.1,
780
+ label="Sustain Gain Multiplier",
781
+ info="Increase (>1.0) for longer tails/reverb."
782
+ )
783
+
784
+ # --- MODULATION (PAN/LEVEL) ---
785
+ gr.Markdown("### Pan/Level Modulation (LFO 1.0)")
786
+ with gr.Group():
787
+ modulation_rate_radio = gr.Radio(
788
+ ['1/2', '1/4', '1/8', '1/16'],
789
+ label="Modulation Rate (Tempo Synced)",
790
+ value='1/4',
791
+ info="The speed of the Pan/Level pulse."
792
+ )
793
+ pan_depth_slider = gr.Slider(
794
+ minimum=0, maximum=100, value=0, step=5,
795
+ label="Pan Modulation Depth (%)",
796
+ info="Creates a stereo auto-pan effect."
797
+ )
798
+ level_depth_slider = gr.Slider(
799
+ minimum=0, maximum=100, value=0, step=5,
800
+ label="Level Modulation Depth (%)",
801
+ info="Creates a tempo-synced tremolo (volume pulse)."
802
+ )
803
 
804
+ # --- FILTER MODULATION ---
805
+ gr.Markdown("### Filter Modulation (LFO 2.0)")
806
+ with gr.Group():
807
+ filter_type_radio = gr.Radio(
808
+ ['Low-Pass', 'High-Pass'],
809
+ label="Filter Type",
810
+ value='Low-Pass'
811
+ )
812
+ with gr.Row():
813
+ filter_freq_slider = gr.Slider(
814
+ minimum=20, maximum=10000, value=2000, step=10,
815
+ label="Base Cutoff Frequency (Hz)",
816
+ )
817
+ filter_depth_slider = gr.Slider(
818
+ minimum=0, maximum=5000, value=0, step=10,
819
+ label="Modulation Depth (Hz)",
820
+ info="0 = Static filter at Base Cutoff. Modifying any value tags the file with `MOD`."
821
+ )
822
+
823
+
824
+ gr.Markdown("### 3. Slicing Options")
825
+ with gr.Group():
826
+ # Normalization Control
827
+ lufs_target_slider = gr.Slider(
828
+ minimum=-18.0, maximum=-0.1, value=-3.0, step=0.1,
829
+ label="Target Peak Level (dBFS)",
830
+ info="Normalizes all exported loops to this peak volume."
831
+ )
832
 
833
+ loop_options_radio = gr.Radio(
834
+ ["One-Shots (All Transients)", "4 Bar Loops", "8 Bar Loops"],
835
+ label="Slice Type",
836
+ value="One-Shots (All Transients)",
837
+ info="Bar Loops include automatic MIDI generation for melodic stems."
838
+ )
839
 
840
+ with gr.Row():
841
+ bpm_input = gr.Number(
842
+ label="Manual BPM",
843
+ value=120,
844
+ minimum=40,
845
+ maximum=300,
846
+ info="Overrides auto-detect for loop timing."
847
+ )
848
+ time_sig_radio = gr.Radio(
849
+ ["4/4", "3/4"],
850
+ label="Time Signature",
851
+ value="4/4",
852
+ info="For correct bar length. (Tags the file with `44` or `34`)"
853
+ )
854
 
855
+ sensitivity_slider = gr.Slider(
856
+ minimum=0.01, maximum=0.5, value=0.05, step=0.01,
857
+ label="One-Shot Sensitivity",
858
+ info="Lower values = more slices."
859
+ )
860
+
861
+ crossfade_ms_slider = gr.Slider(
862
+ minimum=0, maximum=30, value=10, step=1,
863
+ label="One-Shot Crossfade (ms)",
864
+ info="Prevents clicks/pops on transient slices."
865
+ )
866
+
867
+ gr.Markdown("### 4. Create Pack (Rich Tagging & MIDI)")
868
+ slice_all_button = gr.Button("Slice, Transform & Tag ALL Stems (Create ZIP)", variant="stop")
869
+ download_zip_file
870
 
871
+ gr.Markdown("### Status")
872
+ status_log.render()
 
873
 
874
  with gr.Column(scale=2):
875
+ with gr.Accordion("Separated Stems (Preview & Slice)", open=True):
876
+
877
+ # Base slice inputs - ALL inputs for slice_stem_real
878
+ slice_inputs = [
879
+ loop_options_radio, sensitivity_slider, gr.Textbox(visible=False), # Placeholder for stem name
880
+ bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_bpm_key,
881
+ pan_depth_slider, level_depth_slider, modulation_rate_radio,
882
+ lufs_target_slider,
883
+ attack_gain_slider, sustain_gain_slider,
884
+ filter_type_radio, filter_freq_slider, filter_depth_slider
885
+ ]
886
+
887
+ # Wrapper function to call slice_stem_real and update the gallery
888
+ def slice_and_display_wrapper(stem_data, loop_choice, sensitivity, stem_name, manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_bpm_key_str, pan_depth, level_depth, modulation_rate, target_dbfs, attack_gain, sustain_gain, filter_type, filter_freq, filter_depth):
889
+ if not detected_bpm_key_str:
890
+ raise gr.Error("Please run 'Separate & Analyze Stems' first.")
891
+
892
+ key_str = detected_bpm_key_str.split(', ')[1] if len(detected_bpm_key_str.split(', ')) > 1 else "Unknown Key"
893
+
894
+ sliced_files_and_viz, temp_dir = slice_stem_real(
895
+ stem_data, loop_choice, sensitivity, stem_name,
896
+ manual_bpm, time_signature, crossfade_ms, transpose_semitones, key_str,
897
+ pan_depth, level_depth, modulation_rate, target_dbfs,
898
+ attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
899
+ )
900
+
901
+ gallery_output = []
902
+
903
+ if sliced_files_and_viz:
904
+ # Find the first visualization for the gallery
905
+ first_image_path = sliced_files_and_viz[0][1] if sliced_files_and_viz else None
906
 
907
+ wav_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.wav'))
908
+ midi_count = sum(1 for f, _ in sliced_files_and_viz if f.endswith('.mid'))
 
 
 
 
909
 
910
+ for i, (audio_file, _) in enumerate(sliced_files_and_viz):
911
+ if audio_file.endswith('.wav'):
912
+ label = os.path.basename(audio_file).rsplit('.', 1)[0]
913
+ gallery_output.append((audio_file, label, first_image_path))
914
+
915
+ log_msg = f"✅ Sliced {stem_name} into {wav_count} WAVs and generated {midi_count} MIDIs. Waveform preview generated."
916
+ else:
917
+ log_msg = f"No slices generated for {stem_name}."
918
 
919
+ if temp_dir and os.path.exists(temp_dir):
920
+ pass
921
+
922
+ return {
923
+ loop_gallery: gr.update(value=gallery_output),
924
+ status_log: log_msg
925
+ }
926
+
927
+ def update_output_visibility(selected_model):
928
+ is_6_stem = "6-Stem" in selected_model
929
+ is_2_stem = "2-Stem" in selected_model
930
+ other_label = "Other"
931
+ if is_2_stem: other_label = "Instrumental (No Vocals)"
932
+ elif is_6_stem: other_label = "Other (No Guitar/Piano)"
933
+ return (
934
+ gr.update(visible=True),
935
+ gr.update(visible=True if not is_2_stem else False),
936
+ gr.update(visible=True if not is_2_stem else False),
937
+ gr.update(visible=True, label=other_label),
938
+ gr.update(visible=is_6_stem),
939
+ gr.update(visible=is_6_stem),
940
+ gr.update(visible=is_6_stem),
941
+ gr.update(visible=is_6_stem)
942
+ )
943
+
944
+ with gr.Row():
945
+ vocals_output.render()
946
+ slice_vocals_btn = gr.Button("Slice Vocals", scale=1)
947
+ with gr.Row():
948
+ drums_output.render()
949
+ slice_drums_btn = gr.Button("Slice Drums", scale=1)
950
+ with gr.Row():
951
+ bass_output.render()
952
+ slice_bass_btn = gr.Button("Slice Bass", scale=1)
953
+ with gr.Row():
954
+ other_output.render()
955
+ slice_other_btn = gr.Button("Slice Other", scale=1)
956
+
957
+ with gr.Row(visible=False) as guitar_row:
958
+ guitar_output.render()
959
+ slice_guitar_btn = gr.Button("Slice Guitar", scale=1)
960
+ with gr.Row(visible=False) as piano_row:
961
+ piano_output.render()
962
+ slice_piano_btn = gr.Button("Slice Piano", scale=1)
963
+
964
+ gr.Markdown("### Sliced Loops / Samples (Preview)")
965
+ loop_gallery.render()
966
+
967
+ # --- MAIN EVENT LISTENERS ---
968
+
969
+ # 1. Separation Event
970
+ submit_button.click(
971
  fn=separate_stems,
972
+ inputs=[gr.File(type="filepath"), model_selector, gr.Checkbox(visible=False), reverb_reduction_option],
973
+ outputs=[
974
+ vocals_output, drums_output, bass_output, other_output,
975
+ guitar_output, piano_output,
976
+ status_log, detected_bpm_key,
977
+ gr.Textbox(elem_id="detected_bpm_key_output"),
978
+ gr.Textbox(elem_id="harmonic_recs_output")
979
+ ]
980
  )
981
 
982
+ # 2. UI Visibility Event
983
+ model_selector.change(
984
+ fn=update_output_visibility,
985
+ inputs=[model_selector],
986
+ outputs=[
987
+ vocals_output, drums_output, bass_output, other_output,
988
+ guitar_output, piano_output,
989
+ guitar_row, piano_row
990
+ ]
991
  )
992
 
993
+ # --- Single Slice Button Events ---
994
+ slice_vocals_btn.click(fn=slice_and_display_wrapper, inputs=[vocals_output] + slice_inputs[:2] + [gr.Textbox("vocals", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
995
+ slice_drums_btn.click(fn=slice_and_display_wrapper, inputs=[drums_output] + slice_inputs[:2] + [gr.Textbox("drums", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
996
+ slice_bass_btn.click(fn=slice_and_display_wrapper, inputs=[bass_output] + slice_inputs[:2] + [gr.Textbox("bass", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
997
+ slice_other_btn.click(fn=slice_and_display_wrapper, inputs=[other_output] + slice_inputs[:2] + [gr.Textbox("other", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
998
+ slice_guitar_btn.click(fn=slice_and_display_wrapper, inputs=[guitar_output] + slice_inputs[:2] + [gr.Textbox("guitar", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
999
+ slice_piano_btn.click(fn=slice_and_display_wrapper, inputs=[piano_output] + slice_inputs[:2] + [gr.Textbox("piano", visible=False)] + slice_inputs[3:], outputs=[loop_gallery, status_log])
1000
+
1001
+ # 3. Slice All Event
1002
+ slice_all_event = slice_all_button.click(
1003
+ fn=slice_all_and_zip_real,
1004
+ inputs=[
1005
+ vocals_output, drums_output, bass_output, other_output, guitar_output, piano_output,
1006
+ loop_options_radio, sensitivity_slider,
1007
+ bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_bpm_key,
1008
+ pan_depth_slider, level_depth_slider, modulation_rate_radio, lufs_target_slider,
1009
+ attack_gain_slider, sustain_gain_slider,
1010
+ filter_type_radio, filter_freq_slider, filter_depth_slider
1011
+ ],
1012
+ outputs=[download_zip_file, status_log]
1013
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
1014