SaltProphet commited on
Commit
4a9c475
·
verified ·
1 Parent(s): 899edf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +284 -568
app.py CHANGED
@@ -1,8 +1,7 @@
1
- # app.py
2
  import gradio as gr
3
  import numpy as np
4
  import librosa
5
- import librosa.display # Added missing import
6
  import soundfile as sf
7
  import os
8
  import tempfile
@@ -11,24 +10,54 @@ import time
11
  import matplotlib
12
  import matplotlib.pyplot as plt
13
  from scipy import signal
14
- from typing import Tuple, List, Any
15
  import shutil
16
 
17
  # Use a non-interactive backend for Matplotlib
18
  matplotlib.use('Agg')
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # --- UTILITY FUNCTIONS ---
21
 
22
  def freq_to_midi(freq: float) -> int:
23
  """Converts a frequency in Hz to a MIDI note number."""
24
  if freq <= 0:
25
  return 0
26
- if freq < 40: # Ignore frequencies below C2 (approx 65Hz)
 
27
  return 0
28
  return int(round(69 + 12 * np.log2(freq / 440.0)))
29
 
30
  def write_midi_file(notes_list: List[Tuple[int, float, float]], bpm: float, output_path: str):
31
- """Writes a basic MIDI file from a list of notes."""
 
 
 
 
32
  if not notes_list:
33
  return
34
 
@@ -42,67 +71,77 @@ def write_midi_file(notes_list: List[Tuple[int, float, float]], bpm: float, outp
42
  current_tick = 0
43
  midi_events = []
44
 
 
 
 
 
 
 
 
 
 
 
 
45
  for note, start_sec, duration_sec in notes_list:
46
  if note == 0:
47
  continue
48
 
49
  # Calculate delta time from last event
50
- target_tick = int(start_sec / seconds_per_tick)
51
  delta_tick = target_tick - current_tick
52
  current_tick = target_tick
53
 
54
  # Note On event (Channel 1, Velocity 100)
55
  note_on = [0x90, note, 100]
56
- midi_events.append((delta_tick, note_on))
57
 
58
  # Note Off event (Channel 1, Velocity 0)
59
- duration_ticks = int(duration_sec / seconds_per_tick)
 
 
 
60
  note_off = [0x80, note, 0]
61
- midi_events.append((duration_ticks, note_off))
62
  current_tick += duration_ticks
63
 
64
- # Build MIDI file
65
- header = b'MThd' + (6).to_bytes(4, 'big') + (1).to_bytes(2, 'big') + (1).to_bytes(2, 'big') + division.to_bytes(2, 'big')
66
-
67
- track_data = b''
68
- for delta, event in midi_events:
69
- # Encode delta time
70
- delta_bytes = []
71
- while True:
72
- delta_bytes.append(delta & 0x7F)
73
- if delta <= 0x7F:
74
- break
75
- delta >>= 7
76
- for i in range(len(delta_bytes)-1, -1, -1):
77
- if i > 0:
78
- track_data += bytes([delta_bytes[i] | 0x80])
79
- else:
80
- track_data += bytes([delta_bytes[i]])
81
-
82
- # Add event
83
- track_data += bytes(event)
84
-
85
  # End of track
86
  track_data += b'\x00\xFF\x2F\x00'
87
 
 
 
 
 
 
88
  track_chunk = b'MTrk' + len(track_data).to_bytes(4, 'big') + track_data
89
  midi_data = header + track_chunk
90
 
91
  with open(output_path, 'wb') as f:
92
  f.write(midi_data)
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def get_harmonic_recommendations(key_str: str) -> str:
95
  """Calculates harmonically compatible keys based on the Camelot wheel."""
96
- KEY_TO_CAMELOT = {
97
- "C Maj": "8B", "G Maj": "9B", "D Maj": "10B", "A Maj": "11B", "E Maj": "12B",
98
- "B Maj": "1B", "F# Maj": "2B", "Db Maj": "3B", "Ab Maj": "4B", "Eb Maj": "5B",
99
- "Bb Maj": "6B", "F Maj": "7B",
100
- "A Min": "8A", "E Min": "9A", "B Min": "10A", "F# Min": "11A", "C# Min": "12A",
101
- "G# Min": "1A", "D# Min": "2A", "Bb Min": "3A", "F Min": "4A", "C Min": "5A",
102
- "G Min": "6A", "D Min": "7A",
103
- "Gb Maj": "2B", "Cb Maj": "7B", "A# Min": "3A", "D# Maj": "11B", "G# Maj": "3B"
104
- }
105
-
106
  code = KEY_TO_CAMELOT.get(key_str, "N/A")
107
  if code == "N/A":
108
  return "N/A (Key not recognized or 'Unknown Key' detected.)"
@@ -113,11 +152,17 @@ def get_harmonic_recommendations(key_str: str) -> str:
113
  opposite_mode = 'B' if mode == 'A' else 'A'
114
  num_plus_one = (num % 12) + 1
115
  num_minus_one = 12 if num == 1 else num - 1
116
- recs = [f"{num}{opposite_mode}", f"{num_plus_one}{mode}", f"{num_minus_one}{mode}"]
117
- CAMELOT_TO_KEY = {v: k for k, v in KEY_TO_CAMELOT.items()}
118
- rec_keys = [f"{CAMELOT_TO_KEY.get(r_code, f'Code {r_code}')} ({r_code})" for r_code in recs]
 
 
 
 
 
119
  return " | ".join(rec_keys)
120
- except Exception:
 
121
  return "N/A (Error calculating recommendations.)"
122
 
123
  def detect_key(y: np.ndarray, sr: int) -> str:
@@ -125,10 +170,20 @@ def detect_key(y: np.ndarray, sr: int) -> str:
125
  try:
126
  chroma = librosa.feature.chroma_stft(y=y, sr=sr)
127
  chroma_sums = np.sum(chroma, axis=1)
 
 
 
 
 
128
  chroma_norm = chroma_sums / np.sum(chroma_sums)
129
 
 
130
  major_template = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
131
  minor_template = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
 
 
 
 
132
 
133
  pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
134
 
@@ -148,31 +203,47 @@ def detect_key(y: np.ndarray, sr: int) -> str:
148
 
149
  def apply_modulation(y: np.ndarray, sr: int, bpm: float, rate: str, pan_depth: float, level_depth: float) -> np.ndarray:
150
  """Applies tempo-synced LFOs for panning and volume modulation."""
151
- if y.ndim == 1:
152
- y = np.stack((y, y), axis=-1)
153
- elif y.ndim == 0:
154
  return y
 
 
155
 
156
  N = len(y)
157
  duration_sec = N / sr
158
 
159
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
160
  beats_per_measure = rate_map.get(rate, 1)
161
- lfo_freq_hz = (bpm / 60.0) * (beats_per_measure / 4.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  t = np.linspace(0, duration_sec, N, endpoint=False)
164
 
165
- # Panning LFO
166
  if pan_depth > 0:
167
  pan_lfo = np.sin(2 * np.pi * lfo_freq_hz * t) * pan_depth
 
168
  L_mod = (1 - pan_lfo) / 2.0
169
  R_mod = (1 + pan_lfo) / 2.0
 
170
  y[:, 0] *= L_mod
171
  y[:, 1] *= R_mod
172
 
173
- # Level LFO (Tremolo)
174
  if level_depth > 0:
175
  level_lfo = (np.sin(2 * np.pi * lfo_freq_hz * t) + 1) / 2.0
 
176
  gain_multiplier = (1 - level_depth) + (level_depth * level_lfo)
177
  y[:, 0] *= gain_multiplier
178
  y[:, 1] *= gain_multiplier
@@ -182,35 +253,38 @@ def apply_modulation(y: np.ndarray, sr: int, bpm: float, rate: str, pan_depth: f
182
  def apply_normalization_dbfs(y: np.ndarray, target_dbfs: float) -> np.ndarray:
183
  """Applies peak normalization to match a target dBFS value."""
184
  if target_dbfs >= 0:
185
- return y
186
 
187
  current_peak_amp = np.max(np.abs(y))
 
 
 
188
  target_peak_amp = 10**(target_dbfs / 20.0)
189
 
190
- if current_peak_amp > 1e-6:
191
- gain = target_peak_amp / current_peak_amp
192
- y_normalized = y * gain
193
- y_normalized = np.clip(y_normalized, -1.0, 1.0)
194
- return y_normalized
195
- else:
196
- return y
197
 
198
  def apply_filter_modulation(y: np.ndarray, sr: int, bpm: float, rate: str, filter_type: str, freq: float, depth: float) -> np.ndarray:
199
  """Applies a tempo-synced LFO to a 2nd order Butterworth filter cutoff frequency."""
200
- if depth == 0:
201
  return y
202
 
203
  # Ensure stereo for LFO application
204
  if y.ndim == 1:
205
  y = np.stack((y, y), axis=-1)
 
 
206
 
207
  N = len(y)
208
  duration_sec = N / sr
209
 
210
  # LFO Rate Calculation
211
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
212
- beats_per_measure = rate_map.get(rate, 1)
213
- lfo_freq_hz = (bpm / 60.0) * (beats_per_measure / 4.0)
214
 
215
  t = np.linspace(0, duration_sec, N, endpoint=False)
216
 
@@ -220,25 +294,43 @@ def apply_filter_modulation(y: np.ndarray, sr: int, bpm: float, rate: str, filte
220
  # Modulate Cutoff Frequency: Cutoff = BaseFreq + (LFO * Depth)
221
  cutoff_modulation = freq + (lfo_value * depth)
222
  # Safety clip to prevent instability
223
- cutoff_modulation = np.clip(cutoff_modulation, 20.0, sr / 2.0 - 100)
 
224
 
225
  y_out = np.zeros_like(y)
226
- filter_type_b = filter_type.lower().replace('-pass', '')
 
 
 
 
 
227
  frame_size = 512 # Frame-based update for filter coefficients
 
 
228
 
229
  # Apply filter channel by channel
230
  for channel in range(y.shape[1]):
231
- zi = np.zeros(2) # Initial filter state (2nd order filter)
232
 
233
  for frame_start in range(0, N, frame_size):
234
  frame_end = min(frame_start + frame_size, N)
 
 
235
  frame = y[frame_start:frame_end, channel]
236
 
237
  # Use the average LFO cutoff for the frame
238
  avg_cutoff = np.mean(cutoff_modulation[frame_start:frame_end])
239
 
240
  # Calculate 2nd order Butterworth filter coefficients
241
- b, a = signal.butter(2, avg_cutoff, btype=filter_type_b, fs=sr)
 
 
 
 
 
 
 
 
242
 
243
  # Apply filter to the frame, updating the state `zi`
244
  filtered_frame, zi = signal.lfilter(b, a, frame, zi=zi)
@@ -246,39 +338,119 @@ def apply_filter_modulation(y: np.ndarray, sr: int, bpm: float, rate: str, filte
246
 
247
  return y_out
248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  # --- CORE PROCESSING FUNCTIONS ---
250
 
251
- def separate_stems(audio_file_path: str) -> Tuple[str, str, str, str, str, str, float, str]:
252
- """Simulates stem separation and detects BPM and Key."""
 
 
 
 
 
 
 
 
 
 
 
253
  if audio_file_path is None:
254
  raise gr.Error("No audio file uploaded!")
255
 
256
  try:
257
  # Load audio
258
- y_orig, sr_orig = librosa.load(audio_file_path, sr=None)
259
- y_mono = librosa.to_mono(y_orig.T) if y_orig.ndim > 1 else y_orig
 
 
 
 
 
 
 
 
 
260
 
261
  # Detect tempo and key
262
  tempo, _ = librosa.beat.beat_track(y=y_mono, sr=sr_orig)
263
- detected_bpm = 120 if tempo is None or tempo == 0 else int(np.round(tempo).item())
264
  detected_key = detect_key(y_mono, sr_orig)
 
265
 
266
  # Create mock separated stems
267
- temp_dir = tempfile.mkdtemp()
268
- stems = {}
269
- stem_names = ["vocals", "drums", "bass", "other", "guitar", "piano"]
 
 
 
270
 
271
- for name in stem_names:
272
- stem_path = os.path.join(temp_dir, f"{name}.wav")
273
- # Create mock audio (just a portion of the original)
274
- sf.write(stem_path, y_orig[:min(len(y_orig), sr_orig*5)], sr_orig) # 5 seconds max
275
- stems[name] = stem_path
276
 
277
  return (
278
- stems["vocals"], stems["drums"], stems["bass"], stems["other"],
279
- stems["guitar"], stems["piano"], float(detected_bpm), detected_key
 
280
  )
281
  except Exception as e:
 
 
 
282
  raise gr.Error(f"Error processing audio: {str(e)}")
283
 
284
  def generate_waveform_preview(y: np.ndarray, sr: int, stem_name: str, temp_dir: str) -> str:
@@ -286,9 +458,12 @@ def generate_waveform_preview(y: np.ndarray, sr: int, stem_name: str, temp_dir:
286
  img_path = os.path.join(temp_dir, f"{stem_name}_preview.png")
287
 
288
  plt.figure(figsize=(10, 3))
289
- y_display = librosa.to_mono(y.T) if y.ndim > 1 else y
 
 
290
  librosa.display.waveshow(y_display, sr=sr, x_axis='time', color="#4a7098")
291
- plt.title(f"{stem_name} Waveform")
 
292
  plt.tight_layout()
293
  plt.savefig(img_path)
294
  plt.close()
@@ -296,7 +471,7 @@ def generate_waveform_preview(y: np.ndarray, sr: int, stem_name: str, temp_dir:
296
  return img_path
297
 
298
  def slice_stem_real(
299
- stem_audio_path: str,
300
  loop_choice: str,
301
  sensitivity: float,
302
  stem_name: str,
@@ -314,39 +489,33 @@ def slice_stem_real(
314
  filter_type: str,
315
  filter_freq: float,
316
  filter_depth: float
317
- ) -> Tuple[List[Tuple[str, str]], str]:
318
- """Slices a single stem and applies transformations."""
319
- if stem_audio_path is None:
320
- return [], ""
 
 
 
321
 
322
  try:
323
- # Load audio
324
- # Assuming stem_audio_path is a tuple (sample_rate, audio_array) from Gradio
325
- if isinstance(stem_audio_path, tuple) and len(stem_audio_path) == 2:
326
- sample_rate, y_int = stem_audio_path
327
- y = librosa.util.buf_to_float(y_int, dtype=np.float32)
328
- else:
329
- # Handle case where it's a filepath (from separate_stems)
330
- y, sample_rate = librosa.load(stem_audio_path, sr=None) # Fixed indentation
331
-
332
- if y.ndim == 0:
333
- return [], ""
334
-
335
- y_mono = librosa.to_mono(y.T) if y.ndim > 1 else y
336
 
337
  # --- 1. PITCH SHIFTING (if enabled) ---
338
  if transpose_semitones != 0:
339
- y_shifted = librosa.effects.pitch_shift(y, sr=sample_rate, n_steps=transpose_semitones)
340
- y = y_shifted
341
 
342
  # --- 2. FILTER MODULATION ---
343
- if filter_depth > 0:
344
  y = apply_filter_modulation(y, sample_rate, manual_bpm, modulation_rate, filter_type, filter_freq, filter_depth)
345
 
346
  # --- 3. PAN/LEVEL MODULATION ---
347
  normalized_pan_depth = pan_depth / 100.0
348
  normalized_level_depth = level_depth / 100.0
349
-
350
  if normalized_pan_depth > 0 or normalized_level_depth > 0:
351
  y = apply_modulation(y, sample_rate, manual_bpm, modulation_rate, normalized_pan_depth, normalized_level_depth)
352
 
@@ -355,467 +524,14 @@ def slice_stem_real(
355
  y = apply_normalization_dbfs(y, target_dbfs)
356
 
357
  # --- 5. DETERMINE BPM & KEY ---
358
- bpm_int = int(manual_bpm)
359
- key_tag = detected_key.replace(" ", "")
360
- if transpose_semitones != 0:
361
- root = detected_key.split(" ")[0]
362
- mode = detected_key.split(" ")[1]
363
- pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
364
- try:
365
- current_index = pitch_classes.index(root)
366
- new_index = (current_index + transpose_semitones) % 12
367
- new_key_root = pitch_classes[new_index]
368
- key_tag = f"{new_key_root}{mode}Shift"
369
- except ValueError:
370
- pass
371
-
372
- # --- 6. MIDI GENERATION (Melodic Stems) ---
373
- output_files = []
374
- loops_dir = tempfile.mkdtemp()
375
- is_melodic = stem_name in ["vocals", "bass", "guitar", "piano", "other"]
376
-
377
- if is_melodic and ("Bar Loops" in loop_choice):
378
- try:
379
- # Use piptrack for pitch detection
380
- pitches, magnitudes = librosa.piptrack(y=y_mono, sr=sample_rate)
381
- main_pitch_line = np.zeros(pitches.shape[1])
382
- for t in range(pitches.shape[1]):
383
- index = magnitudes[:, t].argmax()
384
- main_pitch_line[t] = pitches[index, t]
385
-
386
- notes_list = []
387
- i = 0
388
- while i < len(main_pitch_line):
389
- current_freq = main_pitch_line[i]
390
- current_midi = freq_to_midi(current_freq)
391
-
392
- j = i
393
- while j < len(main_pitch_line) and freq_to_midi(main_pitch_line[j]) == current_midi:
394
- j += 1
395
-
396
- duration_frames = j - i
397
- if current_midi != 0 and duration_frames >= 2:
398
- start_sec = librosa.frames_to_time(i, sr=sample_rate, hop_length=512)
399
- duration_sec = librosa.frames_to_time(duration_frames, sr=sample_rate, hop_length=512)
400
- notes_list.append((current_midi, start_sec, duration_sec))
401
-
402
- i = j
403
-
404
- full_stem_midi_path = os.path.join(loops_dir, f"{stem_name}_MELODY_{key_tag}_{bpm_int}BPM.mid")
405
- write_midi_file(notes_list, manual_bpm, full_stem_midi_path)
406
- output_files.append((full_stem_midi_path, "MIDI"))
407
-
408
- except Exception as e:
409
- print(f"MIDI generation failed for {stem_name}: {e}")
410
-
411
- # --- 7. CALCULATE TIMING & SLICING ---
412
- beats_per_bar = 4
413
- if time_signature == "3/4":
414
- beats_per_bar = 3
415
-
416
- if "Bar Loops" in loop_choice:
417
- bars = int(loop_choice.split(" ")[0])
418
- loop_type_tag = f"{bars}Bar"
419
- loop_duration_samples = int((60.0 / bpm_int * beats_per_bar * bars) * sample_rate)
420
-
421
- if loop_duration_samples > 0 and len(y) > loop_duration_samples:
422
- num_loops = len(y) // loop_duration_samples
423
-
424
- for i in range(min(num_loops, 10)): # Limit to 10 loops
425
- start_sample = i * loop_duration_samples
426
- end_sample = min(start_sample + loop_duration_samples, len(y))
427
- slice_data = y[start_sample:end_sample]
428
-
429
- filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_int}BPM.wav")
430
- sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
431
- output_files.append((filename, "WAV"))
432
-
433
- elif "One-Shots" in loop_choice:
434
- loop_type_tag = "OneShot"
435
- # Simple slicing at regular intervals for demo
436
- slice_length = int(sample_rate * 0.5) # 0.5 second slices
437
- num_slices = len(y) // slice_length
438
-
439
- for i in range(min(num_slices, 20)): # Limit to 20 slices
440
- start_sample = i * slice_length
441
- end_sample = min(start_sample + slice_length, len(y))
442
- slice_data = y[start_sample:end_sample]
443
-
444
- filename = os.path.join(loops_dir, f"{stem_name}_{loop_type_tag}_{i+1:03d}_{key_tag}_{bpm_int}BPM.wav")
445
- sf.write(filename, slice_data, sample_rate, subtype='PCM_16')
446
- output_files.append((filename, "WAV"))
447
-
448
- # --- 8. VISUALIZATION GENERATION ---
449
- img_path = generate_waveform_preview(y, sample_rate, stem_name, loops_dir)
450
-
451
- return output_files, img_path
452
-
453
- except Exception as e:
454
- raise gr.Error(f"Error processing stem: {str(e)}")
455
-
456
- def slice_all_and_zip(
457
- vocals: Tuple[int, np.ndarray],
458
- drums: Tuple[int, np.ndarray],
459
- bass: Tuple[int, np.ndarray],
460
- other: Tuple[int, np.ndarray],
461
- guitar: Tuple[int, np.ndarray],
462
- piano: Tuple[int, np.ndarray],
463
- loop_choice: str,
464
- sensitivity: float,
465
- manual_bpm: float,
466
- time_signature: str,
467
- crossfade_ms: int,
468
- transpose_semitones: int,
469
- detected_key: str,
470
- pan_depth: float,
471
- level_depth: float,
472
- modulation_rate: str,
473
- target_dbfs: float,
474
- attack_gain: float,
475
- sustain_gain: float,
476
- filter_type: str,
477
- filter_freq: float,
478
- filter_depth: float
479
- ) -> str:
480
- """Slices all available stems and packages them into a ZIP file."""
481
- try:
482
- stems_to_process = {
483
- "vocals": vocals, "drums": drums, "bass": bass,
484
- "other": other, "guitar": guitar, "piano": piano
485
- }
486
-
487
- # Filter out None stems
488
- valid_stems = {name: data for name, data in stems_to_process.items() if data is not None}
489
-
490
- if not valid_stems:
491
- raise gr.Error("No stems to process! Please separate stems first.")
492
-
493
- # Create temporary directory for all outputs
494
- temp_dir = tempfile.mkdtemp()
495
- zip_path = os.path.join(temp_dir, "Loop_Architect_Pack.zip")
496
-
497
- with zipfile.ZipFile(zip_path, 'w') as zf:
498
- for name, data in valid_stems.items():
499
- # Process stem
500
- sliced_files, _ = slice_stem_real(
501
- data, loop_choice, sensitivity, name,
502
- manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_key,
503
- pan_depth, level_depth, modulation_rate, target_dbfs,
504
- attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
505
- )
506
-
507
- # Add files to ZIP
508
- for file_path, file_type in sliced_files:
509
- arcname = os.path.join(file_type, os.path.basename(file_path))
510
- zf.write(file_path, arcname)
511
-
512
- return zip_path
513
-
514
- except Exception as e:
515
- raise gr.Error(f"Error creating ZIP: {str(e)}")
516
-
517
- # --- GRADIO INTERFACE ---
518
-
519
- with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red")) as demo:
520
- gr.Markdown("# 🎵 Loop Architect (Pro Edition)")
521
- gr.Markdown("Upload any song to separate it into stems, detect musical attributes, and then slice and tag the stems for instant use in a DAW.")
522
-
523
- # State variables
524
- detected_bpm_state = gr.State(value=120.0)
525
- detected_key_state = gr.State(value="Unknown Key")
526
- harmonic_recs_state = gr.State(value="---")
527
-
528
- with gr.Row():
529
- with gr.Column(scale=1):
530
- gr.Markdown("### 1. Separate Stems")
531
- audio_input = gr.Audio(type="filepath", label="Upload a Track")
532
- separate_btn = gr.Button("Separate & Analyze Stems", variant="primary")
533
-
534
- # Outputs for separated stems
535
- vocals_output = gr.Audio(label="Vocals", visible=False)
536
- drums_output = gr.Audio(label="Drums", visible=False)
537
- bass_output = gr.Audio(label="Bass", visible=False)
538
- other_output = gr.Audio(label="Other / Instrumental", visible=False)
539
- guitar_output = gr.Audio(label="Guitar", visible=False)
540
- piano_output = gr.Audio(label="Piano", visible=False)
541
-
542
- # Analysis results
543
- with gr.Group():
544
- gr.Markdown("### 2. Analysis & Transform")
545
- detected_bpm_key = gr.Textbox(label="Detected Tempo & Key", value="", interactive=False)
546
- harmonic_recs = gr.Textbox(label="Harmonic Mixing Recommendations", value="", interactive=False)
547
-
548
- transpose_slider = gr.Slider(
549
- minimum=-12, maximum=12, value=0, step=1,
550
- label="Transpose Loops (Semitones)",
551
- info="Shift the pitch of all slices by +/- 1 octave."
552
- )
553
-
554
- # Transient Shaping
555
- gr.Markdown("### Transient Shaping (Drums Only)")
556
- with gr.Group():
557
- attack_gain_slider = gr.Slider(
558
- minimum=0.5, maximum=1.5, value=1.0, step=0.1,
559
- label="Attack Gain Multiplier",
560
- info="Increase (>1.0) for punchier transients."
561
- )
562
- sustain_gain_slider = gr.Slider(
563
- minimum=0.5, maximum=1.5, value=1.0, step=0.1,
564
- label="Sustain Gain Multiplier",
565
- info="Increase (>1.0) for longer tails/reverb."
566
- )
567
-
568
- # Modulation
569
- gr.Markdown("### Pan/Level Modulation (LFO 1.0)")
570
- with gr.Group():
571
- modulation_rate_radio = gr.Radio(
572
- ['1/2', '1/4', '1/8', '1/16'],
573
- label="Modulation Rate (Tempo Synced)",
574
- value='1/4'
575
- )
576
- pan_depth_slider = gr.Slider(
577
- minimum=0, maximum=100, value=0, step=5,
578
- label="Pan Modulation Depth (%)",
579
- info="Creates a stereo auto-pan effect."
580
- )
581
- level_depth_slider = gr.Slider(
582
- minimum=0, maximum=100, value=0, step=5,
583
- label="Level Modulation Depth (%)",
584
- info="Creates a tempo-synced tremolo (volume pulse)."
585
- )
586
-
587
- # Filter Modulation
588
- gr.Markdown("### Filter Modulation (LFO 2.0)")
589
- with gr.Group():
590
- filter_type_radio = gr.Radio(
591
- ['low', 'high'],
592
- label="Filter Type",
593
- value='low'
594
- )
595
- with gr.Row():
596
- filter_freq_slider = gr.Slider(
597
- minimum=20, maximum=10000, value=2000, step=10,
598
- label="Base Cutoff Frequency (Hz)",
599
- )
600
- filter_depth_slider = gr.Slider(
601
- minimum=0, maximum=5000, value=0, step=10,
602
- label="Modulation Depth (Hz)",
603
- info="0 = Static filter at Base Cutoff."
604
- )
605
-
606
- # Slicing Options
607
- gr.Markdown("### 3. Slicing Options")
608
- with gr.Group():
609
- lufs_target_slider = gr.Slider(
610
- minimum=-18.0, maximum=-0.1, value=-3.0, step=0.1,
611
- label="Target Peak Level (dBFS)",
612
- info="Normalizes all exported loops to this peak volume."
613
- )
614
-
615
- loop_options_radio = gr.Radio(
616
- ["One-Shots", "4 Bar Loops", "8 Bar Loops"],
617
- label="Slice Type",
618
- value="One-Shots",
619
- info="Bar Loops include automatic MIDI generation for melodic stems."
620
- )
621
-
622
- with gr.Row():
623
- bpm_input = gr.Number(
624
- label="Manual BPM",
625
- value=120,
626
- minimum=40,
627
- maximum=300
628
- )
629
- time_sig_radio = gr.Radio(
630
- ["4/4", "3/4"],
631
- label="Time Signature",
632
- value="4/4"
633
- )
634
-
635
- sensitivity_slider = gr.Slider(
636
- minimum=0.01, maximum=0.5, value=0.05, step=0.01,
637
- label="One-Shot Sensitivity",
638
- info="Lower values = more slices."
639
- )
640
-
641
- crossfade_ms_slider = gr.Slider(
642
- minimum=0, maximum=30, value=10, step=1,
643
- label="One-Shot Crossfade (ms)",
644
- info="Prevents clicks/pops on transient slices."
645
- )
646
-
647
- # Create Pack
648
- gr.Markdown("### 4. Create Pack")
649
- slice_all_btn = gr.Button("Slice, Transform & Tag ALL Stems (Create ZIP)", variant="stop")
650
- download_zip_file = gr.File(label="Download Your Loop Pack", visible=False)
651
-
652
- with gr.Column(scale=2):
653
- gr.Markdown("### Separated Stems")
654
- with gr.Row():
655
- with gr.Column():
656
- vocals_output # Place component in layout
657
- slice_vocals_btn = gr.Button("Slice Vocals")
658
- with gr.Column():
659
- drums_output # Place component in layout
660
- slice_drums_btn = gr.Button("Slice Drums")
661
- with gr.Row():
662
- with gr.Column():
663
- bass_output # Place component in layout
664
- slice_bass_btn = gr.Button("Slice Bass")
665
- with gr.Column():
666
- other_output # Place component in layout
667
- slice_other_btn = gr.Button("Slice Other")
668
- with gr.Row():
669
- with gr.Column():
670
- guitar_output # Place component in layout
671
- slice_guitar_btn = gr.Button("Slice Guitar")
672
- with gr.Column():
673
- piano_output # Place component in layout
674
- slice_piano_btn = gr.Button("Slice Piano")
675
-
676
- # Gallery for previews
677
- gr.Markdown("### Sliced Loops Preview")
678
- loop_gallery = gr.Gallery(
679
- label="Generated Loops",
680
- columns=4,
681
- object_fit="contain",
682
- height="auto"
683
- )
684
-
685
- # Status textboxes for individual slicing
686
- status_textbox = gr.Textbox(label="Status", visible=True)
687
-
688
- # --- EVENT HANDLERS ---
689
-
690
- # Stem separation
691
- separate_btn.click(
692
- fn=separate_stems,
693
- inputs=[audio_input],
694
- outputs=[
695
- vocals_output, drums_output, bass_output, other_output,
696
- guitar_output, piano_output,
697
- detected_bpm_state, detected_key_state
698
- ]
699
- ).then(
700
- fn=lambda bpm, key: (f"{bpm} BPM, {key}", get_harmonic_recommendations(key)),
701
- inputs=[detected_bpm_state, detected_key_state],
702
- outputs=[detected_bpm_key, harmonic_recs_state]
703
- ).then(
704
- fn=lambda bpm, key: gr.update(value=f"{bpm} BPM, {key}"),
705
- inputs=[detected_bpm_state, detected_key_state],
706
- outputs=[detected_bpm_key]
707
- ).then(
708
- fn=get_harmonic_recommendations,
709
- inputs=[detected_key_state],
710
- outputs=[harmonic_recs]
711
- )
712
-
713
- # Individual stem slicing
714
- def slice_and_display(stem_data, loop_choice, sensitivity, stem_name, manual_bpm, time_signature,
715
- crossfade_ms, transpose_semitones, detected_key, pan_depth, level_depth,
716
- modulation_rate, target_dbfs, attack_gain, sustain_gain, filter_type,
717
- filter_freq, filter_depth):
718
- if stem_data is None:
719
- return [], "No stem data available"
720
-
721
- try:
722
- files, img_path = slice_stem_real(
723
- stem_data, loop_choice, sensitivity, stem_name,
724
- manual_bpm, time_signature, crossfade_ms, transpose_semitones, detected_key,
725
- pan_depth, level_depth, modulation_rate, target_dbfs,
726
- attack_gain, sustain_gain, filter_type, filter_freq, filter_depth
727
- )
728
-
729
- # Return only WAV files for gallery display
730
- wav_files = [f[0] for f in files if f[1] == "WAV"]
731
- return wav_files + [img_path], f"Generated {len(wav_files)} slices for {stem_name}"
732
- except Exception as e:
733
- return [], f"Error: {str(e)}"
734
-
735
- slice_vocals_btn.click(
736
- fn=slice_and_display,
737
- inputs=[
738
- vocals_output, loop_options_radio, sensitivity_slider, gr.Textbox(value="vocals", visible=False),
739
- bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_key_state,
740
- pan_depth_slider, level_depth_slider, modulation_rate_radio, lufs_target_slider,
741
- attack_gain_slider, sustain_gain_slider, filter_type_radio, filter_freq_slider, filter_depth_slider
742
- ],
743
- outputs=[loop_gallery, status_textbox]
744
- )
745
-
746
- slice_drums_btn.click(
747
- fn=slice_and_display,
748
- inputs=[
749
- drums_output, loop_options_radio, sensitivity_slider, gr.Textbox(value="drums", visible=False),
750
- bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_key_state,
751
- pan_depth_slider, level_depth_slider, modulation_rate_radio, lufs_target_slider,
752
- attack_gain_slider, sustain_gain_slider, filter_type_radio, filter_freq_slider, filter_depth_slider
753
- ],
754
- outputs=[loop_gallery, status_textbox]
755
- )
756
-
757
- slice_bass_btn.click(
758
- fn=slice_and_display,
759
- inputs=[
760
- bass_output, loop_options_radio, sensitivity_slider, gr.Textbox(value="bass", visible=False),
761
- bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_key_state,
762
- pan_depth_slider, level_depth_slider, modulation_rate_radio, lufs_target_slider,
763
- attack_gain_slider, sustain_gain_slider, filter_type_radio, filter_freq_slider, filter_depth_slider
764
- ],
765
- outputs=[loop_gallery, status_textbox]
766
- )
767
-
768
- slice_other_btn.click(
769
- fn=slice_and_display,
770
- inputs=[
771
- other_output, loop_options_radio, sensitivity_slider, gr.Textbox(value="other", visible=False),
772
- bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_key_state,
773
- pan_depth_slider, level_depth_slider, modulation_rate_radio, lufs_target_slider,
774
- attack_gain_slider, sustain_gain_slider, filter_type_radio, filter_freq_slider, filter_depth_slider
775
- ],
776
- outputs=[loop_gallery, status_textbox]
777
- )
778
-
779
- slice_guitar_btn.click(
780
- fn=slice_and_display,
781
- inputs=[
782
- guitar_output, loop_options_radio, sensitivity_slider, gr.Textbox(value="guitar", visible=False),
783
- bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_key_state,
784
- pan_depth_slider, level_depth_slider, modulation_rate_radio, lufs_target_slider,
785
- attack_gain_slider, sustain_gain_slider, filter_type_radio, filter_freq_slider, filter_depth_slider
786
- ],
787
- outputs=[loop_gallery, status_textbox]
788
- )
789
-
790
- slice_piano_btn.click(
791
- fn=slice_and_display,
792
- inputs=[
793
- piano_output, loop_options_radio, sensitivity_slider, gr.Textbox(value="piano", visible=False),
794
- bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_key_state,
795
- pan_depth_slider, level_depth_slider, modulation_rate_radio, lufs_target_slider,
796
- attack_gain_slider, sustain_gain_slider, filter_type_radio, filter_freq_slider, filter_depth_slider
797
- ],
798
- outputs=[loop_gallery, status_textbox]
799
- )
800
-
801
- # Slice all stems and create ZIP
802
- slice_all_btn.click(
803
- fn=slice_all_and_zip,
804
- inputs=[
805
- vocals_output, drums_output, bass_output, other_output, guitar_output, piano_output,
806
- loop_options_radio, sensitivity_slider,
807
- bpm_input, time_sig_radio, crossfade_ms_slider, transpose_slider, detected_key_state,
808
- pan_depth_slider, level_depth_slider, modulation_rate_radio, lufs_target_slider,
809
- attack_gain_slider, sustain_gain_slider,
810
- filter_type_radio, filter_freq_slider, filter_depth_slider
811
- ],
812
- outputs=[download_zip_file]
813
- ).then(
814
- fn=lambda: gr.update(visible=True),
815
- inputs=None,
816
- outputs=[download_zip_file]
817
- )
818
-
819
- # Launch the app
820
- if __name__ == "__main__":
821
- demo.launch()
 
 
1
  import gradio as gr
2
  import numpy as np
3
  import librosa
4
+ import librosa.display
5
  import soundfile as sf
6
  import os
7
  import tempfile
 
10
  import matplotlib
11
  import matplotlib.pyplot as plt
12
  from scipy import signal
13
+ from typing import Tuple, List, Any, Optional, Dict
14
  import shutil
15
 
16
  # Use a non-interactive backend for Matplotlib
17
  matplotlib.use('Agg')
18
 
19
+ # --- CONSTANTS & DICTIONARIES ---
20
+
21
+ KEY_TO_CAMELOT = {
22
+ "C Maj": "8B", "G Maj": "9B", "D Maj": "10B", "A Maj": "11B", "E Maj": "12B",
23
+ "B Maj": "1B", "F# Maj": "2B", "Db Maj": "3B", "Ab Maj": "4B", "Eb Maj": "5B",
24
+ "Bb Maj": "6B", "F Maj": "7B",
25
+ "A Min": "8A", "E Min": "9A", "B Min": "10A", "F# Min": "11A", "C# Min": "12A",
26
+ "G# Min": "1A", "D# Min": "2A", "Bb Min": "3A", "F Min": "4A", "C Min": "5A",
27
+ "G Min": "6A", "D Min": "7A",
28
+ # Enharmonic equivalents
29
+ "Gb Maj": "2B", "Cb Maj": "7B", "A# Min": "3A", "D# Maj": "5B", "G# Maj": "4B"
30
+ }
31
+
32
+ # Fixed reverse mapping to avoid "lossy" inversion
33
+ CAMELOT_TO_KEY = {
34
+ "8B": "C Maj", "9B": "G Maj", "10B": "D Maj", "11B": "A Maj", "12B": "E Maj",
35
+ "1B": "B Maj", "2B": "F# Maj / Gb Maj", "3B": "Db Maj", "4B": "Ab Maj / G# Maj", "5B": "Eb Maj / D# Maj",
36
+ "6B": "Bb Maj", "7B": "F Maj / Cb Maj",
37
+ "8A": "A Min", "9A": "E Min", "10A": "B Min", "11A": "F# Min", "12A": "C# Min",
38
+ "1A": "G# Min", "2A": "D# Min", "3A": "Bb Min / A# Min", "4A": "F Min", "5A": "C Min",
39
+ "6A": "G Min", "7A": "D Min"
40
+ }
41
+
42
+ STEM_NAMES = ["vocals", "drums", "bass", "other", "guitar", "piano"]
43
+
44
  # --- UTILITY FUNCTIONS ---
45
 
46
  def freq_to_midi(freq: float) -> int:
47
  """Converts a frequency in Hz to a MIDI note number."""
48
  if freq <= 0:
49
  return 0
50
+ # C1 is ~32.7 Hz. Let's set a reasonable floor.
51
+ if freq < 32.0:
52
  return 0
53
  return int(round(69 + 12 * np.log2(freq / 440.0)))
54
 
55
  def write_midi_file(notes_list: List[Tuple[int, float, float]], bpm: float, output_path: str):
56
+ """
57
+ Writes a basic MIDI file from a list of notes.
58
+ Note: This is a simplified MIDI writer and may have issues.
59
+ Using a dedicated library like 'mido' is recommended for robust use.
60
+ """
61
  if not notes_list:
62
  return
63
 
 
71
  current_tick = 0
72
  midi_events = []
73
 
74
+ # --- MIDI Track Header ---
75
+ # Set Tempo: FF 51 03 TTTTTT (TTTTTT = tempo_us_per_beat)
76
+ tempo_bytes = tempo_us_per_beat.to_bytes(3, 'big')
77
+ track_data = b'\x00\xFF\x51\x03' + tempo_bytes
78
+
79
+ # Set Time Signature: FF 58 04 NN DD CC BB (Using 4/4)
80
+ track_data += b'\x00\xFF\x58\x04\x04\x02\x18\x08'
81
+
82
+ # Set Track Name
83
+ track_data += b'\x00\xFF\x03\x0BLoopArchitect' # 11 chars
84
+
85
  for note, start_sec, duration_sec in notes_list:
86
  if note == 0:
87
  continue
88
 
89
  # Calculate delta time from last event
90
+ target_tick = int(round(start_sec / seconds_per_tick))
91
  delta_tick = target_tick - current_tick
92
  current_tick = target_tick
93
 
94
  # Note On event (Channel 1, Velocity 100)
95
  note_on = [0x90, note, 100]
96
+ track_data += encode_delta_time(delta_tick) + bytes(note_on)
97
 
98
  # Note Off event (Channel 1, Velocity 0)
99
+ duration_ticks = int(round(duration_sec / seconds_per_tick))
100
+ if duration_ticks == 0:
101
+ duration_ticks = 1 # Minimum duration
102
+
103
  note_off = [0x80, note, 0]
104
+ track_data += encode_delta_time(duration_ticks) + bytes(note_off)
105
  current_tick += duration_ticks
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  # End of track
108
  track_data += b'\x00\xFF\x2F\x00'
109
 
110
+ # --- MIDI File Header ---
111
+ # MThd, header_length (6), format (1), num_tracks (1), division
112
+ header = b'MThd' + (6).to_bytes(4, 'big') + (1).to_bytes(2, 'big') + (1).to_bytes(2, 'big') + division.to_bytes(2, 'big')
113
+
114
+ # MTrk, track_length, track_data
115
  track_chunk = b'MTrk' + len(track_data).to_bytes(4, 'big') + track_data
116
  midi_data = header + track_chunk
117
 
118
  with open(output_path, 'wb') as f:
119
  f.write(midi_data)
120
 
121
+ def encode_delta_time(ticks: int) -> bytes:
122
+ """Encodes an integer tick value into MIDI variable-length quantity."""
123
+ buffer = ticks & 0x7F
124
+ ticks >>= 7
125
+ if ticks > 0:
126
+ buffer |= 0x80
127
+ while ticks > 0:
128
+ buffer = (buffer << 8) | ((ticks & 0x7F) | 0x80)
129
+ ticks >>= 7
130
+ buffer = (buffer & 0xFFFFFF7F) # Clear MSB of last byte
131
+
132
+ # Convert buffer to bytes
133
+ byte_list = []
134
+ while buffer > 0:
135
+ byte_list.insert(0, buffer & 0xFF)
136
+ buffer >>= 8
137
+ if not byte_list:
138
+ return b'\x00'
139
+ return bytes(byte_list)
140
+ else:
141
+ return bytes([buffer])
142
+
143
  def get_harmonic_recommendations(key_str: str) -> str:
144
  """Calculates harmonically compatible keys based on the Camelot wheel."""
 
 
 
 
 
 
 
 
 
 
145
  code = KEY_TO_CAMELOT.get(key_str, "N/A")
146
  if code == "N/A":
147
  return "N/A (Key not recognized or 'Unknown Key' detected.)"
 
152
  opposite_mode = 'B' if mode == 'A' else 'A'
153
  num_plus_one = (num % 12) + 1
154
  num_minus_one = 12 if num == 1 else num - 1
155
+
156
+ recs_codes = [
157
+ f"{num}{opposite_mode}", # e.g., 8A (A Min) -> 8B (C Maj)
158
+ f"{num_plus_one}{mode}", # e.g., 8A (A Min) -> 9A (E Min)
159
+ f"{num_minus_one}{mode}" # e.g., 8A (A Min) -> 7A (D Min)
160
+ ]
161
+
162
+ rec_keys = [f"{CAMELOT_TO_KEY.get(r_code, f'Code {r_code}')} ({r_code})" for r_code in recs_codes]
163
  return " | ".join(rec_keys)
164
+ except Exception as e:
165
+ print(f"Error calculating recommendations: {e}")
166
  return "N/A (Error calculating recommendations.)"
167
 
168
  def detect_key(y: np.ndarray, sr: int) -> str:
 
170
  try:
171
  chroma = librosa.feature.chroma_stft(y=y, sr=sr)
172
  chroma_sums = np.sum(chroma, axis=1)
173
+
174
+ # Avoid division by zero if audio is silent
175
+ if np.sum(chroma_sums) == 0:
176
+ return "Unknown Key"
177
+
178
  chroma_norm = chroma_sums / np.sum(chroma_sums)
179
 
180
+ # Krumhansl-Schmuckler key-finding algorithm templates
181
  major_template = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
182
  minor_template = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
183
+
184
+ # Normalize templates
185
+ major_template /= np.sum(major_template)
186
+ minor_template /= np.sum(minor_template)
187
 
188
  pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
189
 
 
203
 
204
  def apply_modulation(y: np.ndarray, sr: int, bpm: float, rate: str, pan_depth: float, level_depth: float) -> np.ndarray:
205
  """Applies tempo-synced LFOs for panning and volume modulation."""
206
+ if y.ndim == 0:
 
 
207
  return y
208
+ if y.ndim == 1:
209
+ y = np.stack((y, y), axis=-1) # Convert to stereo
210
 
211
  N = len(y)
212
  duration_sec = N / sr
213
 
214
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
215
  beats_per_measure = rate_map.get(rate, 1)
216
+ # LFO frequency = (BPM / 60) * (beats_per_measure / 4.0) -- seems off.
217
+ # Let's redefine: LFO freq in Hz = (BPM / 60) * (1 / (4 / beats_per_measure))
218
+ # e.g., 1/4 rate at 120BPM = 2Hz. (120/60) * (1 / (4/1)) = 2 * (1/4) = 0.5Hz? No.
219
+ # 120 BPM = 2 beats/sec. 1/4 note = 1 beat. So LFO should be 2 Hz.
220
+ # 1/8 note = 4 Hz.
221
+ # 1/16 note = 8 Hz.
222
+ # 1/2 note = 1 Hz.
223
+ # Formula: (BPM / 60) * (rate_map_value / 4)
224
+ # 1/4 note: (120/60) * (1/4) = 0.5 Hz. Still wrong.
225
+ # Let's try: (BPM / 60) * (rate_map_value)
226
+ # 1/4 note @ 120BPM: (120/60) * 1 = 2 Hz. Correct.
227
+ # 1/8 note @ 120BPM: (120/60) * 2 = 4 Hz. Correct.
228
+ # 1/2 note @ 120BPM: (120/60) * 0.5 = 1 Hz. Correct.
229
+ lfo_freq_hz = (bpm / 60.0) * rate_map.get(rate, 1)
230
 
231
  t = np.linspace(0, duration_sec, N, endpoint=False)
232
 
233
+ # Panning LFO (Sine wave, -1 to 1)
234
  if pan_depth > 0:
235
  pan_lfo = np.sin(2 * np.pi * lfo_freq_hz * t) * pan_depth
236
+ # L_mod/R_mod should be 0-1. (1-pan_lfo)/2 and (1+pan_lfo)/2 gives 0-1 range.
237
  L_mod = (1 - pan_lfo) / 2.0
238
  R_mod = (1 + pan_lfo) / 2.0
239
+ # This is amplitude panning, not constant power. Good enough.
240
  y[:, 0] *= L_mod
241
  y[:, 1] *= R_mod
242
 
243
+ # Level LFO (Tremolo) (Sine wave, 0 to 1)
244
  if level_depth > 0:
245
  level_lfo = (np.sin(2 * np.pi * lfo_freq_hz * t) + 1) / 2.0
246
+ # gain_multiplier ranges from (1-level_depth) to 1
247
  gain_multiplier = (1 - level_depth) + (level_depth * level_lfo)
248
  y[:, 0] *= gain_multiplier
249
  y[:, 1] *= gain_multiplier
 
253
  def apply_normalization_dbfs(y: np.ndarray, target_dbfs: float) -> np.ndarray:
254
  """Applies peak normalization to match a target dBFS value."""
255
  if target_dbfs >= 0:
256
+ return y # Don't normalize to 0dBFS or higher
257
 
258
  current_peak_amp = np.max(np.abs(y))
259
+ if current_peak_amp < 1e-9: # Avoid division by zero on silence
260
+ return y
261
+
262
  target_peak_amp = 10**(target_dbfs / 20.0)
263
 
264
+ gain = target_peak_amp / current_peak_amp
265
+ y_normalized = y * gain
266
+
267
+ # Clip just in case of floating point inaccuracies
268
+ y_normalized = np.clip(y_normalized, -1.0, 1.0)
269
+ return y_normalized
 
270
 
271
  def apply_filter_modulation(y: np.ndarray, sr: int, bpm: float, rate: str, filter_type: str, freq: float, depth: float) -> np.ndarray:
272
  """Applies a tempo-synced LFO to a 2nd order Butterworth filter cutoff frequency."""
273
+ if depth == 0 or filter_type == "None":
274
  return y
275
 
276
  # Ensure stereo for LFO application
277
  if y.ndim == 1:
278
  y = np.stack((y, y), axis=-1)
279
+ if y.ndim == 0:
280
+ return y
281
 
282
  N = len(y)
283
  duration_sec = N / sr
284
 
285
  # LFO Rate Calculation
286
  rate_map = {'1/2': 0.5, '1/4': 1, '1/8': 2, '1/16': 4}
287
+ lfo_freq_hz = (bpm / 60.0) * rate_map.get(rate, 1)
 
288
 
289
  t = np.linspace(0, duration_sec, N, endpoint=False)
290
 
 
294
  # Modulate Cutoff Frequency: Cutoff = BaseFreq + (LFO * Depth)
295
  cutoff_modulation = freq + (lfo_value * depth)
296
  # Safety clip to prevent instability
297
+ nyquist = sr / 2.0
298
+ cutoff_modulation = np.clip(cutoff_modulation, 20.0, nyquist - 100.0) # Keep away from Nyquist
299
 
300
  y_out = np.zeros_like(y)
301
+
302
+ # --- BUG FIX ---
303
+ # Was: filter_type.lower().replace('-pass', '') -> 'low' (ValueError)
304
+ # Now: filter_type.lower().replace('-pass', 'pass') -> 'lowpass' (Correct)
305
+ filter_type_b = filter_type.lower().replace('-pass', 'pass')
306
+
307
  frame_size = 512 # Frame-based update for filter coefficients
308
+ if N < frame_size:
309
+ frame_size = N # Handle very short audio
310
 
311
  # Apply filter channel by channel
312
  for channel in range(y.shape[1]):
313
+ zi = signal.lfilter_zi(*signal.butter(2, 20.0, btype=filter_type_b, fs=sr))
314
 
315
  for frame_start in range(0, N, frame_size):
316
  frame_end = min(frame_start + frame_size, N)
317
+ if frame_start == frame_end: continue # Skip empty frames
318
+
319
  frame = y[frame_start:frame_end, channel]
320
 
321
  # Use the average LFO cutoff for the frame
322
  avg_cutoff = np.mean(cutoff_modulation[frame_start:frame_end])
323
 
324
  # Calculate 2nd order Butterworth filter coefficients
325
+ try:
326
+ b, a = signal.butter(2, avg_cutoff, btype=filter_type_b, fs=sr)
327
+ except ValueError as e:
328
+ print(f"Butterworth filter error: {e}. Using last good coefficients.")
329
+ # This can happen if avg_cutoff is bad, though we clip it.
330
+ # If it still fails, we just re-use the last good b, a.
331
+ # In the first frame, this is not robust.
332
+ if 'b' not in locals():
333
+ b, a = signal.butter(2, 20.0, btype=filter_type_b, fs=sr) # Failsafe
334
 
335
  # Apply filter to the frame, updating the state `zi`
336
  filtered_frame, zi = signal.lfilter(b, a, frame, zi=zi)
 
338
 
339
  return y_out
340
 
341
+ def apply_crossfade(y: np.ndarray, fade_samples: int) -> np.ndarray:
342
+ """Applies a linear fade-in and fade-out to a clip."""
343
+ if fade_samples == 0:
344
+ return y
345
+
346
+ N = len(y)
347
+ fade_samples = min(fade_samples, N // 2) # Fade can't be longer than half the clip
348
+
349
+ if fade_samples == 0:
350
+ return y # Clip is too short to fade
351
+
352
+ # Create fade ramps
353
+ fade_in = np.linspace(0, 1, fade_samples)
354
+ fade_out = np.linspace(1, 0, fade_samples)
355
+
356
+ y_out = y.copy()
357
+
358
+ # Apply fades (handling mono/stereo)
359
+ if y.ndim == 1:
360
+ y_out[:fade_samples] *= fade_in
361
+ y_out[-fade_samples:] *= fade_out
362
+ else:
363
+ y_out[:fade_samples, :] *= fade_in[:, np.newaxis]
364
+ y_out[-fade_samples:, :] *= fade_out[:, np.newaxis]
365
+
366
+ return y_out
367
+
368
+ def apply_envelope(y: np.ndarray, sr: int, attack_gain_db: float, sustain_gain_db: float) -> np.ndarray:
369
+ """Applies a simple attack/sustain gain envelope to one-shots."""
370
+ N = len(y)
371
+ if N == 0:
372
+ return y
373
+
374
+ # Simple fixed attack time of 10ms
375
+ attack_time_sec = 0.01
376
+ attack_samples = min(int(attack_time_sec * sr), N // 2)
377
+
378
+ start_gain = 10**(attack_gain_db / 20.0)
379
+ end_gain = 10**(sustain_gain_db / 20.0)
380
+
381
+ # Envelope: Linear ramp from start_gain to end_gain over attack_samples, then hold end_gain
382
+ envelope = np.ones(N) * end_gain
383
+ if attack_samples > 0:
384
+ attack_ramp = np.linspace(start_gain, end_gain, attack_samples)
385
+ envelope[:attack_samples] = attack_ramp
386
+
387
+ # Apply envelope (handling mono/stereo)
388
+ if y.ndim == 1:
389
+ y_out = y * envelope
390
+ else:
391
+ y_out = y * envelope[:, np.newaxis]
392
+
393
+ return y_out
394
+
395
  # --- CORE PROCESSING FUNCTIONS ---
396
 
397
+ def separate_stems(audio_file_path: str) -> Tuple[
398
+ Optional[Tuple[int, np.ndarray]],
399
+ Optional[Tuple[int, np.ndarray]],
400
+ Optional[Tuple[int, np.ndarray]],
401
+ Optional[Tuple[int, np.ndarray]],
402
+ Optional[Tuple[int, np.ndarray]],
403
+ Optional[Tuple[int, np.ndarray]],
404
+ float, str, str
405
+ ]:
406
+ """
407
+ Simulates stem separation and detects BPM and Key.
408
+ Returns Gradio Audio tuples (sr, data) for each stem.
409
+ """
410
  if audio_file_path is None:
411
  raise gr.Error("No audio file uploaded!")
412
 
413
  try:
414
  # Load audio
415
+ y_orig, sr_orig = librosa.load(audio_file_path, sr=None, mono=False)
416
+
417
+ # Ensure stereo for processing
418
+ if y_orig.ndim == 1:
419
+ y_orig = np.stack([y_orig, y_orig], axis=-1)
420
+ # librosa.load with mono=False may return (N,) for mono files,
421
+ # or (2, N). Need to ensure (N, 2) or (N,)
422
+ if y_orig.ndim == 2 and y_orig.shape[0] < y_orig.shape[1]:
423
+ y_orig = y_orig.T # Transpose to (N, 2)
424
+
425
+ y_mono = librosa.to_mono(y_orig)
426
 
427
  # Detect tempo and key
428
  tempo, _ = librosa.beat.beat_track(y=y_mono, sr=sr_orig)
429
+ detected_bpm = 120.0 if tempo is None or tempo.size == 0 or tempo[0] == 0 else float(np.round(tempo[0]))
430
  detected_key = detect_key(y_mono, sr_orig)
431
+ harmonic_recs = get_harmonic_recommendations(detected_key)
432
 
433
  # Create mock separated stems
434
+ # In a real app, you'd use Demucs, Spleeter, etc.
435
+ # Here, we just return the original audio for each stem for demo purposes.
436
+ stems_data: Dict[str, Optional[Tuple[int, np.ndarray]]] = {}
437
+
438
+ # Convert to int16 for Gradio Audio component
439
+ y_int16 = (y_orig * 32767).astype(np.int16)
440
 
441
+ for name in STEM_NAMES:
442
+ # We give each stem the full audio for this demo
443
+ stems_data[name] = (sr_orig, y_int16.copy())
 
 
444
 
445
  return (
446
+ stems_data["vocals"], stems_data["drums"], stems_data["bass"], stems_data["other"],
447
+ stems_data["guitar"], stems_data["piano"],
448
+ detected_bpm, detected_key, harmonic_recs
449
  )
450
  except Exception as e:
451
+ print(f"Error processing audio: {e}")
452
+ import traceback
453
+ traceback.print_exc()
454
  raise gr.Error(f"Error processing audio: {str(e)}")
455
 
456
  def generate_waveform_preview(y: np.ndarray, sr: int, stem_name: str, temp_dir: str) -> str:
 
458
  img_path = os.path.join(temp_dir, f"{stem_name}_preview.png")
459
 
460
  plt.figure(figsize=(10, 3))
461
+ y_display = librosa.to_mono(y.T) if y.ndim > 1 and y.shape[0] < y.shape[1] else y
462
+ y_display = librosa.to_mono(y) if y.ndim > 1 else y
463
+
464
  librosa.display.waveshow(y_display, sr=sr, x_axis='time', color="#4a7098")
465
+ plt.title(f"{stem_name} Waveform (Processed)")
466
+ plt.ylabel("Amplitude")
467
  plt.tight_layout()
468
  plt.savefig(img_path)
469
  plt.close()
 
471
  return img_path
472
 
473
  def slice_stem_real(
474
+ stem_audio_tuple: Optional[Tuple[int, np.ndarray]],
475
  loop_choice: str,
476
  sensitivity: float,
477
  stem_name: str,
 
489
  filter_type: str,
490
  filter_freq: float,
491
  filter_depth: float
492
+ ) -> Tuple[List[str], Optional[str]]:
493
+ """
494
+ Slices a single stem and applies transformations.
495
+ Returns a list of filepaths and a path to a preview image.
496
+ """
497
+ if stem_audio_tuple is None:
498
+ return [], None
499
 
500
  try:
501
+ sample_rate, y_int = stem_audio_tuple
502
+ # Convert from int16 array back to float
503
+ y = y_int.astype(np.float32) / 32767.0
504
+
505
+ if y.ndim == 0 or len(y) == 0:
506
+ return [], None
 
 
 
 
 
 
 
507
 
508
  # --- 1. PITCH SHIFTING (if enabled) ---
509
  if transpose_semitones != 0:
510
+ y = librosa.effects.pitch_shift(y, sr=sample_rate, n_steps=transpose_semitones)
 
511
 
512
  # --- 2. FILTER MODULATION ---
513
+ if filter_depth > 0 and filter_type != "None":
514
  y = apply_filter_modulation(y, sample_rate, manual_bpm, modulation_rate, filter_type, filter_freq, filter_depth)
515
 
516
  # --- 3. PAN/LEVEL MODULATION ---
517
  normalized_pan_depth = pan_depth / 100.0
518
  normalized_level_depth = level_depth / 100.0
 
519
  if normalized_pan_depth > 0 or normalized_level_depth > 0:
520
  y = apply_modulation(y, sample_rate, manual_bpm, modulation_rate, normalized_pan_depth, normalized_level_depth)
521
 
 
524
  y = apply_normalization_dbfs(y, target_dbfs)
525
 
526
  # --- 5. DETERMINE BPM & KEY ---
527
+ bpm_int = int(round(manual_bpm))
528
+ key_tag = "UnknownKey"
529
+ if detected_key != "Unknown Key":
530
+ key_tag = detected_key.replace(" ", "")
531
+ if transpose_semitones != 0:
532
+ root, mode = detected_key.split(" ")
533
+ pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
534
+ try:
535
+ current_index = pitch_classes.index(root)
536
+ new_index = (current_index + transpose_semitones) % 12
537
+ new_key_root = pitch_cla