SaltProphet commited on
Commit
b1477ac
·
verified ·
1 Parent(s): 55c17c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +291 -377
app.py CHANGED
@@ -1,385 +1,299 @@
1
  import gradio as gr
2
- import os
3
- import shutil
4
- import asyncio
5
  import librosa
6
- import librosa.display
7
- import soundfile as sf
8
  import numpy as np
9
- import time
 
10
  import zipfile
11
  import tempfile
12
- import matplotlib.pyplot as plt
13
- import matplotlib
14
- matplotlib.use('Agg')
15
-
16
- temp_files = []
17
-
18
- def cleanup_temp_files():
19
- global temp_files
20
- for file_path in temp_files:
21
- if os.path.exists(file_path):
22
- os.remove(file_path)
23
- temp_files = []
24
-
25
- def update_output_visibility(choice):
26
- if "2 Stems" in choice:
27
- return {
28
- vocals_output: gr.update(visible=True),
29
- drums_output: gr.update(visible=False),
30
- bass_output: gr.update(visible=False),
31
- other_output: gr.update(visible=True, label="Instrumental (No Vocals)")
32
- }
33
- elif "4 Stems" in choice:
34
- return {
35
- vocals_output: gr.update(visible=True),
36
- drums_output: gr.update(visible=True),
37
- bass_output: gr.update(visible=True),
38
- other_output: gr.update(visible=True, label="Other")
39
- }
40
-
41
- async def separate_stems(audio_file_path, stem_choice, progress=gr.Progress(track_tqdm=True)):
42
- if audio_file_path is None: raise gr.Error("No audio file uploaded!")
43
- progress(0, desc="Starting...")
 
 
 
 
 
 
 
 
 
 
 
 
44
  try:
45
- progress(0.05, desc="Preparing audio file...")
46
- original_filename_base = os.path.basename(audio_file_path).rsplit('.', 1)[0]
47
- stable_input_path = f"stable_input_{original_filename_base}.wav"
48
- shutil.copy(audio_file_path, stable_input_path)
49
-
50
- model_arg = "--two-stems=vocals" if "2 Stems" in stem_choice else ""
51
- output_dir = "separated"
52
- if os.path.exists(output_dir): shutil.rmtree(output_dir)
53
-
54
- command = f"python3 -m demucs {model_arg} -o \"{output_dir}\" \"{stable_input_path}\""
55
- progress(0.2, desc="Running Demucs (this can take a minute)...")
56
-
57
- process = await asyncio.create_subprocess_shell(
58
- command,
59
- stdout=asyncio.subprocess.PIPE,
60
- stderr=asyncio.subprocess.PIPE)
61
-
62
- stdout, stderr = await process.communicate()
63
-
64
- if process.returncode != 0:
65
- raise gr.Error(f"Demucs failed to run. Error: {stderr.decode()[:500]}")
66
-
67
- progress(0.8, desc="Locating separated stem files...")
68
- stable_filename_base = os.path.basename(stable_input_path).rsplit('.', 1)[0]
69
- model_folder_name = next(os.walk(output_dir))[1][0]
70
- stems_path = os.path.join(output_dir, model_folder_name, stable_filename_base)
71
-
72
- if not os.path.exists(stems_path):
73
- raise gr.Error(f"Demucs finished, but the output directory was not found!")
74
-
75
- vocals_path = os.path.join(stems_path, "vocals.wav") if os.path.exists(os.path.join(stems_path, "vocals.wav")) else None
76
- drums_path = os.path.join(stems_path, "drums.wav") if os.path.exists(os.path.join(stems_path, "drums.wav")) else None
77
- bass_path = os.path.join(stems_path, "bass.wav") if os.path.exists(os.path.join(stems_path, "bass.wav")) else None
78
- other_filename = "no_vocals.wav" if "2 Stems" in stem_choice else "other.wav"
79
- other_path = os.path.join(stems_path, other_filename) if os.path.exists(os.path.join(stems_path, other_filename)) else None
80
-
81
- os.remove(stable_input_path)
82
-
83
- # Detect bars for each stem after separation
84
- vocals_bar_times = None
85
- drums_bar_times = None
86
- bass_bar_times = None
87
- other_bar_times = None
88
-
89
- if vocals_path:
90
- vocals_audio_data = sf.read(vocals_path)
91
- _, _, vocals_bar_times = detect_bars(vocals_audio_data)
92
- if drums_path:
93
- drums_audio_data = sf.read(drums_path)
94
- _, _, drums_bar_times = detect_bars(drums_audio_data)
95
- if bass_path:
96
- bass_audio_data = sf.read(bass_path)
97
- _, _, bass_bar_times = detect_bars(bass_audio_data)
98
- if other_path:
99
- other_audio_data = sf.read(other_path)
100
- _, _, other_bar_times = detect_bars(other_audio_data)
101
-
102
-
103
- return vocals_path, drums_path, bass_path, other_path, vocals_bar_times, drums_bar_times, bass_bar_times, other_bar_times
104
-
105
  except Exception as e:
106
- print(f"An error occurred: {e}")
107
- raise gr.Error(str(e))
108
-
109
- def visualize_slices(stem_audio_data, sensitivity, progress=gr.Progress(track_tqdm=True)):
110
- if stem_audio_data is None:
111
- gr.Warning("This stem is empty. Cannot visualize.")
112
- return None, None, None
113
-
114
- sample_rate, y_int = stem_audio_data
115
- y = librosa.util.buf_to_float(y_int)
116
-
117
- progress(0.3, desc="Finding transients...")
118
- onset_frames = librosa.onset.onset_detect(y=librosa.to_mono(y.T) if y.ndim > 1 else y, sr=sample_rate, wait=1, pre_avg=1, post_avg=1, post_max=1, delta=sensitivity)
119
- onset_times = librosa.frames_to_time(onset_frames, sr=sample_rate)
120
-
121
- progress(0.7, desc="Generating waveform plot...")
122
- fig, ax = plt.subplots(figsize=(10, 3))
123
- fig.patch.set_facecolor('#1f2937')
124
- ax.set_facecolor('#111827')
125
- librosa.display.waveshow(y, sr=sample_rate, ax=ax, color='#32f6ff', alpha=0.7)
126
- for t in onset_times:
127
- ax.axvline(x=t, color='#ff3b3b', linestyle='--', linewidth=1)
128
- ax.tick_params(colors='gray'); ax.xaxis.label.set_color('gray'); ax.yaxis.label.set_color('gray')
129
- ax.set_xlabel("Time (s)"); ax.set_ylabel("Amplitude"); ax.set_title("Detected Slices", color='white')
130
- plt.tight_layout()
131
-
132
- progress(1, desc="Done!")
133
- return fig, onset_times, stem_audio_data
134
-
135
- def preview_slice(active_stem_audio, onset_times, evt: gr.SelectData):
136
- if active_stem_audio is None or onset_times is None: return None
137
- sample_rate, y = active_stem_audio
138
-
139
- # Convert click event coordinates to time
140
- # evt.index[0] is the x-coordinate of the click in pixels
141
- # evt.target[0] is the width of the plot in pixels
142
- # len(y) / sample_rate is the total duration of the audio in seconds
143
- clicked_time = evt.index[0] * (len(y) / sample_rate) / evt.target[0]
144
-
145
- start_time = 0
146
- end_time = len(y) / sample_rate
147
-
148
- # Find the closest onset time before the clicked time
149
- onsets_before = onset_times[onset_times <= clicked_time]
150
- if len(onsets_before) > 0:
151
- start_time = onsets_before[-1]
152
-
153
- # Find the closest onset time after the clicked time
154
- onsets_after = onset_times[onset_times > clicked_time]
155
- if len(onsets_after) > 0:
156
- end_time = onsets_after[0]
157
- else:
158
- # If no onset after the clicked time, slice to the end of the audio
159
- end_time = len(y) / sample_rate
160
-
161
-
162
- start_sample = librosa.time_to_samples(start_time, sr=sample_rate)
163
- end_sample = librosa.time_to_samples(end_time, sr=sample_rate)
164
-
165
- # Ensure start_sample is less than end_sample
166
- if start_sample >= end_sample:
167
- # If the click is exactly on or after the last onset, preview a small segment at the end
168
- if len(onset_times) > 0:
169
- start_sample = librosa.time_to_samples(onset_times[-1], sr=sample_rate)
170
- end_sample = len(y)
171
- else:
172
- # If no onsets detected, slice the whole audio
173
- start_sample = 0
174
- end_sample = len(y)
175
-
176
-
177
- sliced_audio = y[start_sample:end_sample]
178
- return (sample_rate, sliced_audio)
179
-
180
- def download_slice(sliced_audio_data):
181
- if sliced_audio_data is None:
182
- gr.Warning("No slice preview available to download.")
183
- return None
184
-
185
- sample_rate, y = sliced_audio_data
186
-
187
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False, prefix="slice_") as tmp_file:
188
- sf.write(tmp_file.name, y, sample_rate)
189
- global temp_files
190
- temp_files.append(tmp_file.name)
191
- return tmp_file.name
192
-
193
- def detect_bars(stem_audio_data):
194
- if stem_audio_data is None:
195
- return None, None, None
196
-
197
- sample_rate, y_int = stem_audio_data
198
- y = librosa.util.buf_to_float(y_int)
199
- y_mono = librosa.to_mono(y.T) if y.ndim > 1 else y
200
-
201
- # Estimate tempo
202
- tempo, beat_frames = librosa.beat.beat_track(y=y_mono, sr=sample_rate)
203
-
204
- # Convert beat frames to beat times
205
- beat_times = librosa.frames_to_time(beat_frames, sr=sample_rate)
206
-
207
- # Calculate bar times (assuming 4 beats per bar)
208
- bar_times = beat_times[::4]
209
-
210
- return tempo, beat_times, bar_times
211
-
212
- def create_loop(stem_audio_data, bar_times, loop_length):
213
- if stem_audio_data is None or bar_times is None or len(bar_times) < 2:
214
- gr.Warning("Insufficient data to create a loop.")
215
- return None
216
-
217
- sample_rate, y_int = stem_audio_data
218
- y = librosa.util.buf_to_float(y_int)
219
- y_mono = librosa.to_mono(y.T) if y.ndim > 1 else y
220
-
221
- # Parse loop length
222
- num_bars = int(loop_length.split(" ")[0])
223
-
224
- # Find the start of the first full bar (assuming bar_times[0] is the start of the first bar)
225
- # If we want to start from the beginning of the audio, we can use 0 as the start time.
226
- # For now, let's assume we start from the first detected bar.
227
- start_time = bar_times[0]
228
-
229
- # Calculate the duration of one bar
230
- bar_duration = bar_times[1] - bar_times[0] if len(bar_times) > 1 else 0
231
-
232
- # Calculate the end time for the loop
233
- end_time = start_time + (num_bars * bar_duration)
234
-
235
- # Ensure the end time does not exceed the audio duration
236
- audio_duration = len(y) / sample_rate
237
- end_time = min(end_time, audio_duration)
238
-
239
- # Convert times to samples
240
- start_sample = librosa.time_to_samples(start_time, sr=sample_rate)
241
- end_sample = librosa.time_to_samples(end_time, sr=sample_rate)
242
-
243
- # Extract the loop segment
244
- looped_audio = y_mono[start_sample:end_sample]
245
-
246
- # Save the looped audio to a temporary file
247
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False, prefix="loop_") as tmp_file:
248
- sf.write(tmp_file.name, looped_audio, sample_rate)
249
- global temp_files
250
- temp_files.append(tmp_file.name)
251
- return tmp_file.name
252
-
253
- def cut_all_oneshots(stem_audio_data, onset_times):
254
- if stem_audio_data is None or onset_times is None or len(onset_times) < 1:
255
- gr.Warning("Insufficient data or onsets detected to cut one-shots.")
256
- return None
257
-
258
- sample_rate, y_int = stem_audio_data
259
- y = librosa.util.buf_to_float(y_int)
260
- y_mono = librosa.to_mono(y.T) if y.ndim > 1 else y
261
-
262
- oneshot_files = []
263
- audio_duration = len(y_mono) / sample_rate
264
-
265
- for i in range(len(onset_times)):
266
- start_time = onset_times[i]
267
- end_time = onset_times[i+1] if i < len(onset_times) - 1 else audio_duration
268
-
269
- start_sample = librosa.time_to_samples(start_time, sr=sample_rate)
270
- end_sample = librosa.time_to_samples(end_time, sr=sample_rate)
271
-
272
- # Ensure start_sample is less than end_sample, add a small buffer if necessary
273
- if start_sample >= end_sample:
274
- end_sample = start_sample + int(0.01 * sample_rate) # Add 10ms buffer if start is equal to or after end
275
- if end_sample > len(y_mono):
276
- end_sample = len(y_mono)
277
-
278
- segment = y_mono[start_sample:end_sample]
279
-
280
- # Save each segment to a temporary file
281
- with tempfile.NamedTemporaryFile(suffix=f"_{i}.wav", delete=False, prefix="oneshot_") as tmp_file:
282
- sf.write(tmp_file.name, segment, sample_rate)
283
- oneshot_files.append(tmp_file.name)
284
-
285
- if not oneshot_files:
286
- gr.Warning("No one-shots were successfully cut.")
287
- return None
288
-
289
- # Create a zip archive of the temporary one-shot files
290
- with tempfile.NamedTemporaryFile(suffix=".zip", delete=False, prefix="oneshots_archive_") as zip_file:
291
- with zipfile.ZipFile(zip_file.name, 'w') as zipf:
292
- for file_path in oneshot_files:
293
- zipf.write(file_path, os.path.basename(file_path))
294
-
295
- # Add the zip file and individual oneshot files to the temp_files list for cleanup
296
- global temp_files
297
- temp_files.extend(oneshot_files)
298
- temp_files.append(zip_file.name)
299
-
300
- return zip_file.name
301
-
302
-
303
- with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red")) as demo:
304
- gr.Markdown("# 🎵 Loop Architect")
305
- onset_times_state = gr.State(value=None)
306
- active_stem_state = gr.State(value=None)
307
- vocals_bar_times_state = gr.State(value=None)
308
- drums_bar_times_state = gr.State(value=None)
309
- bass_bar_times_state = gr.State(value=None)
310
- other_bar_times_state = gr.State(value=None)
311
-
312
-
313
- with gr.Row():
314
- with gr.Column(scale=1):
315
- gr.Markdown("### 1. Separate Stems")
316
- audio_input = gr.Audio(type="filepath", label="Upload a Track")
317
- stem_options = gr.Radio(["4 Stems (Vocals, Drums, Bass, Other)", "2 Stems (Vocals + Instrumental)"], label="Separation Type", value="4 Stems (Vocals, Drums, Bass, Other)")
318
- submit_button = gr.Button("Separate Stems")
319
-
320
- with gr.Column(scale=2):
321
- with gr.Accordion("Separated Stems", open=True):
322
- with gr.Row():
323
- vocals_output = gr.Audio(label="Vocals", scale=2)
324
- with gr.Column(scale=1):
325
- slice_vocals_btn = gr.Button("Visualize Slices")
326
- vocals_loop_length = gr.Dropdown(choices=["4 Bars", "8 Bars", "16 Bars"], label="Loop Length", value="4 Bars")
327
- create_vocals_loop_btn = gr.Button("Create Loop")
328
- vocals_loop_output = gr.Audio(label="Vocals Loop", visible=False, scale=2)
329
- vocals_loop_download_btn = gr.DownloadButton(value="Download Loop", visible=False)
330
- with gr.Row():
331
- drums_output = gr.Audio(label="Drums", scale=2)
332
- with gr.Column(scale=1):
333
- slice_drums_btn = gr.Button("Visualize Slices")
334
- drums_loop_length = gr.Dropdown(choices=["4 Bars", "8 Bars", "16 Bars"], label="Loop Length", value="4 Bars")
335
- create_drums_loop_btn = gr.Button("Create Loop")
336
- drums_loop_output = gr.Audio(label="Drums Loop", visible=False, scale=2)
337
- drums_loop_download_btn = gr.DownloadButton(value="Download Loop", visible=False)
338
- with gr.Row():
339
- bass_output = gr.Audio(label="Bass", scale=2)
340
- with gr.Column(scale=1):
341
- slice_bass_btn = gr.Button("Visualize Slices")
342
- bass_loop_length = gr.Dropdown(choices=["4 Bars", "8 Bars", "16 Bars"], label="Loop Length", value="4 Bars")
343
- create_bass_loop_btn = gr.Button("Create Loop")
344
- bass_loop_output = gr.Audio(label="Bass Loop", visible=False, scale=2)
345
- bass_loop_download_btn = gr.DownloadButton(value="Download Loop", visible=False)
346
- with gr.Row():
347
- other_output = gr.Audio(label="Other / Instrumental", scale=2)
348
- with gr.Column(scale=1):
349
- slice_other_btn = gr.Button("Visualize Slices")
350
- other_loop_length = gr.Dropdown(choices=["4 Bars", "8 Bars", "16 Bars"], label="Loop Length", value="4 Bars")
351
- create_other_loop_btn = gr.Button("Create Loop")
352
- other_loop_output = gr.Audio(label="Other Loop", visible=False, scale=2)
353
- other_loop_download_btn = gr.DownloadButton(value="Download Loop", visible=False)
354
-
355
-
356
- gr.Markdown("### Slice Editor")
357
- sensitivity_slider = gr.Slider(minimum=0, maximum=1, value=0.5, label="Onset Sensitivity")
358
- slice_plot = gr.Image(label="Click a region on the waveform to preview a slice")
359
- preview_player = gr.Audio(label="Slice Preview")
360
- download_slice_btn = gr.DownloadButton(value="Download Slice", visible=False)
361
- cut_all_oneshots_btn = gr.Button(value="Cut All Oneshots")
362
- cut_oneshots_download_btn = gr.DownloadButton(value="Download All Oneshots", visible=False)
363
-
364
-
365
- audio_input.change(fn=cleanup_temp_files)
366
- submit_button.click(fn=separate_stems, inputs=[audio_input, stem_options], outputs=[vocals_output, drums_output, bass_output, other_output, vocals_bar_times_state, drums_bar_times_state, bass_bar_times_state, other_bar_times_state])
367
- stem_options.change(fn=update_output_visibility, inputs=stem_options, outputs=[vocals_output, drums_output, bass_output, other_output])
368
-
369
- slice_vocals_btn.click(fn=visualize_slices, inputs=[vocals_output, sensitivity_slider], outputs=[slice_plot, onset_times_state, active_stem_state])
370
- slice_drums_btn.click(fn=visualize_slices, inputs=[drums_output, sensitivity_slider], outputs=[slice_plot, onset_times_state, active_stem_state])
371
- slice_bass_btn.click(fn=visualize_slices, inputs=[bass_output, sensitivity_slider], outputs=[slice_plot, onset_times_state, active_stem_state])
372
- slice_other_btn.click(fn=visualize_slices, inputs=[other_output, sensitivity_slider], outputs=[slice_plot, onset_times_state, active_stem_state])
373
-
374
- slice_plot.select(fn=preview_slice, inputs=[active_stem_state, onset_times_state], outputs=preview_player).then(lambda: gr.update(visible=True), outputs=download_slice_btn)
375
-
376
- create_vocals_loop_btn.click(fn=create_loop, inputs=[vocals_output, vocals_bar_times_state, vocals_loop_length], outputs=[vocals_loop_output, vocals_loop_download_btn])
377
- create_drums_loop_btn.click(fn=create_loop, inputs=[drums_output, drums_bar_times_state, drums_loop_length], outputs=[drums_loop_output, drums_loop_download_btn])
378
- create_bass_loop_btn.click(fn=create_loop, inputs=[bass_output, bass_bar_times_state, bass_loop_length], outputs=[bass_loop_output, bass_loop_download_btn])
379
- create_other_loop_btn.click(fn=create_loop, inputs=[other_output, other_bar_times_state, other_loop_length], outputs=[other_loop_output, other_loop_download_btn])
380
-
381
- download_slice_btn.click(fn=download_slice, inputs=preview_player, outputs=download_slice_btn)
382
- cut_all_oneshots_btn.click(fn=cut_all_oneshots, inputs=[active_stem_state, onset_times_state], outputs=cut_oneshots_download_btn)
383
-
384
 
385
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
2
  import librosa
 
 
3
  import numpy as np
4
+ import os
5
+ import shutil
6
  import zipfile
7
  import tempfile
8
+ import soundfile as sf
9
+ import traceback
10
+ import subprocess # Necessary for running Spleeter
11
+ from typing import Tuple, List
12
+
13
+ # --- Configuration ---
14
+ OUTPUT_FOLDER_NAME = "PRO_LOOP_PACK"
15
+
16
+ # Mapping of model selection to Spleeter config and resulting stem types
17
+ STEM_MODELS = {
18
+ '2-Stems (Vocals/Inst)': {
19
+ 'spleeter_config': '2stems',
20
+ 'stems': ['vocals', 'accompaniment'], # Spleeter output names
21
+ 'display_stems': ['Vocals', 'Instrumental'] # User-facing names
22
+ },
23
+ '4-Stems (Drums, Bass, Vocals, Other)': {
24
+ 'spleeter_config': '4stems',
25
+ 'stems': ['vocals', 'drums', 'bass', 'other'],
26
+ 'display_stems': ['Vocals', 'Drums', 'Bass', 'Other']
27
+ },
28
+ '5-Stems (Drums, Bass, Vocals, Piano, Other)': {
29
+ 'spleeter_config': '5stems',
30
+ 'stems': ['vocals', 'drums', 'bass', 'piano', 'other'],
31
+ 'display_stems': ['Vocals', 'Drums', 'Bass', 'Piano', 'Other']
32
+ },
33
+ }
34
+ LOOP_BAR_LENGTHS = [4, 6, 8]
35
+
36
+ # Key Detection Templates (as defined previously)
37
+ KEY_TEMPLATES = {
38
+ 'major': [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.16, 3.61, 3.28, 2.91],
39
+ 'minor': [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.91, 3.03, 3.34]
40
+ }
41
+ NOTES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
42
+
43
+ # --- Utility Functions ---
44
+
45
+ def save_segment(filepath: str, audio_data: np.ndarray, sr: int):
46
+ """Utility function to save a NumPy audio array as a WAV file."""
47
+ # Spleeter outputs 44100Hz audio, so we explicitly set the sample rate
48
+ sf.write(filepath, audio_data, sr, format='WAV', subtype='PCM_16')
49
+
50
+ def detect_key_and_mode(y: np.ndarray, sr: int) -> str:
51
+ """Estimates the musical key (e.g., 'C Major' or 'A Minor')."""
52
  try:
53
+ chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
54
+ chroma_mean = np.mean(chroma, axis=1)
55
+ chroma_mean /= chroma_mean.sum()
56
+
57
+ best_key = "Unknown"
58
+ max_correlation = -1.0
59
+
60
+ for i, note in enumerate(NOTES):
61
+ # Check major keys
62
+ major_template = np.roll(KEY_TEMPLATES['major'], i)
63
+ corr_major = np.dot(chroma_mean, major_template)
64
+
65
+ if corr_major > max_correlation:
66
+ max_correlation = corr_major
67
+ best_key = f"{note} Major"
68
+
69
+ # Check minor keys
70
+ minor_template = np.roll(KEY_TEMPLATES['minor'], i)
71
+ corr_minor = np.dot(chroma_mean, minor_template)
72
+
73
+ if corr_minor > max_correlation:
74
+ max_correlation = corr_minor
75
+ best_key = f"{note} Minor"
76
+
77
+ if max_correlation < 0.2:
78
+ return "KeyDetectionAmbiguous"
79
+
80
+ return best_key.replace(' ', '')
81
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  except Exception as e:
83
+ print(f"Key Detection Failed: {e}")
84
+ return "KeyDetectionFailed"
85
+
86
+ def separate_stems(audio_path: str, model_name: str, output_dir: str) -> str:
87
+ """
88
+ Executes Spleeter separation via subprocess.
89
+ Requires 'spleeter' package to be installed in the environment.
90
+ """
91
+ spleeter_config = STEM_MODELS[model_name]['spleeter_config']
92
+
93
+ # Spleeter output folder will be a subfolder named after the input file (without extension)
94
+ # We clean this up later.
95
+
96
+ # Spleeter command: spleeter separate -o {output_dir} -p {config} {input_file}
97
+ command = [
98
+ "spleeter", "separate",
99
+ "-o", output_dir,
100
+ "-p", f"spleeter:{spleeter_config}",
101
+ audio_path
102
+ ]
103
+
104
+ try:
105
+ # Run Spleeter command
106
+ result = subprocess.run(command, check=True, capture_output=True, text=True)
107
+ print("Spleeter Output:", result.stdout)
108
+ print("Spleeter Errors:", result.stderr)
109
+
110
+ # Spleeter creates a sub-directory based on the input filename.
111
+ # We need to find that subdirectory.
112
+ base_filename = os.path.splitext(os.path.basename(audio_path))[0]
113
+ spleeter_output_path = os.path.join(output_dir, base_filename)
114
+
115
+ if not os.path.isdir(spleeter_output_path):
116
+ raise FileNotFoundError(f"Spleeter output directory not found at: {spleeter_output_path}")
117
+
118
+ return spleeter_output_path
119
+
120
+ except subprocess.CalledProcessError as e:
121
+ raise RuntimeError(f"Spleeter command failed. Check if 'spleeter' is installed. Output: {e.stdout}, Error: {e.stderr}")
122
+ except Exception as e:
123
+ raise RuntimeError(f"Error during Spleeter execution: {e}")
124
+
125
+ # --- Main Processing Function ---
126
+
127
+ def create_market_ready_pack(
128
+ audio_file_path: str,
129
+ one_shot_sensitivity: float,
130
+ stem_model_selection: str,
131
+ progress=gr.Progress()
132
+ ) -> Tuple[str | None, str]:
133
+ """
134
+ Processes the input audio file, generates loops and one-shots,
135
+ and packages them into a market-ready ZIP file.
136
+ """
137
+ temp_dir = None
138
+
139
+ if not audio_file_path:
140
+ return None, "Error: Please upload an audio file before proceeding."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
+ try:
143
+ # 1. Setup Temporary Directories
144
+ temp_dir = tempfile.mkdtemp()
145
+ output_root = os.path.join(temp_dir, OUTPUT_FOLDER_NAME)
146
+ os.makedirs(output_root, exist_ok=True)
147
+
148
+ progress(0.05, desc="Loading and Verifying Audio...")
149
+
150
+ # Robust Audio Loading (Load full mix for analysis)
151
+ y_full, sr = librosa.load(audio_file_path, sr=None, mono=True)
152
+ if y_full.size == 0:
153
+ raise ValueError("Loaded audio is empty.")
154
+
155
+ # 2. Advanced Audio Analysis (Tempo and Key)
156
+ progress(0.15, desc="Analyzing Tempo and Musical Key...")
157
+
158
+ tempo = 120.0
159
+ start_sample = 0
160
+ key_mode_name = "120BPM_UnknownKey"
161
+
162
+ try:
163
+ tempo, beat_frames = librosa.beat.beat_track(y=y_full, sr=sr, trim=True)
164
+ key_mode_name = detect_key_and_mode(y_full, sr)
165
+
166
+ samples_per_beat = int((60 / tempo) * sr)
167
+ start_sample = librosa.frames_to_samples(beat_frames[0]) if beat_frames.size > 0 else 0
168
+
169
+ gr.Info(f"Analysis Complete: {int(tempo)} BPM, {key_mode_name}.")
170
+ key_mode_name = f"{int(tempo)}BPM_{key_mode_name}"
171
+
172
+ except Exception as e:
173
+ gr.Warning(f"Warning: Tempo or Key detection failed ({e}). Using default 120 BPM and 'Unknown Key'.")
174
+ samples_per_beat = int((60 / 120.0) * sr) # Fallback beat timing
175
+
176
+ # 3. REAL STEM SEPARATION using Spleeter
177
+ progress(0.25, desc=f"Separating Stems using {stem_model_selection} model...")
178
+
179
+ spleeter_output_path = separate_stems(audio_file_path, stem_model_selection, output_root)
180
+ spleeter_stems = STEM_MODELS[stem_model_selection]['stems']
181
+ display_stems = STEM_MODELS[stem_model_selection]['display_stems']
182
+
183
+ # Dictionary to hold the audio data for each stem from Spleeter's output
184
+ stem_audio_data = {}
185
+ for spleeter_name, display_name in zip(spleeter_stems, display_stems):
186
+ stem_filepath = os.path.join(spleeter_output_path, f"{spleeter_name}.wav")
187
+ if not os.path.exists(stem_filepath):
188
+ gr.Warning(f"Stem file not found for {display_name}. Skipping this stem.")
189
+ continue
190
+
191
+ # Load the separated stem audio (it will be aligned and resampled by Spleeter)
192
+ # We enforce mono loading for consistent processing later
193
+ y_stem, sr_stem = librosa.load(stem_filepath, sr=sr, mono=True)
194
+
195
+ # Align the start of the stem using the previously detected global beat
196
+ y_stem_aligned = y_stem[start_sample:]
197
+ stem_audio_data[display_name] = y_stem_aligned
198
+
199
+ # Clean up Spleeter's intermediate directory
200
+ shutil.rmtree(spleeter_output_path)
201
+
202
+ if not stem_audio_data:
203
+ raise RuntimeError("No separated stems were successfully processed. Check Spleeter output.")
204
+
205
+ # 4. Generate Loops (4, 6, 8 Bars)
206
+ progress(0.45, desc="Generating Time-Aligned Loops...")
207
+
208
+ for stem_name, y_stem in stem_audio_data.items():
209
+ loops_dir = os.path.join(output_root, 'LOOPS', stem_name)
210
+ os.makedirs(loops_dir, exist_ok=True)
211
+
212
+ samples_per_bar = samples_per_beat * 4 # Assuming 4/4 time signature
213
+
214
+ for num_bars in LOOP_BAR_LENGTHS:
215
+ samples_per_loop = samples_per_bar * num_bars
216
+
217
+ for i in range(0, len(y_stem) - samples_per_loop + 1, samples_per_loop):
218
+ try:
219
+ loop_segment = y_stem[i:i + samples_per_loop]
220
+
221
+ if len(loop_segment) < samples_per_loop * 0.9:
222
+ continue
223
+
224
+ index = i // samples_per_loop + 1
225
+ # Naming convention: {BPM_Key}_{Stem}_{Bars}Bar_{Index}.wav
226
+ filename = f"{key_mode_name}_{stem_name}_{num_bars}Bar_{index:02d}.wav"
227
+ save_segment(os.path.join(loops_dir, filename), loop_segment, sr)
228
+ except Exception as e:
229
+ gr.Warning(f"Error slicing {num_bars}-bar loop for {stem_name}: {e}")
230
+ continue
231
+
232
+ # 5. Generate One-Shots (Transient Detection)
233
+ progress(0.70, desc="Generating One-Shots (Transient Detection)...")
234
+
235
+ # Sensitivity mapping: 1=Few/Loud (large pre_max), 10=Many/Quiet (small pre_max)
236
+ pre_max_frames = int(12 - one_shot_sensitivity)
237
+ if pre_max_frames < 2: pre_max_frames = 2
238
+
239
+ pre_slice_samples = int(sr * 0.05)
240
+ post_slice_samples = int(sr * 0.25)
241
+
242
+ for stem_name, y_stem in stem_audio_data.items():
243
+ shots_dir = os.path.join(output_root, 'ONESHOTS', stem_name)
244
+ os.makedirs(shots_dir, exist_ok=True)
245
+
246
+ try:
247
+ o_env = librosa.onset.onset_strength(y=y_stem, sr=sr, aggregate=np.median)
248
+ onset_frames = librosa.onset.onset_detect(
249
+ onset_envelope=o_env,
250
+ sr=sr,
251
+ units='frames',
252
+ pre_max=pre_max_frames,
253
+ post_max=pre_max_frames // 2,
254
+ wait=10
255
+ )
256
+ onset_samples = librosa.frames_to_samples(onset_frames)
257
+
258
+ for i, sample_index in enumerate(onset_samples):
259
+ start = max(0, sample_index - pre_slice_samples)
260
+ end = min(len(y_stem), sample_index + post_slice_samples)
261
+
262
+ shot_segment = y_stem[start:end]
263
+
264
+ if len(shot_segment) > int(sr * 0.05):
265
+ filename = f"{key_mode_name}_{stem_name}_OneShot_{i+1:03d}.wav"
266
+ save_segment(os.path.join(shots_dir, filename), shot_segment, sr)
267
+ except Exception as e:
268
+ gr.Warning(f"Error during One-Shot detection for {stem_name}. Skipping. Details: {e}")
269
+ continue
270
+
271
+
272
+ # 6. Packaging (License and ZIP)
273
+ progress(0.90, desc="Creating License and Packaging Files...")
274
+
275
+ # Create the License.txt file
276
+ license_content = f"""
277
+ -- PROFESSIONAL LOOP PACK LICENSE AGREEMENT --
278
+
279
+ Product: {OUTPUT_FOLDER_NAME}
280
+ BPM/Key Reference: {key_mode_name}
281
+ Separation Model Used: {stem_model_selection}
282
+
283
+ 1. Royalty-Free Use: All sounds, loops, and one-shots within this pack are
284
+ 100% royalty-free for commercial use in musical compositions, sound design,
285
+ and public performances. You may use them in your own tracks and sell those
286
+ tracks without owing any additional royalties to the creator.
287
+
288
+ 2. Restrictions: Redistribution, repackaging, or re-selling of the individual
289
+ sounds or loops as part of another sound library or sample pack is strictly
290
+ prohibited.
291
+
292
+ 3. Generated: {os.uname().nodename}
293
+ """
294
+
295
+ license_filepath = os.path.join(output_root, 'License.txt')
296
+ with open(license_filepath, 'w') as f:
297
+ f.write(license_content.strip())
298
+
299
+ # Create the final ZIP file