Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -84,6 +84,7 @@ def extract_phantom_center(input_file, rdf=0.99999):
|
|
| 84 |
|
| 85 |
|
| 86 |
def create_5_1_surround(input_file, preset="music"):
|
|
|
|
| 87 |
p = gr.Progress()
|
| 88 |
# Preset-based parameters
|
| 89 |
# Reverberance (50%) HF-damping (50%) room-scale (100%) stereo-depth (100%) pre-delay (0ms) wet-gain (0dB)
|
|
@@ -192,9 +193,9 @@ def send_mvsep_audio_job(
|
|
| 192 |
'output_format': str(output_format)
|
| 193 |
}
|
| 194 |
if addopt1:
|
| 195 |
-
data['add_opt1'] = addopt1
|
| 196 |
if addopt2:
|
| 197 |
-
data['add_opt2'] = addopt2
|
| 198 |
|
| 199 |
# Step 3: Send creation request
|
| 200 |
response = requests.post(url, files=files, data=data)
|
|
@@ -244,6 +245,7 @@ def download_wav(url, target_fs=None):
|
|
| 244 |
|
| 245 |
# Smart mode workflow
|
| 246 |
def smart_mode_process(input_file, api_key, multi_singer=False):
|
|
|
|
| 247 |
p = gr.Progress()
|
| 248 |
import shutil
|
| 249 |
|
|
@@ -255,6 +257,7 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
|
|
| 255 |
data, fs = sf.read(wav, dtype='float32')
|
| 256 |
os.unlink(wav)
|
| 257 |
p((0,7), "Loading File")
|
|
|
|
| 258 |
|
| 259 |
if data.ndim != 2:
|
| 260 |
raise gr.Error("Expected stereo input")
|
|
@@ -263,10 +266,12 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
|
|
| 263 |
|
| 264 |
# Step 1: LFE from lowpass
|
| 265 |
p((1,7), "Processing LFE")
|
|
|
|
| 266 |
bass = sox_filter(0.5 * (L + R), fs, 'lowpass', 120)
|
| 267 |
|
| 268 |
# Step 2: Highpass for MVSep
|
| 269 |
p((2,7), "Processing Speech, Music and SFX")
|
|
|
|
| 270 |
hp_left = sox_filter(L, fs, 'highpass', 120)
|
| 271 |
hp_right = sox_filter(R, fs, 'highpass', 120)
|
| 272 |
hp_stereo = np.column_stack([hp_left, hp_right])
|
|
@@ -280,12 +285,14 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
|
|
| 280 |
)
|
| 281 |
os.unlink(hp_buf.name)
|
| 282 |
|
|
|
|
| 283 |
dialog, _ = download_wav(demucs_resp['files'][0]['url'], target_fs=fs)
|
| 284 |
sfx, _ = download_wav(demucs_resp['files'][2]['url'], target_fs=fs)
|
| 285 |
music, _ = download_wav(demucs_resp['files'][1]['url'], target_fs=fs)
|
| 286 |
|
| 287 |
# Step 3: Extract crowd
|
| 288 |
p((3,7), "Extracting Crowd")
|
|
|
|
| 289 |
music_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
|
| 290 |
sf.write(music_buf.name, music, fs, format='FLAC', subtype='PCM_16')
|
| 291 |
music_buf.close()
|
|
@@ -295,9 +302,11 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
|
|
| 295 |
os.unlink(music_buf.name)
|
| 296 |
crowd, _ = download_wav(crowd_resp['files'][0]['url'], target_fs=fs)
|
| 297 |
other, _ = download_wav(crowd_resp['files'][1]['url'], target_fs=fs)
|
|
|
|
| 298 |
|
| 299 |
# Step 4: Extract vocals
|
| 300 |
p((4,7), "Extracting Vocals")
|
|
|
|
| 301 |
other_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
|
| 302 |
sf.write(other_buf.name, other, fs, format='FLAC', subtype='PCM_16')
|
| 303 |
other_buf.close()
|
|
@@ -309,9 +318,11 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
|
|
| 309 |
vocals_lead, _ = download_wav(karaoke_resp['files'][1]['url'], target_fs=fs)
|
| 310 |
vocals_back, _ = download_wav(karaoke_resp['files'][2]['url'], target_fs=fs)
|
| 311 |
instr, _ = download_wav(karaoke_resp['files'][3]['url'], target_fs=fs)
|
|
|
|
| 312 |
|
| 313 |
# Step 5: Phantom center for lead vocals
|
| 314 |
p((5,7), "Distributing Front Vocal Channels")
|
|
|
|
| 315 |
vl_buf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
| 316 |
sf.write(vl_buf.name, vocals_full if multi_singer else vocals_lead, fs, subtype='FLOAT')
|
| 317 |
vl_buf.close()
|
|
@@ -320,6 +331,7 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
|
|
| 320 |
|
| 321 |
# Step 6: Map channels and pad
|
| 322 |
p((6,7), "Mapping Channels")
|
|
|
|
| 323 |
def match_len(x, length): return np.pad(x, (0, length - len(x)))
|
| 324 |
lens = [len(FL_vl), len(FR_vl), len(FC_vl), len(bass), sfx.shape[0], crowd.shape[0], vocals_back.shape[0], instr.shape[0]]
|
| 325 |
length = max(lens)
|
|
@@ -338,6 +350,7 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
|
|
| 338 |
|
| 339 |
# Step 7: Encode to 5.1 OGG
|
| 340 |
p((7,7), "Processing Step 7, Encoding")
|
|
|
|
| 341 |
multich = np.column_stack([out_L, out_R, out_C, out_LFE, SL, SR])
|
| 342 |
out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
|
| 343 |
sf.write(out_wav.name, multich, fs, subtype='FLOAT')
|
|
@@ -353,8 +366,8 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
|
|
| 353 |
|
| 354 |
# ========== Gradio UI ==========
|
| 355 |
with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
|
| 356 |
-
gr.Markdown("# 🎧 Stereo to 5.1
|
| 357 |
-
gr.Markdown("
|
| 358 |
|
| 359 |
inp = gr.Audio(label="Upload stereo audio", type="filepath")
|
| 360 |
smart_mode = gr.Checkbox(label="Enable Smart Mode", value=False)
|
|
@@ -372,8 +385,8 @@ with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
|
|
| 372 |
with gr.Column(visible=False) as smart_section:
|
| 373 |
api_key = gr.Textbox(label="MVSep API Key", type="password")
|
| 374 |
multi_singer = gr.Checkbox(label="Multi Singer Mode", value=False)
|
| 375 |
-
smart_btn = gr.Button("
|
| 376 |
-
smart_out = gr.File(label="Output
|
| 377 |
|
| 378 |
# Logic for toggling sections
|
| 379 |
def toggle_mode(enabled):
|
|
|
|
| 84 |
|
| 85 |
|
| 86 |
def create_5_1_surround(input_file, preset="music"):
|
| 87 |
+
print("Starting Normal Processing")
|
| 88 |
p = gr.Progress()
|
| 89 |
# Preset-based parameters
|
| 90 |
# Reverberance (50%) HF-damping (50%) room-scale (100%) stereo-depth (100%) pre-delay (0ms) wet-gain (0dB)
|
|
|
|
| 193 |
'output_format': str(output_format)
|
| 194 |
}
|
| 195 |
if addopt1:
|
| 196 |
+
data['add_opt1'] = str(addopt1)
|
| 197 |
if addopt2:
|
| 198 |
+
data['add_opt2'] = str(addopt2)
|
| 199 |
|
| 200 |
# Step 3: Send creation request
|
| 201 |
response = requests.post(url, files=files, data=data)
|
|
|
|
| 245 |
|
| 246 |
# Smart mode workflow
|
| 247 |
def smart_mode_process(input_file, api_key, multi_singer=False):
|
| 248 |
+
print("Starting Smartmode")
|
| 249 |
p = gr.Progress()
|
| 250 |
import shutil
|
| 251 |
|
|
|
|
| 257 |
data, fs = sf.read(wav, dtype='float32')
|
| 258 |
os.unlink(wav)
|
| 259 |
p((0,7), "Loading File")
|
| 260 |
+
print("Loading File")
|
| 261 |
|
| 262 |
if data.ndim != 2:
|
| 263 |
raise gr.Error("Expected stereo input")
|
|
|
|
| 266 |
|
| 267 |
# Step 1: LFE from lowpass
|
| 268 |
p((1,7), "Processing LFE")
|
| 269 |
+
print("Processing LFE")
|
| 270 |
bass = sox_filter(0.5 * (L + R), fs, 'lowpass', 120)
|
| 271 |
|
| 272 |
# Step 2: Highpass for MVSep
|
| 273 |
p((2,7), "Processing Speech, Music and SFX")
|
| 274 |
+
print("Speech, Music, SFX")
|
| 275 |
hp_left = sox_filter(L, fs, 'highpass', 120)
|
| 276 |
hp_right = sox_filter(R, fs, 'highpass', 120)
|
| 277 |
hp_stereo = np.column_stack([hp_left, hp_right])
|
|
|
|
| 285 |
)
|
| 286 |
os.unlink(hp_buf.name)
|
| 287 |
|
| 288 |
+
print(demucs_resp)
|
| 289 |
dialog, _ = download_wav(demucs_resp['files'][0]['url'], target_fs=fs)
|
| 290 |
sfx, _ = download_wav(demucs_resp['files'][2]['url'], target_fs=fs)
|
| 291 |
music, _ = download_wav(demucs_resp['files'][1]['url'], target_fs=fs)
|
| 292 |
|
| 293 |
# Step 3: Extract crowd
|
| 294 |
p((3,7), "Extracting Crowd")
|
| 295 |
+
print("Crowd")
|
| 296 |
music_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
|
| 297 |
sf.write(music_buf.name, music, fs, format='FLAC', subtype='PCM_16')
|
| 298 |
music_buf.close()
|
|
|
|
| 302 |
os.unlink(music_buf.name)
|
| 303 |
crowd, _ = download_wav(crowd_resp['files'][0]['url'], target_fs=fs)
|
| 304 |
other, _ = download_wav(crowd_resp['files'][1]['url'], target_fs=fs)
|
| 305 |
+
print(crowd_resp)
|
| 306 |
|
| 307 |
# Step 4: Extract vocals
|
| 308 |
p((4,7), "Extracting Vocals")
|
| 309 |
+
print("Vocals")
|
| 310 |
other_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
|
| 311 |
sf.write(other_buf.name, other, fs, format='FLAC', subtype='PCM_16')
|
| 312 |
other_buf.close()
|
|
|
|
| 318 |
vocals_lead, _ = download_wav(karaoke_resp['files'][1]['url'], target_fs=fs)
|
| 319 |
vocals_back, _ = download_wav(karaoke_resp['files'][2]['url'], target_fs=fs)
|
| 320 |
instr, _ = download_wav(karaoke_resp['files'][3]['url'], target_fs=fs)
|
| 321 |
+
print(karaoke_resp)
|
| 322 |
|
| 323 |
# Step 5: Phantom center for lead vocals
|
| 324 |
p((5,7), "Distributing Front Vocal Channels")
|
| 325 |
+
print("Front Vocal Channels")
|
| 326 |
vl_buf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
| 327 |
sf.write(vl_buf.name, vocals_full if multi_singer else vocals_lead, fs, subtype='FLOAT')
|
| 328 |
vl_buf.close()
|
|
|
|
| 331 |
|
| 332 |
# Step 6: Map channels and pad
|
| 333 |
p((6,7), "Mapping Channels")
|
| 334 |
+
print("Mapping")
|
| 335 |
def match_len(x, length): return np.pad(x, (0, length - len(x)))
|
| 336 |
lens = [len(FL_vl), len(FR_vl), len(FC_vl), len(bass), sfx.shape[0], crowd.shape[0], vocals_back.shape[0], instr.shape[0]]
|
| 337 |
length = max(lens)
|
|
|
|
| 350 |
|
| 351 |
# Step 7: Encode to 5.1 OGG
|
| 352 |
p((7,7), "Processing Step 7, Encoding")
|
| 353 |
+
print("Encoding")
|
| 354 |
multich = np.column_stack([out_L, out_R, out_C, out_LFE, SL, SR])
|
| 355 |
out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
|
| 356 |
sf.write(out_wav.name, multich, fs, subtype='FLOAT')
|
|
|
|
| 366 |
|
| 367 |
# ========== Gradio UI ==========
|
| 368 |
with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
|
| 369 |
+
gr.Markdown("# 🎧 Stereo to 5.1 Converter")
|
| 370 |
+
gr.Markdown("Convert A Stereo File Into Surround")
|
| 371 |
|
| 372 |
inp = gr.Audio(label="Upload stereo audio", type="filepath")
|
| 373 |
smart_mode = gr.Checkbox(label="Enable Smart Mode", value=False)
|
|
|
|
| 385 |
with gr.Column(visible=False) as smart_section:
|
| 386 |
api_key = gr.Textbox(label="MVSep API Key", type="password")
|
| 387 |
multi_singer = gr.Checkbox(label="Multi Singer Mode", value=False)
|
| 388 |
+
smart_btn = gr.Button("Convert")
|
| 389 |
+
smart_out = gr.File(label="Output")
|
| 390 |
|
| 391 |
# Logic for toggling sections
|
| 392 |
def toggle_mode(enabled):
|