ziqiangao commited on
Commit
bcdabcf
·
verified ·
1 Parent(s): 0a346a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -6
app.py CHANGED
@@ -84,6 +84,7 @@ def extract_phantom_center(input_file, rdf=0.99999):
84
 
85
 
86
  def create_5_1_surround(input_file, preset="music"):
 
87
  p = gr.Progress()
88
  # Preset-based parameters
89
  # Reverberance (50%) HF-damping (50%) room-scale (100%) stereo-depth (100%) pre-delay (0ms) wet-gain (0dB)
@@ -192,9 +193,9 @@ def send_mvsep_audio_job(
192
  'output_format': str(output_format)
193
  }
194
  if addopt1:
195
- data['add_opt1'] = addopt1
196
  if addopt2:
197
- data['add_opt2'] = addopt2
198
 
199
  # Step 3: Send creation request
200
  response = requests.post(url, files=files, data=data)
@@ -244,6 +245,7 @@ def download_wav(url, target_fs=None):
244
 
245
  # Smart mode workflow
246
  def smart_mode_process(input_file, api_key, multi_singer=False):
 
247
  p = gr.Progress()
248
  import shutil
249
 
@@ -255,6 +257,7 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
255
  data, fs = sf.read(wav, dtype='float32')
256
  os.unlink(wav)
257
  p((0,7), "Loading File")
 
258
 
259
  if data.ndim != 2:
260
  raise gr.Error("Expected stereo input")
@@ -263,10 +266,12 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
263
 
264
  # Step 1: LFE from lowpass
265
  p((1,7), "Processing LFE")
 
266
  bass = sox_filter(0.5 * (L + R), fs, 'lowpass', 120)
267
 
268
  # Step 2: Highpass for MVSep
269
  p((2,7), "Processing Speech, Music and SFX")
 
270
  hp_left = sox_filter(L, fs, 'highpass', 120)
271
  hp_right = sox_filter(R, fs, 'highpass', 120)
272
  hp_stereo = np.column_stack([hp_left, hp_right])
@@ -280,12 +285,14 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
280
  )
281
  os.unlink(hp_buf.name)
282
 
 
283
  dialog, _ = download_wav(demucs_resp['files'][0]['url'], target_fs=fs)
284
  sfx, _ = download_wav(demucs_resp['files'][2]['url'], target_fs=fs)
285
  music, _ = download_wav(demucs_resp['files'][1]['url'], target_fs=fs)
286
 
287
  # Step 3: Extract crowd
288
  p((3,7), "Extracting Crowd")
 
289
  music_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
290
  sf.write(music_buf.name, music, fs, format='FLAC', subtype='PCM_16')
291
  music_buf.close()
@@ -295,9 +302,11 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
295
  os.unlink(music_buf.name)
296
  crowd, _ = download_wav(crowd_resp['files'][0]['url'], target_fs=fs)
297
  other, _ = download_wav(crowd_resp['files'][1]['url'], target_fs=fs)
 
298
 
299
  # Step 4: Extract vocals
300
  p((4,7), "Extracting Vocals")
 
301
  other_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
302
  sf.write(other_buf.name, other, fs, format='FLAC', subtype='PCM_16')
303
  other_buf.close()
@@ -309,9 +318,11 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
309
  vocals_lead, _ = download_wav(karaoke_resp['files'][1]['url'], target_fs=fs)
310
  vocals_back, _ = download_wav(karaoke_resp['files'][2]['url'], target_fs=fs)
311
  instr, _ = download_wav(karaoke_resp['files'][3]['url'], target_fs=fs)
 
312
 
313
  # Step 5: Phantom center for lead vocals
314
  p((5,7), "Distributing Front Vocal Channels")
 
315
  vl_buf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
316
  sf.write(vl_buf.name, vocals_full if multi_singer else vocals_lead, fs, subtype='FLOAT')
317
  vl_buf.close()
@@ -320,6 +331,7 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
320
 
321
  # Step 6: Map channels and pad
322
  p((6,7), "Mapping Channels")
 
323
  def match_len(x, length): return np.pad(x, (0, length - len(x)))
324
  lens = [len(FL_vl), len(FR_vl), len(FC_vl), len(bass), sfx.shape[0], crowd.shape[0], vocals_back.shape[0], instr.shape[0]]
325
  length = max(lens)
@@ -338,6 +350,7 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
338
 
339
  # Step 7: Encode to 5.1 OGG
340
  p((7,7), "Processing Step 7, Encoding")
 
341
  multich = np.column_stack([out_L, out_R, out_C, out_LFE, SL, SR])
342
  out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
343
  sf.write(out_wav.name, multich, fs, subtype='FLOAT')
@@ -353,8 +366,8 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
353
 
354
  # ========== Gradio UI ==========
355
  with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
356
- gr.Markdown("# 🎧 Stereo to 5.1 OGG Converter")
357
- gr.Markdown("Choose music or speech preset for surround processing")
358
 
359
  inp = gr.Audio(label="Upload stereo audio", type="filepath")
360
  smart_mode = gr.Checkbox(label="Enable Smart Mode", value=False)
@@ -372,8 +385,8 @@ with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
372
  with gr.Column(visible=False) as smart_section:
373
  api_key = gr.Textbox(label="MVSep API Key", type="password")
374
  multi_singer = gr.Checkbox(label="Multi Singer Mode", value=False)
375
- smart_btn = gr.Button("Start")
376
- smart_out = gr.File(label="Output from Smart Mode")
377
 
378
  # Logic for toggling sections
379
  def toggle_mode(enabled):
 
84
 
85
 
86
  def create_5_1_surround(input_file, preset="music"):
87
+ print("Starting Normal Processing")
88
  p = gr.Progress()
89
  # Preset-based parameters
90
  # Reverberance (50%) HF-damping (50%) room-scale (100%) stereo-depth (100%) pre-delay (0ms) wet-gain (0dB)
 
193
  'output_format': str(output_format)
194
  }
195
  if addopt1:
196
+ data['add_opt1'] = str(addopt1)
197
  if addopt2:
198
+ data['add_opt2'] = str(addopt2)
199
 
200
  # Step 3: Send creation request
201
  response = requests.post(url, files=files, data=data)
 
245
 
246
  # Smart mode workflow
247
  def smart_mode_process(input_file, api_key, multi_singer=False):
248
+ print("Starting Smartmode")
249
  p = gr.Progress()
250
  import shutil
251
 
 
257
  data, fs = sf.read(wav, dtype='float32')
258
  os.unlink(wav)
259
  p((0,7), "Loading File")
260
+ print("Loading File")
261
 
262
  if data.ndim != 2:
263
  raise gr.Error("Expected stereo input")
 
266
 
267
  # Step 1: LFE from lowpass
268
  p((1,7), "Processing LFE")
269
+ print("Processing LFE")
270
  bass = sox_filter(0.5 * (L + R), fs, 'lowpass', 120)
271
 
272
  # Step 2: Highpass for MVSep
273
  p((2,7), "Processing Speech, Music and SFX")
274
+ print("Speech, Music, SFX")
275
  hp_left = sox_filter(L, fs, 'highpass', 120)
276
  hp_right = sox_filter(R, fs, 'highpass', 120)
277
  hp_stereo = np.column_stack([hp_left, hp_right])
 
285
  )
286
  os.unlink(hp_buf.name)
287
 
288
+ print(demucs_resp)
289
  dialog, _ = download_wav(demucs_resp['files'][0]['url'], target_fs=fs)
290
  sfx, _ = download_wav(demucs_resp['files'][2]['url'], target_fs=fs)
291
  music, _ = download_wav(demucs_resp['files'][1]['url'], target_fs=fs)
292
 
293
  # Step 3: Extract crowd
294
  p((3,7), "Extracting Crowd")
295
+ print("Crowd")
296
  music_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
297
  sf.write(music_buf.name, music, fs, format='FLAC', subtype='PCM_16')
298
  music_buf.close()
 
302
  os.unlink(music_buf.name)
303
  crowd, _ = download_wav(crowd_resp['files'][0]['url'], target_fs=fs)
304
  other, _ = download_wav(crowd_resp['files'][1]['url'], target_fs=fs)
305
+ print(crowd_resp)
306
 
307
  # Step 4: Extract vocals
308
  p((4,7), "Extracting Vocals")
309
+ print("Vocals")
310
  other_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
311
  sf.write(other_buf.name, other, fs, format='FLAC', subtype='PCM_16')
312
  other_buf.close()
 
318
  vocals_lead, _ = download_wav(karaoke_resp['files'][1]['url'], target_fs=fs)
319
  vocals_back, _ = download_wav(karaoke_resp['files'][2]['url'], target_fs=fs)
320
  instr, _ = download_wav(karaoke_resp['files'][3]['url'], target_fs=fs)
321
+ print(karaoke_resp)
322
 
323
  # Step 5: Phantom center for lead vocals
324
  p((5,7), "Distributing Front Vocal Channels")
325
+ print("Front Vocal Channels")
326
  vl_buf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
327
  sf.write(vl_buf.name, vocals_full if multi_singer else vocals_lead, fs, subtype='FLOAT')
328
  vl_buf.close()
 
331
 
332
  # Step 6: Map channels and pad
333
  p((6,7), "Mapping Channels")
334
+ print("Mapping")
335
  def match_len(x, length): return np.pad(x, (0, length - len(x)))
336
  lens = [len(FL_vl), len(FR_vl), len(FC_vl), len(bass), sfx.shape[0], crowd.shape[0], vocals_back.shape[0], instr.shape[0]]
337
  length = max(lens)
 
350
 
351
  # Step 7: Encode to 5.1 OGG
352
  p((7,7), "Processing Step 7, Encoding")
353
+ print("Encoding")
354
  multich = np.column_stack([out_L, out_R, out_C, out_LFE, SL, SR])
355
  out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
356
  sf.write(out_wav.name, multich, fs, subtype='FLOAT')
 
366
 
367
  # ========== Gradio UI ==========
368
  with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
369
+ gr.Markdown("# 🎧 Stereo to 5.1 Converter")
370
+ gr.Markdown("Convert A Stereo File Into Surround")
371
 
372
  inp = gr.Audio(label="Upload stereo audio", type="filepath")
373
  smart_mode = gr.Checkbox(label="Enable Smart Mode", value=False)
 
385
  with gr.Column(visible=False) as smart_section:
386
  api_key = gr.Textbox(label="MVSep API Key", type="password")
387
  multi_singer = gr.Checkbox(label="Multi Singer Mode", value=False)
388
+ smart_btn = gr.Button("Convert")
389
+ smart_out = gr.File(label="Output")
390
 
391
  # Logic for toggling sections
392
  def toggle_mode(enabled):