Spaces:

ziqiangao
/

surroundify

Sleeping

App Files Files Community

ziqiangao commited on Aug 3, 2025

Commit

bcdabcf

verified ·

1 Parent(s): 0a346a3

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -6

app.py CHANGED Viewed

@@ -84,6 +84,7 @@ def extract_phantom_center(input_file, rdf=0.99999):
 def create_5_1_surround(input_file, preset="music"):
     p = gr.Progress()
     # Preset-based parameters
     # Reverberance (50%) HF-damping (50%) room-scale (100%) stereo-depth (100%) pre-delay (0ms) wet-gain (0dB)
@@ -192,9 +193,9 @@ def send_mvsep_audio_job(
         'output_format': str(output_format)
     }
     if addopt1:
-        data['add_opt1'] = addopt1
     if addopt2:
-        data['add_opt2'] = addopt2
     # Step 3: Send creation request
     response = requests.post(url, files=files, data=data)
@@ -244,6 +245,7 @@ def download_wav(url, target_fs=None):
 # Smart mode workflow
 def smart_mode_process(input_file, api_key, multi_singer=False):
     p = gr.Progress()
     import shutil
@@ -255,6 +257,7 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
     data, fs = sf.read(wav, dtype='float32')
     os.unlink(wav)
     p((0,7), "Loading File")
     if data.ndim != 2:
         raise gr.Error("Expected stereo input")
@@ -263,10 +266,12 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
     # Step 1: LFE from lowpass
     p((1,7), "Processing LFE")
     bass = sox_filter(0.5 * (L + R), fs, 'lowpass', 120)
     # Step 2: Highpass for MVSep
     p((2,7), "Processing Speech, Music and SFX")
     hp_left = sox_filter(L, fs, 'highpass', 120)
     hp_right = sox_filter(R, fs, 'highpass', 120)
     hp_stereo = np.column_stack([hp_left, hp_right])
@@ -280,12 +285,14 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
     )
     os.unlink(hp_buf.name)
     dialog, _ = download_wav(demucs_resp['files'][0]['url'], target_fs=fs)
     sfx, _ = download_wav(demucs_resp['files'][2]['url'], target_fs=fs)
     music, _ = download_wav(demucs_resp['files'][1]['url'], target_fs=fs)
     # Step 3: Extract crowd
     p((3,7), "Extracting Crowd")
     music_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
     sf.write(music_buf.name, music, fs, format='FLAC', subtype='PCM_16')
     music_buf.close()
@@ -295,9 +302,11 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
     os.unlink(music_buf.name)
     crowd, _ = download_wav(crowd_resp['files'][0]['url'], target_fs=fs)
     other, _ = download_wav(crowd_resp['files'][1]['url'], target_fs=fs)
     # Step 4: Extract vocals
     p((4,7), "Extracting Vocals")
     other_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
     sf.write(other_buf.name, other, fs, format='FLAC', subtype='PCM_16')
     other_buf.close()
@@ -309,9 +318,11 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
     vocals_lead, _ = download_wav(karaoke_resp['files'][1]['url'], target_fs=fs)
     vocals_back, _ = download_wav(karaoke_resp['files'][2]['url'], target_fs=fs)
     instr, _ = download_wav(karaoke_resp['files'][3]['url'], target_fs=fs)
     # Step 5: Phantom center for lead vocals
     p((5,7), "Distributing Front Vocal Channels")
     vl_buf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
     sf.write(vl_buf.name, vocals_full if multi_singer else vocals_lead, fs, subtype='FLOAT')
     vl_buf.close()
@@ -320,6 +331,7 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
     # Step 6: Map channels and pad
     p((6,7), "Mapping Channels")
     def match_len(x, length): return np.pad(x, (0, length - len(x)))
     lens = [len(FL_vl), len(FR_vl), len(FC_vl), len(bass), sfx.shape[0], crowd.shape[0], vocals_back.shape[0], instr.shape[0]]
     length = max(lens)
@@ -338,6 +350,7 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
     # Step 7: Encode to 5.1 OGG
     p((7,7), "Processing Step 7, Encoding")
     multich = np.column_stack([out_L, out_R, out_C, out_LFE, SL, SR])
     out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
     sf.write(out_wav.name, multich, fs, subtype='FLOAT')
@@ -353,8 +366,8 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
 # ========== Gradio UI ==========
 with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
-    gr.Markdown("# 🎧 Stereo to 5.1 OGG Converter")
-    gr.Markdown("Choose music or speech preset for surround processing")
     inp = gr.Audio(label="Upload stereo audio", type="filepath")
     smart_mode = gr.Checkbox(label="Enable Smart Mode", value=False)
@@ -372,8 +385,8 @@ with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
     with gr.Column(visible=False) as smart_section:
         api_key = gr.Textbox(label="MVSep API Key", type="password")
         multi_singer = gr.Checkbox(label="Multi Singer Mode", value=False)
-        smart_btn = gr.Button("Start")
-        smart_out = gr.File(label="Output from Smart Mode")
     # Logic for toggling sections
     def toggle_mode(enabled):

 def create_5_1_surround(input_file, preset="music"):
+    print("Starting Normal Processing")
     p = gr.Progress()
     # Preset-based parameters
     # Reverberance (50%) HF-damping (50%) room-scale (100%) stereo-depth (100%) pre-delay (0ms) wet-gain (0dB)
         'output_format': str(output_format)
     }
     if addopt1:
+        data['add_opt1'] = str(addopt1)
     if addopt2:
+        data['add_opt2'] = str(addopt2)
     # Step 3: Send creation request
     response = requests.post(url, files=files, data=data)
 # Smart mode workflow
 def smart_mode_process(input_file, api_key, multi_singer=False):
+    print("Starting Smartmode")
     p = gr.Progress()
     import shutil
     data, fs = sf.read(wav, dtype='float32')
     os.unlink(wav)
     p((0,7), "Loading File")
+    print("Loading File")
     if data.ndim != 2:
         raise gr.Error("Expected stereo input")
     # Step 1: LFE from lowpass
     p((1,7), "Processing LFE")
+    print("Processing LFE")
     bass = sox_filter(0.5 * (L + R), fs, 'lowpass', 120)
     # Step 2: Highpass for MVSep
     p((2,7), "Processing Speech, Music and SFX")
+    print("Speech, Music, SFX")
     hp_left = sox_filter(L, fs, 'highpass', 120)
     hp_right = sox_filter(R, fs, 'highpass', 120)
     hp_stereo = np.column_stack([hp_left, hp_right])
     )
     os.unlink(hp_buf.name)
+    print(demucs_resp)
     dialog, _ = download_wav(demucs_resp['files'][0]['url'], target_fs=fs)
     sfx, _ = download_wav(demucs_resp['files'][2]['url'], target_fs=fs)
     music, _ = download_wav(demucs_resp['files'][1]['url'], target_fs=fs)
     # Step 3: Extract crowd
     p((3,7), "Extracting Crowd")
+    print("Crowd")
     music_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
     sf.write(music_buf.name, music, fs, format='FLAC', subtype='PCM_16')
     music_buf.close()
     os.unlink(music_buf.name)
     crowd, _ = download_wav(crowd_resp['files'][0]['url'], target_fs=fs)
     other, _ = download_wav(crowd_resp['files'][1]['url'], target_fs=fs)
+    print(crowd_resp)
     # Step 4: Extract vocals
     p((4,7), "Extracting Vocals")
+    print("Vocals")
     other_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
     sf.write(other_buf.name, other, fs, format='FLAC', subtype='PCM_16')
     other_buf.close()
     vocals_lead, _ = download_wav(karaoke_resp['files'][1]['url'], target_fs=fs)
     vocals_back, _ = download_wav(karaoke_resp['files'][2]['url'], target_fs=fs)
     instr, _ = download_wav(karaoke_resp['files'][3]['url'], target_fs=fs)
+    print(karaoke_resp)
     # Step 5: Phantom center for lead vocals
     p((5,7), "Distributing Front Vocal Channels")
+    print("Front Vocal Channels")
     vl_buf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
     sf.write(vl_buf.name, vocals_full if multi_singer else vocals_lead, fs, subtype='FLOAT')
     vl_buf.close()
     # Step 6: Map channels and pad
     p((6,7), "Mapping Channels")
+    print("Mapping")
     def match_len(x, length): return np.pad(x, (0, length - len(x)))
     lens = [len(FL_vl), len(FR_vl), len(FC_vl), len(bass), sfx.shape[0], crowd.shape[0], vocals_back.shape[0], instr.shape[0]]
     length = max(lens)
     # Step 7: Encode to 5.1 OGG
     p((7,7), "Processing Step 7, Encoding")
+    print("Encoding")
     multich = np.column_stack([out_L, out_R, out_C, out_LFE, SL, SR])
     out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
     sf.write(out_wav.name, multich, fs, subtype='FLOAT')
 # ========== Gradio UI ==========
 with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
+    gr.Markdown("# 🎧 Stereo to 5.1 Converter")
+    gr.Markdown("Convert A Stereo File Into Surround")
     inp = gr.Audio(label="Upload stereo audio", type="filepath")
     smart_mode = gr.Checkbox(label="Enable Smart Mode", value=False)
     with gr.Column(visible=False) as smart_section:
         api_key = gr.Textbox(label="MVSep API Key", type="password")
         multi_singer = gr.Checkbox(label="Multi Singer Mode", value=False)
+        smart_btn = gr.Button("Convert")
+        smart_out = gr.File(label="Output")
     # Logic for toggling sections
     def toggle_mode(enabled):