Spaces:

ziqiangao
/

surroundify

Sleeping

App Files Files Community

ziqiangao commited on Jul 23, 2025

Commit

26c1442

1 Parent(s): b5743db

app inplementaion

Browse files

Files changed (1) hide show

app.py +132 -68

app.py CHANGED Viewed

@@ -1,75 +1,139 @@
-import gradio as gr
 import numpy as np
 import soundfile as sf
 from scipy import signal
-def extract_phantom_center_test(input_file, output_format, rdf=0.99999):
-    output_file = f"other.{output_format}"
-    output_center_file = f"center.{output_format}"
-    data, samplerate = sf.read(input_file)
     if data.ndim != 2 or data.shape[1] != 2:
-        raise ValueError("A stereo file is required (2 channels)")
-    left = data[:, 0]
-    right = data[:, 1]
-    mono = np.mean(data, axis=1)
-    nperseg = samplerate  # window size
-    noverlap = nperseg // 2  # overlap
-    f, t, Z_left = signal.stft(left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
-    f, t, Z_right = signal.stft(right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
-    f, t, Z_mono = signal.stft(mono, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
-    Z_common_left = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j * np.angle(Z_mono))
-    Z_common_right = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j * np.angle(Z_mono))
-    reduction_factor = rdf
-    Z_new_left = Z_left - Z_common_left * reduction_factor
-    Z_new_right = Z_right - Z_common_right * reduction_factor
-    _, new_left = signal.istft(Z_new_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
-    _, new_right = signal.istft(Z_new_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
-    _, common_signal_left = signal.istft(Z_common_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
-    _, common_signal_right = signal.istft(Z_common_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
-    new_left = new_left[:len(left)]
-    new_right = new_right[:len(right)]
-    common_signal_left = common_signal_left[:len(left)]
-    common_signal_right = common_signal_right[:len(right)]
-    peak = max(np.max(np.abs(new_left)), np.max(np.abs(new_right)))
-    if peak > 1.0:
-        new_left /= peak
-        new_right /= peak
-    sf.write(output_file, np.column_stack((new_left, new_right)), samplerate)
-    sf.write(output_center_file, np.column_stack((common_signal_left, common_signal_right)), samplerate)
-    return output_file, output_center_file
-with gr.Blocks(title="Phantom Center Extraction", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# Phantom Center Extraction")
-    input_audio = gr.Audio(label="Upload stereo audio", type="filepath")
-    reduction_f = gr.Slider(0.1, 1.0, value=0.99999, step=0.00001, label="Reduction Factor (rdf)", interactive=True)
-    output_format = gr.Dropdown(choices=["flac", "wav"], value="flac", label="Export format")
-    extract_btn = gr.Button("Separate")
-    with gr.Row():
-        side_audio = gr.Audio(label="Other audio", type="filepath", interactive=False)
-        center_audio = gr.Audio(label="Phantom center audio", type="filepath", interactive=False)
-    extract_btn.click(
-        fn=extract_phantom_center_test,
-        inputs=[input_audio, output_format, reduction_f],
-        outputs=[side_audio, center_audio]
-    )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0")

 import numpy as np
 import soundfile as sf
+import subprocess
+import tempfile
+import os
+import gradio as gr
 from scipy import signal
+# ========== Processing Functions ==========
+def convert_to_wav_float(input_file):
+    """
+    Convert any input audio to 32-bit float WAV to preserve full dynamic range.
+    """
+    temp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    temp_wav.close()
+    # PCM 32-bit little endian preserves float dynamic without clipping
+    subprocess.run([
+        "ffmpeg", "-y", "-i", input_file,
+        "-c:a", "pcm_f32le", "-f", "wav", temp_wav.name
+    ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
+    return temp_wav.name
+def apply_reverb_wet_only(audio, samplerate):
+    """
+    Apply wet-only reverb using SoX to a single channel.
+    """
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \
+         tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout:
+        sf.write(tin.name, audio, samplerate, subtype='FLOAT')
+        subprocess.run(
+            ["sox", tin.name, tout.name, "reverb", '-w', '85', '50', '100', '95', '10', '-2'],
+            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
+        )
+        wet, _ = sf.read(tout.name, dtype='float32')
+    os.unlink(tin.name)
+    os.unlink(tout.name)
+    return wet
+def sox_filter(audio, samplerate, filter_type, cutoff):
+    """
+    Apply highpass or lowpass filter via SoX.
+    filter_type: 'highpass' or 'lowpass'; cutoff in Hz.
+    """
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \
+         tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout:
+        sf.write(tin.name, audio, samplerate, subtype='FLOAT')
+        subprocess.run(
+            ["sox", tin.name, tout.name, filter_type, str(cutoff)],
+            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
+        )
+        out, _ = sf.read(tout.name, dtype='float32')
+    os.unlink(tin.name)
+    os.unlink(tout.name)
+    return out
+def extract_phantom_center(input_file, rdf=0.99999):
+    """
+    Returns FL (front left without centre), FR, and FC (phantom centre).
+    """
+    wav = convert_to_wav_float(input_file)
+    data, fs = sf.read(wav, dtype='float32')
+    os.unlink(wav)
     if data.ndim != 2 or data.shape[1] != 2:
+        raise ValueError("Input must be stereo 2-channel")
+    L, R = data[:,0], data[:,1]
+    M = (L + R) / 2
+    nperseg = fs
+    noverlap = nperseg // 2
+    _, _, ZL = signal.stft(L, fs=fs, nperseg=nperseg, noverlap=noverlap)
+    _, _, ZR = signal.stft(R, fs=fs, nperseg=nperseg, noverlap=noverlap)
+    _, _, ZM = signal.stft(M, fs=fs, nperseg=nperseg, noverlap=noverlap)
+    Zc = np.minimum(np.abs(ZL), np.abs(ZR)) * np.exp(1j * np.angle(ZM))
+    Zl_res = ZL - Zc * rdf
+    Zr_res = ZR - Zc * rdf
+    _, FL = signal.istft(Zl_res, fs=fs, nperseg=nperseg, noverlap=noverlap)
+    _, FR = signal.istft(Zr_res, fs=fs, nperseg=nperseg, noverlap=noverlap)
+    _, FC = signal.istft(Zc, fs=fs, nperseg=nperseg, noverlap=noverlap)
+    return fs, FL[:len(L)], FR[:len(R)], FC[:len(M)]
+def create_5_1_surround(input_file):
+    # 1. Extract fronts and centre
+    fs, FL, FR, FC = extract_phantom_center(input_file)
+    # 2. Read original for reverb and LFE source as float WAV
+    wav = convert_to_wav_float(input_file)
+    stereo, _ = sf.read(wav, dtype='float32')
+    os.unlink(wav)
+    L_orig, R_orig = stereo[:,0], stereo[:,1]
+    # 3. Wet-only reverb for surrounds using original L/R
+    SL = apply_reverb_wet_only(L_orig, fs)
+    SR = apply_reverb_wet_only(R_orig, fs)
+    # 4. High-pass filter to all but LFE
+    FL_hp = sox_filter(FL, fs, 'highpass', 120)
+    FR_hp = sox_filter(FR, fs, 'highpass', 120)
+    FC_hp = sox_filter(FC, fs, 'highpass', 120)
+    SL_hp = sox_filter(SL, fs, 'highpass', 120)
+    SR_hp = sox_filter(SR, fs, 'highpass', 120)
+    # 5. LFE from pre-filtered sum of FL/FR
+    bass_sum = 0.5 * (FL + FR)
+    LFE = sox_filter(bass_sum, fs, 'lowpass', 120)
+    # 6. Pad to equal length
+    channels = [FL_hp, FR_hp, FC_hp, LFE, SL_hp, SR_hp]
+    length = max(len(ch) for ch in channels)
+    def pad(x): return np.pad(x, (0, length - len(x)))
+    multich = np.column_stack([pad(ch) for ch in channels])
+    # 7. Write and encode
+    out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+    sf.write(out_wav.name, multich, fs, subtype='FLOAT')
+    out_wav.close()
+    out_ogg = tempfile.NamedTemporaryFile(suffix='.ogg', delete=False)
+    out_ogg.close()
+    subprocess.run([
+        "ffmpeg", "-y", "-i", out_wav.name,
+        "-c:a", "libvorbis", "-ac", "6", "-channel_layout", "5.1", out_ogg.name
+    ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
+    os.unlink(out_wav.name)
+    return out_ogg.name
+# ========== Gradio UI ==========
+with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
+    gr.Markdown("# 🎧 Stereo to 5.1 OGG Converter")
+    gr.Markdown("Phantom centre extraction + wet-only reverb on original L/R + sox highpass/lowpass + float I/O")
+    inp = gr.Audio(label="Upload stereo audio", type="filepath")
+    btn = gr.Button("Convert to 5.1 OGG")
+    out = gr.File(label="Download 5.1 OGG")
+    btn.click(fn=create_5_1_surround, inputs=[inp], outputs=[out])
 if __name__ == "__main__":
+    demo.launch()