Spaces:

noblebarkrr
/

phantom_center_extraction

Sleeping

App Files Files Community

noblebarkrr commited on Apr 2, 2025

Commit

93c3ca9

verified ·

1 Parent(s): 204c40e

Fixed extraction phantom center

Browse files

Files changed (1) hide show

app.py +31 -16

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import os
 import gradio as gr
 import numpy as np
 import soundfile as sf
 from scipy import signal
-def extract_phantom_center(input_file, reduction_db=0.01):
-    output_file = "other.wav"
-    output_center_file = "center.wav"
     data, samplerate = sf.read(input_file)
     if data.ndim != 2 or data.shape[1] != 2:
@@ -14,28 +15,33 @@ def extract_phantom_center(input_file, reduction_db=0.01):
     left = data[:, 0]
     right = data[:, 1]
     nperseg = samplerate  # Размер окна
     noverlap = nperseg // 2  # Перекрытие окон
     f, t, Z_left = signal.stft(left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
     f, t, Z_right = signal.stft(right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
-    Z_common = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j*np.angle(Z_left))
-    reduction_factor = 10**(-reduction_db/20)
-    Z_new_left = Z_left - Z_common * reduction_factor
-    Z_new_right = Z_right - Z_common * reduction_factor
     _, new_left = signal.istft(Z_new_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
     _, new_right = signal.istft(Z_new_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
-    _, common_signal = signal.istft(Z_common, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
     new_left = new_left[:len(left)]
     new_right = new_right[:len(right)]
-    common_signal = common_signal[:len(left)]
     peak = np.max([np.abs(new_left).max(), np.abs(new_right).max()])
     if peak > 1.0:
@@ -44,17 +50,18 @@ def extract_phantom_center(input_file, reduction_db=0.01):
     sf.write(output_file, np.column_stack((new_left, new_right)), samplerate)
-    inverted_center = -common_signal
-    mixed_left = left + inverted_center
-    mixed_right = right + inverted_center
     peak_mixed = np.max([np.abs(mixed_left).max(), np.abs(mixed_right).max()])
     if peak_mixed > 1.0:
         mixed_left = mixed_left / peak_mixed
         mixed_right = mixed_right / peak_mixed
-    sf.write(output_center_file, np.column_stack((common_signal, common_signal)), samplerate)  # Моно фантомный центр в оба канала
     # sf.write(output_file, np.column_stack((mixed_left, mixed_right)), samplerate)
     return (output_file, output_center_file)
@@ -63,16 +70,24 @@ with gr.Blocks(title="Phantom Center Extraction", theme=gr.themes.Soft()) as dem
     gr.Markdown("# Phantom Center Extraction")
     with gr.Row():
         input_audio = gr.Audio(label="Upload audio", type="filepath")
     with gr.Row():
         extract_btn = gr.Button("Separate")
     with gr.Row():
         mid = gr.Audio(type="filepath", interactive=False, label="Phantom Center", visible=True)
         side = gr.Audio(type="filepath", interactive=False, label="Other", visible=True)
     extract_btn.click(
-        fn=extract_phantom_center,
-        inputs=[input_audio],
         outputs=[side, mid]
     )
 if __name__ == "__main__":
-    demo.launch(share=True)

 import os
 import gradio as gr
 import numpy as np
 import soundfile as sf
 from scipy import signal
+def extract_phantom_center_test(input_file, output_format, rdf=0.99999):
+    output_file = f"other.{output_format}"
+    output_center_file = f"center.{output_format}"
     data, samplerate = sf.read(input_file)
     if data.ndim != 2 or data.shape[1] != 2:
     left = data[:, 0]
     right = data[:, 1]
+    mono = np.mean(data, axis=1)
     nperseg = samplerate  # Размер окна
     noverlap = nperseg // 2  # Перекрытие окон
     f, t, Z_left = signal.stft(left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
     f, t, Z_right = signal.stft(right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
+    f, t, Z_mono = signal.stft(mono, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
+    Z_common_left = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j*np.angle(Z_mono))
+    Z_common_right = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j*np.angle(Z_mono))
+    reduction_factor = rdf
+    Z_new_left = Z_left - Z_common_left * reduction_factor
+    Z_new_right = Z_right - Z_common_right * reduction_factor
     _, new_left = signal.istft(Z_new_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
     _, new_right = signal.istft(Z_new_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
+    _, common_signal_left = signal.istft(Z_common_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
+    _, common_signal_right = signal.istft(Z_common_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
     new_left = new_left[:len(left)]
     new_right = new_right[:len(right)]
+    common_signal_left = common_signal_left[:len(left)]
+    common_signal_right = common_signal_right[:len(right)]
     peak = np.max([np.abs(new_left).max(), np.abs(new_right).max()])
     if peak > 1.0:
     sf.write(output_file, np.column_stack((new_left, new_right)), samplerate)
+    inverted_center_left = -common_signal_left
+    inverted_center_right = -common_signal_right
+    mixed_left = left + inverted_center_left
+    mixed_right = right + inverted_center_right
     peak_mixed = np.max([np.abs(mixed_left).max(), np.abs(mixed_right).max()])
     if peak_mixed > 1.0:
         mixed_left = mixed_left / peak_mixed
         mixed_right = mixed_right / peak_mixed
+    sf.write(output_center_file, np.column_stack((common_signal_left, common_signal_right)), samplerate)  # Моно фантомный центр в оба канала
     # sf.write(output_file, np.column_stack((mixed_left, mixed_right)), samplerate)
     return (output_file, output_center_file)
     gr.Markdown("# Phantom Center Extraction")
     with gr.Row():
         input_audio = gr.Audio(label="Upload audio", type="filepath")
+    with gr.Row():
+        reduction_f = gr.Slider(0.1, 10, value=1, step=0.1, label="Reduction dB", interactive=True, visible=False)
+    with gr.Row():
+        format = gr.Dropdown(
+                            ["flac", "wav"],
+                            value="flac",
+                            label="Export format"
+                        )
     with gr.Row():
         extract_btn = gr.Button("Separate")
     with gr.Row():
         mid = gr.Audio(type="filepath", interactive=False, label="Phantom Center", visible=True)
         side = gr.Audio(type="filepath", interactive=False, label="Other", visible=True)
     extract_btn.click(
+        fn=extract_phantom_center_test,
+        inputs=[input_audio, format, reduction_f],
         outputs=[side, mid]
     )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0")