noblebarkrr commited on
Commit
93c3ca9
·
verified ·
1 Parent(s): 204c40e

Fixed extraction phantom center

Browse files
Files changed (1) hide show
  1. app.py +31 -16
app.py CHANGED
@@ -1,12 +1,13 @@
1
  import os
2
  import gradio as gr
 
3
  import numpy as np
4
  import soundfile as sf
5
  from scipy import signal
6
 
7
- def extract_phantom_center(input_file, reduction_db=0.01):
8
- output_file = "other.wav"
9
- output_center_file = "center.wav"
10
  data, samplerate = sf.read(input_file)
11
 
12
  if data.ndim != 2 or data.shape[1] != 2:
@@ -14,28 +15,33 @@ def extract_phantom_center(input_file, reduction_db=0.01):
14
 
15
  left = data[:, 0]
16
  right = data[:, 1]
 
17
 
18
  nperseg = samplerate # Размер окна
19
  noverlap = nperseg // 2 # Перекрытие окон
20
 
21
  f, t, Z_left = signal.stft(left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
22
  f, t, Z_right = signal.stft(right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
 
23
 
24
- Z_common = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j*np.angle(Z_left))
 
25
 
26
- reduction_factor = 10**(-reduction_db/20)
27
 
28
- Z_new_left = Z_left - Z_common * reduction_factor
29
- Z_new_right = Z_right - Z_common * reduction_factor
30
 
31
  _, new_left = signal.istft(Z_new_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
32
  _, new_right = signal.istft(Z_new_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
33
 
34
- _, common_signal = signal.istft(Z_common, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
 
35
 
36
  new_left = new_left[:len(left)]
37
  new_right = new_right[:len(right)]
38
- common_signal = common_signal[:len(left)]
 
39
 
40
  peak = np.max([np.abs(new_left).max(), np.abs(new_right).max()])
41
  if peak > 1.0:
@@ -44,17 +50,18 @@ def extract_phantom_center(input_file, reduction_db=0.01):
44
 
45
  sf.write(output_file, np.column_stack((new_left, new_right)), samplerate)
46
 
47
- inverted_center = -common_signal
 
48
 
49
- mixed_left = left + inverted_center
50
- mixed_right = right + inverted_center
51
 
52
  peak_mixed = np.max([np.abs(mixed_left).max(), np.abs(mixed_right).max()])
53
  if peak_mixed > 1.0:
54
  mixed_left = mixed_left / peak_mixed
55
  mixed_right = mixed_right / peak_mixed
56
 
57
- sf.write(output_center_file, np.column_stack((common_signal, common_signal)), samplerate) # Моно фантомный центр в оба канала
58
  # sf.write(output_file, np.column_stack((mixed_left, mixed_right)), samplerate)
59
 
60
  return (output_file, output_center_file)
@@ -63,16 +70,24 @@ with gr.Blocks(title="Phantom Center Extraction", theme=gr.themes.Soft()) as dem
63
  gr.Markdown("# Phantom Center Extraction")
64
  with gr.Row():
65
  input_audio = gr.Audio(label="Upload audio", type="filepath")
 
 
 
 
 
 
 
 
66
  with gr.Row():
67
  extract_btn = gr.Button("Separate")
68
  with gr.Row():
69
  mid = gr.Audio(type="filepath", interactive=False, label="Phantom Center", visible=True)
70
  side = gr.Audio(type="filepath", interactive=False, label="Other", visible=True)
71
  extract_btn.click(
72
- fn=extract_phantom_center,
73
- inputs=[input_audio],
74
  outputs=[side, mid]
75
  )
76
 
77
  if __name__ == "__main__":
78
- demo.launch(share=True)
 
1
  import os
2
  import gradio as gr
3
+
4
  import numpy as np
5
  import soundfile as sf
6
  from scipy import signal
7
 
8
+ def extract_phantom_center_test(input_file, output_format, rdf=0.99999):
9
+ output_file = f"other.{output_format}"
10
+ output_center_file = f"center.{output_format}"
11
  data, samplerate = sf.read(input_file)
12
 
13
  if data.ndim != 2 or data.shape[1] != 2:
 
15
 
16
  left = data[:, 0]
17
  right = data[:, 1]
18
+ mono = np.mean(data, axis=1)
19
 
20
  nperseg = samplerate # Размер окна
21
  noverlap = nperseg // 2 # Перекрытие окон
22
 
23
  f, t, Z_left = signal.stft(left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
24
  f, t, Z_right = signal.stft(right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
25
+ f, t, Z_mono = signal.stft(mono, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
26
 
27
+ Z_common_left = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j*np.angle(Z_mono))
28
+ Z_common_right = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j*np.angle(Z_mono))
29
 
30
+ reduction_factor = rdf
31
 
32
+ Z_new_left = Z_left - Z_common_left * reduction_factor
33
+ Z_new_right = Z_right - Z_common_right * reduction_factor
34
 
35
  _, new_left = signal.istft(Z_new_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
36
  _, new_right = signal.istft(Z_new_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
37
 
38
+ _, common_signal_left = signal.istft(Z_common_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
39
+ _, common_signal_right = signal.istft(Z_common_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
40
 
41
  new_left = new_left[:len(left)]
42
  new_right = new_right[:len(right)]
43
+ common_signal_left = common_signal_left[:len(left)]
44
+ common_signal_right = common_signal_right[:len(right)]
45
 
46
  peak = np.max([np.abs(new_left).max(), np.abs(new_right).max()])
47
  if peak > 1.0:
 
50
 
51
  sf.write(output_file, np.column_stack((new_left, new_right)), samplerate)
52
 
53
+ inverted_center_left = -common_signal_left
54
+ inverted_center_right = -common_signal_right
55
 
56
+ mixed_left = left + inverted_center_left
57
+ mixed_right = right + inverted_center_right
58
 
59
  peak_mixed = np.max([np.abs(mixed_left).max(), np.abs(mixed_right).max()])
60
  if peak_mixed > 1.0:
61
  mixed_left = mixed_left / peak_mixed
62
  mixed_right = mixed_right / peak_mixed
63
 
64
+ sf.write(output_center_file, np.column_stack((common_signal_left, common_signal_right)), samplerate) # Моно фантомный центр в оба канала
65
  # sf.write(output_file, np.column_stack((mixed_left, mixed_right)), samplerate)
66
 
67
  return (output_file, output_center_file)
 
70
  gr.Markdown("# Phantom Center Extraction")
71
  with gr.Row():
72
  input_audio = gr.Audio(label="Upload audio", type="filepath")
73
+ with gr.Row():
74
+ reduction_f = gr.Slider(0.1, 10, value=1, step=0.1, label="Reduction dB", interactive=True, visible=False)
75
+ with gr.Row():
76
+ format = gr.Dropdown(
77
+ ["flac", "wav"],
78
+ value="flac",
79
+ label="Export format"
80
+ )
81
  with gr.Row():
82
  extract_btn = gr.Button("Separate")
83
  with gr.Row():
84
  mid = gr.Audio(type="filepath", interactive=False, label="Phantom Center", visible=True)
85
  side = gr.Audio(type="filepath", interactive=False, label="Other", visible=True)
86
  extract_btn.click(
87
+ fn=extract_phantom_center_test,
88
+ inputs=[input_audio, format, reduction_f],
89
  outputs=[side, mid]
90
  )
91
 
92
  if __name__ == "__main__":
93
+ demo.launch(server_name="0.0.0.0")