ziqiangao commited on
Commit
26c1442
·
1 Parent(s): b5743db

app inplementaion

Browse files
Files changed (1) hide show
  1. app.py +132 -68
app.py CHANGED
@@ -1,75 +1,139 @@
1
- import gradio as gr
2
  import numpy as np
3
  import soundfile as sf
 
 
 
 
4
  from scipy import signal
5
 
6
- def extract_phantom_center_test(input_file, output_format, rdf=0.99999):
7
- output_file = f"other.{output_format}"
8
- output_center_file = f"center.{output_format}"
9
- data, samplerate = sf.read(input_file)
10
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  if data.ndim != 2 or data.shape[1] != 2:
12
- raise ValueError("A stereo file is required (2 channels)")
13
-
14
- left = data[:, 0]
15
- right = data[:, 1]
16
- mono = np.mean(data, axis=1)
17
-
18
- nperseg = samplerate # window size
19
- noverlap = nperseg // 2 # overlap
20
-
21
- f, t, Z_left = signal.stft(left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
22
- f, t, Z_right = signal.stft(right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
23
- f, t, Z_mono = signal.stft(mono, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
24
-
25
- Z_common_left = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j * np.angle(Z_mono))
26
- Z_common_right = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j * np.angle(Z_mono))
27
-
28
- reduction_factor = rdf
29
-
30
- Z_new_left = Z_left - Z_common_left * reduction_factor
31
- Z_new_right = Z_right - Z_common_right * reduction_factor
32
-
33
- _, new_left = signal.istft(Z_new_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
34
- _, new_right = signal.istft(Z_new_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
35
-
36
- _, common_signal_left = signal.istft(Z_common_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
37
- _, common_signal_right = signal.istft(Z_common_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
38
-
39
- new_left = new_left[:len(left)]
40
- new_right = new_right[:len(right)]
41
- common_signal_left = common_signal_left[:len(left)]
42
- common_signal_right = common_signal_right[:len(right)]
43
-
44
- peak = max(np.max(np.abs(new_left)), np.max(np.abs(new_right)))
45
- if peak > 1.0:
46
- new_left /= peak
47
- new_right /= peak
48
-
49
- sf.write(output_file, np.column_stack((new_left, new_right)), samplerate)
50
-
51
- sf.write(output_center_file, np.column_stack((common_signal_left, common_signal_right)), samplerate)
52
-
53
- return output_file, output_center_file
54
-
55
- with gr.Blocks(title="Phantom Center Extraction", theme=gr.themes.Soft()) as demo:
56
- gr.Markdown("# Phantom Center Extraction")
57
-
58
- input_audio = gr.Audio(label="Upload stereo audio", type="filepath")
59
- reduction_f = gr.Slider(0.1, 1.0, value=0.99999, step=0.00001, label="Reduction Factor (rdf)", interactive=True)
60
- output_format = gr.Dropdown(choices=["flac", "wav"], value="flac", label="Export format")
61
-
62
- extract_btn = gr.Button("Separate")
63
-
64
- with gr.Row():
65
- side_audio = gr.Audio(label="Other audio", type="filepath", interactive=False)
66
- center_audio = gr.Audio(label="Phantom center audio", type="filepath", interactive=False)
67
-
68
- extract_btn.click(
69
- fn=extract_phantom_center_test,
70
- inputs=[input_audio, output_format, reduction_f],
71
- outputs=[side_audio, center_audio]
72
- )
 
 
 
 
 
 
 
 
73
 
74
  if __name__ == "__main__":
75
- demo.launch(server_name="0.0.0.0")
 
 
1
  import numpy as np
2
  import soundfile as sf
3
+ import subprocess
4
+ import tempfile
5
+ import os
6
+ import gradio as gr
7
  from scipy import signal
8
 
9
+ # ========== Processing Functions ==========
10
+
11
+ def convert_to_wav_float(input_file):
12
+ """
13
+ Convert any input audio to 32-bit float WAV to preserve full dynamic range.
14
+ """
15
+ temp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
16
+ temp_wav.close()
17
+ # PCM 32-bit little endian preserves float dynamic without clipping
18
+ subprocess.run([
19
+ "ffmpeg", "-y", "-i", input_file,
20
+ "-c:a", "pcm_f32le", "-f", "wav", temp_wav.name
21
+ ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
22
+ return temp_wav.name
23
+
24
+
25
+ def apply_reverb_wet_only(audio, samplerate):
26
+ """
27
+ Apply wet-only reverb using SoX to a single channel.
28
+ """
29
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \
30
+ tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout:
31
+ sf.write(tin.name, audio, samplerate, subtype='FLOAT')
32
+ subprocess.run(
33
+ ["sox", tin.name, tout.name, "reverb", '-w', '85', '50', '100', '95', '10', '-2'],
34
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
35
+ )
36
+ wet, _ = sf.read(tout.name, dtype='float32')
37
+ os.unlink(tin.name)
38
+ os.unlink(tout.name)
39
+ return wet
40
+
41
+
42
+ def sox_filter(audio, samplerate, filter_type, cutoff):
43
+ """
44
+ Apply highpass or lowpass filter via SoX.
45
+ filter_type: 'highpass' or 'lowpass'; cutoff in Hz.
46
+ """
47
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \
48
+ tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout:
49
+ sf.write(tin.name, audio, samplerate, subtype='FLOAT')
50
+ subprocess.run(
51
+ ["sox", tin.name, tout.name, filter_type, str(cutoff)],
52
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
53
+ )
54
+ out, _ = sf.read(tout.name, dtype='float32')
55
+ os.unlink(tin.name)
56
+ os.unlink(tout.name)
57
+ return out
58
+
59
+
60
+ def extract_phantom_center(input_file, rdf=0.99999):
61
+ """
62
+ Returns FL (front left without centre), FR, and FC (phantom centre).
63
+ """
64
+ wav = convert_to_wav_float(input_file)
65
+ data, fs = sf.read(wav, dtype='float32')
66
+ os.unlink(wav)
67
  if data.ndim != 2 or data.shape[1] != 2:
68
+ raise ValueError("Input must be stereo 2-channel")
69
+ L, R = data[:,0], data[:,1]
70
+ M = (L + R) / 2
71
+ nperseg = fs
72
+ noverlap = nperseg // 2
73
+ _, _, ZL = signal.stft(L, fs=fs, nperseg=nperseg, noverlap=noverlap)
74
+ _, _, ZR = signal.stft(R, fs=fs, nperseg=nperseg, noverlap=noverlap)
75
+ _, _, ZM = signal.stft(M, fs=fs, nperseg=nperseg, noverlap=noverlap)
76
+ Zc = np.minimum(np.abs(ZL), np.abs(ZR)) * np.exp(1j * np.angle(ZM))
77
+ Zl_res = ZL - Zc * rdf
78
+ Zr_res = ZR - Zc * rdf
79
+ _, FL = signal.istft(Zl_res, fs=fs, nperseg=nperseg, noverlap=noverlap)
80
+ _, FR = signal.istft(Zr_res, fs=fs, nperseg=nperseg, noverlap=noverlap)
81
+ _, FC = signal.istft(Zc, fs=fs, nperseg=nperseg, noverlap=noverlap)
82
+ return fs, FL[:len(L)], FR[:len(R)], FC[:len(M)]
83
+
84
+
85
+ def create_5_1_surround(input_file):
86
+ # 1. Extract fronts and centre
87
+ fs, FL, FR, FC = extract_phantom_center(input_file)
88
+
89
+ # 2. Read original for reverb and LFE source as float WAV
90
+ wav = convert_to_wav_float(input_file)
91
+ stereo, _ = sf.read(wav, dtype='float32')
92
+ os.unlink(wav)
93
+ L_orig, R_orig = stereo[:,0], stereo[:,1]
94
+
95
+ # 3. Wet-only reverb for surrounds using original L/R
96
+ SL = apply_reverb_wet_only(L_orig, fs)
97
+ SR = apply_reverb_wet_only(R_orig, fs)
98
+
99
+ # 4. High-pass filter to all but LFE
100
+ FL_hp = sox_filter(FL, fs, 'highpass', 120)
101
+ FR_hp = sox_filter(FR, fs, 'highpass', 120)
102
+ FC_hp = sox_filter(FC, fs, 'highpass', 120)
103
+ SL_hp = sox_filter(SL, fs, 'highpass', 120)
104
+ SR_hp = sox_filter(SR, fs, 'highpass', 120)
105
+
106
+ # 5. LFE from pre-filtered sum of FL/FR
107
+ bass_sum = 0.5 * (FL + FR)
108
+ LFE = sox_filter(bass_sum, fs, 'lowpass', 120)
109
+
110
+ # 6. Pad to equal length
111
+ channels = [FL_hp, FR_hp, FC_hp, LFE, SL_hp, SR_hp]
112
+ length = max(len(ch) for ch in channels)
113
+ def pad(x): return np.pad(x, (0, length - len(x)))
114
+ multich = np.column_stack([pad(ch) for ch in channels])
115
+
116
+ # 7. Write and encode
117
+ out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
118
+ sf.write(out_wav.name, multich, fs, subtype='FLOAT')
119
+ out_wav.close()
120
+ out_ogg = tempfile.NamedTemporaryFile(suffix='.ogg', delete=False)
121
+ out_ogg.close()
122
+ subprocess.run([
123
+ "ffmpeg", "-y", "-i", out_wav.name,
124
+ "-c:a", "libvorbis", "-ac", "6", "-channel_layout", "5.1", out_ogg.name
125
+ ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
126
+ os.unlink(out_wav.name)
127
+ return out_ogg.name
128
+
129
+ # ========== Gradio UI ==========
130
+ with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
131
+ gr.Markdown("# 🎧 Stereo to 5.1 OGG Converter")
132
+ gr.Markdown("Phantom centre extraction + wet-only reverb on original L/R + sox highpass/lowpass + float I/O")
133
+ inp = gr.Audio(label="Upload stereo audio", type="filepath")
134
+ btn = gr.Button("Convert to 5.1 OGG")
135
+ out = gr.File(label="Download 5.1 OGG")
136
+ btn.click(fn=create_5_1_surround, inputs=[inp], outputs=[out])
137
 
138
  if __name__ == "__main__":
139
+ demo.launch()