dghhvc commited on
Commit
a3c7947
·
verified ·
1 Parent(s): e57c68d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -0
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import numpy as np
4
+ import soundfile as sf
5
+ import librosa
6
+ import pyloudnorm as pyln
7
+ import noisereduce as nr
8
+ from scipy.signal import butter, sosfiltfilt
9
+ import gradio as gr
10
+ from pydub import AudioSegment
11
+
12
+
13
+ # ----------------- Utility Filters -----------------
14
+ def butter_bandpass(lowcut, highcut, sr, order=4):
15
+ sos = butter(order, [lowcut / (sr/2), highcut / (sr/2)], btype='band', output='sos')
16
+ return sos
17
+
18
+ def butter_highpass(cutoff, sr, order=4):
19
+ sos = butter(order, cutoff / (sr/2), btype='highpass', output='sos')
20
+ return sos
21
+
22
+ def butter_lowpass(cutoff, sr, order=4):
23
+ sos = butter(order, cutoff / (sr/2), btype='lowpass', output='sos')
24
+ return sos
25
+
26
+
27
+ # ----------------- DSP Core -----------------
28
+ class VoiceProcessor:
29
+ def __init__(self, sr):
30
+ self.sr = sr
31
+ self.meter = pyln.Meter(sr)
32
+
33
+ def to_mono(self, x):
34
+ return np.mean(x, axis=1) if x.ndim > 1 else x
35
+
36
+ def dc_remove_and_peak_norm(self, x, target_peak=0.98):
37
+ x = x - np.mean(x)
38
+ peak = np.max(np.abs(x)) + 1e-12
39
+ return x * (target_peak / peak)
40
+
41
+ def adaptive_noise_reduction(self, x, amount=0.6):
42
+ # نویز ثابت: حالت stationary
43
+ return nr.reduce_noise(y=x, sr=self.sr, stationary=True, prop_decrease=amount)
44
+
45
+ def dynamic_deesser(self, x, band=(5000, 9000), reduction_db=8):
46
+ # تشخیص سیبلانس با انرژی باند بالا
47
+ sos = butter_bandpass(band[0], band[1], self.sr, order=2)
48
+ sib_energy = np.abs(sosfiltfilt(sos, x))
49
+ threshold = np.percentile(sib_energy, 85) # بالاترین ۱۵٪
50
+ gain = np.ones_like(x)
51
+ idx = sib_energy > threshold
52
+ gain[idx] = 10 ** (-reduction_db / 20.0)
53
+ return x * gain
54
+
55
+ def multiband_compression(self, x, low_band=(80, 300), mid_band=(300, 4000), high_band=(4000, 16000),
56
+ ratios=(2, 2.5, 1.5), thresholds=(-24, -20, -18)):
57
+ def band_compress(signal, band, ratio, thr_db):
58
+ sos = butter_bandpass(band[0], band[1], self.sr, order=2)
59
+ b = sosfiltfilt(sos, signal)
60
+ rms = np.sqrt(np.convolve(b**2, np.ones(1024)/1024, mode='same') + 1e-12)
61
+ level_db = 20 * np.log10(rms + 1e-12)
62
+ over_db = level_db - thr_db
63
+ gain_db = np.where(over_db > 0, -over_db * (1 - 1/ratio), 0)
64
+ gain_lin = 10 ** (gain_db / 20)
65
+ return b * gain_lin
66
+
67
+ low = band_compress(x, low_band, ratios[0], thresholds[0])
68
+ mid = band_compress(x, mid_band, ratios[1], thresholds[1])
69
+ high = band_compress(x, high_band, ratios[2], thresholds[2])
70
+
71
+ return low + mid + high
72
+
73
+ def eq_tone_shaping(self, x):
74
+ # HPF
75
+ sos_hp = butter_highpass(80, self.sr, order=2)
76
+ x = sosfiltfilt(sos_hp, x)
77
+ # LPF
78
+ sos_lp = butter_lowpass(16000, self.sr, order=2)
79
+ x = sosfiltfilt(sos_lp, x)
80
+ return x
81
+
82
+ def loudness_normalize(self, x, target_lufs=-16.0):
83
+ loudness = self.meter.integrated_loudness(x)
84
+ return pyln.normalize.loudness(x, loudness, target_lufs)
85
+
86
+ def true_peak_limiter(self, x, ceiling=0.97):
87
+ peak = np.max(np.abs(x))
88
+ if peak > ceiling:
89
+ x = x * (ceiling / peak)
90
+ return x
91
+
92
+ def process(self, audio_path, nr_amount=0.6, target_lufs=-16.0):
93
+ x, _ = librosa.load(audio_path, sr=self.sr, mono=False)
94
+ x = self.to_mono(x)
95
+ x = self.dc_remove_and_peak_norm(x)
96
+ x = self.adaptive_noise_reduction(x, amount=nr_amount)
97
+ x = self.eq_tone_shaping(x)
98
+ x = self.dynamic_deesser(x)
99
+ x = self.multiband_compression(x)
100
+ x = self.loudness_normalize(x, target_lufs=target_lufs)
101
+ x = self.true_peak_limiter(x)
102
+ return x
103
+
104
+
105
+ # ----------------- File Save -----------------
106
+ def save_outputs(y, sr, export_mp3=True, export_flac=True):
107
+ files = []
108
+
109
+ # WAV
110
+ out_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
111
+ sf.write(out_wav.name, y, sr)
112
+ files.append(out_wav.name)
113
+
114
+ # MP3
115
+ if export_mp3:
116
+ mp3_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
117
+ AudioSegment.from_wav(out_wav.name).export(mp3_file.name, format="mp3", bitrate="192k")
118
+ files.append(mp3_file.name)
119
+
120
+ # FLAC
121
+ if export_flac:
122
+ flac_file = tempfile.NamedTemporaryFile(delete=False, suffix=".flac")
123
+ AudioSegment.from_wav(out_wav.name).export(flac_file.name, format="flac")
124
+ files.append(flac_file.name)
125
+
126
+ return files
127
+
128
+
129
+ # ----------------- Gradio Interface -----------------
130
+ def process_audio(input_file, nr_amount, target_lufs, export_mp3, export_flac):
131
+ proc = VoiceProcessor(sr=48000)
132
+ y = proc.process(input_file, nr_amount=nr_amount, target_lufs=target_lufs)
133
+ outputs = save_outputs(y, proc.sr, export_mp3, export_flac)
134
+ return (proc.sr, y.astype(np.float32)), outputs
135
+
136
+
137
+ with gr.Blocks(title="Voice Softener Studio Pro") as demo:
138
+ gr.Markdown("# 🎙️ Voice Softener Studio Pro")
139
+ with gr.Tab("حالت ساده"):
140
+ audio_in = gr.Audio(sources=["upload", "microphone"], type="filepath", label="آپلود یا ضبط صدا")
141
+ nr_amount = gr.Slider(0.0, 1.0, value=0.6, step=0.05, label="شدت کاهش نویز")
142
+ target_lufs = gr.Slider(-28, -12, value=-16, step=1, label="هدف لودنس (LUFS)")
143
+ export_mp3 = gr.Checkbox(value=True, label="MP3 خروجی")
144
+ export_flac = gr.Checkbox(value=False, label="FLAC خروجی")
145
+ btn = gr.Button("پردازش کن", variant="primary")
146
+ out_audio = gr.Audio(label="خروجی پردازش‌شده", type="numpy")
147
+ out_files = gr.Files(label="دانلود فایل‌ها")
148
+ btn.click(process_audio, [audio_in, nr_amount, target_lufs, export_mp3, export_flac],
149
+ [out_audio, out_files])
150
+
151
+ if __name__ == "__main__":
152
+ demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))