File size: 5,416 Bytes
f032a60 2054fa6 f032a60 2054fa6 f032a60 2054fa6 f032a60 2054fa6 f032a60 2054fa6 f032a60 2054fa6 f032a60 2054fa6 f032a60 2054fa6 f032a60 2054fa6 f032a60 2054fa6 f032a60 2054fa6 f032a60 2054fa6 f032a60 2054fa6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | import torch
import librosa
import soundfile as sf
import numpy as np
import tempfile
import gradio as gr
from denoiser import pretrained
from denoiser.dsp import convert_audio
from pydub import AudioSegment, silence
from tqdm import tqdm
# -----------------------------
# Load model ONCE
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = pretrained.dns64().to(device)
# -----------------------------
# Silence trimming helpers
# -----------------------------
def safe_append(base, chunk, crossfade_ms=30):
if len(base) > 0 and len(chunk) > 0:
safe_crossfade = min(crossfade_ms, len(base), len(chunk))
if safe_crossfade > 0:
return base.append(chunk, crossfade=safe_crossfade)
return base + chunk
def shorten_silences(audio, silence_thresh=-50, min_silence_len=400, max_keep_silence=1500, crossfade_ms=30):
"""
Detects silences and reduces them using a stepped approach.
"""
silent_ranges = silence.detect_silence(
audio,
min_silence_len=min_silence_len,
silence_thresh=silence_thresh
)
output = AudioSegment.silent(duration=0)
prev_end = 0
for start, end in silent_ranges:
# Add the non-silent chunk
chunk = audio[prev_end:start]
output = safe_append(output, chunk, crossfade_ms)
silence_len = end - start
# Smart stepped reduction logic:
# - Short pauses (< 500ms) are kept as is.
# - Longer ones are compressed in steps of 500ms for every 1000ms extra.
if silence_len < 500:
keep = silence_len
else:
# Calculation: starts at 500ms, adds 500ms for every full second beyond the first.
keep = min(max_keep_silence, (silence_len // 1000) * 500 + 500)
output = safe_append(
output,
AudioSegment.silent(duration=int(keep)),
crossfade_ms
)
prev_end = end
# Add final chunk
output = safe_append(output, audio[prev_end:], crossfade_ms)
return output
# -----------------------------
# Main processing function
# -----------------------------
def denoise_audio(audio_file, trim_silence, silence_thresh, min_silence_len, max_keep_silence):
if audio_file is None:
return None
# Load audio
wav, sr = librosa.load(audio_file, sr=16000)
chunk_size = 16000 * 10
denoised_chunks = []
# Process in chunks to avoid OOM
for i in range(0, len(wav), chunk_size):
chunk = wav[i:i + chunk_size]
wav_tensor = torch.tensor(chunk).unsqueeze(0).to(device)
wav_tensor = convert_audio(
wav_tensor, sr, model.sample_rate, model.chin
)
with torch.no_grad():
denoised = model(wav_tensor)[0]
denoised_chunks.append(
denoised.squeeze().cpu().numpy()
)
denoised_np = np.concatenate(denoised_chunks)
# Save denoised to temp
tmp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
sf.write(tmp_wav.name, denoised_np, model.sample_rate)
if trim_silence:
audio = AudioSegment.from_file(tmp_wav.name, format="wav")
processed = shorten_silences(
audio,
silence_thresh=silence_thresh,
min_silence_len=min_silence_len,
max_keep_silence=max_keep_silence
)
final_file = tempfile.NamedTemporaryFile(
suffix="_final.wav", delete=False
)
processed.export(final_file.name, format="wav")
return final_file.name
return tmp_wav.name
# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks(title="🎧 Advanced Audio Denoiser") as demo:
gr.Markdown("# 🎧 Audio Denoiser (Demucs DNS64)")
gr.Markdown("Upload audio to remove noise and intelligently shorten silences.")
with gr.Row():
with gr.Column():
input_audio = gr.Audio(type="filepath", label="Upload Audio")
with gr.Accordion("Silence Trimming Settings", open=True):
do_trim = gr.Checkbox(label="Enable Silence Trimming", value=True)
threshold = gr.Slider(
minimum=-70, maximum=-20, value=-50, step=1,
label="Silence Threshold (dB)",
info="Lower is stricter (quieter sounds count as silence)"
)
min_len = gr.Slider(
minimum=100, maximum=2000, value=400, step=50,
label="Min Silence Duration (ms)",
info="How long a pause must be to be considered 'silence'"
)
max_keep = gr.Slider(
minimum=0, maximum=5000, value=1500, step=100,
label="Max Silence to Keep (ms)",
info="Longer silences will be shortened towards this value"
)
submit_btn = gr.Button("Process Audio", variant="primary")
with gr.Column():
output_audio = gr.Audio(label="Denoised & Trimmed Output")
submit_btn.click(
fn=denoise_audio,
inputs=[input_audio, do_trim, threshold, min_len, max_keep],
outputs=output_audio
)
if __name__ == "__main__":
demo.launch()
|