| | import torch |
| | import librosa |
| | import soundfile as sf |
| | import numpy as np |
| | import tempfile |
| | import gradio as gr |
| |
|
| | from denoiser import pretrained |
| | from denoiser.dsp import convert_audio |
| | from pydub import AudioSegment, silence |
| | from tqdm import tqdm |
| |
|
| |
|
| | |
| | |
| | |
| | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| | model = pretrained.dns64().to(device) |
| |
|
| |
|
| | |
| | |
| | |
| | def safe_append(base, chunk, crossfade_ms=30): |
| | if len(base) > 0 and len(chunk) > 0: |
| | safe_crossfade = min(crossfade_ms, len(base), len(chunk)) |
| | if safe_crossfade > 0: |
| | return base.append(chunk, crossfade=safe_crossfade) |
| | return base + chunk |
| |
|
| |
|
| | def shorten_silences(audio, silence_thresh=-50, min_silence_len=400, max_keep_silence=1500, crossfade_ms=30): |
| | """ |
| | Detects silences and reduces them using a stepped approach. |
| | """ |
| | silent_ranges = silence.detect_silence( |
| | audio, |
| | min_silence_len=min_silence_len, |
| | silence_thresh=silence_thresh |
| | ) |
| |
|
| | output = AudioSegment.silent(duration=0) |
| | prev_end = 0 |
| |
|
| | for start, end in silent_ranges: |
| | |
| | chunk = audio[prev_end:start] |
| | output = safe_append(output, chunk, crossfade_ms) |
| |
|
| | silence_len = end - start |
| | |
| | |
| | |
| | |
| | if silence_len < 500: |
| | keep = silence_len |
| | else: |
| | |
| | keep = min(max_keep_silence, (silence_len // 1000) * 500 + 500) |
| |
|
| | output = safe_append( |
| | output, |
| | AudioSegment.silent(duration=int(keep)), |
| | crossfade_ms |
| | ) |
| | prev_end = end |
| |
|
| | |
| | output = safe_append(output, audio[prev_end:], crossfade_ms) |
| | return output |
| |
|
| |
|
| | |
| | |
| | |
| | def denoise_audio(audio_file, trim_silence, silence_thresh, min_silence_len, max_keep_silence): |
| | if audio_file is None: |
| | return None |
| | |
| | |
| | wav, sr = librosa.load(audio_file, sr=16000) |
| |
|
| | chunk_size = 16000 * 10 |
| | denoised_chunks = [] |
| |
|
| | |
| | for i in range(0, len(wav), chunk_size): |
| | chunk = wav[i:i + chunk_size] |
| | wav_tensor = torch.tensor(chunk).unsqueeze(0).to(device) |
| | wav_tensor = convert_audio( |
| | wav_tensor, sr, model.sample_rate, model.chin |
| | ) |
| |
|
| | with torch.no_grad(): |
| | denoised = model(wav_tensor)[0] |
| |
|
| | denoised_chunks.append( |
| | denoised.squeeze().cpu().numpy() |
| | ) |
| |
|
| | denoised_np = np.concatenate(denoised_chunks) |
| |
|
| | |
| | tmp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) |
| | sf.write(tmp_wav.name, denoised_np, model.sample_rate) |
| |
|
| | if trim_silence: |
| | audio = AudioSegment.from_file(tmp_wav.name, format="wav") |
| | processed = shorten_silences( |
| | audio, |
| | silence_thresh=silence_thresh, |
| | min_silence_len=min_silence_len, |
| | max_keep_silence=max_keep_silence |
| | ) |
| | final_file = tempfile.NamedTemporaryFile( |
| | suffix="_final.wav", delete=False |
| | ) |
| | processed.export(final_file.name, format="wav") |
| | return final_file.name |
| |
|
| | return tmp_wav.name |
| |
|
| |
|
| | |
| | |
| | |
| | with gr.Blocks(title="🎧 Advanced Audio Denoiser") as demo: |
| | gr.Markdown("# 🎧 Audio Denoiser (Demucs DNS64)") |
| | gr.Markdown("Upload audio to remove noise and intelligently shorten silences.") |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | input_audio = gr.Audio(type="filepath", label="Upload Audio") |
| | |
| | with gr.Accordion("Silence Trimming Settings", open=True): |
| | do_trim = gr.Checkbox(label="Enable Silence Trimming", value=True) |
| | |
| | threshold = gr.Slider( |
| | minimum=-70, maximum=-20, value=-50, step=1, |
| | label="Silence Threshold (dB)", |
| | info="Lower is stricter (quieter sounds count as silence)" |
| | ) |
| | |
| | min_len = gr.Slider( |
| | minimum=100, maximum=2000, value=400, step=50, |
| | label="Min Silence Duration (ms)", |
| | info="How long a pause must be to be considered 'silence'" |
| | ) |
| | |
| | max_keep = gr.Slider( |
| | minimum=0, maximum=5000, value=1500, step=100, |
| | label="Max Silence to Keep (ms)", |
| | info="Longer silences will be shortened towards this value" |
| | ) |
| | |
| | submit_btn = gr.Button("Process Audio", variant="primary") |
| | |
| | with gr.Column(): |
| | output_audio = gr.Audio(label="Denoised & Trimmed Output") |
| |
|
| | submit_btn.click( |
| | fn=denoise_audio, |
| | inputs=[input_audio, do_trim, threshold, min_len, max_keep], |
| | outputs=output_audio |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|