hmdlohar commited on
Commit
f032a60
·
1 Parent(s): fe82cbf

Initial audio denoiser gradio app

Browse files
Files changed (2) hide show
  1. app.py +121 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import librosa
3
+ import soundfile as sf
4
+ import numpy as np
5
+ import tempfile
6
+ import gradio as gr
7
+
8
+ from denoiser import pretrained
9
+ from denoiser.dsp import convert_audio
10
+ from pydub import AudioSegment, silence
11
+ from tqdm import tqdm
12
+
13
+
14
+ # -----------------------------
15
+ # Load model ONCE (important!)
16
+ # -----------------------------
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ model = pretrained.dns64().to(device)
19
+
20
+
21
+ # -----------------------------
22
+ # Silence trimming helpers
23
+ # -----------------------------
24
+ def safe_append(base, chunk, crossfade_ms=30):
25
+ if len(base) > 0 and len(chunk) > 0:
26
+ safe_crossfade = min(crossfade_ms, len(base), len(chunk))
27
+ if safe_crossfade > 0:
28
+ return base.append(chunk, crossfade=safe_crossfade)
29
+ return base + chunk
30
+
31
+
32
+ def shorten_silences(audio, silence_thresh=-50, crossfade_ms=30):
33
+ silent_ranges = silence.detect_silence(
34
+ audio,
35
+ min_silence_len=400,
36
+ silence_thresh=silence_thresh
37
+ )
38
+
39
+ output = AudioSegment.silent(duration=0)
40
+ prev_end = 0
41
+
42
+ for start, end in silent_ranges:
43
+ chunk = audio[prev_end:start]
44
+ output = safe_append(output, chunk, crossfade_ms)
45
+
46
+ silence_len = end - start
47
+ if silence_len < 500:
48
+ keep = silence_len
49
+ elif silence_len <= 1500:
50
+ keep = 500
51
+ elif silence_len <= 2500:
52
+ keep = 1000
53
+ else:
54
+ keep = 1500
55
+
56
+ output = safe_append(
57
+ output,
58
+ AudioSegment.silent(duration=keep),
59
+ crossfade_ms
60
+ )
61
+ prev_end = end
62
+
63
+ output = safe_append(output, audio[prev_end:], crossfade_ms)
64
+ return output
65
+
66
+
67
+ # -----------------------------
68
+ # Main processing function
69
+ # -----------------------------
70
+ def denoise_audio(audio_file, trim_silence):
71
+ wav, sr = librosa.load(audio_file, sr=16000)
72
+
73
+ chunk_size = 16000 * 10
74
+ denoised_chunks = []
75
+
76
+ for i in range(0, len(wav), chunk_size):
77
+ chunk = wav[i:i + chunk_size]
78
+ wav_tensor = torch.tensor(chunk).unsqueeze(0).to(device)
79
+ wav_tensor = convert_audio(
80
+ wav_tensor, sr, model.sample_rate, model.chin
81
+ )
82
+
83
+ with torch.no_grad():
84
+ denoised = model(wav_tensor)[0]
85
+
86
+ denoised_chunks.append(
87
+ denoised.squeeze().cpu().numpy()
88
+ )
89
+
90
+ denoised_np = np.concatenate(denoised_chunks)
91
+
92
+ tmp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
93
+ sf.write(tmp_wav.name, denoised_np, model.sample_rate)
94
+
95
+ if trim_silence:
96
+ audio = AudioSegment.from_file(tmp_wav.name, format="wav")
97
+ processed = shorten_silences(audio)
98
+ final_file = tempfile.NamedTemporaryFile(
99
+ suffix="_final.wav", delete=False
100
+ )
101
+ processed.export(final_file.name, format="wav")
102
+ return final_file.name
103
+
104
+ return tmp_wav.name
105
+
106
+
107
+ # -----------------------------
108
+ # Gradio UI
109
+ # -----------------------------
110
+ demo = gr.Interface(
111
+ fn=denoise_audio,
112
+ inputs=[
113
+ gr.Audio(type="filepath", label="Upload Audio"),
114
+ gr.Checkbox(label="Trim silence after denoising", value=True)
115
+ ],
116
+ outputs=gr.Audio(label="Denoised Output"),
117
+ title="🎧 Audio Denoiser (Demucs DNS64)",
118
+ description="Upload an audio file, optionally trim silences, and get clean audio."
119
+ )
120
+
121
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/facebookresearch/denoiser.git
2
+ torch
3
+ torchaudio
4
+ librosa
5
+ soundfile
6
+ pydub
7
+ tqdm
8
+ gradio
9
+ numpy