DeepFilterNet2

Sleeping

App Files Files Community

ongudidan commited on Oct 3, 2025

Commit

6024d1d

verified ·

1 Parent(s): faa83be

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -36

app.py CHANGED Viewed

@@ -8,6 +8,10 @@ from typing import List, Optional, Tuple, Union
 import subprocess
 # import os
 import gradio as gr
 import matplotlib.pyplot as plt
 import numpy as np
@@ -112,71 +116,82 @@ def ensure_wav(filepath: str) -> str:
     return filepath
-def demo_fn(speech_upl: str, noise_type: str, snr: int, mic_input: Optional[str] = None):
     if mic_input:
         speech_upl = mic_input
     sr = config("sr", 48000, int, section="df")
-    logger.info(f"Got parameters speech_upl: {speech_upl}, noise: {noise_type}, snr: {snr}")
     snr = int(snr)
     noise_fn = NOISES[noise_type]
     meta = AudioMetaData(-1, -1, -1, -1, "")
-    max_s = 3600  # allow up to 1 hour (3600 seconds)
-    if speech_upl is not None:
-        # ✅ Ensure compatible WAV input
-        speech_upl = ensure_wav(speech_upl)
-        sample, meta = load_audio(speech_upl, sr)
-        max_len = max_s * sr
-        if sample.shape[-1] > max_len:
-            start = torch.randint(0, sample.shape[-1] - max_len, ()).item()
-            sample = sample[..., start : start + max_len]
-    else:
-        sample, meta = load_audio("samples/p232_013_clean.wav", sr)
-        sample = sample[..., : max_s * sr]
     if sample.dim() > 1 and sample.shape[0] > 1:
-        assert sample.shape[1] > sample.shape[0], f"Expecting channels first, but got {sample.shape}"
         sample = sample.mean(dim=0, keepdim=True)
-    logger.info(f"Loaded sample with shape {sample.shape}")
     if noise_fn is not None:
-        noise, _ = load_audio(noise_fn, sr)  # type: ignore
-        logger.info(f"Loaded noise with shape {noise.shape}")
-        _, _, sample = mix_at_snr(sample, noise, snr)
-    logger.info("Start denoising audio")
-    enhanced = enhance(model, df, sample)
-    logger.info("Denoising finished")
     lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
     lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
     enhanced = enhanced * lim
     if meta.sample_rate != sr:
-        enhanced = resample(enhanced, sr, meta.sample_rate)
-        sample = resample(sample, sr, meta.sample_rate)
         sr = meta.sample_rate
     noisy_wav = tempfile.NamedTemporaryFile(suffix="noisy.wav", delete=False).name
-    save_audio(noisy_wav, sample, sr)
     enhanced_wav = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
-    save_audio(enhanced_wav, enhanced, sr)
-    logger.info(f"saved audios: {noisy_wav}, {enhanced_wav}")
-    ax_noisy.clear()
-    ax_enh.clear()
-    noisy_im = spec_im(sample, sr=sr, figure=fig_noisy, ax=ax_noisy)
-    enh_im = spec_im(enhanced, sr=sr, figure=fig_enh, ax=ax_enh)
-    filter = [speech_upl, noisy_wav, enhanced_wav]
-    if mic_input is not None and mic_input != "":
-        filter.append(mic_input)
-    cleanup_tmp(filter)
     return noisy_wav, noisy_im, enhanced_wav, enh_im

 import subprocess
 # import os
+import asyncio
+# from typing import Optional
 import gradio as gr
 import matplotlib.pyplot as plt
 import numpy as np
     return filepath
+async def ensure_wav_async(filepath: str) -> str:
+    """Async wrapper for FFmpeg conversion."""
+    if filepath.lower().endswith(".mp3"):
+        wav_path = filepath.rsplit(".", 1)[0] + ".wav"
+        # Run ffmpeg in a thread to avoid blocking
+        loop = asyncio.get_running_loop()
+        await loop.run_in_executor(None, lambda: subprocess.run(["ffmpeg", "-y", "-i", filepath, wav_path], check=True))
+        return wav_path
+    return filepath
+async def demo_fn(speech_upl: str, noise_type: str, snr: int, mic_input: Optional[str] = None, progress=gr.Progress()):
     if mic_input:
         speech_upl = mic_input
     sr = config("sr", 48000, int, section="df")
     snr = int(snr)
     noise_fn = NOISES[noise_type]
     meta = AudioMetaData(-1, -1, -1, -1, "")
+    max_s = 3600  # 1 hour
+    # Stage 1: Upload / Convert
+    progress(0, desc="Converting audio...")
+    speech_upl = await ensure_wav_async(speech_upl)
+    # Stage 2: Load audio
+    progress(10, desc="Loading audio...")
+    sample, meta = await asyncio.to_thread(load_audio, speech_upl, sr)
+    max_len = max_s * sr
+    if sample.shape[-1] > max_len:
+        start = torch.randint(0, sample.shape[-1] - max_len, ()).item()
+        sample = sample[..., start : start + max_len]
     if sample.dim() > 1 and sample.shape[0] > 1:
         sample = sample.mean(dim=0, keepdim=True)
+    # Stage 3: Mix noise if applicable
+    progress(30, desc="Mixing noise...")
     if noise_fn is not None:
+        noise, _ = await asyncio.to_thread(load_audio, noise_fn, sr)
+        _, _, sample = await asyncio.to_thread(mix_at_snr, sample, noise, snr)
+    # Stage 4: Denoising
+    progress(60, desc="Denoising...")
+    enhanced = await asyncio.to_thread(enhance, model, df, sample)
     lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
     lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
     enhanced = enhanced * lim
     if meta.sample_rate != sr:
+        enhanced = await asyncio.to_thread(resample, enhanced, sr, meta.sample_rate)
+        sample = await asyncio.to_thread(resample, sample, sr, meta.sample_rate)
         sr = meta.sample_rate
+    # Stage 5: Save outputs
+    progress(90, desc="Saving files...")
     noisy_wav = tempfile.NamedTemporaryFile(suffix="noisy.wav", delete=False).name
     enhanced_wav = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
+    await asyncio.to_thread(save_audio, noisy_wav, sample, sr)
+    await asyncio.to_thread(save_audio, enhanced_wav, enhanced, sr)
+    progress(100, desc="Done!")
+    # Optional: generate spectrograms (can also be offloaded to thread)
+    noisy_im = await asyncio.to_thread(spec_im, sample, sr=sr, figure=fig_noisy, ax=ax_noisy)
+    enh_im = await asyncio.to_thread(spec_im, enhanced, sr=sr, figure=fig_enh, ax=ax_enh)
+    # Cleanup temp files
+    cleanup_tmp([speech_upl, noisy_wav, enhanced_wav])
     return noisy_wav, noisy_im, enhanced_wav, enh_im