DeepFilterNet2

Running

App Files Files Community

ongudidan commited on Oct 3, 2025

Commit

a030856

verified ·

1 Parent(s): 6024d1d

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -53

app.py CHANGED Viewed

@@ -8,9 +8,11 @@ from typing import List, Optional, Tuple, Union
 import subprocess
 # import os
-import asyncio
 # from typing import Optional
 import gradio as gr
 import matplotlib.pyplot as plt
@@ -107,30 +109,17 @@ def load_audio_gradio(
     return audio, meta
-def ensure_wav(filepath: str) -> str:
-    """Convert MP3 (or other formats) to WAV using ffmpeg if needed."""
-    if filepath.lower().endswith(".mp3"):
-        wav_path = filepath.rsplit(".", 1)[0] + ".wav"
-        subprocess.run(["ffmpeg", "-y", "-i", filepath, wav_path], check=True)
-        return wav_path
-    return filepath
-async def ensure_wav_async(filepath: str) -> str:
-    """Async wrapper for FFmpeg conversion."""
-    if filepath.lower().endswith(".mp3"):
-        wav_path = filepath.rsplit(".", 1)[0] + ".wav"
-        # Run ffmpeg in a thread to avoid blocking
-        loop = asyncio.get_running_loop()
-        await loop.run_in_executor(None, lambda: subprocess.run(["ffmpeg", "-y", "-i", filepath, wav_path], check=True))
-        return wav_path
-    return filepath
-async def demo_fn(speech_upl: str, noise_type: str, snr: int, mic_input: Optional[str] = None, progress=gr.Progress()):
     if mic_input:
         speech_upl = mic_input
@@ -140,60 +129,70 @@ async def demo_fn(speech_upl: str, noise_type: str, snr: int, mic_input: Optiona
     meta = AudioMetaData(-1, -1, -1, -1, "")
     max_s = 3600  # 1 hour
-    # Stage 1: Upload / Convert
-    progress(0, desc="Converting audio...")
-    speech_upl = await ensure_wav_async(speech_upl)
-    # Stage 2: Load audio
-    progress(10, desc="Loading audio...")
-    sample, meta = await asyncio.to_thread(load_audio, speech_upl, sr)
-    max_len = max_s * sr
-    if sample.shape[-1] > max_len:
-        start = torch.randint(0, sample.shape[-1] - max_len, ()).item()
-        sample = sample[..., start : start + max_len]
     if sample.dim() > 1 and sample.shape[0] > 1:
         sample = sample.mean(dim=0, keepdim=True)
-    # Stage 3: Mix noise if applicable
-    progress(30, desc="Mixing noise...")
     if noise_fn is not None:
-        noise, _ = await asyncio.to_thread(load_audio, noise_fn, sr)
-        _, _, sample = await asyncio.to_thread(mix_at_snr, sample, noise, snr)
-    # Stage 4: Denoising
-    progress(60, desc="Denoising...")
-    enhanced = await asyncio.to_thread(enhance, model, df, sample)
     lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
     lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
     enhanced = enhanced * lim
     if meta.sample_rate != sr:
-        enhanced = await asyncio.to_thread(resample, enhanced, sr, meta.sample_rate)
-        sample = await asyncio.to_thread(resample, sample, sr, meta.sample_rate)
         sr = meta.sample_rate
-    # Stage 5: Save outputs
-    progress(90, desc="Saving files...")
     noisy_wav = tempfile.NamedTemporaryFile(suffix="noisy.wav", delete=False).name
     enhanced_wav = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
-    await asyncio.to_thread(save_audio, noisy_wav, sample, sr)
-    await asyncio.to_thread(save_audio, enhanced_wav, enhanced, sr)
-    progress(100, desc="Done!")
-    # Optional: generate spectrograms (can also be offloaded to thread)
-    noisy_im = await asyncio.to_thread(spec_im, sample, sr=sr, figure=fig_noisy, ax=ax_noisy)
-    enh_im = await asyncio.to_thread(spec_im, enhanced, sr=sr, figure=fig_enh, ax=ax_enh)
-    # Cleanup temp files
-    cleanup_tmp([speech_upl, noisy_wav, enhanced_wav])
-    return noisy_wav, noisy_im, enhanced_wav, enh_im
 def specshow(
     spec,

 import subprocess
 # import os
+# import torch
+# import numpy as np
+# import tempfile
 # from typing import Optional
+# import gradio as gr
 import gradio as gr
 import matplotlib.pyplot as plt
     return audio, meta
+def chunk_audio(sample: torch.Tensor, chunk_size: int):
+    """Yield chunks of audio of size `chunk_size`."""
+    total_len = sample.shape[-1]
+    for start in range(0, total_len, chunk_size):
+        end = min(start + chunk_size, total_len)
+        yield sample[..., start:end], start, total_len
+def demo_fn(speech_upl: str, noise_type: str, snr: int, mic_input: Optional[str] = None, progress=gr.Progress()):
     if mic_input:
         speech_upl = mic_input
     meta = AudioMetaData(-1, -1, -1, -1, "")
     max_s = 3600  # 1 hour
+    chunk_s = 10  # process in 10-second chunks
+    chunk_len = chunk_s * sr
+    # Load audio
+    speech_upl = ensure_wav(speech_upl)
+    sample, meta = load_audio(speech_upl, sr)
+    # Limit to max_s
+    if sample.shape[-1] > max_s * sr:
+        start_idx = torch.randint(0, sample.shape[-1] - max_s*sr, ()).item()
+        sample = sample[..., start_idx:start_idx + max_s*sr]
+    # Convert to mono if needed
     if sample.dim() > 1 and sample.shape[0] > 1:
         sample = sample.mean(dim=0, keepdim=True)
+    # Mix noise if applicable
     if noise_fn is not None:
+        noise, _ = load_audio(noise_fn, sr)
+        _, _, sample = mix_at_snr(sample, noise, snr)
+    # Prepare output tensor
+    enhanced_chunks = []
+    # Process audio in chunks
+    for i, (chunk, start, total_len) in enumerate(chunk_audio(sample, chunk_len)):
+        # Denoise the chunk
+        enhanced_chunk = enhance(model, df, chunk)
+        enhanced_chunks.append(enhanced_chunk)
+        # Update progress
+        progress((start + chunk.shape[-1]) / total_len * 100, desc="Denoising audio...")
+    # Concatenate all chunks
+    enhanced = torch.cat(enhanced_chunks, dim=-1)
+    # Optional: apply fade or limiter
     lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
     lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
     enhanced = enhanced * lim
+    # Resample if needed
     if meta.sample_rate != sr:
+        enhanced = resample(enhanced, sr, meta.sample_rate)
+        sample = resample(sample, sr, meta.sample_rate)
         sr = meta.sample_rate
+    # Save outputs
     noisy_wav = tempfile.NamedTemporaryFile(suffix="noisy.wav", delete=False).name
     enhanced_wav = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
+    save_audio(noisy_wav, sample, sr)
+    save_audio(enhanced_wav, enhanced, sr)
+    # Spectrograms
+    ax_noisy.clear()
+    ax_enh.clear()
+    noisy_im = spec_im(sample, sr=sr, figure=fig_noisy, ax=ax_noisy)
+    enh_im = spec_im(enhanced, sr=sr, figure=fig_enh, ax=ax_enh)
+    cleanup_tmp([speech_upl, noisy_wav, enhanced_wav])
+    return noisy_wav, noisy_im, enhanced_wav, enh_im
 def specshow(
     spec,