Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| from pathlib import Path | |
| import gradio as gr | |
| import numpy as np | |
| import soundfile as sf | |
| import librosa | |
| import torch | |
| from pyharp import ModelCard, build_endpoint | |
| from df import enhance, init_df | |
| # ----------------------------- | |
| # Model metadata for HARP | |
| # ----------------------------- | |
| model_card = ModelCard( | |
| name="Background Noise Remover (DeepFilterNet3)", | |
| description=( | |
| "Background noise suppression / speech enhancement using DeepFilterNet3. " | |
| "Input is converted to mono 48kHz. Slider controls strength through wet/dry blend." | |
| ), | |
| author="Derek Llanes", | |
| tags=["denoise", "speech enhancement", "deepfilternet", "v3"], | |
| ) | |
| # ----------------------------- | |
| # Device & Model Initialization | |
| # ----------------------------- | |
| # Auto-detect GPU for Hugging Face deployment | |
| DEVICE_STR = "cuda" if torch.cuda.is_available() else "cpu" | |
| DEVICE = torch.device(DEVICE_STR) | |
| # Load model 1 time into global memory | |
| MODEL, DF_STATE, _ = init_df() | |
| MODEL = MODEL.to(DEVICE) | |
| def load_audio_mono_48k(path: str): | |
| # Load audio from filepath, convert to mono float32, resample to 48kHz. | |
| try: | |
| audio, sr = sf.read(path, always_2d=False) | |
| except Exception: | |
| audio, sr = librosa.load(path, sr=None, mono=False) | |
| audio = np.asarray(audio) | |
| # stereo to mono | |
| if audio.ndim == 2: | |
| audio = audio.mean(axis=1) | |
| audio = audio.astype(np.float32) | |
| # resample to 48k | |
| if sr != 48000: | |
| audio = librosa.resample(audio, orig_sr=sr, target_sr=48000) | |
| sr = 48000 | |
| return audio, sr | |
| def apply_attenuation_db(noisy: np.ndarray, enhanced: np.ndarray, noise_atten_db: float, max_db: float = 30.0): | |
| """ | |
| Map Noise Attenuation (dB) slider to a stable strength in [0,1] | |
| and crossfade between original and enhanced audio. | |
| """ | |
| # convert dB slider into [0,1] | |
| s = float(noise_atten_db) / float(max_db) | |
| s = max(0.0, min(1.0, s)) | |
| # make same length | |
| n = min(len(noisy), len(enhanced)) | |
| noisy = noisy[:n] | |
| enhanced = enhanced[:n] | |
| out = (1.0 - s) * noisy + s * enhanced | |
| return out.astype(np.float32) | |
| def process_fn(input_audio_path: str, noise_atten_db: float) -> str: | |
| if not input_audio_path: | |
| raise ValueError("No input audio provided.") | |
| # Load and normalize | |
| noisy, sr = load_audio_mono_48k(input_audio_path) | |
| # numpy to torch, add channel dim [T] to [1, T] | |
| noisy_t = torch.from_numpy(noisy).float().unsqueeze(0).to(DEVICE) | |
| # Denoise, then remove added channel and back to numpy | |
| enhanced_t = enhance(MODEL, DF_STATE, noisy_t) | |
| enhanced = enhanced_t.squeeze(0).detach().cpu().numpy() | |
| # Slider strength | |
| out = apply_attenuation_db(noisy, enhanced, noise_atten_db, max_db=30.0) | |
| # Save output WAV and return path | |
| out_dir = Path(tempfile.gettempdir()) / "pyharp_dfnet_outputs" | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| out_path = out_dir / "denoised.wav" | |
| sf.write(str(out_path), out, sr) | |
| return str(out_path) | |
| # ----------------------------- | |
| # Gradio endpoint | |
| # ----------------------------- | |
| with gr.Blocks() as demo: | |
| input_components = [ | |
| gr.Audio(type="filepath", label="Input Audio").harp_required(True), | |
| gr.Slider( | |
| minimum=0, | |
| maximum=30, | |
| step=1, | |
| value=12, | |
| label="Noise Attenuation (dB)", | |
| info="0 = no change, 30 = strongest. Implemented as wet/dry strength.", | |
| ), | |
| ] | |
| output_components = [ | |
| gr.Audio(type="filepath", label="Output Audio") | |
| .set_info("Denoised audio output."), | |
| ] | |
| app = build_endpoint( | |
| model_card=model_card, | |
| input_components=input_components, | |
| output_components=output_components, | |
| process_fn=process_fn, | |
| ) | |
| demo.queue().launch(show_error=False, pwa=True) |