Spaces:

teamup-tech
/

denoiser

Sleeping

App Files Files Community

denoiser / app.py

harp-dev

Create app.py

bc9667c verified 20 days ago

raw

history blame contribute delete

3.87 kB

	import os
	import tempfile
	from pathlib import Path

	import gradio as gr
	import numpy as np
	import soundfile as sf
	import librosa
	import torch

	from pyharp import ModelCard, build_endpoint
	from df import enhance, init_df

	# -----------------------------
	# Model metadata for HARP
	# -----------------------------
	model_card = ModelCard(
	name="Background Noise Remover (DeepFilterNet3)",
	description=(
	"Background noise suppression / speech enhancement using DeepFilterNet3. "
	"Input is converted to mono 48kHz. Slider controls strength through wet/dry blend."
	),
	author="Derek Llanes",
	tags=["denoise", "speech enhancement", "deepfilternet", "v3"],
	)

	# -----------------------------
	# Device & Model Initialization
	# -----------------------------
	# Auto-detect GPU for Hugging Face deployment
	DEVICE_STR = "cuda" if torch.cuda.is_available() else "cpu"
	DEVICE = torch.device(DEVICE_STR)

	# Load model 1 time into global memory
	MODEL, DF_STATE, _ = init_df()
	MODEL = MODEL.to(DEVICE)


	def load_audio_mono_48k(path: str):
	# Load audio from filepath, convert to mono float32, resample to 48kHz.
	try:
	audio, sr = sf.read(path, always_2d=False)
	except Exception:
	audio, sr = librosa.load(path, sr=None, mono=False)

	audio = np.asarray(audio)

	# stereo to mono
	if audio.ndim == 2:
	audio = audio.mean(axis=1)

	audio = audio.astype(np.float32)

	# resample to 48k
	if sr != 48000:
	audio = librosa.resample(audio, orig_sr=sr, target_sr=48000)
	sr = 48000

	return audio, sr


	def apply_attenuation_db(noisy: np.ndarray, enhanced: np.ndarray, noise_atten_db: float, max_db: float = 30.0):
	"""
	Map Noise Attenuation (dB) slider to a stable strength in [0,1]
	and crossfade between original and enhanced audio.
	"""
	# convert dB slider into [0,1]
	s = float(noise_atten_db) / float(max_db)
	s = max(0.0, min(1.0, s))

	# make same length
	n = min(len(noisy), len(enhanced))
	noisy = noisy[:n]
	enhanced = enhanced[:n]

	out = (1.0 - s) * noisy + s * enhanced
	return out.astype(np.float32)


	@torch.inference_mode()
	def process_fn(input_audio_path: str, noise_atten_db: float) -> str:
	if not input_audio_path:
	raise ValueError("No input audio provided.")

	# Load and normalize
	noisy, sr = load_audio_mono_48k(input_audio_path)

	# numpy to torch, add channel dim [T] to [1, T]
	noisy_t = torch.from_numpy(noisy).float().unsqueeze(0).to(DEVICE)

	# Denoise, then remove added channel and back to numpy
	enhanced_t = enhance(MODEL, DF_STATE, noisy_t)
	enhanced = enhanced_t.squeeze(0).detach().cpu().numpy()

	# Slider strength
	out = apply_attenuation_db(noisy, enhanced, noise_atten_db, max_db=30.0)

	# Save output WAV and return path
	out_dir = Path(tempfile.gettempdir()) / "pyharp_dfnet_outputs"
	out_dir.mkdir(parents=True, exist_ok=True)
	out_path = out_dir / "denoised.wav"

	sf.write(str(out_path), out, sr)
	return str(out_path)


	# -----------------------------
	# Gradio endpoint
	# -----------------------------
	with gr.Blocks() as demo:
	input_components = [
	gr.Audio(type="filepath", label="Input Audio").harp_required(True),
	gr.Slider(
	minimum=0,
	maximum=30,
	step=1,
	value=12,
	label="Noise Attenuation (dB)",
	info="0 = no change, 30 = strongest. Implemented as wet/dry strength.",
	),
	]

	output_components = [
	gr.Audio(type="filepath", label="Output Audio")
	.set_info("Denoised audio output."),
	]

	app = build_endpoint(
	model_card=model_card,
	input_components=input_components,
	output_components=output_components,
	process_fn=process_fn,
	)

	demo.queue().launch(show_error=False, pwa=True)