PoC_ASR_v6_dev / app /services /denoiser.py
vyluong's picture
Update app/services/denoiser.py
d975a12 verified
from pathlib import Path
import asyncio
import logging
import torch
import torchaudio
try:
from df.enhance import enhance, init_df
DF_AVAILABLE = True
except Exception:
DF_AVAILABLE = False
from app.core.config import get_settings
logger = logging.getLogger(__name__)
settings = get_settings()
class DenoiserService:
_model = None
_df_state = None
_device = None
@classmethod
def _load_model(cls):
if cls._model is not None:
return
if not DF_AVAILABLE:
raise ImportError("DeepFilterNet is not available")
logger.info("Loading DeepFilterNet...")
model, df_state, _ = init_df()
cls._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(cls._device)
model.eval()
cls._model = model
cls._df_state = df_state
logger.info(f"DeepFilterNet READY on {cls._device}")
@classmethod
async def enhance_audio(cls, input_path: Path) -> Path:
if not settings.enable_denoiser:
return input_path
loop = asyncio.get_running_loop()
return await loop.run_in_executor(
None,
lambda: cls._run_enhancement(input_path)
)
@classmethod
def _run_enhancement(cls, input_path: Path) -> Path:
try:
cls._load_model()
# ----------------------------
# LOAD AUDIO
# ----------------------------
audio, sr = torchaudio.load(str(input_path))
# mono conversion
if audio.shape[0] > 1:
audio = torch.mean(audio, dim=0, keepdim=True)
audio = audio.float()
# move to device
audio = audio.to(cls._device)
with torch.no_grad():
enhanced = enhance(
cls._model,
cls._df_state,
audio
)
output_path = (
settings.processed_dir /
f"{input_path.stem}_enhanced.wav"
)
output_path.parent.mkdir(parents=True, exist_ok=True)
# move back CPU before save
enhanced = enhanced.cpu()
torchaudio.save(
str(output_path),
enhanced,
sr
)
logger.info(f"Denoised audio saved: {output_path}")
return output_path
except Exception as e:
logger.exception("DeepFilterNet enhancement failed")
# fallback = original file
return input_path