vera6
/

sn105_denoising_35

Model card Files Files and versions

xet

Community

vera6 commited on Oct 13, 2025

Commit

e343fe3

verified ·

1 Parent(s): 01e806a

Update app/app.py

Browse files

Files changed (1) hide show

app/app.py +148 -145

app/app.py CHANGED Viewed

@@ -1,12 +1,12 @@
-import fastapi
-import shutil
-import os
-import zipfile
-import io
-import uvicorn
 import threading
 import glob
-from typing import List
 import torch
 import gdown
 from soundfile import write
@@ -14,97 +14,25 @@ from torchaudio import load
 from librosa import resample
 import logging
-import librosa
-import numpy as np
-from scipy.signal import butter, filtfilt
-from scipy.ndimage import uniform_filter1d
-from scipy.signal import hilbert
 logging.basicConfig(level=logging.DEBUG)
 from sgmse import ScoreModel
 from sgmse.util.other import pad_spec
-def gentle_noise_reduction(audio, sr):
-    """Very gentle noise reduction - only remove obvious noise"""
-    # Only target very quiet background noise
-    abs_audio = np.abs(audio)
-    noise_threshold = np.percentile(abs_audio, 5)  # Bottom 5% only
-    # Very conservative gating - only suppress very quiet parts
-    gate_threshold = noise_threshold * 1.5  # Very low threshold
-    mask = abs_audio > gate_threshold
-    # Smooth the mask heavily to avoid artifacts
-    window_size = int(0.05 * sr)  # 50ms smoothing
-    if window_size % 2 == 0:
-        window_size += 1
-    mask_smooth = uniform_filter1d(mask.astype(float), size=window_size)
-    mask_smooth = np.clip(mask_smooth, 0.8, 1.0)  # Never go below 80%
-    return audio * mask_smooth
-def minimal_speech_boost(audio, sr):
-    """Minimal boost to speech frequencies"""
-    # Very light boost to mid frequencies (1-3 kHz) - critical for PESQ
-    nyquist = sr / 2
-    low_freq = 1000 / nyquist
-    high_freq = 3000 / nyquist
-    # Design a very gentle bandpass filter
-    b, a = butter(2, [low_freq, high_freq], btype='band')  # Order 2 only
-    mid_freq_content = filtfilt(b, a, audio)
-    # Very small boost - only 2%
-    boost_amount = 0.02
-    enhanced_audio = audio + boost_amount * mid_freq_content
-    return enhanced_audio
-def conservative_enhancement(enhanced_file):
-    """Very conservative enhancement - minimal processing for small improvements"""
-    audio, sr = librosa.load(enhanced_file, sr=16000)
-    original_audio = audio.copy()
-    original_length = len(audio)
-    # Step 1: Very light noise reduction (only remove obvious noise)
-    audio_denoised = gentle_noise_reduction(audio, sr)
-    # Step 2: Minimal speech clarity boost
-    audio_enhanced = minimal_speech_boost(audio_denoised, sr)
-    # Ensure same length
-    if len(audio_enhanced) != original_length:
-        if len(audio_enhanced) > original_length:
-            audio_enhanced = audio_enhanced[:original_length]
-        else:
-            audio_enhanced = np.pad(audio_enhanced, (0, original_length - len(audio_enhanced)), mode='constant')
-    # Very conservative blending - mostly keep original
-    blend_ratio = 0.15  # Only 15% enhancement, 85% original
-    audio_result = blend_ratio * audio_enhanced + (1 - blend_ratio) * original_audio
-    return audio_result
 class ModelAPI:
     def __init__(self, host, port):
-        self.host = host
         self.port = port
         self.base_path = os.path.join(os.path.expanduser("~"), ".modelapi")
         self.noisy_audio_path = os.path.join(self.base_path, "noisy_audio")
         self.enhanced_audio_path = os.path.join(self.base_path, "enhanced_audio")
         app_dir = os.path.dirname(os.path.abspath(__file__))
-        self.ckpt_path = os.path.join(app_dir,"miner_49.ckpt")
-        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
         self.corrector = "ald"
         self.corrector_steps = 1
         self.snr = 0.5
@@ -113,10 +41,10 @@ class ModelAPI:
         for audio_path in [self.noisy_audio_path, self.enhanced_audio_path]:
             if not os.path.exists(audio_path):
                 os.makedirs(audio_path)
             for filename in os.listdir(audio_path):
                 file_path = os.path.join(audio_path, filename)
                 try:
                     if os.path.isfile(file_path) or os.path.islink(file_path):
                         os.unlink(file_path)
@@ -124,29 +52,45 @@ class ModelAPI:
                         shutil.rmtree(file_path)
                 except Exception as e:
                     raise e
         self.app = fastapi.FastAPI()
         self._setup_routes()
     def _prepare(self):
         self.model = ScoreModel.load_from_checkpoint(self.ckpt_path, self.device)
         self.model.t_eps = 0.03
         self.model.eval()
     def _enhance(self):
-        if self.model.backbone == 'ncsnpp_48k':
             target_sr = 48000
             pad_mode = "reflection"
-        elif self.model.backbone == 'ncsnpp_v2':
             target_sr = 16000
             pad_mode = "reflection"
         else:
             target_sr = 16000
             pad_mode = "zero_pad"
-        noisy_files = sorted(glob.glob(os.path.join(self.noisy_audio_path, '*.wav')))
-        for noisy_file in noisy_files:
             filename = noisy_file.replace(self.noisy_audio_path, "")
             filename = filename[1:] if filename.startswith("/") else filename
@@ -155,91 +99,115 @@ class ModelAPI:
             if sr != target_sr:
                 y = torch.tensor(resample(y.numpy(), orig_sr=sr, target_sr=target_sr))
-            T_orig = y.size(1)
             # Normalize
             norm_factor = y.abs().max()
             y = y / norm_factor
             # Prepare DNN input
-            Y = torch.unsqueeze(self.model._forward_transform(self.model._stft(y.to(self.device))), 0)
             Y = pad_spec(Y, mode=pad_mode)
             # Reverse sampling
-            if self.model.sde.__class__.__name__ == 'OUVESDE':
-                if self.model.sde.sampler_type == 'pc':
-                    sampler = self.model.get_pc_sampler('reverse_diffusion', self.corrector, Y.to(self.device), N=self.N,
-                        corrector_steps=self.corrector_steps, snr=self.snr)
-                elif self.model.sde.sampler_type == 'ode':
                     sampler = self.model.get_ode_sampler(Y.to(self.device), N=self.N)
                 else:
                     raise ValueError(f"Sampler type {args.sampler_type} not supported")
-            elif self.model.sde.__class__.__name__ == 'SBVESDE':
-                sampler_type = 'ode' if self.model.sde.sampler_type == 'pc' else self.model.sde.sampler_type
-                sampler = self.model.get_sb_sampler(sde=self.model.sde, y=Y.cuda(), sampler_type=sampler_type)
             else:
-                raise ValueError(f"SDE {self.model.sde.__class__.__name__} not supported")
             sample, _ = sampler()
             x_hat = self.model.to_audio(sample.squeeze(), T_orig)
             x_hat = x_hat * norm_factor
-            os.makedirs(os.path.dirname(os.path.join(self.enhanced_audio_path, filename)), exist_ok=True)
-            enhanced_file = os.path.join(self.enhanced_audio_path, filename)
-            write(enhanced_file, x_hat.cpu().numpy(), target_sr)
-            try:
-                audio_enhanced = conservative_enhancement(enhanced_file)
-                write(enhanced_file, audio_enhanced, target_sr)
-            except Exception as e:
-                write(enhanced_file, x_hat.cpu().numpy(), target_sr)
     def _setup_routes(self):
         self.app.get("/status/")(self.get_status)
         self.app.post("/prepare/")(self.prepare)
         self.app.post("/upload-audio/")(self.upload_audio)
         self.app.post("/enhance/")(self.enhance_audio)
         self.app.get("/download-enhanced/")(self.download_enhanced)
     def get_status(self):
         try:
             return {"container_running": True}
         except Exception as e:
             logging.error(f"Error getting status: {e}")
-            raise fastapi.HTTPException(status_code=500, detail="An error occurred while fetching API status.")
     def prepare(self):
         try:
             self._prepare()
-            return {'preparations': True}
         except Exception as e:
             logging.error(f"Error during preparations: {e}")
-            return fastapi.HTTPException(status_code=500, detail="An error occurred while fetching API status.")
     def upload_audio(self, files: List[fastapi.UploadFile] = fastapi.File(...)):
         uploaded_files = []
         for file in files:
-            try:
                 file_path = os.path.join(self.noisy_audio_path, file.filename)
                 with open(file_path, "wb") as f:
-                    while contents := file.file.read(1024*1024):
                         f.write(contents)
-                uploaded_files.append(file.filename)
             except Exception as e:
-                logging.error(f"Error uploading files: {e}")
-                raise fastapi.HTTPException(status_code=500, detail="An error occurred while uploading the noisy files.")
             finally:
                 file.file.close()
         print(f"uploaded files: {uploaded_files}")
         return {"uploaded_files": uploaded_files, "status": True}
     def enhance_audio(self):
@@ -247,35 +215,70 @@ class ModelAPI:
             # Enhance audio
             self._enhance()
             # Obtain list of file paths for enhanced audio
-            wav_files = glob.glob(os.path.join(self.enhanced_audio_path, '*.wav'))
             # Extract just the file names
             enhanced_files = [os.path.basename(file) for file in wav_files]
             return {"status": True}
         except Exception as e:
             print(f"Exception occured during enhancement: {e}")
-            raise fastapi.HTTPException(status_code=500, detail="An error occurred while enhancing the noisy files.")
     def download_enhanced(self):
         try:
             zip_buffer = io.BytesIO()
             with zipfile.ZipFile(zip_buffer, "w") as zip_file:
-                for wav_file in glob.glob(os.path.join(self.enhanced_audio_path, '*.wav')):
                     zip_file.write(wav_file, arcname=os.path.basename(wav_file))
             zip_buffer.seek(0)
             return fastapi.responses.StreamingResponse(
                 iter([zip_buffer.getvalue()]),  # Stream the in-memory content
                 media_type="application/zip",
-                headers={"Content-Disposition": "attachment; filename=enhanced_audio_files.zip"}
             )
         except Exception as e:
             logging.error(f"Error during enhanced files download: {e}")
-            raise fastapi.HTTPException(status_code=500, detail=f"An error occurred while creating the download file: {str(e)}")
     def run(self):
-        uvicorn.run(self.app, host=self.host, port=self.port)

+import fastapi
+import shutil
+import os
+import zipfile
+import io
+import uvicorn
 import threading
 import glob
+from typing import List
 import torch
 import gdown
 from soundfile import write
 from librosa import resample
 import logging
 logging.basicConfig(level=logging.DEBUG)
 from sgmse import ScoreModel
 from sgmse.util.other import pad_spec
 class ModelAPI:
     def __init__(self, host, port):
+        self.host = host
         self.port = port
         self.base_path = os.path.join(os.path.expanduser("~"), ".modelapi")
         self.noisy_audio_path = os.path.join(self.base_path, "noisy_audio")
         self.enhanced_audio_path = os.path.join(self.base_path, "enhanced_audio")
         app_dir = os.path.dirname(os.path.abspath(__file__))
+        self.ckpt_path = glob.glob(os.path.join(app_dir, "*.ckpt"))[0]
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.corrector = "ald"
         self.corrector_steps = 1
         self.snr = 0.5
         for audio_path in [self.noisy_audio_path, self.enhanced_audio_path]:
             if not os.path.exists(audio_path):
                 os.makedirs(audio_path)
             for filename in os.listdir(audio_path):
                 file_path = os.path.join(audio_path, filename)
                 try:
                     if os.path.isfile(file_path) or os.path.islink(file_path):
                         os.unlink(file_path)
                         shutil.rmtree(file_path)
                 except Exception as e:
                     raise e
         self.app = fastapi.FastAPI()
         self._setup_routes()
     def _prepare(self):
+        """Miners should modify this function to fit their fine-tuned models.
+        This function will make any preparations necessary to initialize the
+        speech enhancement model (i.e. downloading checkpoint files, etc.)
+        """
         self.model = ScoreModel.load_from_checkpoint(self.ckpt_path, self.device)
         self.model.t_eps = 0.03
         self.model.eval()
     def _enhance(self):
+        """
+        Miners should modify this function to fit their fine-tuned models.
+        This function will:
+        1. Open each noisy .wav file
+        2. Enhance the audio with the model
+        3. Save the enhanced audio in .wav format to ModelAPI.enhanced_audio_path
+        """
+        if self.model.backbone == "ncsnpp_48k":
             target_sr = 48000
             pad_mode = "reflection"
+        elif self.model.backbone == "ncsnpp_v2":
             target_sr = 16000
             pad_mode = "reflection"
+            print("using ncsnpp_v2")
         else:
             target_sr = 16000
             pad_mode = "zero_pad"
+        noisy_files = sorted(glob.glob(os.path.join(self.noisy_audio_path, "*.wav")))
+        for noisy_file in noisy_files:
             filename = noisy_file.replace(self.noisy_audio_path, "")
             filename = filename[1:] if filename.startswith("/") else filename
             if sr != target_sr:
                 y = torch.tensor(resample(y.numpy(), orig_sr=sr, target_sr=target_sr))
+            T_orig = y.size(1)
             # Normalize
             norm_factor = y.abs().max()
             y = y / norm_factor
             # Prepare DNN input
+            Y = torch.unsqueeze(
+                self.model._forward_transform(self.model._stft(y.to(self.device))), 0
+            )
             Y = pad_spec(Y, mode=pad_mode)
             # Reverse sampling
+            if self.model.sde.__class__.__name__ == "OUVESDE":
+                if self.model.sde.sampler_type == "pc":
+                    sampler = self.model.get_pc_sampler(
+                        "reverse_diffusion",
+                        self.corrector,
+                        Y.to(self.device),
+                        N=self.N,
+                        corrector_steps=self.corrector_steps,
+                        snr=self.snr,
+                    )
+                elif self.model.sde.sampler_type == "ode":
                     sampler = self.model.get_ode_sampler(Y.to(self.device), N=self.N)
                 else:
                     raise ValueError(f"Sampler type {args.sampler_type} not supported")
+            elif self.model.sde.__class__.__name__ == "SBVESDE":
+                sampler_type = (
+                    "ode"
+                    if self.model.sde.sampler_type == "pc"
+                    else self.model.sde.sampler_type
+                )
+                sampler = self.model.get_sb_sampler(
+                    sde=self.model.sde, y=Y.cuda(), sampler_type=sampler_type
+                )
             else:
+                raise ValueError(
+                    f"SDE {self.model.sde.__class__.__name__} not supported"
+                )
             sample, _ = sampler()
             x_hat = self.model.to_audio(sample.squeeze(), T_orig)
             x_hat = x_hat * norm_factor
+            os.makedirs(
+                os.path.dirname(os.path.join(self.enhanced_audio_path, filename)),
+                exist_ok=True,
+            )
+            write(
+                os.path.join(self.enhanced_audio_path, filename),
+                x_hat.cpu().numpy(),
+                target_sr,
+            )
     def _setup_routes(self):
         self.app.get("/status/")(self.get_status)
         self.app.post("/prepare/")(self.prepare)
         self.app.post("/upload-audio/")(self.upload_audio)
         self.app.post("/enhance/")(self.enhance_audio)
         self.app.get("/download-enhanced/")(self.download_enhanced)
+        self.app.post("/reset/")(self.reset)
     def get_status(self):
         try:
             return {"container_running": True}
         except Exception as e:
             logging.error(f"Error getting status: {e}")
+            raise fastapi.HTTPException(
+                status_code=500, detail="An error occurred while fetching API status."
+            )
     def prepare(self):
         try:
             self._prepare()
+            return {"preparations": True}
         except Exception as e:
             logging.error(f"Error during preparations: {e}")
+            return fastapi.HTTPException(
+                status_code=500, detail="An error occurred while fetching API status."
+            )
     def upload_audio(self, files: List[fastapi.UploadFile] = fastapi.File(...)):
         uploaded_files = []
         for file in files:
+            try:
                 file_path = os.path.join(self.noisy_audio_path, file.filename)
                 with open(file_path, "wb") as f:
+                    while contents := file.file.read(1024 * 1024):
                         f.write(contents)
+                uploaded_files.append(file.filename)
             except Exception as e:
+                logging.error(f"Error uploading files: {e}")
+                raise fastapi.HTTPException(
+                    status_code=500,
+                    detail="An error occurred while uploading the noisy files.",
+                )
             finally:
                 file.file.close()
         print(f"uploaded files: {uploaded_files}")
         return {"uploaded_files": uploaded_files, "status": True}
     def enhance_audio(self):
             # Enhance audio
             self._enhance()
             # Obtain list of file paths for enhanced audio
+            wav_files = glob.glob(os.path.join(self.enhanced_audio_path, "*.wav"))
             # Extract just the file names
             enhanced_files = [os.path.basename(file) for file in wav_files]
             return {"status": True}
         except Exception as e:
             print(f"Exception occured during enhancement: {e}")
+            raise fastapi.HTTPException(
+                status_code=500,
+                detail="An error occurred while enhancing the noisy files.",
+            )
     def download_enhanced(self):
         try:
             zip_buffer = io.BytesIO()
             with zipfile.ZipFile(zip_buffer, "w") as zip_file:
+                for wav_file in glob.glob(
+                    os.path.join(self.enhanced_audio_path, "*.wav")
+                ):
                     zip_file.write(wav_file, arcname=os.path.basename(wav_file))
             zip_buffer.seek(0)
             return fastapi.responses.StreamingResponse(
                 iter([zip_buffer.getvalue()]),  # Stream the in-memory content
                 media_type="application/zip",
+                headers={
+                    "Content-Disposition": "attachment; filename=enhanced_audio_files.zip"
+                },
             )
         except Exception as e:
             logging.error(f"Error during enhanced files download: {e}")
+            raise fastapi.HTTPException(
+                status_code=500,
+                detail=f"An error occurred while creating the download file: {str(e)}",
+            )
+    def reset(self):
+        """
+        Removes all audio files in preparation for another batch of enhancement.
+        """
+        for directory in [self.noisy_audio_path, self.enhanced_audio_path]:
+            if not os.path.isdir(directory):
+                continue
+            for filename in os.listdir(directory):
+                filepath = os.path.join(directory, filename)
+                if os.path.isfile(filepath):
+                    try:
+                        os.remove(filepath)
+                    except Exception as e:
+                        print(f"Error removing {filepath}: {e}")
+                        return {
+                            "status": False,
+                            "noisy": os.listdir(self.noisy_audio_path),
+                            "enhanced": os.listdir(self.enhanced_audio_path),
+                        }
+        return {
+            "status": True,
+            "noisy": os.listdir(self.noisy_audio_path),
+            "enhanced": os.listdir(self.enhanced_audio_path),
+        }
     def run(self):
+        uvicorn.run(self.app, host=self.host, port=self.port)