SESA_Fast_Separation

Running

App Files Files Community

ASesYusuf1 commited on May 23, 2025

Commit

5db82ef

verified ·

1 Parent(s): deb8a70

Update audio_separator/separator/separator.py

Browse files

Files changed (1) hide show

audio_separator/separator/separator.py +39 -74

audio_separator/separator/separator.py CHANGED Viewed

@@ -1,27 +1,3 @@
-""" This file contains the Separator class, to facilitate the separation of stems from audio. """
-from importlib import metadata, resources
-import os
-import sys
-import platform
-import subprocess
-import time
-import logging
-import warnings
-import importlib
-import io
-from typing import Optional
-import hashlib
-import json
-import yaml
-import requests
-import torch
-import torch.amp.autocast_mode as autocast_mode
-import onnxruntime as ort
-from tqdm import tqdm
 import os
 import logging
 import requests
@@ -44,7 +20,7 @@ class Separator:
     """
     Optimized Separator class for audio source separation on Hugging Face Zero GPU.
     Supports MDX, VR, Demucs, and MDXC architectures with ONNX Runtime and PyTorch.
-    Optimized for memory efficiency, fast inference, and serverless environments.
     """
     def __init__(
         self,
@@ -167,52 +143,41 @@ class Separator:
             raise RuntimeError(f"Failed to download {url}: {response.status_code}")
     def list_supported_model_files(self):
-        """Fetch supported model files from predefined sources."""
         download_checks_url = "https://raw.githubusercontent.com/TRvlvr/application_data/main/filelists/download_checks.json"
         download_checks_path = os.path.join(self.model_file_dir, "download_checks.json")
         self.download_file_if_not_exists(download_checks_url, download_checks_path)
         model_downloads_list = json.load(open(download_checks_path, encoding="utf-8"))
-        # Mock model scores for simplicity (replace with actual model-scores.json if available)
-        model_scores = {
-            "UVR-MDX-NET-Inst_full_292.onnx": {
-                "median_scores": {
-                    "vocals": {"SDR": 10.6497, "SIR": 20.3786, "SAR": 10.692, "ISR": 14.848},
-                    "instrumental": {"SDR": 15.2149, "SIR": 25.6075, "SAR": 17.1363, "ISR": 17.7893}
-                },
                 "stems": ["vocals", "instrumental"],
-                "target_stem": "vocals"
-            },
-            "htdemucs_ft.yaml": {
-                "median_scores": {
-                    "vocals": {"SDR": 11.2685, "SIR": 21.257, "SAR": 11.0359, "ISR": 19.3753},
-                    "drums": {"SDR": 13.235, "SIR": 23.3053, "SAR": 13.0313, "ISR": 17.2889},
-                    "bass": {"SDR": 9.72743, "SIR": 19.5435, "SAR": 9.20801, "ISR": 13.5037}
-                },
-                "stems": ["vocals", "drums", "bass"],
-                "target_stem": "vocals"
             },
-            "MDX23C-8KFFT-InstVoc_HQ.ckpt": {
-                "median_scores": {
-                    "vocals": {"SDR": 11.9504, "SIR": 23.1166, "SAR": 12.093, "ISR": 15.4782},
-                    "instrumental": {"SDR": 16.3035, "SIR": 26.6161, "SAR": 18.5167, "ISR": 18.3939}
-                },
                 "stems": ["vocals", "instrumental"],
-                "target_stem": "vocals"
-            }
         }
         public_model_repo_url_prefix = "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models"
         audio_separator_models_repo_url_prefix = "https://github.com/nomadkaraoke/python-audio-separator/releases/download/model-configs"
-        # Simplified model list for MDX, VR, Demucs, MDXC
         model_files_grouped_by_type = {
             "MDX": {
                 "MDX-Net Model: UVR-MDX-NET-Inst_full_292": {
                     "filename": "UVR-MDX-NET-Inst_full_292.onnx",
-                    "scores": model_scores.get("UVR-MDX-NET-Inst_full_292.onnx", {}).get("median_scores", {}),
-                    "stems": model_scores.get("UVR-MDX-NET-Inst_full_292.onnx", {}).get("stems", []),
-                    "target_stem": model_scores.get("UVR-MDX-NET-Inst_full_292.onnx", {}).get("target_stem"),
                     "download_files": ["UVR-MDX-NET-Inst_full_292.onnx"]
                 }
             },
@@ -228,27 +193,16 @@ class Separator:
             "Demucs": {
                 "Demucs v4: htdemucs_ft": {
                     "filename": "htdemucs_ft.yaml",
-                    "scores": model_scores.get("htdemucs_ft.yaml", {}).get("median_scores", {}),
-                    "stems": model_scores.get("htdemucs_ft.yaml", {}).get("stems", []),
-                    "target_stem": model_scores.get("htdemucs_ft.yaml", {}).get("target_stem"),
                     "download_files": [
                         f"{public_model_repo_url_prefix}/htdemucs_ft.yaml",
                         "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/f7e0c4bc-ba3fe64a.th"
                     ]
                 }
             },
-            "MDXC": {
-                "MDX23C Model: MDX23C-InstVoc HQ": {
-                    "filename": "MDX23C-8KFFT-InstVoc_HQ.ckpt",
-                    "scores": model_scores.get("MDX23C-8KFFT-InstVoc_HQ.ckpt", {}).get("median_scores", {}),
-                    "stems": model_scores.get("MDX23C-8KFFT-InstVoc_HQ.ckpt", {}).get("stems", []),
-                    "target_stem": model_scores.get("MDX23C-8KFFT-InstVoc_HQ.ckpt", {}).get("target_stem"),
-                    "download_files": [
-                        "MDX23C-8KFFT-InstVoc_HQ.ckpt",
-                        f"{audio_separator_models_repo_url_prefix}/model_2_stem_full_band_8k.yaml"
-                    ]
-                }
-            }
         }
         return model_files_grouped_by_type
@@ -289,6 +243,7 @@ class Separator:
                         if file_to_download.endswith(".yaml"):
                             yaml_config_filename = file_to_download
                     return model_filename, model_type, model_friendly_name, model_path, yaml_config_filename
         raise ValueError(f"Model file {model_filename} not found")
     def load_model_data_from_yaml(self, yaml_config_filename):
@@ -321,6 +276,7 @@ class Separator:
         if model_hash in model_data:
             self.logger.debug(f"Model data loaded for hash {model_hash}")
             return model_data[model_hash]
         raise ValueError(f"No model data for hash {model_hash}")
     def load_model(self, model_filename="UVR-MDX-NET-Inst_full_292.onnx"):
@@ -328,9 +284,14 @@ class Separator:
         self.logger.info(f"Loading model {model_filename}")
         start_time = time.perf_counter()
-        model_filename, model_type, model_friendly_name, model_path, yaml_config_filename = self.download_model_files(model_filename)
-        model_name = model_filename.split(".")[0]
         model_data = self.load_model_data_from_yaml(yaml_config_filename) if yaml_config_filename else self.load_model_data_using_hash(model_path)
         common_params = {
@@ -461,9 +422,13 @@ class Separator:
     def download_model_and_data(self, model_filename):
         """Download model files without loading into memory."""
         self.logger.info(f"Downloading model {model_filename}")
-        model_filename, model_type, model_friendly_name, model_path, yaml_config_filename = self.download_model_files(model_filename)
-        model_data = self.load_model_data_from_yaml(yaml_config_filename) if yaml_config_filename else self.load_model_data_using_hash(model_path)
-        self.logger.info(f"Model downloaded: {model_friendly_name}, type: {model_type}, path: {model_path}, data items: {len(model_data)}")
     def get_simplified_model_list(self, filter_sort_by: Optional[str] = None):
         """Return a simplified list of models."""

 import os
 import logging
 import requests
     """
     Optimized Separator class for audio source separation on Hugging Face Zero GPU.
     Supports MDX, VR, Demucs, and MDXC architectures with ONNX Runtime and PyTorch.
+    Handles MelBand Roformer models and ensures robust model downloading.
     """
     def __init__(
         self,
             raise RuntimeError(f"Failed to download {url}: {response.status_code}")
     def list_supported_model_files(self):
+        """Fetch supported model files, including MelBand Roformer models."""
         download_checks_url = "https://raw.githubusercontent.com/TRvlvr/application_data/main/filelists/download_checks.json"
         download_checks_path = os.path.join(self.model_file_dir, "download_checks.json")
         self.download_file_if_not_exists(download_checks_url, download_checks_path)
         model_downloads_list = json.load(open(download_checks_path, encoding="utf-8"))
+        # Custom model list from the Gradio log
+        roformer_models = {
+            "MelBand Roformer Kim | Inst V1 (E) Plus by Unwa": {
+                "filename": "melband_roformer_inst_v1e_plus.ckpt",
+                "scores": {"vocals": {"SDR": 11.95}, "instrumental": {"SDR": 16.30}},
                 "stems": ["vocals", "instrumental"],
+                "target_stem": "vocals",
+                "download_files": ["melband_roformer_inst_v1e_plus.ckpt", "model_2_stem_full_band_8k.yaml"]
             },
+            "MelBand Roformer Kim | Inst V1 Plus by Unwa": {
+                "filename": "melband_roformer_inst_v1_plus.ckpt",
+                "scores": {"vocals": {"SDR": 11.80}, "instrumental": {"SDR": 16.20}},
                 "stems": ["vocals", "instrumental"],
+                "target_stem": "vocals",
+                "download_files": ["melband_roformer_inst_v1_plus.ckpt", "model_2_stem_full_band_8k.yaml"]
+            },
+            # Add other models from the log as needed
         }
         public_model_repo_url_prefix = "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models"
         audio_separator_models_repo_url_prefix = "https://github.com/nomadkaraoke/python-audio-separator/releases/download/model-configs"
         model_files_grouped_by_type = {
             "MDX": {
                 "MDX-Net Model: UVR-MDX-NET-Inst_full_292": {
                     "filename": "UVR-MDX-NET-Inst_full_292.onnx",
+                    "scores": {"vocals": {"SDR": 10.6497}, "instrumental": {"SDR": 15.2149}},
+                    "stems": ["vocals", "instrumental"],
+                    "target_stem": "vocals",
                     "download_files": ["UVR-MDX-NET-Inst_full_292.onnx"]
                 }
             },
             "Demucs": {
                 "Demucs v4: htdemucs_ft": {
                     "filename": "htdemucs_ft.yaml",
+                    "scores": {"vocals": {"SDR": 11.2685}, "drums": {"SDR": 13.235}, "bass": {"SDR": 9.72743}},
+                    "stems": ["vocals", "drums", "bass"],
+                    "target_stem": "vocals",
                     "download_files": [
                         f"{public_model_repo_url_prefix}/htdemucs_ft.yaml",
                         "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/f7e0c4bc-ba3fe64a.th"
                     ]
                 }
             },
+            "MDXC": roformer_models
         }
         return model_files_grouped_by_type
                         if file_to_download.endswith(".yaml"):
                             yaml_config_filename = file_to_download
                     return model_filename, model_type, model_friendly_name, model_path, yaml_config_filename
+        self.logger.error(f"Model {model_filename} not found in supported models")
         raise ValueError(f"Model file {model_filename} not found")
     def load_model_data_from_yaml(self, yaml_config_filename):
         if model_hash in model_data:
             self.logger.debug(f"Model data loaded for hash {model_hash}")
             return model_data[model_hash]
+        self.logger.error(f"No model data for hash {model_hash}")
         raise ValueError(f"No model data for hash {model_hash}")
     def load_model(self, model_filename="UVR-MDX-NET-Inst_full_292.onnx"):
         self.logger.info(f"Loading model {model_filename}")
         start_time = time.perf_counter()
+        try:
+            model_filename, model_type, model_friendly_name, model_path, yaml_config_filename = self.download_model_files(model_filename)
+        except ValueError as e:
+            self.logger.error(f"Failed to load model: {e}")
+            self.logger.info("Falling back to default model: UVR-MDX-NET-Inst_full_292.onnx")
+            model_filename, model_type, model_friendly_name, model_path, yaml_config_filename = self\nSystem: .download_model_files("UVR-MDX-NET-Inst_full_292.onnx")
+        model_name = model_filename.split(".")[0]
         model_data = self.load_model_data_from_yaml(yaml_config_filename) if yaml_config_filename else self.load_model_data_using_hash(model_path)
         common_params = {
     def download_model_and_data(self, model_filename):
         """Download model files without loading into memory."""
         self.logger.info(f"Downloading model {model_filename}")
+        try:
+            model_filename, model_type, model_friendly_name, model_path, yaml_config_filename = self.download_model_files(model_filename)
+            model_data = self.load_model_data_from_yaml(yaml_config_filename) if yaml_config_filename else self.load_model_data_using_hash(model_path)
+            self.logger.info(f"Model downloaded: {model_friendly_name}, type: {model_type}, path: {model_path}, data items: {len(model_data)}")
+        except ValueError as e:
+            self.logger.error(f"Failed to download model: {e}")
+            raise
     def get_simplified_model_list(self, filter_sort_by: Optional[str] = None):
         """Return a simplified list of models."""