Spaces:

ASesYusuf1
/

SESA_Audio_Separation

Sleeping

App Files Files Community

ASesYusuf1 commited on May 19, 2025

Commit

c419df5

verified ·

1 Parent(s): 6d177f8

Update utils.py

Browse files

Files changed (1) hide show

utils.py +13 -25

utils.py CHANGED Viewed

@@ -13,14 +13,7 @@ from omegaconf import OmegaConf
 from tqdm.auto import tqdm
 from typing import Dict, List, Tuple, Any, Union
 import loralib as lora
-import gc  # For garbage collection to free memory
-# ZeroGPU-specific imports
-try:
-    from spaces import GPU
-    IS_ZEROGPU = True
-except ImportError:
-    IS_ZEROGPU = False
 def load_config(model_type: str, config_path: str) -> Union[ConfigDict, OmegaConf]:
     try:
@@ -37,8 +30,6 @@ def load_config(model_type: str, config_path: str) -> Union[ConfigDict, OmegaCon
 def get_model_from_config(model_type: str, config_path: str) -> Tuple[nn.Module, Any]:
     config = load_config(model_type, config_path)
-    # Initialize model based on type
     model = None
     if model_type == 'mdx23c':
         from models.mdx23c_tfc_tdf_v3 import TFC_TDF_net
@@ -49,7 +40,6 @@ def get_model_from_config(model_type: str, config_path: str) -> Tuple[nn.Module,
     # Add other model types as needed...
     else:
         raise ValueError(f"Unknown model type: {model_type}")
     return model, config
 def read_audio_transposed(path: str, instr: str = None, skip_err: bool = False) -> Tuple[np.ndarray, int]:
@@ -80,7 +70,7 @@ def apply_tta(
     device: str,
     model_type: str
 ) -> Dict[str, torch.Tensor]:
-    track_proc_list = [mix[::-1].clone(), -mix.clone()]  # Use clone to avoid in-place ops
     for i, augmented_mix in enumerate(track_proc_list):
         waveforms = demix(config, model, augmented_mix, device, model_type=model_type, pbar=False)
         for el in waveforms:
@@ -89,7 +79,9 @@ def apply_tta(
             else:
                 waveforms_orig[el] -= waveforms[el]
         del waveforms, augmented_mix
-        gc.collect()  # Free memory after each augmentation
     for el in waveforms_orig:
         waveforms_orig[el] /= (len(track_proc_list) + 1)
     return waveforms_orig
@@ -102,8 +94,6 @@ def _getWindowingArray(window_size: int, fade_size: int) -> torch.Tensor:
     window[:fade_size] = fadein
     return window
-if IS_ZEROGPU:
-    @GPU
 def demix(
     config: ConfigDict,
     model: nn.Module,
@@ -113,9 +103,8 @@ def demix(
     pbar: bool = False
 ) -> Dict[str, np.ndarray]:
     mix = torch.tensor(mix, dtype=torch.float16, device='cpu')  # Start on CPU with FP16
     mode = 'demucs' if model_type == 'htdemucs' else 'generic'
     # Processing parameters
     if mode == 'demucs':
         chunk_size = config.training.samplerate * config.training.segment
@@ -136,10 +125,12 @@ def demix(
     batch_size = getattr(config.inference, 'batch_size', 1)  # Default to 1 for low memory
-    # Use autocast for mixed precision
-    scaler = torch.cuda.amp.GradScaler(enabled=True) if device.startswith('cuda') else None
-    with torch.cuda.amp.autocast(enabled=True, dtype=torch.float16):
-        with torch.no_grad():  # No gradients for inference
             req_shape = (num_instruments,) + mix.shape
             result = torch.zeros(req_shape, dtype=torch.float16, device='cpu')
             counter = torch.zeros(req_shape, dtype=torch.float16, device='cpu')
@@ -212,7 +203,6 @@ def load_not_compatible_weights(model: nn.Module, weights: str, verbose: bool =
         old_model = old_model['state']
     if 'state_dict' in old_model:
         old_model = old_model['state_dict']
     for el in new_model:
         if el in old_model and new_model[el].shape == old_model[el].shape:
             new_model[el] = old_model[el]
@@ -236,7 +226,6 @@ def load_start_checkpoint(args: argparse.Namespace, model: nn.Module, type_='tra
 def bind_lora_to_model(config: Dict[str, Any], model: nn.Module) -> nn.Module:
     if 'lora' not in config:
         raise ValueError("Configuration must contain the 'lora' key with parameters for LoRA.")
     replaced_layers = 0
     for name, module in model.named_modules():
         hierarchy = name.split('.')
@@ -259,7 +248,6 @@ def bind_lora_to_model(config: Dict[str, Any], model: nn.Module) -> nn.Module:
                 replaced_layers += 1
             except Exception as e:
                 print(f"Error replacing layer {name}: {e}")
     print(f"Number of layers replaced with LoRA: {replaced_layers}")
     return model
@@ -276,4 +264,4 @@ def draw_spectrogram(waveform, sample_rate, length, output_file):
     fig.colorbar(img, ax=ax, format="%+2.f dB")
     if output_file:
         plt.savefig(output_file)
-        plt.close()  # Close plot to free memory

 from tqdm.auto import tqdm
 from typing import Dict, List, Tuple, Any, Union
 import loralib as lora
+import gc  # For garbage collection
 def load_config(model_type: str, config_path: str) -> Union[ConfigDict, OmegaConf]:
     try:
 def get_model_from_config(model_type: str, config_path: str) -> Tuple[nn.Module, Any]:
     config = load_config(model_type, config_path)
     model = None
     if model_type == 'mdx23c':
         from models.mdx23c_tfc_tdf_v3 import TFC_TDF_net
     # Add other model types as needed...
     else:
         raise ValueError(f"Unknown model type: {model_type}")
     return model, config
 def read_audio_transposed(path: str, instr: str = None, skip_err: bool = False) -> Tuple[np.ndarray, int]:
     device: str,
     model_type: str
 ) -> Dict[str, torch.Tensor]:
+    track_proc_list = [mix[::-1].clone(), -mix.clone()]
     for i, augmented_mix in enumerate(track_proc_list):
         waveforms = demix(config, model, augmented_mix, device, model_type=model_type, pbar=False)
         for el in waveforms:
             else:
                 waveforms_orig[el] -= waveforms[el]
         del waveforms, augmented_mix
+        gc.collect()
+        if device.startswith('cuda'):
+            torch.cuda.empty_cache()
     for el in waveforms_orig:
         waveforms_orig[el] /= (len(track_proc_list) + 1)
     return waveforms_orig
     window[:fade_size] = fadein
     return window
 def demix(
     config: ConfigDict,
     model: nn.Module,
     pbar: bool = False
 ) -> Dict[str, np.ndarray]:
     mix = torch.tensor(mix, dtype=torch.float16, device='cpu')  # Start on CPU with FP16
     mode = 'demucs' if model_type == 'htdemucs' else 'generic'
     # Processing parameters
     if mode == 'demucs':
         chunk_size = config.training.samplerate * config.training.segment
     batch_size = getattr(config.inference, 'batch_size', 1)  # Default to 1 for low memory
+    # Move model to device
+    model = model.to(device)
+    model.eval()
+    with torch.no_grad():  # No gradients for inference
+        with torch.cuda.amp.autocast(enabled=device.startswith('cuda'), dtype=torch.float16):
             req_shape = (num_instruments,) + mix.shape
             result = torch.zeros(req_shape, dtype=torch.float16, device='cpu')
             counter = torch.zeros(req_shape, dtype=torch.float16, device='cpu')
         old_model = old_model['state']
     if 'state_dict' in old_model:
         old_model = old_model['state_dict']
     for el in new_model:
         if el in old_model and new_model[el].shape == old_model[el].shape:
             new_model[el] = old_model[el]
 def bind_lora_to_model(config: Dict[str, Any], model: nn.Module) -> nn.Module:
     if 'lora' not in config:
         raise ValueError("Configuration must contain the 'lora' key with parameters for LoRA.")
     replaced_layers = 0
     for name, module in model.named_modules():
         hierarchy = name.split('.')
                 replaced_layers += 1
             except Exception as e:
                 print(f"Error replacing layer {name}: {e}")
     print(f"Number of layers replaced with LoRA: {replaced_layers}")
     return model
     fig.colorbar(img, ax=ax, format="%+2.f dB")
     if output_file:
         plt.savefig(output_file)
+        plt.close()