SESA_Fast_Separation

Running

App Files Files Community

ASesYusuf1 commited on May 16, 2025

Commit

3047c6d

verified ·

1 Parent(s): 50d1ae7

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -42

app.py CHANGED Viewed

@@ -348,7 +348,7 @@ def download_audio(url, cookie_file=None):
             output_path = 'ytdl/gdrive_audio.wav'
             audio = AudioSegment.from_file(temp_output_path)
             audio.export(output_path, format="wav")
-            sample_rate, data = scipy.io.wavfile.read(output_path)  # Fixed: Use scipy.io.wavfile.read
             return output_path, "Download successful", (sample_rate, data)
         else:
             os.makedirs('ytdl', exist_ok=True)
@@ -360,7 +360,7 @@ def download_audio(url, cookie_file=None):
                     file_path = file_path.replace(ext, '.wav')
                 if not os.path.exists(file_path):
                     return None, "Downloaded file not found", None
-                sample_rate, data = scipy.io.wavfile.read(file_path)  # Fixed: Use scipy.io.wavfile.read
                 return file_path, "Download successful", (sample_rate, data)
     except yt_dlp.utils.ExtractorError as e:
         if "Sign in to confirm you’re not a bot" in str(e):
@@ -430,9 +430,10 @@ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, p
             logger.info("GPU memory cleared")
 @spaces.GPU
-def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_format="wav", use_tta="False", model_dir="/tmp/audio-separator-models/", output_dir="output", norm_thresh=0.9, amp_thresh=0.9, batch_size=1, ensemble_method="avg_wave", exclude_stems="", weights_str=""):
     temp_audio_path = None
     chunk_paths = []
     try:
         if not audio:
             raise ValueError("No audio file provided.")
@@ -464,13 +465,15 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
             chunks = [audio]
         use_tta = use_tta == "True"
         if os.path.exists(output_dir):
-            shutil.rmtree(output_dir)
         os.makedirs(output_dir, exist_ok=True)
         base_name = os.path.splitext(os.path.basename(audio))[0]
         logger.info(f"Ensemble for {base_name} with {model_keys} on {device}")
         all_stems = []
         model_stems = {}
-        for model_key in model_keys:
             model_stems[model_key] = {"vocals": [], "other": []}
             for category, models in ROFORMER_MODELS.items():
                 if model_key in models:
@@ -480,44 +483,62 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
                 logger.warning(f"Model {model_key} not found, skipping")
                 continue
             for chunk_idx, chunk_path in enumerate(chunks):
-                separator = Separator(
-                    log_level=logging.INFO,
-                    model_file_dir=model_dir,
-                    output_dir=output_dir,
-                    output_format=out_format,
-                    normalization_threshold=norm_thresh,
-                    amplification_threshold=amp_thresh,
-                    use_autocast=use_autocast,
-                    mdxc_params={"segment_size": seg_size, "overlap": overlap, "use_tta": use_tta, "batch_size": batch_size}
-                )
-                logger.info(f"Loading {model_key} for chunk {chunk_idx}")
-                separator.load_model(model_filename=model)
-                logger.info(f"Separating chunk {chunk_idx} with {model_key}")
-                separation = separator.separate(chunk_path)
-                stems = [os.path.join(output_dir, file_name) for file_name in separation]
-                for stem in stems:
-                    if "vocals" in os.path.basename(stem).lower():
-                        model_stems[model_key]["vocals"].append(stem)
-                    elif "other" in os.path.basename(stem).lower():
-                        model_stems[model_key]["other"].append(stem)
-                separator = None
-                gc.collect()
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-                    logger.info(f"Cleared CUDA cache after {model_key} chunk {chunk_idx}")
         for model_key, stems_dict in model_stems.items():
             for stem_type in ["vocals", "other"]:
                 if stems_dict[stem_type]:
                     combined_path = os.path.join(output_dir, f"{base_name}_{stem_type}_{model_key.replace(' | ', '_').replace(' ', '_')}.wav")
-                    with sf.SoundFile(combined_path, 'w', sr, channels=2 if audio_data.ndim == 2 else 1) as f:
-                        for stem_path in stems_dict[stem_type]:
-                            data, _ = librosa.load(stem_path, sr=sr, mono=False)
-                            f.write(data.T if data.ndim == 2 else data)
-                    logger.info(f"Combined {stem_type} for {model_key}: {combined_path}")
-                    if exclude_stems.strip() and stem_type.lower() in [s.strip().lower() for s in exclude_stems.split(',')]:
-                        logger.info(f"Excluding {stem_type} for {model_key}")
-                        continue
-                    all_stems.append(combined_path)
         all_stems = [stem for stem in all_stems if os.path.exists(stem)]
         if not all_stems:
             raise ValueError("No valid stems found for ensemble.")
@@ -532,10 +553,16 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
             "--weights", *[str(w) for w in weights],
             "--output", output_file
         ]
         logger.info(f"Running ensemble with args: {ensemble_args}")
-        ensemble_files(ensemble_args)
-        logger.info("Ensemble completed")
-        return output_file, f"Ensemble completed with {ensemble_method}, excluded: {exclude_stems if exclude_stems else 'None'}"
     except Exception as e:
         logger.error(f"Ensemble error: {e}")
         raise RuntimeError(f"Ensemble error: {e}")

             output_path = 'ytdl/gdrive_audio.wav'
             audio = AudioSegment.from_file(temp_output_path)
             audio.export(output_path, format="wav")
+            sample_rate, data = scipy.io.wavfile.read(output_path)
             return output_path, "Download successful", (sample_rate, data)
         else:
             os.makedirs('ytdl', exist_ok=True)
                     file_path = file_path.replace(ext, '.wav')
                 if not os.path.exists(file_path):
                     return None, "Downloaded file not found", None
+                sample_rate, data = scipy.io.wavfile.read(file_path)
                 return file_path, "Download successful", (sample_rate, data)
     except yt_dlp.utils.ExtractorError as e:
         if "Sign in to confirm you’re not a bot" in str(e):
             logger.info("GPU memory cleared")
 @spaces.GPU
+def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_format="wav", use_tta="False", model_dir="/tmp/audio-separator-models/", output_dir="output", norm_thresh=0.9, amp_thresh=0.9, batch_size=1, ensemble_method="avg_wave", exclude_stems="", weights_str="", progress=gr.Progress(track_tqdm=True)):
     temp_audio_path = None
     chunk_paths = []
+    max_retries = 2  # Retry attempts for ZeroGPU session issues
     try:
         if not audio:
             raise ValueError("No audio file provided.")
             chunks = [audio]
         use_tta = use_tta == "True"
         if os.path.exists(output_dir):
+            shutil.rmtree(outputwatermark = True
+            shutil.copyfile(audio, os.path.join(output_dir, os.path.basename(audio)))
         os.makedirs(output_dir, exist_ok=True)
         base_name = os.path.splitext(os.path.basename(audio))[0]
         logger.info(f"Ensemble for {base_name} with {model_keys} on {device}")
         all_stems = []
         model_stems = {}
+        total_models = len(model_keys)
+        for model_idx, model_key in enumerate(model_keys):
             model_stems[model_key] = {"vocals": [], "other": []}
             for category, models in ROFORMER_MODELS.items():
                 if model_key in models:
                 logger.warning(f"Model {model_key} not found, skipping")
                 continue
             for chunk_idx, chunk_path in enumerate(chunks):
+                retry_count = 0
+                while retry_count <= max_retries:
+                    try:
+                        progress((model_idx + 0.1) / total_models, desc=f"Loading {model_key} for chunk {chunk_idx}")
+                        separator = Separator(
+                            log_level=logging.INFO,
+                            model_file_dir=model_dir,
+                            output_dir=output_dir,
+                            output_format=out_format,
+                            normalization_threshold=norm_thresh,
+                            amplification_threshold=amp_thresh,
+                            use_autocast=use_autocast,
+                            mdxc_params={"segment_size": seg_size, "overlap": overlap, "use_tta": use_tta, "batch_size": batch_size}
+                        )
+                        logger.info(f"Loading {model_key} for chunk {chunk_idx}")
+                        separator.load_model(model_filename=model)
+                        progress((model_idx + 0.5) / total_models, desc=f"Separating chunk {chunk_idx} with {model_key}")
+                        logger.info(f"Separating chunk {chunk_idx} with {model_key}")
+                        separation = separator.separate(chunk_path)
+                        stems = [os.path.join(output_dir, file_name) for file_name in separation]
+                        for stem in stems:
+                            if "vocals" in os.path.basename(stem).lower():
+                                model_stems[model_key]["vocals"].append(stem)
+                            elif "other" in os.path.basename(stem).lower() or "instrumental" in os.path.basename(stem).lower():
+                                model_stems[model_key]["other"].append(stem)
+                        break  # Success, exit retry loop
+                    except Exception as e:
+                        retry_count += 1
+                        logger.error(f"Error processing {model_key} chunk {chunk_idx}, attempt {retry_count}/{max_retries}: {e}")
+                        if retry_count > max_retries:
+                            logger.error(f"Max retries reached for {model_key} chunk {chunk_idx}, skipping")
+                            break
+                        time.sleep(2)  # Wait before retrying
+                    finally:
+                        separator = None
+                        gc.collect()
+                        if torch.cuda.is_available():
+                            torch.cuda.empty_cache()
+                            logger.info(f"Cleared CUDA cache after {model_key} chunk {chunk_idx}")
+        progress(0.8, desc="Combining stems...")
         for model_key, stems_dict in model_stems.items():
             for stem_type in ["vocals", "other"]:
                 if stems_dict[stem_type]:
                     combined_path = os.path.join(output_dir, f"{base_name}_{stem_type}_{model_key.replace(' | ', '_').replace(' ', '_')}.wav")
+                    try:
+                        with sf.SoundFile(combined_path, 'w', sr, channels=2 if audio_data.ndim == 2 else 1) as f:
+                            for stem_path in stems_dict[stem_type]:
+                                data, _ = librosa.load(stem_path, sr=sr, mono=False)
+                                f.write(data.T if data.ndim == 2 else data)
+                        logger.info(f"Combined {stem_type} for {model_key}: {combined_path}")
+                        if exclude_stems.strip() and stem_type.lower() in [s.strip().lower() for s in exclude_stems.split(',')]:
+                            logger.info(f"Excluding {stem_type} for {model_key}")
+                            continue
+                        all_stems.append(combined_path)
+                    except Exception as e:
+                        logger.error(f"Error combining {stem_type} for {model_key}: {e}")
         all_stems = [stem for stem in all_stems if os.path.exists(stem)]
         if not all_stems:
             raise ValueError("No valid stems found for ensemble.")
             "--weights", *[str(w) for w in weights],
             "--output", output_file
         ]
+        progress(0.9, desc="Running ensemble...")
         logger.info(f"Running ensemble with args: {ensemble_args}")
+        try:
+            ensemble_files(ensemble_args)
+            logger.info("Ensemble completed")
+            progress(1.0, desc="Ensemble completed")
+            return output_file, f"Ensemble completed with {ensemble_method}, excluded: {exclude_stems if exclude_stems else 'None'}"
+        except Exception as e:
+            logger.error(f"Ensemble processing error: {e}")
+            raise RuntimeError(f"Ensemble processing error: {e}")
     except Exception as e:
         logger.error(f"Ensemble error: {e}")
         raise RuntimeError(f"Ensemble error: {e}")