SESA_Fast_Separation

Running

App Files Files Community

ASesYusuf1 commited on May 26, 2025

Commit

05553c4

verified ·

1 Parent(s): 4f779d2

Update app.py

Browse files

Files changed (1) hide show

app.py +164 -145

app.py CHANGED Viewed

@@ -437,10 +437,11 @@ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, p
 @spaces.GPU
 def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_format="wav", use_tta="False", model_dir="/tmp/audio-separator-models/", output_dir="output", norm_thresh=0.9, amp_thresh=0.9, batch_size=1, ensemble_method="avg_wave", exclude_stems="", weights_str="", progress=gr.Progress(track_tqdm=True)):
     temp_audio_path = None
     max_retries = 2
     start_time = time.time()
-    time_budget = 100  # seconds
     max_models = 6
     gpu_lock = Lock()
@@ -453,7 +454,7 @@ def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_forma
             logger.warning(f"Selected {len(model_keys)} models, limiting to {max_models}.")
             model_keys = model_keys[:max_models]
-        # Dynamic batch size based on audio duration and model count
         audio_data, sr = librosa.load(audio, sr=None, mono=False)
         duration = librosa.get_duration(y=audio_data, sr=sr)
         logger.info(f"Audio duration: {duration:.2f} seconds")
@@ -466,173 +467,191 @@ def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_forma
             scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
             audio = temp_audio_path
         use_tta = use_tta == "True"
-        if os.path.exists(output_dir):
-            shutil.rmtree(output_dir)
-        os.makedirs(output_dir, exist_ok=True)
         base_name = os.path.splitext(os.path.basename(audio))[0]
         logger.info(f"Ensemble for {base_name} with {model_keys} on {device}")
         # Model cache
         model_cache = {}
         all_stems = []
-        model_stems = {model_key: {"vocals": [], "other": []} for model_key in model_keys}
         total_tasks = len(model_keys)
-        def process_model(model_key, model_idx):
-            with torch.no_grad():
-                for attempt in range(max_retries + 1):
-                    try:
-                        # Find model
-                        for category, models in ROFORMER_MODELS.items():
-                            if model_key in models:
-                                model = models[model_key]
-                                break
-                        else:
-                            logger.warning(f"Model {model_key} not found, skipping")
-                            return []
-                        # Check time budget
-                        elapsed = time.time() - start_time
-                        if elapsed > time_budget:
-                            logger.error(f"Time budget ({time_budget}s) exceeded")
-                            raise TimeoutError("Processing took too long")
-                        # Initialize separator
-                        model_path = os.path.join(model_dir, model)
-                        if model_key not in model_cache:
-                            logger.info(f"Loading {model_key} into cache")
-                            separator = Separator(
-                                log_level=logging.INFO,
-                                model_file_dir=model_dir,
-                                output_dir=output_dir,
-                                output_format=out_format,
-                                normalization_threshold=norm_thresh,
-                                amplification_threshold=amp_thresh,
-                                use_autocast=use_autocast,
-                                mdxc_params={
-                                    "segment_size": seg_size,
-                                    "overlap": overlap,
-                                    "use_tta": use_tta,
-                                    "batch_size": dynamic_batch_size
-                                }
-                            )
-                            separator.load_model(model_filename=model)
-                            model_cache[model_key] = separator
-                        else:
-                            separator = model_cache[model_key]
-                        # Process with GPU lock
-                        with gpu_lock:
-                            progress(0.3 + (model_idx / total_tasks) * 0.5, desc=f"Separating with {model_key}")
-                            logger.info(f"Separating with {model_key}")
-                            separation = separator.separate(audio)
-                            stems = [os.path.join(output_dir, file_name) for file_name in separation]
-                            result = []
-                            for stem in stems:
-                                if "vocals" in os.path.basename(stem).lower():
-                                    model_stems[model_key]["vocals"].append(stem)
-                                elif "other" in os.path.basename(stem).lower() or "instrumental" in os.path.basename(stem).lower():
-                                    model_stems[model_key]["other"].append(stem)
-                                    result.append(stem)
-                            return result
-                    except Exception as e:
-                        logger.error(f"Error processing {model_key}, attempt {attempt + 1}/{max_retries + 1}: {e}")
-                        if attempt == max_retries:
-                            logger.error(f"Max retries reached for {model_key}, skipping")
-                            return []
-                        time.sleep(1)
-                    finally:
-                        if torch.cuda.is_available():
-                            torch.cuda.empty_cache()
-                            logger.info(f"Cleared CUDA cache after {model_key}")
-        # Parallel processing
-        progress(0.1, desc="Starting model separations...")
-        with ThreadPoolExecutor(max_workers=min(4, len(model_keys))) as executor:
-            future_to_task = {executor.submit(process_model, model_key, idx): model_key for idx, model_key in enumerate(model_keys)}
-            for future in as_completed(future_to_task):
-                model_key = future_to_task[future]
-                try:
-                    stems = future.result()
-                    if stems:
-                        logger.info(f"Completed {model_key}")
-                    else:
-                        logger.warning(f"No stems produced for {model_key}")
-                except Exception as e:
-                    logger.error(f"Task {model_key} failed: {e}")
-        # Clear model cache
-        model_cache.clear()
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            logger.info("Cleared model cache and GPU memory")
-        # Combine stems
-        progress(0.8, desc="Combining stems...")
-        for model_key, stems_dict in model_stems.items():
-            for stem_type in ["vocals", "other"]:
-                if stems_dict[stem_type]:
-                    combined_path = os.path.join(output_dir, f"{base_name}_{stem_type}_{model_key.replace(' | ', '_').replace(' ', '_')}.wav")
-                    try:
-                        data, _ = librosa.load(stems_dict[stem_type][0], sr=sr, mono=False)
-                        with sf.SoundFile(combined_path, 'w', sr, channels=2 if data.ndim == 2 else 1) as f:
-                            f.write(data.T if data.ndim == 2 else data)
-                        logger.info(f"Combined {stem_type} for {model_key}: {combined_path}")
-                        if exclude_stems.strip() and stem_type.lower() in [s.strip().lower() for s in exclude_stems.split(',')]:
-                            logger.info(f"Excluding {stem_type} for {model_key}")
                             continue
-                        all_stems.append(combined_path)
-                    except Exception as e:
-                        logger.error(f"Error combining {stem_type} for {model_key}: {e}")
-        all_stems = [stem for stem in all_stems if os.path.exists(stem)]
-        if not all_stems:
-            raise ValueError("No valid stems found for ensemble. Try uploading a local WAV file.")
-        # Ensemble
-        weights = [float(w.strip()) for w in weights_str.split(',')] if weights_str.strip() else [1.0] * len(all_stems)
-        if len(weights) != len(all_stems):
-            weights = [1.0] * len(all_stems)
-            logger.info("Weights mismatched, defaulting to 1.0")
-        output_file = os.path.join(output_dir, f"{base_name}_ensemble_{ensemble_method}.{out_format}")
-        ensemble_args = [
-            "--files", *all_stems,
-            "--type", ensemble_method,
-            "--weights", *[str(w) for w in weights],
-            "--output", output_file
-        ]
-        progress(0.9, desc="Running ensemble...")
-        logger.info(f"Running ensemble with args: {ensemble_args}")
-        try:
             result = ensemble_files(ensemble_args)
             if result is None or not os.path.exists(output_file):
                 raise RuntimeError(f"Ensemble failed, output file not created: {output_file}")
-            logger.info(f"Ensemble completed, output: {output_file}")
-            progress(1.0, desc="Ensemble completed")
             elapsed = time.time() - start_time
-            logger.info(f"Total processing time: {elapsed:.2f}s")
-            # Prepare file list for download
-            file_list = [output_file] + all_stems
-            # Create status message with download links
             status = f"Ensemble completed with {ensemble_method}, excluded: {exclude_stems if exclude_stems else 'None'}, {len(model_keys)} models in {elapsed:.2f}s<br>Download files:<ul>"
             for file in file_list:
                 file_name = os.path.basename(file)
                 status += f"<li><a href='file={file}' download>{file_name}</a></li>"
             status += "</ul>"
             return output_file, status, file_list
-        except Exception as e:
-            logger.error(f"Ensemble processing error: {e}")
-            if "numpy" in str(e).lower() or "copy" in str(e).lower():
-                error_msg = f"NumPy compatibility error: {e}. Try installing numpy<2.0.0 or contact support."
-            else:
-                error_msg = f"Ensemble processing error: {e}"
-            raise RuntimeError(error_msg)
     except Exception as e:
         logger.error(f"Ensemble error: {e}")
-        error_msg = f"Processing failed. Try fewer models (max {max_models}), shorter audio, or uploading a local WAV file."
         raise RuntimeError(error_msg)
     finally:
         if temp_audio_path and os.path.exists(temp_audio_path):
             try:
@@ -643,7 +662,7 @@ def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_forma
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             logger.info("GPU memory cleared")
 def update_roformer_models(category):
     """Update Roformer model dropdown based on selected category."""
     choices = list(ROFORMER_MODELS.get(category, {}).keys()) or []

 @spaces.GPU
 def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_format="wav", use_tta="False", model_dir="/tmp/audio-separator-models/", output_dir="output", norm_thresh=0.9, amp_thresh=0.9, batch_size=1, ensemble_method="avg_wave", exclude_stems="", weights_str="", progress=gr.Progress(track_tqdm=True)):
+    global ensemble_state
     temp_audio_path = None
     max_retries = 2
     start_time = time.time()
+    time_budget = 300  # ZeroGPU için işlem sınırı
     max_models = 6
     gpu_lock = Lock()
             logger.warning(f"Selected {len(model_keys)} models, limiting to {max_models}.")
             model_keys = model_keys[:max_models]
+        # Audio süresine göre dinamik batch size
         audio_data, sr = librosa.load(audio, sr=None, mono=False)
         duration = librosa.get_duration(y=audio_data, sr=sr)
         logger.info(f"Audio duration: {duration:.2f} seconds")
             scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
             audio = temp_audio_path
+        # Aynı ses dosyası mı kontrolü
+        if ensemble_state["current_audio"] != audio:
+            ensemble_state["current_audio"] = audio
+            ensemble_state["current_model_idx"] = 0
+            ensemble_state["processed_stems"] = []
+            ensemble_state["model_outputs"] = {model_key: {"vocals": [], "other": []} for model_key in model_keys}
+            logger.info("New audio file detected, resetting ensemble state.")
         use_tta = use_tta == "True"
         base_name = os.path.splitext(os.path.basename(audio))[0]
         logger.info(f"Ensemble for {base_name} with {model_keys} on {device}")
+        # Kalıcı bir klasör oluştur
+        permanent_output_dir = os.path.join(output_dir, "permanent_stems")
+        os.makedirs(permanent_output_dir, exist_ok=True)
         # Model cache
         model_cache = {}
         all_stems = []
         total_tasks = len(model_keys)
+        # Şu anki modeli işle
+        current_idx = ensemble_state["current_model_idx"]
+        if current_idx >= len(model_keys):
+            # Tüm modeller işlendiyse ensemble işlemini yap
+            logger.info("All models processed, running ensemble...")
+            progress(0.9, desc="Running ensemble...")
+            # "Exclude Stems" listesindeki stem'leri belirle
+            excluded_stems_list = [s.strip().lower() for s in exclude_stems.split(',')] if exclude_stems.strip() else []
+            # Tüm stem’leri topla, ama "Exclude Stems" ile belirtilenleri hariç tut
+            for model_key, stems_dict in ensemble_state["model_outputs"].items():
+                for stem_type in ["vocals", "other"]:
+                    if stems_dict[stem_type]:
+                        # Stem tipini kontrol et, excluded listesinde varsa atla
+                        if stem_type.lower() in excluded_stems_list:
+                            logger.info(f"Excluding {stem_type} for {model_key} from ensemble")
                             continue
+                        all_stems.extend(stems_dict[stem_type])
+            all_stems = [stem for stem in all_stems if os.path.exists(stem)]
+            if not all_stems:
+                raise ValueError("No valid stems found for ensemble after excluding specified stems.")
+            # Ensemble işlemi
+            weights = [float(w.strip()) for w in weights_str.split(',')] if weights_str.strip() else [1.0] * len(all_stems)
+            if len(weights) != len(all_stems):
+                weights = [1.0] * len(all_stems)
+                logger.info("Weights mismatched, defaulting to 1.0")
+            output_file = os.path.join(output_dir, f"{base_name}_ensemble_{ensemble_method}.{out_format}")
+            ensemble_args = [
+                "--files", *all_stems,
+                "--type", ensemble_method,
+                "--weights", *[str(w) for w in weights],
+                "--output", output_file
+            ]
+            logger.info(f"Running ensemble with args: {ensemble_args}")
             result = ensemble_files(ensemble_args)
             if result is None or not os.path.exists(output_file):
                 raise RuntimeError(f"Ensemble failed, output file not created: {output_file}")
+            # Durumu sıfırla
+            ensemble_state["current_model_idx"] = 0
+            ensemble_state["current_audio"] = None
+            ensemble_state["processed_stems"] = []
+            ensemble_state["model_outputs"] = {}
             elapsed = time.time() - start_time
+            logger.info(f"Ensemble completed, output: {output_file}, took {elapsed:.2f}s")
+            progress(1.0, desc="Ensemble completed")
             status = f"Ensemble completed with {ensemble_method}, excluded: {exclude_stems if exclude_stems else 'None'}, {len(model_keys)} models in {elapsed:.2f}s<br>Download files:<ul>"
+            file_list = [output_file] + all_stems
             for file in file_list:
                 file_name = os.path.basename(file)
                 status += f"<li><a href='file={file}' download>{file_name}</a></li>"
             status += "</ul>"
             return output_file, status, file_list
+        # Şu anki modeli işle
+        model_key = model_keys[current_idx]
+        logger.info(f"Processing model {current_idx + 1}/{len(model_keys)}: {model_key}")
+        progress(0.1, desc=f"Processing model {model_key}...")
+        with torch.no_grad():
+            for attempt in range(max_retries + 1):
+                try:
+                    # Modeli bul
+                    for category, models in ROFORMER_MODELS.items():
+                        if model_key in models:
+                            model = models[model_key]
+                            break
+                    else:
+                        logger.warning(f"Model {model_key} not found, skipping")
+                        ensemble_state["current_model_idx"] += 1
+                        return None, f"Model {model_key} not found, proceeding to next model.", []
+                    # Zaman kontrolü
+                    elapsed = time.time() - start_time
+                    if elapsed > time_budget:
+                        logger.error(f"Time budget ({time_budget}s) exceeded")
+                        raise TimeoutError("Processing took too long")
+                    # Separator oluştur
+                    if model_key not in model_cache:
+                        logger.info(f"Loading {model_key} into cache")
+                        separator = Separator(
+                            log_level=logging.INFO,
+                            model_file_dir=model_dir,
+                            output_dir=output_dir,
+                            output_format=out_format,
+                            normalization_threshold=norm_thresh,
+                            amplification_threshold=amp_thresh,
+                            use_autocast=use_autocast,
+                            mdxc_params={
+                                "segment_size": seg_size,
+                                "overlap": overlap,
+                                "use_tta": use_tta,
+                                "batch_size": dynamic_batch_size
+                            }
+                        )
+                        separator.load_model(model_filename=model)
+                        model_cache[model_key] = separator
+                    else:
+                        separator = model_cache[model_key]
+                    # GPU ile işlem
+                    with gpu_lock:
+                        progress(0.3, desc=f"Separating with {model_key}")
+                        logger.info(f"Separating with {model_key}")
+                        separation = separator.separate(audio)
+                        stems = [os.path.join(output_dir, file_name) for file_name in separation]
+                        result = []
+                        # Stem’leri kalıcı klasöre taşı
+                        for stem in stems:
+                            stem_type = "vocals" if "vocals" in os.path.basename(stem).lower() else "other"
+                            permanent_stem_path = os.path.join(permanent_output_dir, f"{base_name}_{stem_type}_{model_key.replace(' | ', '_').replace(' ', '_')}.{out_format}")
+                            shutil.copy(stem, permanent_stem_path)
+                            ensemble_state["model_outputs"][model_key][stem_type].append(permanent_stem_path)
+                            if stem_type not in exclude_stems.lower():
+                                result.append(permanent_stem_path)
+                        ensemble_state["processed_stems"].extend(result)
+                        break
+                except Exception as e:
+                    logger.error(f"Error processing {model_key}, attempt {attempt + 1}/{max_retries + 1}: {e}")
+                    if attempt == max_retries:
+                        logger.error(f"Max retries reached for {model_key}, skipping")
+                        ensemble_state["current_model_idx"] += 1
+                        return None, f"Failed to process {model_key} after {max_retries} attempts.", []
+                    time.sleep(1)
+                finally:
+                    if torch.cuda.is_available():
+                        torch.cuda.empty_cache()
+                        logger.info(f"Cleared CUDA cache after {model_key}")
+        # Model cache temizliği
+        model_cache.clear()
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            logger.info("Cleared model cache and GPU memory")
+        # Bir sonraki modele geç
+        ensemble_state["current_model_idx"] += 1
+        elapsed = time.time() - start_time
+        logger.info(f"Model {model_key} completed in {elapsed:.2f}s")
+        # Çıktılar
+        file_list = ensemble_state["processed_stems"]
+        status = f"Model {model_key} (Model {current_idx + 1}/{len(model_keys)}) completed in {elapsed:.2f}s<br>Click 'Run Ensemble!' to process the next model.<br>Processed stems:<ul>"
+        for file in file_list:
+            file_name = os.path.basename(file)
+            status += f"<li><a href='file={file}' download>{file_name}</a></li>"
+        status += "</ul>"
+        return file_list[0] if file_list else None, status, file_list
     except Exception as e:
         logger.error(f"Ensemble error: {e}")
+        error_msg = f"Processing failed: {e}. Try fewer models (max {max_models}) or uploading a local WAV file."
         raise RuntimeError(error_msg)
     finally:
         if temp_audio_path and os.path.exists(temp_audio_path):
             try:
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             logger.info("GPU memory cleared")
 def update_roformer_models(category):
     """Update Roformer model dropdown based on selected category."""
     choices = list(ROFORMER_MODELS.get(category, {}).keys()) or []