SESA_Fast_Separation

Running

App Files Files Community

ASesYusuf1 commited on May 26, 2025

Commit

1d35b52

verified ·

1 Parent(s): 4bcaa31

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -34

app.py CHANGED Viewed

@@ -24,14 +24,6 @@ from threading import Lock
 import scipy.io.wavfile
 import spaces
-# Global state definition
-ensemble_state = {
-    "current_model_idx": 0,
-    "current_audio": None,
-    "processed_stems": [],
-    "model_outputs": {}  # Her modelin stem'lerini saklamak için
-}
 # Logging setup
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -450,7 +442,7 @@ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, p
             logger.info("GPU memory cleared")
 @spaces.GPU
-def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_format="wav", use_tta="False", model_dir="/tmp/audio-separator-models/", output_dir="output", norm_thresh=0.9, amp_thresh=0.9, batch_size=1, ensemble_method="avg_wave", exclude_stems="", weights_str="", progress=gr.Progress(track_tqdm=True)):
     temp_audio_path = None
     start_time = time.time()
     try:
@@ -475,12 +467,21 @@ def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_forma
             scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
             audio = temp_audio_path
-        # State yönetimini kontrol et ve sıfırlama
-        if ensemble_state["current_audio"] != audio or ensemble_state["current_model_idx"] >= len(model_keys):
-            ensemble_state["current_audio"] = audio
-            ensemble_state["current_model_idx"] = 0
-            ensemble_state["processed_stems"] = []
-            ensemble_state["model_outputs"] = {model_key: {"vocals": [], "other": []} for model_key in model_keys}
             logger.info("New audio or completed cycle detected, resetting ensemble state.")
         use_tta = use_tta == "True"
@@ -497,7 +498,7 @@ def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_forma
         total_tasks = len(model_keys)
         # Şu anki modeli işle
-        current_idx = ensemble_state["current_model_idx"]
         if current_idx >= len(model_keys):
             # Tüm modeller işlendiyse ensemble işlemini yap
             logger.info("All models processed, running ensemble...")
@@ -507,7 +508,7 @@ def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_forma
             excluded_stems_list = [s.strip().lower() for s in exclude_stems.split(',')] if exclude_stems.strip() else []
             # Tüm stem’leri topla, ama "Exclude Stems" ile belirtilenleri hariç tut
-            for model_key, stems_dict in ensemble_state["model_outputs"].items():
                 for stem_type in ["vocals", "other"]:
                     if stems_dict[stem_type]:
                         if stem_type.lower() in excluded_stems_list:
@@ -537,10 +538,10 @@ def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_forma
                 raise RuntimeError(f"Ensemble failed, output file not created: {output_file}")
             # Durumu sıfırla
-            ensemble_state["current_model_idx"] = 0
-            ensemble_state["current_audio"] = None
-            ensemble_state["processed_stems"] = []
-            ensemble_state["model_outputs"] = {}
             elapsed = time.time() - start_time
             logger.info(f"Ensemble completed, output: {output_file}, took {elapsed:.2f}s")
@@ -551,7 +552,7 @@ def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_forma
                 file_name = os.path.basename(file)
                 status += f"<li><a href='file={file}' download>{file_name}</a></li>"
             status += "</ul>"
-            return output_file, status, file_list
         # Şu anki modeli işle
         model_key = model_keys[current_idx]
@@ -568,8 +569,8 @@ def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_forma
                             break
                     else:
                         logger.warning(f"Model {model_key} not found, skipping")
-                        ensemble_state["current_model_idx"] += 1
-                        return None, f"Model {model_key} not found, proceeding to next model.", []
                     # Zaman kontrolü
                     elapsed = time.time() - start_time
@@ -613,19 +614,19 @@ def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_forma
                             stem_type = "vocals" if "vocals" in os.path.basename(stem).lower() else "other"
                             permanent_stem_path = os.path.join(permanent_output_dir, f"{base_name}_{stem_type}_{model_key.replace(' | ', '_').replace(' ', '_')}.{out_format}")
                             shutil.copy(stem, permanent_stem_path)
-                            ensemble_state["model_outputs"][model_key][stem_type].append(permanent_stem_path)
                             if stem_type not in exclude_stems.lower():
                                 result.append(permanent_stem_path)
-                        ensemble_state["processed_stems"].extend(result)
                         break
                 except Exception as e:
                     logger.error(f"Error processing {model_key}, attempt {attempt + 1}/{max_retries + 1}: {e}")
                     if attempt == max_retries:
                         logger.error(f"Max retries reached for {model_key}, skipping")
-                        ensemble_state["current_model_idx"] += 1
-                        return None, f"Failed to process {model_key} after {max_retries} attempts.", []
                     time.sleep(1)
                 finally:
@@ -641,18 +642,18 @@ def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_forma
             logger.info("Cleared model cache and GPU memory")
         # Bir sonraki modele geç
-        ensemble_state["current_model_idx"] += 1
         elapsed = time.time() - start_time
         logger.info(f"Model {model_key} completed in {elapsed:.2f}s")
         # Çıktılar
-        file_list = ensemble_state["processed_stems"]
         status = f"Model {model_key} (Model {current_idx + 1}/{len(model_keys)}) completed in {elapsed:.2f}s<br>Click 'Run Ensemble!' to process the next model.<br>Processed stems:<ul>"
         for file in file_list:
             file_name = os.path.basename(file)
             status += f"<li><a href='file={file}' download>{file_name}</a></li>"
         status += "</ul>"
-        return file_list[0] if file_list else None, status, file_list
     except Exception as e:
         logger.error(f"Ensemble error: {e}")
@@ -668,7 +669,7 @@ def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_forma
                 logger.warning(f"Failed to delete temporary file {temp_audio_path}: {e}")
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
-            logger.info("GPU memory cleared")
 def update_roformer_models(category):
     """Update Roformer model dropdown based on selected category."""
@@ -691,6 +692,13 @@ def create_interface():
         gr.Markdown("<h1 class='header-text'>🎵 SESA Fast Separation 🎵</h1>")
         gr.Markdown("**Note**: If YouTube downloads fail, upload a valid cookies file or a local WAV file. [Cookie Instructions](https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies)")
         gr.Markdown("**Tip**: For best results, use audio shorter than 15 minutes or fewer models (up to 6) to ensure smooth processing.")
         with gr.Tabs():
             with gr.Tab("⚙️ Settings"):
                 with gr.Group(elem_classes="dubbing-theme"):
@@ -774,12 +782,12 @@ def create_interface():
         ensemble_button.click(
             fn=auto_ensemble_process,
             inputs=[
-                ensemble_audio, ensemble_models, ensemble_seg_size, ensemble_overlap,
                 output_format, ensemble_use_tta, model_file_dir, output_dir,
                 norm_threshold, amp_threshold, batch_size, ensemble_method,
                 ensemble_exclude_stems, ensemble_weights
             ],
-            outputs=[ensemble_output, ensemble_status, ensemble_files]
         )
     return app

 import scipy.io.wavfile
 import spaces
 # Logging setup
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
             logger.info("GPU memory cleared")
 @spaces.GPU
+def auto_ensemble_process(audio, model_keys, state, seg_size=64, overlap=0.1, out_format="wav", use_tta="False", model_dir="/tmp/audio-separator-models/", output_dir="output", norm_thresh=0.9, amp_thresh=0.9, batch_size=1, ensemble_method="avg_wave", exclude_stems="", weights_str="", progress=gr.Progress(track_tqdm=True)):
     temp_audio_path = None
     start_time = time.time()
     try:
             scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
             audio = temp_audio_path
+        # State kontrolü
+        if not state:
+            state = {
+                "current_audio": None,
+                "current_model_idx": 0,
+                "processed_stems": [],
+                "model_outputs": {}
+            }
+        # Yeni audio dosyası kontrolü
+        if state["current_audio"] != audio or state["current_model_idx"] >= len(model_keys):
+            state["current_audio"] = audio
+            state["current_model_idx"] = 0
+            state["processed_stems"] = []
+            state["model_outputs"] = {model_key: {"vocals": [], "other": []} for model_key in model_keys}
             logger.info("New audio or completed cycle detected, resetting ensemble state.")
         use_tta = use_tta == "True"
         total_tasks = len(model_keys)
         # Şu anki modeli işle
+        current_idx = state["current_model_idx"]
         if current_idx >= len(model_keys):
             # Tüm modeller işlendiyse ensemble işlemini yap
             logger.info("All models processed, running ensemble...")
             excluded_stems_list = [s.strip().lower() for s in exclude_stems.split(',')] if exclude_stems.strip() else []
             # Tüm stem’leri topla, ama "Exclude Stems" ile belirtilenleri hariç tut
+            for model_key, stems_dict in state["model_outputs"].items():
                 for stem_type in ["vocals", "other"]:
                     if stems_dict[stem_type]:
                         if stem_type.lower() in excluded_stems_list:
                 raise RuntimeError(f"Ensemble failed, output file not created: {output_file}")
             # Durumu sıfırla
+            state["current_model_idx"] = 0
+            state["current_audio"] = None
+            state["processed_stems"] = []
+            state["model_outputs"] = {}
             elapsed = time.time() - start_time
             logger.info(f"Ensemble completed, output: {output_file}, took {elapsed:.2f}s")
                 file_name = os.path.basename(file)
                 status += f"<li><a href='file={file}' download>{file_name}</a></li>"
             status += "</ul>"
+            return output_file, status, file_list, state
         # Şu anki modeli işle
         model_key = model_keys[current_idx]
                             break
                     else:
                         logger.warning(f"Model {model_key} not found, skipping")
+                        state["current_model_idx"] += 1
+                        return None, f"Model {model_key} not found, proceeding to next model.", [], state
                     # Zaman kontrolü
                     elapsed = time.time() - start_time
                             stem_type = "vocals" if "vocals" in os.path.basename(stem).lower() else "other"
                             permanent_stem_path = os.path.join(permanent_output_dir, f"{base_name}_{stem_type}_{model_key.replace(' | ', '_').replace(' ', '_')}.{out_format}")
                             shutil.copy(stem, permanent_stem_path)
+                            state["model_outputs"][model_key][stem_type].append(permanent_stem_path)
                             if stem_type not in exclude_stems.lower():
                                 result.append(permanent_stem_path)
+                        state["processed_stems"].extend(result)
                         break
                 except Exception as e:
                     logger.error(f"Error processing {model_key}, attempt {attempt + 1}/{max_retries + 1}: {e}")
                     if attempt == max_retries:
                         logger.error(f"Max retries reached for {model_key}, skipping")
+                        state["current_model_idx"] += 1
+                        return None, f"Failed to process {model_key} after {max_retries} attempts.", [], state
                     time.sleep(1)
                 finally:
             logger.info("Cleared model cache and GPU memory")
         # Bir sonraki modele geç
+        state["current_model_idx"] += 1
         elapsed = time.time() - start_time
         logger.info(f"Model {model_key} completed in {elapsed:.2f}s")
         # Çıktılar
+        file_list = state["processed_stems"]
         status = f"Model {model_key} (Model {current_idx + 1}/{len(model_keys)}) completed in {elapsed:.2f}s<br>Click 'Run Ensemble!' to process the next model.<br>Processed stems:<ul>"
         for file in file_list:
             file_name = os.path.basename(file)
             status += f"<li><a href='file={file}' download>{file_name}</a></li>"
         status += "</ul>"
+        return file_list[0] if file_list else None, status, file_list, state
     except Exception as e:
         logger.error(f"Ensemble error: {e}")
                 logger.warning(f"Failed to delete temporary file {temp_audio_path}: {e}")
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+            logger.info("GPU memory cleared")
 def update_roformer_models(category):
     """Update Roformer model dropdown based on selected category."""
         gr.Markdown("<h1 class='header-text'>🎵 SESA Fast Separation 🎵</h1>")
         gr.Markdown("**Note**: If YouTube downloads fail, upload a valid cookies file or a local WAV file. [Cookie Instructions](https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies)")
         gr.Markdown("**Tip**: For best results, use audio shorter than 15 minutes or fewer models (up to 6) to ensure smooth processing.")
+        # Gradio State bileşeni
+        ensemble_state = gr.State(value={
+            "current_audio": None,
+            "current_model_idx": 0,
+            "processed_stems": [],
+            "model_outputs": {}
+        })
         with gr.Tabs():
             with gr.Tab("⚙️ Settings"):
                 with gr.Group(elem_classes="dubbing-theme"):
         ensemble_button.click(
             fn=auto_ensemble_process,
             inputs=[
+                ensemble_audio, ensemble_models, ensemble_state, ensemble_seg_size, ensemble_overlap,
                 output_format, ensemble_use_tta, model_file_dir, output_dir,
                 norm_threshold, amp_threshold, batch_size, ensemble_method,
                 ensemble_exclude_stems, ensemble_weights
             ],
+            outputs=[ensemble_output, ensemble_status, ensemble_files, ensemble_state]
         )
     return app