Spaces:

ASesYusuf1
/

SESA_Fast_Separation

Running

App Files Files Community

ASesYusuf1 commited on Jun 11, 2025

Commit

01781d2

verified ·

1 Parent(s): 1d87edf

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -28

app.py CHANGED Viewed

@@ -485,6 +485,7 @@ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, p
 def auto_ensemble_process(audio, model_keys, state, seg_size=64, overlap=0.1, out_format="wav", use_tta="False", model_dir="/tmp/audio-separator-models/", output_dir="output", norm_thresh=0.9, amp_thresh=0.9, batch_size=1, ensemble_method="avg_wave", exclude_stems="", weights_str="", progress=gr.Progress(track_tqdm=True)):
     temp_audio_path = None
     extracted_audio_path = None
     start_time = time.time()
     try:
         if not audio:
@@ -505,7 +506,7 @@ def auto_ensemble_process(audio, model_keys, state, seg_size=64, overlap=0.1, ou
             extracted_audio_path = os.path.join("/tmp", f"extracted_audio_{os.path.basename(audio)}.wav")
             logger.info(f"Extracting audio from video file: {audio}")
             ffmpeg_command = [
-                "ffmpeg", "-i", audio, "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "2",
                 extracted_audio_path, "-y"
             ]
             try:
@@ -521,9 +522,21 @@ def auto_ensemble_process(audio, model_keys, state, seg_size=64, overlap=0.1, ou
                 else:
                     raise RuntimeError(f"Failed to extract audio from video: {error_message}")
         audio_data, sr = librosa.load(audio_to_process, sr=None, mono=False)
         duration = librosa.get_duration(y=audio_data, sr=sr)
-        logger.info(f"Audio duration: {duration:.2f} seconds")
         dynamic_batch_size = max(1, min(4, 1 + int(900 / (duration + 1)) - len(model_keys) // 2))
         logger.info(f"Using batch size: {dynamic_batch_size} for {len(model_keys)} models, duration {duration:.2f}s")
@@ -555,13 +568,17 @@ def auto_ensemble_process(audio, model_keys, state, seg_size=64, overlap=0.1, ou
         permanent_output_dir = os.path.join(output_dir, "permanent_stems")
         os.makedirs(permanent_output_dir, exist_ok=True)
-        # Check if all models have been processed
-        if state["current_model_idx"] >= len(model_keys):
             logger.info("All models processed, running ensemble...")
             progress(0.9, desc="Running ensemble...")
             excluded_stems_list = [s.strip().lower() for s in exclude_stems.split(',')] if exclude_stems.strip() else []
-            all_stems = []
             for model_key, stems_dict in state["model_outputs"].items():
                 for stem_type in ["vocals", "other"]:
                     if stems_dict[stem_type]:
@@ -590,7 +607,6 @@ def auto_ensemble_process(audio, model_keys, state, seg_size=64, overlap=0.1, ou
             if result is None or not os.path.exists(output_file):
                 raise RuntimeError(f"Ensemble failed, output file not created: {output_file}")
-            # Reset state after ensemble
             state["current_model_idx"] = 0
             state["current_audio"] = None
             state["processed_stems"] = []
@@ -607,12 +623,10 @@ def auto_ensemble_process(audio, model_keys, state, seg_size=64, overlap=0.1, ou
             status += "</ul>"
             return output_file, status, file_list, state
-        # Process the next model
-        model_key = model_keys[state["current_model_idx"]]
-        logger.info(f"Processing model {state['current_model_idx'] + 1}/{len(model_keys)}: {model_key}")
         progress(0.1, desc=f"Processing model {model_key}...")
-        model_cache = {}
         with torch.no_grad():
             for attempt in range(max_retries + 1):
                 try:
@@ -691,13 +705,12 @@ def auto_ensemble_process(audio, model_keys, state, seg_size=64, overlap=0.1, ou
         elapsed = time.time() - start_time
         logger.info(f"Model {model_key} completed in {elapsed:.2f}s")
-        file_list = state["processed_stems"]
-        status = f"Model {model_key} (Model {state['current_model_idx']}/{len(model_keys)}) completed in {elapsed:.2f}s<br>"
         if state["current_model_idx"] >= len(model_keys):
-            status += "All models processed. Click 'Run Ensemble!' to combine the stems.<br>"
-        else:
-            status += "Click 'Run Ensemble!' to process the next model.<br>"
-        status += "Processed stems:<ul>"
         for file in file_list:
             file_name = os.path.basename(file)
             status += f"<li><a href='file={file}' download>{file_name}</a></li>"
@@ -710,18 +723,13 @@ def auto_ensemble_process(audio, model_keys, state, seg_size=64, overlap=0.1, ou
         raise RuntimeError(error_msg)
     finally:
-        if temp_audio_path and os.path.exists(temp_audio_path):
-            try:
-                os.remove(temp_audio_path)
-                logger.info(f"Temporary file deleted: {temp_audio_path}")
-            except Exception as e:
-                logger.warning(f"Failed to delete temporary file {temp_audio_path}: {e}")
-        if extracted_audio_path and os.path.exists(extracted_audio_path):
-            try:
-                os.remove(extracted_audio_path)
-                logger.info(f"Extracted audio file deleted: {extracted_audio_path}")
-            except Exception as e:
-                logger.warning(f"Failed to delete extracted audio file {extracted_audio_path}: {e}")
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             logger.info("GPU memory cleared")

 def auto_ensemble_process(audio, model_keys, state, seg_size=64, overlap=0.1, out_format="wav", use_tta="False", model_dir="/tmp/audio-separator-models/", output_dir="output", norm_thresh=0.9, amp_thresh=0.9, batch_size=1, ensemble_method="avg_wave", exclude_stems="", weights_str="", progress=gr.Progress(track_tqdm=True)):
     temp_audio_path = None
     extracted_audio_path = None
+    resampled_audio_path = None
     start_time = time.time()
     try:
         if not audio:
             extracted_audio_path = os.path.join("/tmp", f"extracted_audio_{os.path.basename(audio)}.wav")
             logger.info(f"Extracting audio from video file: {audio}")
             ffmpeg_command = [
+                "ffmpeg", "-i", audio, "-vn", "-acodec", "pcm_s16le", "-ar", "48000", "-ac", "2",
                 extracted_audio_path, "-y"
             ]
             try:
                 else:
                     raise RuntimeError(f"Failed to extract audio from video: {error_message}")
+        # Load audio and resample to 48 kHz
         audio_data, sr = librosa.load(audio_to_process, sr=None, mono=False)
+        logger.info(f"Original sample rate: {sr} Hz, Audio duration: {librosa.get_duration(y=audio_data, sr=sr):.2f} seconds")
+        if sr != 48000:
+            logger.info(f"Resampling audio from {sr} Hz to 48000 Hz")
+            resampled_audio_path = os.path.join("/tmp", f"resampled_audio_{os.path.basename(audio)}.wav")
+            waveform, _ = torchaudio.load(audio_to_process)
+            resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=48000)
+            resampled_waveform = resampler(waveform)
+            torchaudio.save(resampled_audio_path, resampled_waveform, 48000)
+            audio_to_process = resampled_audio_path
+            audio_data, sr = librosa.load(audio_to_process, sr=None, mono=False)
+            logger.info(f"Resampled audio saved to: {resampled_audio_path}, new sample rate: {sr} Hz")
         duration = librosa.get_duration(y=audio_data, sr=sr)
         dynamic_batch_size = max(1, min(4, 1 + int(900 / (duration + 1)) - len(model_keys) // 2))
         logger.info(f"Using batch size: {dynamic_batch_size} for {len(model_keys)} models, duration {duration:.2f}s")
         permanent_output_dir = os.path.join(output_dir, "permanent_stems")
         os.makedirs(permanent_output_dir, exist_ok=True)
+        model_cache = {}
+        all_stems = []
+        total_tasks = len(model_keys)
+        current_idx = state["current_model_idx"]
+        logger.info(f"Current model index: {current_idx}, total models: {len(model_keys)}")
+        if current_idx >= len(model_keys):
             logger.info("All models processed, running ensemble...")
             progress(0.9, desc="Running ensemble...")
             excluded_stems_list = [s.strip().lower() for s in exclude_stems.split(',')] if exclude_stems.strip() else []
             for model_key, stems_dict in state["model_outputs"].items():
                 for stem_type in ["vocals", "other"]:
                     if stems_dict[stem_type]:
             if result is None or not os.path.exists(output_file):
                 raise RuntimeError(f"Ensemble failed, output file not created: {output_file}")
             state["current_model_idx"] = 0
             state["current_audio"] = None
             state["processed_stems"] = []
             status += "</ul>"
             return output_file, status, file_list, state
+        model_key = model_keys[current_idx]
+        logger.info(f"Processing model {current_idx + 1}/{len(model_keys)}: {model_key}")
         progress(0.1, desc=f"Processing model {model_key}...")
         with torch.no_grad():
             for attempt in range(max_retries + 1):
                 try:
         elapsed = time.time() - start_time
         logger.info(f"Model {model_key} completed in {elapsed:.2f}s")
         if state["current_model_idx"] >= len(model_keys):
+            logger.info("Last model processed, running ensemble immediately...")
+            return auto_ensemble_process(audio, model_keys, state, seg_size, overlap, out_format, use_tta, model_dir, output_dir, norm_thresh, amp_thresh, batch_size, ensemble_method, exclude_stems, weights_str, progress)
+        file_list = state["processed_stems"]
+        status = f"Model {model_key} (Model {current_idx + 1}/{len(model_keys)}) completed in {elapsed:.2f}s<br>Click 'Run Ensemble!' to process the next model.<br>Processed stems:<ul>"
         for file in file_list:
             file_name = os.path.basename(file)
             status += f"<li><a href='file={file}' download>{file_name}</a></li>"
         raise RuntimeError(error_msg)
     finally:
+        for temp_file in [temp_audio_path, extracted_audio_path, resampled_audio_path]:
+            if temp_file and os.path.exists(temp_file):
+                try:
+                    os.remove(temp_file)
+                    logger.info(f"Temporary file deleted: {temp_file}")
+                except Exception as e:
+                    logger.warning(f"Failed to delete temporary file {temp_file}: {e}")
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             logger.info("GPU memory cleared")