ASesYusuf1 commited on
Commit
ddbc8fa
Β·
verified Β·
1 Parent(s): 2297009

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -86
app.py CHANGED
@@ -338,16 +338,16 @@ def download_audio(url, cookie_file=None):
338
  temp_output_path = 'ytdl/gdrive_temp_audio'
339
  gdown.download(download_url, temp_output_path, quiet=False)
340
  if not os.path.exists(temp_output_path):
341
- return None, "Δ°ndirilen dosya bulunamadΔ±", None
342
  from mimetypes import guess_type
343
  mime_type, _ = guess_type(temp_output_path)
344
  if not mime_type or not mime_type.startswith('audio'):
345
- return None, "İndirilen dosya bir ses dosyası değil", None
346
  output_path = 'ytdl/gdrive_audio.wav'
347
  audio = AudioSegment.from_file(temp_output_path)
348
  audio.export(output_path, format="wav")
349
  sample_rate, data = wavfile.read(output_path)
350
- return output_path, "İndirme başarılı", (sample_rate, data)
351
  else:
352
  os.makedirs('ytdl', exist_ok=True)
353
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
@@ -357,24 +357,24 @@ def download_audio(url, cookie_file=None):
357
  for ext in ['.webm', '.m4a', '.opus', '.aac']:
358
  file_path = file_path.replace(ext, '.wav')
359
  if not os.path.exists(file_path):
360
- return None, "Δ°ndirilen dosya bulunamadΔ±", None
361
  sample_rate, data = wavfile.read(file_path)
362
- return file_path, "İndirme başarılı", (sample_rate, data)
363
  except yt_dlp.utils.ExtractorError as e:
364
  if "Sign in to confirm you’re not a bot" in str(e):
365
- return None, "Kimlik doğrulama hatası. Lütfen geçerli YouTube çerezleri yükleyin: https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies", None
366
- return None, f"Δ°ndirme hatasΔ±: {str(e)}", None
367
  except Exception as e:
368
- return None, f"Beklenmeyen hata: {str(e)}", None
369
  finally:
370
  if temp_output_path and os.path.exists(temp_output_path):
371
  os.remove(temp_output_path)
372
- logger.info(f"GeΓ§ici dosya silindi: {temp_output_path}")
373
 
374
  @spaces.GPU
375
  def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, output_dir, out_format, norm_thresh, amp_thresh, batch_size, exclude_stems="", progress=gr.Progress(track_tqdm=True)):
376
  if not audio:
377
- raise ValueError("Ses dosyası sağlanmadı.")
378
  temp_audio_path = None
379
  try:
380
  if isinstance(audio, tuple):
@@ -383,7 +383,7 @@ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, p
383
  scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
384
  audio = temp_audio_path
385
  if seg_size > 512:
386
- logger.warning(f"Segment boyutu {seg_size} bΓΌyΓΌk, bu ZeroGPU'da Γ§ΓΆkmelere neden olabilir.")
387
  override_seg_size = override_seg_size == "True"
388
  if os.path.exists(output_dir):
389
  shutil.rmtree(output_dir)
@@ -394,8 +394,8 @@ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, p
394
  model = models[model_key]
395
  break
396
  else:
397
- raise ValueError(f"Model '{model_key}' bulunamadΔ±.")
398
- logger.info(f"{base_name} ayrıştırılıyor, model: {model_key}, cihaz: {device}")
399
  separator = Separator(
400
  log_level=logging.INFO,
401
  model_file_dir=model_dir,
@@ -406,9 +406,9 @@ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, p
406
  use_autocast=use_autocast,
407
  mdxc_params={"segment_size": seg_size, "override_model_segment_size": override_seg_size, "batch_size": batch_size, "overlap": overlap, "pitch_shift": pitch_shift}
408
  )
409
- progress(0.2, desc="Model yΓΌkleniyor...")
410
  separator.load_model(model_filename=model)
411
- progress(0.7, desc="Ses ayrıştırılıyor...")
412
  separation = separator.separate(audio)
413
  stems = [os.path.join(output_dir, file_name) for file_name in separation]
414
  if exclude_stems.strip():
@@ -417,15 +417,15 @@ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, p
417
  return filtered_stems[0] if filtered_stems else None, filtered_stems[1] if len(filtered_stems) > 1 else None
418
  return stems[0], stems[1] if len(stems) > 1 else None
419
  except Exception as e:
420
- logger.error(f"Ayrıştırma hatası: {e}")
421
- raise RuntimeError(f"Ayrıştırma hatası: {e}")
422
  finally:
423
  if temp_audio_path and os.path.exists(temp_audio_path):
424
  os.remove(temp_audio_path)
425
- logger.info(f"GeΓ§ici dosya silindi: {temp_audio_path}")
426
  if torch.cuda.is_available():
427
  torch.cuda.empty_cache()
428
- logger.info("GPU belleği temizlendi")
429
 
430
  @spaces.GPU
431
  def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_format="wav", use_tta="False", model_dir="/tmp/audio-separator-models/", output_dir="output", norm_thresh=0.9, amp_thresh=0.9, batch_size=1, ensemble_method="avg_wave", exclude_stems="", weights_str=""):
@@ -433,9 +433,9 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
433
  chunk_paths = []
434
  try:
435
  if not audio:
436
- raise ValueError("Ses dosyası sağlanmadı.")
437
  if not model_keys:
438
- raise ValueError("Model seΓ§ilmedi.")
439
  if isinstance(audio, tuple):
440
  sample_rate, data = audio
441
  temp_audio_path = os.path.join("/tmp", "temp_audio.wav")
@@ -443,11 +443,11 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
443
  audio = temp_audio_path
444
  audio_data, sr = librosa.load(audio, sr=None, mono=False)
445
  duration = librosa.get_duration(y=audio_data, sr=sr)
446
- logger.info(f"Ses sΓΌresi: {duration:.2f} saniye")
447
  chunk_duration = 300
448
  chunks = []
449
  if duration > 900:
450
- logger.info(f"Ses 15 dakikadan uzun, {chunk_duration}-saniyelik parΓ§alara bΓΆlΓΌnΓΌyor")
451
  num_chunks = int(np.ceil(duration / chunk_duration))
452
  for i in range(num_chunks):
453
  start = i * chunk_duration * sr
@@ -457,7 +457,7 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
457
  sf.write(chunk_path, chunk_data.T if audio_data.ndim == 2 else chunk_data, sr)
458
  chunks.append(chunk_path)
459
  chunk_paths.append(chunk_path)
460
- logger.info(f"Parça {i} oluşturuldu: {chunk_path}")
461
  else:
462
  chunks = [audio]
463
  use_tta = use_tta == "True"
@@ -465,7 +465,7 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
465
  shutil.rmtree(output_dir)
466
  os.makedirs(output_dir, exist_ok=True)
467
  base_name = os.path.splitext(os.path.basename(audio))[0]
468
- logger.info(f"{base_name} için birleştirme, modeller: {model_keys}, cihaz: {device}")
469
  all_stems = []
470
  model_stems = {}
471
  for model_key in model_keys:
@@ -475,7 +475,7 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
475
  model = models[model_key]
476
  break
477
  else:
478
- logger.warning(f"Model {model_key} bulunamadΔ±, atlanΔ±yor")
479
  continue
480
  for chunk_idx, chunk_path in enumerate(chunks):
481
  separator = Separator(
@@ -488,9 +488,9 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
488
  use_autocast=use_autocast,
489
  mdxc_params={"segment_size": seg_size, "overlap": overlap, "use_tta": use_tta, "batch_size": batch_size}
490
  )
491
- logger.info(f"Chunk {chunk_idx} iΓ§in {model_key} yΓΌkleniyor")
492
  separator.load_model(model_filename=model)
493
- logger.info(f"Chunk {chunk_idx} {model_key} ile ayrıştırılıyor")
494
  separation = separator.separate(chunk_path)
495
  stems = [os.path.join(output_dir, file_name) for file_name in separation]
496
  for stem in stems:
@@ -502,7 +502,7 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
502
  gc.collect()
503
  if torch.cuda.is_available():
504
  torch.cuda.empty_cache()
505
- logger.info(f"{model_key} chunk {chunk_idx} sonrası CUDA belleği temizlendi")
506
  for model_key, stems_dict in model_stems.items():
507
  for stem_type in ["vocals", "other"]:
508
  if stems_dict[stem_type]:
@@ -511,18 +511,18 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
511
  for stem_path in stems_dict[stem_type]:
512
  data, _ = librosa.load(stem_path, sr=sr, mono=False)
513
  f.write(data.T if data.ndim == 2 else data)
514
- logger.info(f"{model_key} için {stem_type} birleştirildi: {combined_path}")
515
  if exclude_stems.strip() and stem_type.lower() in [s.strip().lower() for s in exclude_stems.split(',')]:
516
- logger.info(f"{model_key} iΓ§in {stem_type} hariΓ§ tutuldu")
517
  continue
518
  all_stems.append(combined_path)
519
  all_stems = [stem for stem in all_stems if os.path.exists(stem)]
520
  if not all_stems:
521
- raise ValueError("Birleştirme için geçerli stem dosyası bulunamadı.")
522
  weights = [float(w.strip()) for w in weights_str.split(',')] if weights_str.strip() else [1.0] * len(all_stems)
523
  if len(weights) != len(all_stems):
524
  weights = [1.0] * len(all_stems)
525
- logger.info("Ağırlıklar eşleşmedi, varsayılan 1.0 kullanıldı")
526
  output_file = os.path.join(output_dir, f"{base_name}_ensemble_{ensemble_method}.{out_format}")
527
  ensemble_args = [
528
  "--files", *all_stems,
@@ -530,26 +530,36 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
530
  "--weights", *[str(w) for w in weights],
531
  "--output", output_file
532
  ]
533
- logger.info(f"Birleştirme argümanları: {ensemble_args}")
534
  ensemble_files(ensemble_args)
535
- logger.info("Birleştirme tamamlandı")
536
- return output_file, f"Birleştirme {ensemble_method} ile tamamlandı, hariç tutulan: {exclude_stems if exclude_stems else 'Yok'}"
537
  except Exception as e:
538
- logger.error(f"Birleştirme hatası: {e}")
539
- raise RuntimeError(f"Birleştirme hatası: {e}")
540
  finally:
541
  for path in chunk_paths + ([temp_audio_path] if temp_audio_path and os.path.exists(temp_audio_path) else []):
542
  try:
543
  if os.path.exists(path):
544
  os.remove(path)
545
- logger.info(f"GeΓ§ici dosya silindi: {path}")
546
  except Exception as e:
547
- logger.warning(f"GeΓ§ici dosya silinemedi {path}: {e}")
548
  if torch.cuda.is_available():
549
  torch.cuda.empty_cache()
550
- logger.info("GPU belleği temizlendi")
551
 
552
- # Mevcut update_roformer_models ve update_ensemble_models (değişmedi)
 
 
 
 
 
 
 
 
 
 
553
 
554
  def download_audio_wrapper(url, cookie_file):
555
  file_path, status, audio_data = download_audio(url, cookie_file)
@@ -558,65 +568,65 @@ def download_audio_wrapper(url, cookie_file):
558
  def create_interface():
559
  with gr.Blocks(title="🎡 SESA Fast Separation 🎡", css=CSS, elem_id="app-container") as app:
560
  gr.Markdown("<h1 class='header-text'>🎡 SESA Fast Separation 🎡</h1>")
561
- gr.Markdown("**Not**: YouTube indirmeleri başarΔ±sΔ±z olursa, doğrudan bir ses dosyasΔ± yΓΌkleyin veya geΓ§erli bir Γ§erez dosyasΔ± kullanΔ±n. [Γ‡erez TalimatlarΔ±](https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies)")
562
- gr.Markdown("**Uyarı**: 15 dakikadan uzun ses dosyaları otomatik olarak parçalara bâlünür, bu işlem daha fazla zaman ve kaynak gerektirebilir.")
563
  with gr.Tabs():
564
  with gr.Tab("βš™οΈ Settings"):
565
  with gr.Group(elem_classes="dubbing-theme"):
566
- gr.Markdown("### Genel Ayarlar")
567
- model_file_dir = gr.Textbox(value="/tmp/audio-separator-models/", label="πŸ“‚ Model Γ–nbelleği", placeholder="Model dizini yolu", interactive=True)
568
- output_dir = gr.Textbox(value="output", label="πŸ“€ Γ‡Δ±kış Dizini", placeholder="SonuΓ§larΔ±n kaydedileceği yer", interactive=True)
569
- output_format = gr.Dropdown(value="wav", choices=OUTPUT_FORMATS, label="🎢 Γ‡Δ±kış FormatΔ±", interactive=True)
570
- norm_threshold = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="πŸ”Š Normalizasyon Eşiği", interactive=True)
571
- amp_threshold = gr.Slider(0.1, 1.0, value=0.3, step=0.1, label="πŸ“ˆ Amplifikasyon Eşiği", interactive=True)
572
- batch_size = gr.Slider(1, 16, value=1, step=1, label="⚑ Batch Boyutu", interactive=True)
573
  with gr.Tab("🎀 Roformer"):
574
  with gr.Group(elem_classes="dubbing-theme"):
575
- gr.Markdown("### Ses Ayrıştırma")
576
  with gr.Row():
577
- roformer_audio = gr.Audio(label="🎧 Ses Yükle", type="filepath", interactive=True)
578
- url_ro = gr.Textbox(label="πŸ”— Veya URL YapıştΔ±r", placeholder="YouTube veya ses URL'si", interactive=True)
579
- cookies_ro = gr.File(label="πŸͺ Γ‡erez DosyasΔ±", file_types=[".txt"], interactive=True)
580
- download_roformer = gr.Button("⬇️ Δ°ndir", variant="secondary")
581
- roformer_download_status = gr.Textbox(label="πŸ“’ Δ°ndirme Durumu", interactive=False)
582
- roformer_exclude_stems = gr.Textbox(label="🚫 Stem'leri Hariç Tut", placeholder="ârn: vocals, drums (virgülle ayrılmış)", interactive=True)
583
  with gr.Row():
584
- roformer_category = gr.Dropdown(label="πŸ“š Kategori", choices=list(ROFORMER_MODELS.keys()), value="General Purpose", interactive=True)
585
  roformer_model = gr.Dropdown(label="πŸ› οΈ Model", choices=list(ROFORMER_MODELS["General Purpose"].keys()), interactive=True, allow_custom_value=True)
586
  with gr.Row():
587
- roformer_seg_size = gr.Slider(32, 4000, value=256, step=32, label="πŸ“ Segment Boyutu", interactive=True)
588
- roformer_overlap = gr.Slider(2, 10, value=8, step=1, label="πŸ”„ Γ–rtüşme", interactive=True)
589
  with gr.Row():
590
- roformer_pitch_shift = gr.Slider(-12, 12, value=0, step=1, label="🎡 Perde Kaydırma", interactive=True)
591
- roformer_override_seg_size = gr.Dropdown(choices=["True", "False"], value="False", label="πŸ”§ Segment Boyutunu GeΓ§ersiz KΔ±l", interactive=True)
592
- roformer_button = gr.Button("βœ‚οΈ Şimdi AyΔ±r!", variant="primary")
593
  with gr.Row():
594
  roformer_stem1 = gr.Audio(label="🎸 Stem 1", type="filepath", interactive=False)
595
  roformer_stem2 = gr.Audio(label="πŸ₯ Stem 2", type="filepath", interactive=False)
596
  with gr.Tab("🎚️ Auto Ensemble"):
597
  with gr.Group(elem_classes="dubbing-theme"):
598
- gr.Markdown("### Birleştirme İşlemi")
599
- gr.Markdown("Not: Ağırlıklar belirtilmezse, tüm modellere eşit ağırlık (1.0) uygulanır.")
600
  with gr.Row():
601
- ensemble_audio = gr.Audio(label="🎧 Ses Yükle", type="filepath", interactive=True)
602
- url_ensemble = gr.Textbox(label="πŸ”— Veya URL YapıştΔ±r", placeholder="YouTube veya ses URL'si", interactive=True)
603
- cookies_ensemble = gr.File(label="πŸͺ Γ‡erez DosyasΔ±", file_types=[".txt"], interactive=True)
604
- download_ensemble = gr.Button("⬇️ Δ°ndir", variant="secondary")
605
- ensemble_download_status = gr.Textbox(label="πŸ“’ Δ°ndirme Durumu", interactive=False)
606
- ensemble_exclude_stems = gr.Textbox(label="🚫 Stem'leri Hariç Tut", placeholder="ârn: vocals, drums (virgülle ayrılmış)", interactive=True)
607
  with gr.Row():
608
- ensemble_category = gr.Dropdown(label="πŸ“š Kategori", choices=list(ROFORMER_MODELS.keys()), value="Instrumentals", interactive=True)
609
- ensemble_models = gr.Dropdown(label="πŸ› οΈ Modeller", choices=list(ROFORMER_MODELS["Instrumentals"].keys()), multiselect=True, interactive=True, allow_custom_value=True)
610
  with gr.Row():
611
- ensemble_seg_size = gr.Slider(32, 4000, value=256, step=32, label="πŸ“ Segment Boyutu", interactive=True)
612
- ensemble_overlap = gr.Slider(2, 10, value=8, step=1, label="πŸ”„ Γ–rtüşme", interactive=True)
613
- ensemble_use_tta = gr.Dropdown(choices=["True", "False"], value="False", label="πŸ” TTA Kullan", interactive=True)
614
- ensemble_method = gr.Dropdown(label="βš™οΈ Birleştirme YΓΆntemi", choices=['avg_wave', 'median_wave', 'max_wave', 'min_wave', 'avg_fft', 'median_fft', 'max_fft', 'min_fft'], value='avg_wave', interactive=True)
615
- ensemble_weights = gr.Textbox(label="βš–οΈ AğırlΔ±klar", placeholder="ΓΆrn: 1.0, 1.0 (virgΓΌlle ayrΔ±lmış)", interactive=True)
616
- ensemble_button = gr.Button("πŸŽ›οΈ Birleştirme Γ‡alıştΔ±r!", variant="primary")
617
- ensemble_output = gr.Audio(label="🎢 Birleştirme Sonucu", type="filepath", interactive=False)
618
- ensemble_status = gr.Textbox(label="πŸ“’ Durum", interactive=False)
619
- gr.HTML("<div class='footer'>Audio-Separator ile Güçlendirildi 🌟🎢 | ❀️ ile yapıldı</div>")
620
  roformer_category.change(update_roformer_models, inputs=[roformer_category], outputs=[roformer_model])
621
  download_roformer.click(
622
  fn=download_audio_wrapper,
@@ -651,14 +661,14 @@ def create_interface():
651
  return app
652
 
653
  if __name__ == "__main__":
654
- parser = argparse.ArgumentParser(description="Müzik Kaynak Ayrıştırma Web Arayüzü")
655
- parser.add_argument("--port", type=int, default=7860, help="Arayüzün çalışacağı port")
656
  args = parser.parse_args()
657
  app = create_interface()
658
  try:
659
  app.launch(server_name="0.0.0.0", server_port=args.port, share=True)
660
  except Exception as e:
661
- logger.error(f"Arayüz başlatılamadı: {e}")
662
  raise
663
  finally:
664
  app.close()
 
338
  temp_output_path = 'ytdl/gdrive_temp_audio'
339
  gdown.download(download_url, temp_output_path, quiet=False)
340
  if not os.path.exists(temp_output_path):
341
+ return None, "Downloaded file not found", None
342
  from mimetypes import guess_type
343
  mime_type, _ = guess_type(temp_output_path)
344
  if not mime_type or not mime_type.startswith('audio'):
345
+ return None, "Downloaded file is not an audio file", None
346
  output_path = 'ytdl/gdrive_audio.wav'
347
  audio = AudioSegment.from_file(temp_output_path)
348
  audio.export(output_path, format="wav")
349
  sample_rate, data = wavfile.read(output_path)
350
+ return output_path, "Download successful", (sample_rate, data)
351
  else:
352
  os.makedirs('ytdl', exist_ok=True)
353
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
 
357
  for ext in ['.webm', '.m4a', '.opus', '.aac']:
358
  file_path = file_path.replace(ext, '.wav')
359
  if not os.path.exists(file_path):
360
+ return None, "Downloaded file not found", None
361
  sample_rate, data = wavfile.read(file_path)
362
+ return file_path, "Download successful", (sample_rate, data)
363
  except yt_dlp.utils.ExtractorError as e:
364
  if "Sign in to confirm you’re not a bot" in str(e):
365
+ return None, "Authentication error. Please upload valid YouTube cookies: https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies", None
366
+ return None, f"Download error: {str(e)}", None
367
  except Exception as e:
368
+ return None, f"Unexpected error: {str(e)}", None
369
  finally:
370
  if temp_output_path and os.path.exists(temp_output_path):
371
  os.remove(temp_output_path)
372
+ logger.info(f"Temporary file deleted: {temp_output_path}")
373
 
374
  @spaces.GPU
375
  def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, output_dir, out_format, norm_thresh, amp_thresh, batch_size, exclude_stems="", progress=gr.Progress(track_tqdm=True)):
376
  if not audio:
377
+ raise ValueError("No audio file provided.")
378
  temp_audio_path = None
379
  try:
380
  if isinstance(audio, tuple):
 
383
  scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
384
  audio = temp_audio_path
385
  if seg_size > 512:
386
+ logger.warning(f"Segment size {seg_size} is large, this may cause crashes on ZeroGPU.")
387
  override_seg_size = override_seg_size == "True"
388
  if os.path.exists(output_dir):
389
  shutil.rmtree(output_dir)
 
394
  model = models[model_key]
395
  break
396
  else:
397
+ raise ValueError(f"Model '{model_key}' not found.")
398
+ logger.info(f"Separating {base_name} with {model_key} on {device}")
399
  separator = Separator(
400
  log_level=logging.INFO,
401
  model_file_dir=model_dir,
 
406
  use_autocast=use_autocast,
407
  mdxc_params={"segment_size": seg_size, "override_model_segment_size": override_seg_size, "batch_size": batch_size, "overlap": overlap, "pitch_shift": pitch_shift}
408
  )
409
+ progress(0.2, desc="Loading model...")
410
  separator.load_model(model_filename=model)
411
+ progress(0.7, desc="Separating audio...")
412
  separation = separator.separate(audio)
413
  stems = [os.path.join(output_dir, file_name) for file_name in separation]
414
  if exclude_stems.strip():
 
417
  return filtered_stems[0] if filtered_stems else None, filtered_stems[1] if len(filtered_stems) > 1 else None
418
  return stems[0], stems[1] if len(stems) > 1 else None
419
  except Exception as e:
420
+ logger.error(f"Separation error: {e}")
421
+ raise RuntimeError(f"Separation error: {e}")
422
  finally:
423
  if temp_audio_path and os.path.exists(temp_audio_path):
424
  os.remove(temp_audio_path)
425
+ logger.info(f"Temporary file deleted: {temp_audio_path}")
426
  if torch.cuda.is_available():
427
  torch.cuda.empty_cache()
428
+ logger.info("GPU memory cleared")
429
 
430
  @spaces.GPU
431
  def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_format="wav", use_tta="False", model_dir="/tmp/audio-separator-models/", output_dir="output", norm_thresh=0.9, amp_thresh=0.9, batch_size=1, ensemble_method="avg_wave", exclude_stems="", weights_str=""):
 
433
  chunk_paths = []
434
  try:
435
  if not audio:
436
+ raise ValueError("No audio file provided.")
437
  if not model_keys:
438
+ raise ValueError("No models selected.")
439
  if isinstance(audio, tuple):
440
  sample_rate, data = audio
441
  temp_audio_path = os.path.join("/tmp", "temp_audio.wav")
 
443
  audio = temp_audio_path
444
  audio_data, sr = librosa.load(audio, sr=None, mono=False)
445
  duration = librosa.get_duration(y=audio_data, sr=sr)
446
+ logger.info(f"Audio duration: {duration:.2f} seconds")
447
  chunk_duration = 300
448
  chunks = []
449
  if duration > 900:
450
+ logger.info(f"Audio exceeds 15 minutes, splitting into {chunk_duration}-second chunks")
451
  num_chunks = int(np.ceil(duration / chunk_duration))
452
  for i in range(num_chunks):
453
  start = i * chunk_duration * sr
 
457
  sf.write(chunk_path, chunk_data.T if audio_data.ndim == 2 else chunk_data, sr)
458
  chunks.append(chunk_path)
459
  chunk_paths.append(chunk_path)
460
+ logger.info(f"Created chunk {i}: {chunk_path}")
461
  else:
462
  chunks = [audio]
463
  use_tta = use_tta == "True"
 
465
  shutil.rmtree(output_dir)
466
  os.makedirs(output_dir, exist_ok=True)
467
  base_name = os.path.splitext(os.path.basename(audio))[0]
468
+ logger.info(f"Ensemble for {base_name} with {model_keys} on {device}")
469
  all_stems = []
470
  model_stems = {}
471
  for model_key in model_keys:
 
475
  model = models[model_key]
476
  break
477
  else:
478
+ logger.warning(f"Model {model_key} not found, skipping")
479
  continue
480
  for chunk_idx, chunk_path in enumerate(chunks):
481
  separator = Separator(
 
488
  use_autocast=use_autocast,
489
  mdxc_params={"segment_size": seg_size, "overlap": overlap, "use_tta": use_tta, "batch_size": batch_size}
490
  )
491
+ logger.info(f"Loading {model_key} for chunk {chunk_idx}")
492
  separator.load_model(model_filename=model)
493
+ logger.info(f"Separating chunk {chunk_idx} with {model_key}")
494
  separation = separator.separate(chunk_path)
495
  stems = [os.path.join(output_dir, file_name) for file_name in separation]
496
  for stem in stems:
 
502
  gc.collect()
503
  if torch.cuda.is_available():
504
  torch.cuda.empty_cache()
505
+ logger.info(f"Cleared CUDA cache after {model_key} chunk {chunk_idx}")
506
  for model_key, stems_dict in model_stems.items():
507
  for stem_type in ["vocals", "other"]:
508
  if stems_dict[stem_type]:
 
511
  for stem_path in stems_dict[stem_type]:
512
  data, _ = librosa.load(stem_path, sr=sr, mono=False)
513
  f.write(data.T if data.ndim == 2 else data)
514
+ logger.info(f"Combined {stem_type} for {model_key}: {combined_path}")
515
  if exclude_stems.strip() and stem_type.lower() in [s.strip().lower() for s in exclude_stems.split(',')]:
516
+ logger.info(f"Excluding {stem_type} for {model_key}")
517
  continue
518
  all_stems.append(combined_path)
519
  all_stems = [stem for stem in all_stems if os.path.exists(stem)]
520
  if not all_stems:
521
+ raise ValueError("No valid stems found for ensemble.")
522
  weights = [float(w.strip()) for w in weights_str.split(',')] if weights_str.strip() else [1.0] * len(all_stems)
523
  if len(weights) != len(all_stems):
524
  weights = [1.0] * len(all_stems)
525
+ logger.info("Weights mismatched, defaulting to 1.0")
526
  output_file = os.path.join(output_dir, f"{base_name}_ensemble_{ensemble_method}.{out_format}")
527
  ensemble_args = [
528
  "--files", *all_stems,
 
530
  "--weights", *[str(w) for w in weights],
531
  "--output", output_file
532
  ]
533
+ logger.info(f"Running ensemble with args: {ensemble_args}")
534
  ensemble_files(ensemble_args)
535
+ logger.info("Ensemble completed")
536
+ return output_file, f"Ensemble completed with {ensemble_method}, excluded: {exclude_stems if exclude_stems else 'None'}"
537
  except Exception as e:
538
+ logger.error(f"Ensemble error: {e}")
539
+ raise RuntimeError(f"Ensemble error: {e}")
540
  finally:
541
  for path in chunk_paths + ([temp_audio_path] if temp_audio_path and os.path.exists(temp_audio_path) else []):
542
  try:
543
  if os.path.exists(path):
544
  os.remove(path)
545
+ logger.info(f"Temporary file deleted: {path}")
546
  except Exception as e:
547
+ logger.warning(f"Failed to delete temporary file {path}: {e}")
548
  if torch.cuda.is_available():
549
  torch.cuda.empty_cache()
550
+ logger.info("GPU memory cleared")
551
 
552
+ def update_roformer_models(category):
553
+ """Update Roformer model dropdown based on selected category."""
554
+ choices = list(ROFORMER_MODELS.get(category, {}).keys()) or []
555
+ logger.debug(f"Updating roformer models for category {category}: {choices}")
556
+ return gr.update(choices=choices, value=choices[0] if choices else None)
557
+
558
+ def update_ensemble_models(category):
559
+ """Update ensemble model dropdown based on selected category."""
560
+ choices = list(ROFORMER_MODELS.get(category, {}).keys()) or []
561
+ logger.debug(f"Updating ensemble models for category {category}: {choices}")
562
+ return gr.update(choices=choices, value=[])
563
 
564
  def download_audio_wrapper(url, cookie_file):
565
  file_path, status, audio_data = download_audio(url, cookie_file)
 
568
  def create_interface():
569
  with gr.Blocks(title="🎡 SESA Fast Separation 🎡", css=CSS, elem_id="app-container") as app:
570
  gr.Markdown("<h1 class='header-text'>🎡 SESA Fast Separation 🎡</h1>")
571
+ gr.Markdown("**Note**: If YouTube downloads fail, upload an audio file directly or use a valid cookies file. [Cookie Instructions](https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies)")
572
+ gr.Markdown("**Warning**: Audio files longer than 15 minutes are automatically split into chunks, which may require more time and resources.")
573
  with gr.Tabs():
574
  with gr.Tab("βš™οΈ Settings"):
575
  with gr.Group(elem_classes="dubbing-theme"):
576
+ gr.Markdown("### General Settings")
577
+ model_file_dir = gr.Textbox(value="/tmp/audio-separator-models/", label="πŸ“‚ Model Cache", placeholder="Path to model directory", interactive=True)
578
+ output_dir = gr.Textbox(value="output", label="πŸ“€ Output Directory", placeholder="Where to save results", interactive=True)
579
+ output_format = gr.Dropdown(value="wav", choices=OUTPUT_FORMATS, label="🎢 Output Format", interactive=True)
580
+ norm_threshold = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="πŸ”Š Normalization Threshold", interactive=True)
581
+ amp_threshold = gr.Slider(0.1, 1.0, value=0.3, step=0.1, label="πŸ“ˆ Amplification Threshold", interactive=True)
582
+ batch_size = gr.Slider(1, 16, value=1, step=1, label="⚑ Batch Size", interactive=True)
583
  with gr.Tab("🎀 Roformer"):
584
  with gr.Group(elem_classes="dubbing-theme"):
585
+ gr.Markdown("### Audio Separation")
586
  with gr.Row():
587
+ roformer_audio = gr.Audio(label="🎧 Upload Audio", type="filepath", interactive=True)
588
+ url_ro = gr.Textbox(label="πŸ”— Or Paste URL", placeholder="YouTube or audio URL", interactive=True)
589
+ cookies_ro = gr.File(label="πŸͺ Cookies File", file_types=[".txt"], interactive=True)
590
+ download_roformer = gr.Button("⬇️ Download", variant="secondary")
591
+ roformer_download_status = gr.Textbox(label="πŸ“’ Download Status", interactive=False)
592
+ roformer_exclude_stems = gr.Textbox(label="🚫 Exclude Stems", placeholder="e.g., vocals, drums (comma-separated)", interactive=True)
593
  with gr.Row():
594
+ roformer_category = gr.Dropdown(label="πŸ“š Category", choices=list(ROFORMER_MODELS.keys()), value="General Purpose", interactive=True)
595
  roformer_model = gr.Dropdown(label="πŸ› οΈ Model", choices=list(ROFORMER_MODELS["General Purpose"].keys()), interactive=True, allow_custom_value=True)
596
  with gr.Row():
597
+ roformer_seg_size = gr.Slider(32, 4000, value=256, step=32, label="πŸ“ Segment Size", interactive=True)
598
+ roformer_overlap = gr.Slider(2, 10, value=8, step=1, label="πŸ”„ Overlap", interactive=True)
599
  with gr.Row():
600
+ roformer_pitch_shift = gr.Slider(-12, 12, value=0, step=1, label="🎡 Pitch Shift", interactive=True)
601
+ roformer_override_seg_size = gr.Dropdown(choices=["True", "False"], value="False", label="πŸ”§ Override Segment Size", interactive=True)
602
+ roformer_button = gr.Button("βœ‚οΈ Separate Now!", variant="primary")
603
  with gr.Row():
604
  roformer_stem1 = gr.Audio(label="🎸 Stem 1", type="filepath", interactive=False)
605
  roformer_stem2 = gr.Audio(label="πŸ₯ Stem 2", type="filepath", interactive=False)
606
  with gr.Tab("🎚️ Auto Ensemble"):
607
  with gr.Group(elem_classes="dubbing-theme"):
608
+ gr.Markdown("### Ensemble Processing")
609
+ gr.Markdown("Note: If weights are not specified, equal weights (1.0) are applied to all models.")
610
  with gr.Row():
611
+ ensemble_audio = gr.Audio(label="🎧 Upload Audio", type="filepath", interactive=True)
612
+ url_ensemble = gr.Textbox(label="πŸ”— Or Paste URL", placeholder="YouTube or audio URL", interactive=True)
613
+ cookies_ensemble = gr.File(label="πŸͺ Cookies File", file_types=[".txt"], interactive=True)
614
+ download_ensemble = gr.Button("⬇️ Download", variant="secondary")
615
+ ensemble_download_status = gr.Textbox(label="πŸ“’ Download Status", interactive=False)
616
+ ensemble_exclude_stems = gr.Textbox(label="🚫 Exclude Stems", placeholder="e.g., vocals, drums (comma-separated)", interactive=True)
617
  with gr.Row():
618
+ ensemble_category = gr.Dropdown(label="πŸ“š Category", choices=list(ROFORMER_MODELS.keys()), value="Instrumentals", interactive=True)
619
+ ensemble_models = gr.Dropdown(label="πŸ› οΈ Models", choices=list(ROFORMER_MODELS["Instrumentals"].keys()), multiselect=True, interactive=True, allow_custom_value=True)
620
  with gr.Row():
621
+ ensemble_seg_size = gr.Slider(32, 4000, value=256, step=32, label="πŸ“ Segment Size", interactive=True)
622
+ ensemble_overlap = gr.Slider(2, 10, value=8, step=1, label="πŸ”„ Overlap", interactive=True)
623
+ ensemble_use_tta = gr.Dropdown(choices=["True", "False"], value="False", label="πŸ” Use TTA", interactive=True)
624
+ ensemble_method = gr.Dropdown(label="βš™οΈ Ensemble Method", choices=['avg_wave', 'median_wave', 'max_wave', 'min_wave', 'avg_fft', 'median_fft', 'max_fft', 'min_fft'], value='avg_wave', interactive=True)
625
+ ensemble_weights = gr.Textbox(label="βš–οΈ Weights", placeholder="e.g., 1.0, 1.0 (comma-separated)", interactive=True)
626
+ ensemble_button = gr.Button("πŸŽ›οΈ Run Ensemble!", variant="primary")
627
+ ensemble_output = gr.Audio(label="🎢 Ensemble Result", type="filepath", interactive=False)
628
+ ensemble_status = gr.Textbox(label="πŸ“’ Status", interactive=False)
629
+ gr.HTML("<div class='footer'>Powered by Audio-Separator 🌟🎢 | Made with ❀️</div>")
630
  roformer_category.change(update_roformer_models, inputs=[roformer_category], outputs=[roformer_model])
631
  download_roformer.click(
632
  fn=download_audio_wrapper,
 
661
  return app
662
 
663
  if __name__ == "__main__":
664
+ parser = argparse.ArgumentParser(description="Music Source Separation Web UI")
665
+ parser.add_argument("--port", type=int, default=7860, help="Port to run the UI on")
666
  args = parser.parse_args()
667
  app = create_interface()
668
  try:
669
  app.launch(server_name="0.0.0.0", server_port=args.port, share=True)
670
  except Exception as e:
671
+ logger.error(f"Failed to launch UI: {e}")
672
  raise
673
  finally:
674
  app.close()