ASesYusuf1 commited on
Commit
26e2b18
·
verified ·
1 Parent(s): af61d8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -222
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import sys
3
  import torch
@@ -12,65 +13,41 @@ import soundfile as sf
12
  from ensemble import ensemble_files
13
  import shutil
14
  import gradio_client.utils as client_utils
15
- import validators
16
  import matchering as mg
17
- from typing import Any, Optional
18
- import spaces # Added for @spaces.GPU
19
- import gdown
20
-
21
- # Add this import if not already present
22
- import yt_dlp
23
- import scipy.io.wavfile # For audio processing
24
 
25
  # Logging setup
26
  logging.basicConfig(level=logging.INFO)
27
  logger = logging.getLogger(__name__)
28
 
29
- # Configuration directory
30
- CONFIG_DIR = "/tmp/SESA-Config"
31
-
32
- def ensure_config_dir():
33
- """Ensure the configuration directory exists and is writable."""
34
- try:
35
- os.makedirs(CONFIG_DIR, exist_ok=True)
36
- logger.info(f"Configuration directory ensured: {CONFIG_DIR}")
37
- except PermissionError as e:
38
- logger.error(f"Failed to create config directory {CONFIG_DIR}: {e}")
39
- raise RuntimeError(f"Cannot create config directory: {e}")
40
- except Exception as e:
41
- logger.error(f"Unexpected error creating config directory {CONFIG_DIR}: {e}")
42
- raise
43
-
44
- # Call early in the script
45
- ensure_config_dir()
46
 
47
- # Patch gradio_client.utils._json_schema_to_python_type
48
  original_json_schema_to_python_type = client_utils._json_schema_to_python_type
49
-
50
  def patched_json_schema_to_python_type(schema: Any, defs: Optional[dict] = None) -> str:
51
  logger.debug(f"Parsing schema: {schema}")
52
  if isinstance(schema, bool):
53
- logger.info("Found boolean schema, returning 'boolean'")
54
  return "boolean"
55
  if not isinstance(schema, dict):
56
- logger.warning(f"Unexpected schema type: {type(schema)}, returning 'Any'")
57
  return "Any"
58
  if "enum" in schema and schema.get("type") == "string":
59
- logger.info(f"Handling enum schema: {schema['enum']}")
60
  return f"Literal[{', '.join(repr(e) for e in schema['enum'])}]"
61
  try:
62
  return original_json_schema_to_python_type(schema, defs)
63
- except client_utils.APIInfoParseError as e:
64
- logger.error(f"Failed to parse schema {schema}: {e}")
65
  return "str"
66
-
67
  client_utils._json_schema_to_python_type = patched_json_schema_to_python_type
68
 
69
- # Device and autocast setup
70
  device = "cuda" if torch.cuda.is_available() else "cpu"
71
  use_autocast = device == "cuda"
72
  logger.info(f"Using device: {device}")
73
 
 
74
  # ROFORMER_MODELS and OUTPUT_FORMATS
75
  ROFORMER_MODELS = {
76
  "Vocals": {
@@ -327,12 +304,6 @@ button:hover {
327
  }
328
  """
329
 
330
- import os
331
- import yt_dlp
332
- import gdown
333
- from scipy.io import wavfile
334
- from pydub import AudioSegment
335
-
336
  def download_audio(url, cookie_file=None):
337
  ydl_opts = {
338
  'format': 'bestaudio[ext=webm]/bestaudio[ext=m4a]/bestaudio[ext=opus]/bestaudio[ext=aac]/bestaudio -video',
@@ -353,92 +324,73 @@ def download_audio(url, cookie_file=None):
353
  'no_check_certificate': True,
354
  'verbose': True,
355
  }
356
-
357
- # Check if it's a Google Drive link
358
- if 'drive.google.com' in url or 'https://drive.google.com' in url:
359
- try:
360
- # Create the 'ytdl' directory if it doesn't exist
361
  os.makedirs('ytdl', exist_ok=True)
362
-
363
- # Extract file ID from the URL
364
  file_id = url.split('/d/')[1].split('/')[0]
365
  download_url = f'https://drive.google.com/uc?id={file_id}'
366
- temp_output_path = 'ytdl/gdrive_temp_audio' # Temporary file for raw download
367
  gdown.download(download_url, temp_output_path, quiet=False)
368
-
369
  if not os.path.exists(temp_output_path):
370
- return None, "Downloaded file not found", None
371
-
372
- # Convert the downloaded file to WAV using pydub
 
 
373
  output_path = 'ytdl/gdrive_audio.wav'
374
  audio = AudioSegment.from_file(temp_output_path)
375
  audio.export(output_path, format="wav")
376
-
377
- # Read the converted WAV file
378
  sample_rate, data = wavfile.read(output_path)
379
- audio_data = (sample_rate, data)
380
-
381
- # Clean up the temporary file
382
- os.remove(temp_output_path)
383
-
384
- return output_path, "Download successful", audio_data
385
- except Exception as e:
386
- return None, f"Google Drive download failed: {str(e)}", None
387
-
388
- # Handle YouTube link
389
- else:
390
- os.makedirs('ytdl', exist_ok=True)
391
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
392
- try:
393
  info_dict = ydl.extract_info(url, download=True)
394
  base_file_path = ydl.prepare_filename(info_dict)
395
  file_path = base_file_path
396
  for ext in ['.webm', '.m4a', '.opus', '.aac']:
397
  file_path = file_path.replace(ext, '.wav')
398
  if not os.path.exists(file_path):
399
- return None, "Downloaded file not found", None
400
  sample_rate, data = wavfile.read(file_path)
401
- audio_data = (sample_rate, data)
402
- return file_path, "Download successful", audio_data
403
- except yt_dlp.utils.ExtractorError as e:
404
- if "Sign in to confirm you’re not a bot" in str(e) or "The provided YouTube account cookies are no longer valid" in str(e):
405
- return None, "Authentication failed. Please upload updated cookies from a logged-in browser session in the respective tab. See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies for instructions.", None
406
- elif "HTTP Error 403: Forbidden" in str(e):
407
- return None, "Download failed: HTTP Error 403. This format requires a GVS PO Token, or the cookies are invalid. Please upload fresh cookies. See https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide for advanced troubleshooting.", None
408
- return None, f"Download failed: {str(e)}", None
409
- except Exception as e:
410
- return None, f"Unexpected error: {str(e)}", None
411
-
 
412
  @spaces.GPU
413
  def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, output_dir, out_format, norm_thresh, amp_thresh, batch_size, exclude_stems="", progress=gr.Progress(track_tqdm=True)):
414
- """Separate audio into stems using a Roformer model."""
415
  if not audio:
416
- raise ValueError("No audio file provided.")
417
-
418
- temp_audio_path = None # Initialize to None to avoid undefined variable in finally
419
  try:
420
- # If audio is a tuple (sample_rate, data), save it as a temporary file
421
  if isinstance(audio, tuple):
422
  sample_rate, data = audio
423
  temp_audio_path = os.path.join("/tmp", "temp_audio.wav")
424
  scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
425
  audio = temp_audio_path
426
-
 
427
  override_seg_size = override_seg_size == "True"
428
-
429
  if os.path.exists(output_dir):
430
  shutil.rmtree(output_dir)
431
  os.makedirs(output_dir, exist_ok=True)
432
-
433
  base_name = os.path.splitext(os.path.basename(audio))[0]
434
  for category, models in ROFORMER_MODELS.items():
435
  if model_key in models:
436
  model = models[model_key]
437
  break
438
  else:
439
- raise ValueError(f"Model '{model_key}' not found.")
440
-
441
- logger.info(f"Separating {base_name} with {model_key} on {device}")
442
  separator = Separator(
443
  log_level=logging.INFO,
444
  model_file_dir=model_dir,
@@ -449,54 +401,48 @@ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, p
449
  use_autocast=use_autocast,
450
  mdxc_params={"segment_size": seg_size, "override_model_segment_size": override_seg_size, "batch_size": batch_size, "overlap": overlap, "pitch_shift": pitch_shift}
451
  )
452
- progress(0.2, desc="Loading model...")
453
  separator.load_model(model_filename=model)
454
- progress(0.7, desc="Separating audio...")
455
  separation = separator.separate(audio)
456
  stems = [os.path.join(output_dir, file_name) for file_name in separation]
457
-
458
  if exclude_stems.strip():
459
  excluded = [s.strip().lower() for s in exclude_stems.split(',')]
460
  filtered_stems = [stem for stem in stems if not any(ex in os.path.basename(stem).lower() for ex in excluded)]
461
  return filtered_stems[0] if filtered_stems else None, filtered_stems[1] if len(filtered_stems) > 1 else None
462
  return stems[0], stems[1] if len(stems) > 1 else None
463
  except Exception as e:
464
- logger.error(f"Separation failed: {e}")
465
- raise RuntimeError(f"Separation failed: {e}")
466
  finally:
467
- # Clean up temporary file if it was created
468
  if temp_audio_path and os.path.exists(temp_audio_path):
469
- try:
470
- os.remove(temp_audio_path)
471
- logger.info(f"Cleaned up temporary file: {temp_audio_path}")
472
- except Exception as e:
473
- logger.warning(f"Failed to clean up temporary file {temp_audio_path}: {e}")
474
 
475
  @spaces.GPU
476
  def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_format="wav", use_tta="False", model_dir="/tmp/audio-separator-models/", output_dir="output", norm_thresh=0.9, amp_thresh=0.9, batch_size=1, ensemble_method="avg_wave", exclude_stems="", weights_str=""):
477
  temp_audio_path = None
478
  chunk_paths = []
479
  try:
480
- if not audio or not model_keys:
481
- raise ValueError("Audio or models missing.")
482
-
483
- # Handle tuple input (sample_rate, data)
484
  if isinstance(audio, tuple):
485
  sample_rate, data = audio
486
  temp_audio_path = os.path.join("/tmp", "temp_audio.wav")
487
  scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
488
  audio = temp_audio_path
489
-
490
- # Load audio to check duration
491
  audio_data, sr = librosa.load(audio, sr=None, mono=False)
492
  duration = librosa.get_duration(y=audio_data, sr=sr)
493
- logger.info(f"Audio duration: {duration:.2f} seconds")
494
-
495
- # Split audio if longer than 15 minutes (900 seconds)
496
- chunk_duration = 300 # 5 minutes in seconds
497
  chunks = []
498
  if duration > 900:
499
- logger.info(f"Audio exceeds 15 minutes, splitting into {chunk_duration}-second chunks")
500
  num_chunks = int(np.ceil(duration / chunk_duration))
501
  for i in range(num_chunks):
502
  start = i * chunk_duration * sr
@@ -506,23 +452,17 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
506
  sf.write(chunk_path, chunk_data.T if audio_data.ndim == 2 else chunk_data, sr)
507
  chunks.append(chunk_path)
508
  chunk_paths.append(chunk_path)
509
- logger.info(f"Created chunk {i}: {chunk_path}")
510
  else:
511
  chunks = [audio]
512
-
513
  use_tta = use_tta == "True"
514
-
515
- # Create output directory
516
  if os.path.exists(output_dir):
517
  shutil.rmtree(output_dir)
518
  os.makedirs(output_dir, exist_ok=True)
519
-
520
  base_name = os.path.splitext(os.path.basename(audio))[0]
521
- logger.info(f"Ensemble for {base_name} with {model_keys} on {device}")
522
-
523
  all_stems = []
524
- model_stems = {} # Store stems per model for concatenation
525
-
526
  for model_key in model_keys:
527
  model_stems[model_key] = {"vocals": [], "other": []}
528
  for category, models in ROFORMER_MODELS.items():
@@ -530,9 +470,8 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
530
  model = models[model_key]
531
  break
532
  else:
533
- logger.warning(f"Model {model_key} not found, skipping")
534
  continue
535
-
536
  for chunk_idx, chunk_path in enumerate(chunks):
537
  separator = Separator(
538
  log_level=logging.INFO,
@@ -544,51 +483,41 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
544
  use_autocast=use_autocast,
545
  mdxc_params={"segment_size": seg_size, "overlap": overlap, "use_tta": use_tta, "batch_size": batch_size}
546
  )
547
- logger.info(f"Loading {model_key} for chunk {chunk_idx}")
548
  separator.load_model(model_filename=model)
549
- logger.info(f"Separating chunk {chunk_idx} with {model_key}")
550
  separation = separator.separate(chunk_path)
551
  stems = [os.path.join(output_dir, file_name) for file_name in separation]
552
-
553
- # Store stems for this chunk
554
  for stem in stems:
555
  if "vocals" in os.path.basename(stem).lower():
556
  model_stems[model_key]["vocals"].append(stem)
557
  elif "other" in os.path.basename(stem).lower():
558
  model_stems[model_key]["other"].append(stem)
559
-
560
- # Clean up memory
561
  separator = None
562
  gc.collect()
563
  if torch.cuda.is_available():
564
  torch.cuda.empty_cache()
565
- logger.info(f"Cleared CUDA cache after {model_key} chunk {chunk_idx}")
566
-
567
- # Combine stems for each model
568
  for model_key, stems_dict in model_stems.items():
569
  for stem_type in ["vocals", "other"]:
570
  if stems_dict[stem_type]:
571
  combined_path = os.path.join(output_dir, f"{base_name}_{stem_type}_{model_key.replace(' | ', '_').replace(' ', '_')}.wav")
572
- combined_data = []
573
- for stem_path in stems_dict[stem_type]:
574
- data, _ = librosa.load(stem_path, sr=sr, mono=False)
575
- combined_data.append(data)
576
- combined_data = np.concatenate(combined_data, axis=-1) if combined_data[0].ndim == 2 else np.concatenate(combined_data)
577
- sf.write(combined_path, combined_data.T if combined_data.ndim == 2 else combined_data, sr)
578
- logger.info(f"Combined {stem_type} for {model_key}: {combined_path}")
579
  if exclude_stems.strip() and stem_type.lower() in [s.strip().lower() for s in exclude_stems.split(',')]:
580
- logger.info(f"Excluding {stem_type} for {model_key}")
581
  continue
582
  all_stems.append(combined_path)
583
-
584
  if not all_stems:
585
- raise ValueError("No valid stems for ensemble after exclusion.")
586
-
587
- # Ensemble the combined stems
588
  weights = [float(w.strip()) for w in weights_str.split(',')] if weights_str.strip() else [1.0] * len(all_stems)
589
  if len(weights) != len(all_stems):
590
  weights = [1.0] * len(all_stems)
591
-
592
  output_file = os.path.join(output_dir, f"{base_name}_ensemble_{ensemble_method}.{out_format}")
593
  ensemble_args = [
594
  "--files", *all_stems,
@@ -596,114 +525,98 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
596
  "--weights", *[str(w) for w in weights],
597
  "--output", output_file
598
  ]
599
- logger.info(f"Running ensemble with args: {ensemble_args}")
600
  ensemble_files(ensemble_args)
601
-
602
- logger.info("Ensemble complete")
603
- return output_file, f"Ensemble completed with {ensemble_method}, excluded: {exclude_stems if exclude_stems else 'None'}"
604
  except Exception as e:
605
- logger.error(f"Ensemble failed: {e}")
606
- raise RuntimeError(f"Ensemble failed: {e}")
607
  finally:
608
- # Clean up temporary files
609
  for path in chunk_paths + ([temp_audio_path] if temp_audio_path and os.path.exists(temp_audio_path) else []):
610
  try:
611
  if os.path.exists(path):
612
  os.remove(path)
613
- logger.info(f"Successfully cleaned up {path}")
614
  except Exception as e:
615
- logger.error(f"Failed to clean up {path}: {e}")
616
-
617
- def update_roformer_models(category):
618
- """Update Roformer model dropdown based on selected category."""
619
- choices = list(ROFORMER_MODELS.get(category, {}).keys()) or []
620
- logger.debug(f"Updating roformer models for category {category}: {choices}")
621
- return gr.update(choices=choices, value=choices[0] if choices else None)
622
 
623
- def update_ensemble_models(category):
624
- """Update ensemble model dropdown based on selected category."""
625
- choices = list(ROFORMER_MODELS.get(category, {}).keys()) or []
626
- logger.debug(f"Updating ensemble models for category {category}: {choices}")
627
- return gr.update(choices=choices, value=[])
628
 
629
  def download_audio_wrapper(url, cookie_file):
630
  file_path, status, audio_data = download_audio(url, cookie_file)
631
- return audio_data, status # Return audio_data for gr.Audio, status for gr.Textbox
632
 
633
  def create_interface():
634
  with gr.Blocks(title="🎵 SESA Fast Separation 🎵", css=CSS, elem_id="app-container") as app:
635
  gr.Markdown("<h1 class='header-text'>🎵 SESA Fast Separation 🎵</h1>")
636
- gr.Markdown("**Note**: If YouTube downloads fail, try uploading an audio file directly or use a valid cookies file.")
637
-
638
  with gr.Tabs():
639
- # Settings Tab
640
  with gr.Tab("⚙️ Settings"):
641
  with gr.Group(elem_classes="dubbing-theme"):
642
- gr.Markdown("### General Settings")
643
- model_file_dir = gr.Textbox(value="/tmp/audio-separator-models/", label="📂 Model Cache", placeholder="Path to model directory", interactive=True)
644
- output_dir = gr.Textbox(value="output", label="📤 Output Directory", placeholder="Where to save results", interactive=True)
645
- output_format = gr.Dropdown(value="wav", choices=OUTPUT_FORMATS, label="🎶 Output Format", interactive=True)
646
- norm_threshold = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="🔊 Normalization Threshold", interactive=True)
647
- amp_threshold = gr.Slider(0.1, 1.0, value=0.3, step=0.1, label="📈 Amplification Threshold", interactive=True)
648
- batch_size = gr.Slider(1, 16, value=1, step=1, label="⚡ Batch Size", interactive=True)
649
-
650
- # Roformer Tab
651
  with gr.Tab("🎤 Roformer"):
652
  with gr.Group(elem_classes="dubbing-theme"):
653
- gr.Markdown("### Audio Separation")
654
  with gr.Row():
655
- roformer_audio = gr.Audio(label="🎧 Upload Audio", type="filepath", interactive=True)
656
- url_ro = gr.Textbox(label="🔗 Or Paste URL", placeholder="YouTube or audio URL", interactive=True)
657
- cookies_ro = gr.File(label="🍪 Cookies File", file_types=[".txt"], interactive=True)
658
- download_roformer = gr.Button("⬇️ Download", variant="secondary")
659
- roformer_download_status = gr.Textbox(label="📢 Download Status", interactive=False) # Added status output
660
- roformer_exclude_stems = gr.Textbox(label="🚫 Exclude Stems", placeholder="e.g., vocals, drums (comma-separated)", interactive=True)
661
  with gr.Row():
662
- roformer_category = gr.Dropdown(label="📚 Category", choices=list(ROFORMER_MODELS.keys()), value="General Purpose", interactive=True)
663
  roformer_model = gr.Dropdown(label="🛠️ Model", choices=list(ROFORMER_MODELS["General Purpose"].keys()), interactive=True, allow_custom_value=True)
664
  with gr.Row():
665
- roformer_seg_size = gr.Slider(32, 4000, value=256, step=32, label="📏 Segment Size", interactive=True)
666
- roformer_overlap = gr.Slider(2, 10, value=8, step=1, label="🔄 Overlap", interactive=True)
667
  with gr.Row():
668
- roformer_pitch_shift = gr.Slider(-12, 12, value=0, step=1, label="🎵 Pitch Shift", interactive=True)
669
- roformer_override_seg_size = gr.Dropdown(choices=["True", "False"], value="False", label="🔧 Override Segment Size", interactive=True)
670
- roformer_button = gr.Button("✂️ Separate Now!", variant="primary")
671
  with gr.Row():
672
  roformer_stem1 = gr.Audio(label="🎸 Stem 1", type="filepath", interactive=False)
673
  roformer_stem2 = gr.Audio(label="🥁 Stem 2", type="filepath", interactive=False)
674
-
675
- # Auto Ensemble Tab
676
  with gr.Tab("🎚️ Auto Ensemble"):
677
  with gr.Group(elem_classes="dubbing-theme"):
678
- gr.Markdown("### Ensemble Processing")
 
679
  with gr.Row():
680
- ensemble_audio = gr.Audio(label="🎧 Upload Audio", type="filepath", interactive=True)
681
- url_ensemble = gr.Textbox(label="🔗 Or Paste URL", placeholder="YouTube or audio URL", interactive=True)
682
- cookies_ensemble = gr.File(label="🍪 Cookies File", file_types=[".txt"], interactive=True)
683
- download_ensemble = gr.Button("⬇️ Download", variant="secondary")
684
- ensemble_download_status = gr.Textbox(label="📢 Download Status", interactive=False) # Added status output
685
- ensemble_exclude_stems = gr.Textbox(label="🚫 Exclude Stems", placeholder="e.g., vocals, drums (comma-separated)", interactive=True)
686
  with gr.Row():
687
- ensemble_category = gr.Dropdown(label="📚 Category", choices=list(ROFORMER_MODELS.keys()), value="Instrumentals", interactive=True)
688
- ensemble_models = gr.Dropdown(label="🛠️ Models", choices=list(ROFORMER_MODELS["Instrumentals"].keys()), multiselect=True, interactive=True, allow_custom_value=True)
689
  with gr.Row():
690
- ensemble_seg_size = gr.Slider(32, 4000, value=256, step=32, label="📏 Segment Size", interactive=True)
691
- ensemble_overlap = gr.Slider(2, 10, value=8, step=1, label="🔄 Overlap", interactive=True)
692
- ensemble_use_tta = gr.Dropdown(choices=["True", "False"], value="False", label="🔍 Use TTA", interactive=True)
693
- ensemble_method = gr.Dropdown(label="⚙️ Ensemble Method", choices=['avg_wave', 'median_wave', 'max_wave', 'min_wave', 'avg_fft', 'median_fft', 'max_fft', 'min_fft'], value='avg_wave', interactive=True)
694
- ensemble_weights = gr.Textbox(label="⚖️ Weights", placeholder="e.g., 1.0, 1.0 (comma-separated)", interactive=True)
695
- ensemble_button = gr.Button("🎛️ Run Ensemble!", variant="primary")
696
- ensemble_output = gr.Audio(label="🎶 Ensemble Result", type="filepath", interactive=False)
697
- ensemble_status = gr.Textbox(label="📢 Status", interactive=False)
698
-
699
- gr.HTML("<div class='footer'>Powered by Audio-Separator 🌟🎶 | Made with ❤️</div>")
700
-
701
- # Event Handlers
702
  roformer_category.change(update_roformer_models, inputs=[roformer_category], outputs=[roformer_model])
703
  download_roformer.click(
704
  fn=download_audio_wrapper,
705
  inputs=[url_ro, cookies_ro],
706
- outputs=[roformer_audio, roformer_download_status] # Updated to use wrapper and output status
707
  )
708
  roformer_button.click(
709
  fn=roformer_separator,
@@ -718,7 +631,7 @@ def create_interface():
718
  download_ensemble.click(
719
  fn=download_audio_wrapper,
720
  inputs=[url_ensemble, cookies_ensemble],
721
- outputs=[ensemble_audio, ensemble_download_status] # Updated to use wrapper and output status
722
  )
723
  ensemble_button.click(
724
  fn=auto_ensemble_process,
@@ -730,19 +643,17 @@ def create_interface():
730
  ],
731
  outputs=[ensemble_output, ensemble_status]
732
  )
733
-
734
  return app
735
 
736
  if __name__ == "__main__":
737
- parser = argparse.ArgumentParser(description="Music Source Separation Web UI")
738
- parser.add_argument("--port", type=int, default=7860, help="Port to run the UI on")
739
  args = parser.parse_args()
740
-
741
  app = create_interface()
742
  try:
743
  app.launch(server_name="0.0.0.0", server_port=args.port, share=True)
744
  except Exception as e:
745
- logger.error(f"Failed to launch app: {e}")
746
  raise
747
  finally:
748
  app.close()
 
1
+ # Mevcut imports
2
  import os
3
  import sys
4
  import torch
 
13
  from ensemble import ensemble_files
14
  import shutil
15
  import gradio_client.utils as client_utils
 
16
  import matchering as mg
17
+ import spaces
18
+ import gdown
19
+ import scipy.io.wavfile
20
+ from pydub import AudioSegment
21
+ import gc
 
 
22
 
23
  # Logging setup
24
  logging.basicConfig(level=logging.INFO)
25
  logger = logging.getLogger(__name__)
26
 
27
+ # Mevcut CONFIG_DIR ve ensure_config_dir kaldırıldı (kullanılmıyor)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ # Mevcut JSON schema yaması (gerekirse korunabilir)
30
  original_json_schema_to_python_type = client_utils._json_schema_to_python_type
 
31
  def patched_json_schema_to_python_type(schema: Any, defs: Optional[dict] = None) -> str:
32
  logger.debug(f"Parsing schema: {schema}")
33
  if isinstance(schema, bool):
 
34
  return "boolean"
35
  if not isinstance(schema, dict):
 
36
  return "Any"
37
  if "enum" in schema and schema.get("type") == "string":
 
38
  return f"Literal[{', '.join(repr(e) for e in schema['enum'])}]"
39
  try:
40
  return original_json_schema_to_python_type(schema, defs)
41
+ except client_utils.APIInfoParseError:
 
42
  return "str"
 
43
  client_utils._json_schema_to_python_type = patched_json_schema_to_python_type
44
 
45
+ # Device setup
46
  device = "cuda" if torch.cuda.is_available() else "cpu"
47
  use_autocast = device == "cuda"
48
  logger.info(f"Using device: {device}")
49
 
50
+
51
  # ROFORMER_MODELS and OUTPUT_FORMATS
52
  ROFORMER_MODELS = {
53
  "Vocals": {
 
304
  }
305
  """
306
 
 
 
 
 
 
 
307
  def download_audio(url, cookie_file=None):
308
  ydl_opts = {
309
  'format': 'bestaudio[ext=webm]/bestaudio[ext=m4a]/bestaudio[ext=opus]/bestaudio[ext=aac]/bestaudio -video',
 
324
  'no_check_certificate': True,
325
  'verbose': True,
326
  }
327
+ temp_output_path = None
328
+ try:
329
+ if 'drive.google.com' in url:
 
 
330
  os.makedirs('ytdl', exist_ok=True)
 
 
331
  file_id = url.split('/d/')[1].split('/')[0]
332
  download_url = f'https://drive.google.com/uc?id={file_id}'
333
+ temp_output_path = 'ytdl/gdrive_temp_audio'
334
  gdown.download(download_url, temp_output_path, quiet=False)
 
335
  if not os.path.exists(temp_output_path):
336
+ return None, "İndirilen dosya bulunamadı", None
337
+ from mimetypes import guess_type
338
+ mime_type, _ = guess_type(temp_output_path)
339
+ if not mime_type or not mime_type.startswith('audio'):
340
+ return None, "İndirilen dosya bir ses dosyası değil", None
341
  output_path = 'ytdl/gdrive_audio.wav'
342
  audio = AudioSegment.from_file(temp_output_path)
343
  audio.export(output_path, format="wav")
 
 
344
  sample_rate, data = wavfile.read(output_path)
345
+ return output_path, "İndirme başarılı", (sample_rate, data)
346
+ else:
347
+ os.makedirs('ytdl', exist_ok=True)
348
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
 
 
 
 
 
 
 
 
 
 
349
  info_dict = ydl.extract_info(url, download=True)
350
  base_file_path = ydl.prepare_filename(info_dict)
351
  file_path = base_file_path
352
  for ext in ['.webm', '.m4a', '.opus', '.aac']:
353
  file_path = file_path.replace(ext, '.wav')
354
  if not os.path.exists(file_path):
355
+ return None, "İndirilen dosya bulunamadı", None
356
  sample_rate, data = wavfile.read(file_path)
357
+ return file_path, "İndirme başarılı", (sample_rate, data)
358
+ except yt_dlp.utils.ExtractorError as e:
359
+ if "Sign in to confirm you’re not a bot" in str(e):
360
+ return None, "Kimlik doğrulama hatası. Lütfen geçerli YouTube çerezleri yükleyin: https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies", None
361
+ return None, f"İndirme hatası: {str(e)}", None
362
+ except Exception as e:
363
+ return None, f"Beklenmeyen hata: {str(e)}", None
364
+ finally:
365
+ if temp_output_path and os.path.exists(temp_output_path):
366
+ os.remove(temp_output_path)
367
+ logger.info(f"Geçici dosya silindi: {temp_output_path}")
368
+
369
  @spaces.GPU
370
  def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, output_dir, out_format, norm_thresh, amp_thresh, batch_size, exclude_stems="", progress=gr.Progress(track_tqdm=True)):
 
371
  if not audio:
372
+ raise ValueError("Ses dosyası sağlanmadı.")
373
+ temp_audio_path = None
 
374
  try:
 
375
  if isinstance(audio, tuple):
376
  sample_rate, data = audio
377
  temp_audio_path = os.path.join("/tmp", "temp_audio.wav")
378
  scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
379
  audio = temp_audio_path
380
+ if seg_size > 512:
381
+ logger.warning(f"Segment boyutu {seg_size} büyük, bu ZeroGPU'da çökmelere neden olabilir.")
382
  override_seg_size = override_seg_size == "True"
 
383
  if os.path.exists(output_dir):
384
  shutil.rmtree(output_dir)
385
  os.makedirs(output_dir, exist_ok=True)
 
386
  base_name = os.path.splitext(os.path.basename(audio))[0]
387
  for category, models in ROFORMER_MODELS.items():
388
  if model_key in models:
389
  model = models[model_key]
390
  break
391
  else:
392
+ raise ValueError(f"Model '{model_key}' bulunamadı.")
393
+ logger.info(f"{base_name} ayrıştırılıyor, model: {model_key}, cihaz: {device}")
 
394
  separator = Separator(
395
  log_level=logging.INFO,
396
  model_file_dir=model_dir,
 
401
  use_autocast=use_autocast,
402
  mdxc_params={"segment_size": seg_size, "override_model_segment_size": override_seg_size, "batch_size": batch_size, "overlap": overlap, "pitch_shift": pitch_shift}
403
  )
404
+ progress(0.2, desc="Model yükleniyor...")
405
  separator.load_model(model_filename=model)
406
+ progress(0.7, desc="Ses ayrıştırılıyor...")
407
  separation = separator.separate(audio)
408
  stems = [os.path.join(output_dir, file_name) for file_name in separation]
 
409
  if exclude_stems.strip():
410
  excluded = [s.strip().lower() for s in exclude_stems.split(',')]
411
  filtered_stems = [stem for stem in stems if not any(ex in os.path.basename(stem).lower() for ex in excluded)]
412
  return filtered_stems[0] if filtered_stems else None, filtered_stems[1] if len(filtered_stems) > 1 else None
413
  return stems[0], stems[1] if len(stems) > 1 else None
414
  except Exception as e:
415
+ logger.error(f"Ayrıştırma hatası: {e}")
416
+ raise RuntimeError(f"Ayrıştırma hatası: {e}")
417
  finally:
 
418
  if temp_audio_path and os.path.exists(temp_audio_path):
419
+ os.remove(temp_audio_path)
420
+ logger.info(f"Geçici dosya silindi: {temp_audio_path}")
421
+ if torch.cuda.is_available():
422
+ torch.cuda.empty_cache()
423
+ logger.info("GPU belleği temizlendi")
424
 
425
  @spaces.GPU
426
  def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_format="wav", use_tta="False", model_dir="/tmp/audio-separator-models/", output_dir="output", norm_thresh=0.9, amp_thresh=0.9, batch_size=1, ensemble_method="avg_wave", exclude_stems="", weights_str=""):
427
  temp_audio_path = None
428
  chunk_paths = []
429
  try:
430
+ if not audio:
431
+ raise ValueError("Ses dosyası sağlanmadı.")
432
+ if not model_keys:
433
+ raise ValueError("Model seçilmedi.")
434
  if isinstance(audio, tuple):
435
  sample_rate, data = audio
436
  temp_audio_path = os.path.join("/tmp", "temp_audio.wav")
437
  scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
438
  audio = temp_audio_path
 
 
439
  audio_data, sr = librosa.load(audio, sr=None, mono=False)
440
  duration = librosa.get_duration(y=audio_data, sr=sr)
441
+ logger.info(f"Ses süresi: {duration:.2f} saniye")
442
+ chunk_duration = 300
 
 
443
  chunks = []
444
  if duration > 900:
445
+ logger.info(f"Ses 15 dakikadan uzun, {chunk_duration}-saniyelik parçalara bölünüyor")
446
  num_chunks = int(np.ceil(duration / chunk_duration))
447
  for i in range(num_chunks):
448
  start = i * chunk_duration * sr
 
452
  sf.write(chunk_path, chunk_data.T if audio_data.ndim == 2 else chunk_data, sr)
453
  chunks.append(chunk_path)
454
  chunk_paths.append(chunk_path)
455
+ logger.info(f"Parça {i} oluşturuldu: {chunk_path}")
456
  else:
457
  chunks = [audio]
 
458
  use_tta = use_tta == "True"
 
 
459
  if os.path.exists(output_dir):
460
  shutil.rmtree(output_dir)
461
  os.makedirs(output_dir, exist_ok=True)
 
462
  base_name = os.path.splitext(os.path.basename(audio))[0]
463
+ logger.info(f"{base_name} için birleştirme, modeller: {model_keys}, cihaz: {device}")
 
464
  all_stems = []
465
+ model_stems = {}
 
466
  for model_key in model_keys:
467
  model_stems[model_key] = {"vocals": [], "other": []}
468
  for category, models in ROFORMER_MODELS.items():
 
470
  model = models[model_key]
471
  break
472
  else:
473
+ logger.warning(f"Model {model_key} bulunamadı, atlanıyor")
474
  continue
 
475
  for chunk_idx, chunk_path in enumerate(chunks):
476
  separator = Separator(
477
  log_level=logging.INFO,
 
483
  use_autocast=use_autocast,
484
  mdxc_params={"segment_size": seg_size, "overlap": overlap, "use_tta": use_tta, "batch_size": batch_size}
485
  )
486
+ logger.info(f"Chunk {chunk_idx} için {model_key} yükleniyor")
487
  separator.load_model(model_filename=model)
488
+ logger.info(f"Chunk {chunk_idx} {model_key} ile ayrıştırılıyor")
489
  separation = separator.separate(chunk_path)
490
  stems = [os.path.join(output_dir, file_name) for file_name in separation]
 
 
491
  for stem in stems:
492
  if "vocals" in os.path.basename(stem).lower():
493
  model_stems[model_key]["vocals"].append(stem)
494
  elif "other" in os.path.basename(stem).lower():
495
  model_stems[model_key]["other"].append(stem)
 
 
496
  separator = None
497
  gc.collect()
498
  if torch.cuda.is_available():
499
  torch.cuda.empty_cache()
500
+ logger.info(f"{model_key} chunk {chunk_idx} sonrası CUDA belleği temizlendi")
 
 
501
  for model_key, stems_dict in model_stems.items():
502
  for stem_type in ["vocals", "other"]:
503
  if stems_dict[stem_type]:
504
  combined_path = os.path.join(output_dir, f"{base_name}_{stem_type}_{model_key.replace(' | ', '_').replace(' ', '_')}.wav")
505
+ with sf.SoundFile(combined_path, 'w', sr, channels=2 if audio_data.ndim == 2 else 1) as f:
506
+ for stem_path in stems_dict[stem_type]:
507
+ data, _ = librosa.load(stem_path, sr=sr, mono=False)
508
+ f.write(data.T if data.ndim == 2 else data)
509
+ logger.info(f"{model_key} için {stem_type} birleştirildi: {combined_path}")
 
 
510
  if exclude_stems.strip() and stem_type.lower() in [s.strip().lower() for s in exclude_stems.split(',')]:
511
+ logger.info(f"{model_key} için {stem_type} hariç tutuldu")
512
  continue
513
  all_stems.append(combined_path)
514
+ all_stems = [stem for stem in all_stems if os.path.exists(stem)]
515
  if not all_stems:
516
+ raise ValueError("Birleştirme için geçerli stem dosyası bulunamadı.")
 
 
517
  weights = [float(w.strip()) for w in weights_str.split(',')] if weights_str.strip() else [1.0] * len(all_stems)
518
  if len(weights) != len(all_stems):
519
  weights = [1.0] * len(all_stems)
520
+ logger.info("Ağırlıklar eşleşmedi, varsayılan 1.0 kullanıldı")
521
  output_file = os.path.join(output_dir, f"{base_name}_ensemble_{ensemble_method}.{out_format}")
522
  ensemble_args = [
523
  "--files", *all_stems,
 
525
  "--weights", *[str(w) for w in weights],
526
  "--output", output_file
527
  ]
528
+ logger.info(f"Birleştirme argümanları: {ensemble_args}")
529
  ensemble_files(ensemble_args)
530
+ logger.info("Birleştirme tamamlandı")
531
+ return output_file, f"Birleştirme {ensemble_method} ile tamamlandı, hariç tutulan: {exclude_stems if exclude_stems else 'Yok'}"
 
532
  except Exception as e:
533
+ logger.error(f"Birleştirme hatası: {e}")
534
+ raise RuntimeError(f"Birleştirme hatası: {e}")
535
  finally:
 
536
  for path in chunk_paths + ([temp_audio_path] if temp_audio_path and os.path.exists(temp_audio_path) else []):
537
  try:
538
  if os.path.exists(path):
539
  os.remove(path)
540
+ logger.info(f"Geçici dosya silindi: {path}")
541
  except Exception as e:
542
+ logger.warning(f"Geçici dosya silinemedi {path}: {e}")
543
+ if torch.cuda.is_available():
544
+ torch.cuda.empty_cache()
545
+ logger.info("GPU belleği temizlendi")
 
 
 
546
 
547
+ # Mevcut update_roformer_models ve update_ensemble_models (değişmedi)
 
 
 
 
548
 
549
  def download_audio_wrapper(url, cookie_file):
550
  file_path, status, audio_data = download_audio(url, cookie_file)
551
+ return audio_data, status
552
 
553
  def create_interface():
554
  with gr.Blocks(title="🎵 SESA Fast Separation 🎵", css=CSS, elem_id="app-container") as app:
555
  gr.Markdown("<h1 class='header-text'>🎵 SESA Fast Separation 🎵</h1>")
556
+ gr.Markdown("**Not**: YouTube indirmeleri başarısız olursa, doğrudan bir ses dosyası yükleyin veya geçerli bir çerez dosyası kullanın. [Çerez Talimatları](https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies)")
557
+ gr.Markdown("**Uyarı**: 15 dakikadan uzun ses dosyaları otomatik olarak parçalara bölünür, bu işlem daha fazla zaman ve kaynak gerektirebilir.")
558
  with gr.Tabs():
 
559
  with gr.Tab("⚙️ Settings"):
560
  with gr.Group(elem_classes="dubbing-theme"):
561
+ gr.Markdown("### Genel Ayarlar")
562
+ model_file_dir = gr.Textbox(value="/tmp/audio-separator-models/", label="📂 Model Önbelleği", placeholder="Model dizini yolu", interactive=True)
563
+ output_dir = gr.Textbox(value="output", label="📤 Çıkış Dizini", placeholder="Sonuçların kaydedileceği yer", interactive=True)
564
+ output_format = gr.Dropdown(value="wav", choices=OUTPUT_FORMATS, label="🎶 Çıkış Formatı", interactive=True)
565
+ norm_threshold = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="🔊 Normalizasyon Eşiği", interactive=True)
566
+ amp_threshold = gr.Slider(0.1, 1.0, value=0.3, step=0.1, label="📈 Amplifikasyon Eşiği", interactive=True)
567
+ batch_size = gr.Slider(1, 16, value=1, step=1, label="⚡ Batch Boyutu", interactive=True)
 
 
568
  with gr.Tab("🎤 Roformer"):
569
  with gr.Group(elem_classes="dubbing-theme"):
570
+ gr.Markdown("### Ses Ayrıştırma")
571
  with gr.Row():
572
+ roformer_audio = gr.Audio(label="🎧 Ses Yükle", type="filepath", interactive=True)
573
+ url_ro = gr.Textbox(label="🔗 Veya URL Yapıştır", placeholder="YouTube veya ses URL'si", interactive=True)
574
+ cookies_ro = gr.File(label="🍪 Çerez Dosyası", file_types=[".txt"], interactive=True)
575
+ download_roformer = gr.Button("⬇️ İndir", variant="secondary")
576
+ roformer_download_status = gr.Textbox(label="📢 İndirme Durumu", interactive=False)
577
+ roformer_exclude_stems = gr.Textbox(label="🚫 Stem'leri Hariç Tut", placeholder="örn: vocals, drums (virgülle ayrılmış)", interactive=True)
578
  with gr.Row():
579
+ roformer_category = gr.Dropdown(label="📚 Kategori", choices=list(ROFORMER_MODELS.keys()), value="General Purpose", interactive=True)
580
  roformer_model = gr.Dropdown(label="🛠️ Model", choices=list(ROFORMER_MODELS["General Purpose"].keys()), interactive=True, allow_custom_value=True)
581
  with gr.Row():
582
+ roformer_seg_size = gr.Slider(32, 4000, value=256, step=32, label="📏 Segment Boyutu", interactive=True)
583
+ roformer_overlap = gr.Slider(2, 10, value=8, step=1, label="🔄 Örtüşme", interactive=True)
584
  with gr.Row():
585
+ roformer_pitch_shift = gr.Slider(-12, 12, value=0, step=1, label="🎵 Perde Kaydırma", interactive=True)
586
+ roformer_override_seg_size = gr.Dropdown(choices=["True", "False"], value="False", label="🔧 Segment Boyutunu Geçersiz Kıl", interactive=True)
587
+ roformer_button = gr.Button("✂️ Şimdi Ayır!", variant="primary")
588
  with gr.Row():
589
  roformer_stem1 = gr.Audio(label="🎸 Stem 1", type="filepath", interactive=False)
590
  roformer_stem2 = gr.Audio(label="🥁 Stem 2", type="filepath", interactive=False)
 
 
591
  with gr.Tab("🎚️ Auto Ensemble"):
592
  with gr.Group(elem_classes="dubbing-theme"):
593
+ gr.Markdown("### Birleştirme İşlemi")
594
+ gr.Markdown("Not: Ağırlıklar belirtilmezse, tüm modellere eşit ağırlık (1.0) uygulanır.")
595
  with gr.Row():
596
+ ensemble_audio = gr.Audio(label="🎧 Ses Yükle", type="filepath", interactive=True)
597
+ url_ensemble = gr.Textbox(label="🔗 Veya URL Yapıştır", placeholder="YouTube veya ses URL'si", interactive=True)
598
+ cookies_ensemble = gr.File(label="🍪 Çerez Dosyası", file_types=[".txt"], interactive=True)
599
+ download_ensemble = gr.Button("⬇️ İndir", variant="secondary")
600
+ ensemble_download_status = gr.Textbox(label="📢 İndirme Durumu", interactive=False)
601
+ ensemble_exclude_stems = gr.Textbox(label="🚫 Stem'leri Hariç Tut", placeholder="örn: vocals, drums (virgülle ayrılmış)", interactive=True)
602
  with gr.Row():
603
+ ensemble_category = gr.Dropdown(label="📚 Kategori", choices=list(ROFORMER_MODELS.keys()), value="Instrumentals", interactive=True)
604
+ ensemble_models = gr.Dropdown(label="🛠️ Modeller", choices=list(ROFORMER_MODELS["Instrumentals"].keys()), multiselect=True, interactive=True, allow_custom_value=True)
605
  with gr.Row():
606
+ ensemble_seg_size = gr.Slider(32, 4000, value=256, step=32, label="📏 Segment Boyutu", interactive=True)
607
+ ensemble_overlap = gr.Slider(2, 10, value=8, step=1, label="🔄 Örtüşme", interactive=True)
608
+ ensemble_use_tta = gr.Dropdown(choices=["True", "False"], value="False", label="🔍 TTA Kullan", interactive=True)
609
+ ensemble_method = gr.Dropdown(label="⚙️ Birleştirme Yöntemi", choices=['avg_wave', 'median_wave', 'max_wave', 'min_wave', 'avg_fft', 'median_fft', 'max_fft', 'min_fft'], value='avg_wave', interactive=True)
610
+ ensemble_weights = gr.Textbox(label="⚖️ Ağırlıklar", placeholder="örn: 1.0, 1.0 (virgülle ayrılmış)", interactive=True)
611
+ ensemble_button = gr.Button("🎛️ Birleştirme Çalıştır!", variant="primary")
612
+ ensemble_output = gr.Audio(label="🎶 Birleştirme Sonucu", type="filepath", interactive=False)
613
+ ensemble_status = gr.Textbox(label="📢 Durum", interactive=False)
614
+ gr.HTML("<div class='footer'>Audio-Separator ile Güçlendirildi 🌟🎶 | ❤️ ile yapıldı</div>")
 
 
 
615
  roformer_category.change(update_roformer_models, inputs=[roformer_category], outputs=[roformer_model])
616
  download_roformer.click(
617
  fn=download_audio_wrapper,
618
  inputs=[url_ro, cookies_ro],
619
+ outputs=[roformer_audio, roformer_download_status]
620
  )
621
  roformer_button.click(
622
  fn=roformer_separator,
 
631
  download_ensemble.click(
632
  fn=download_audio_wrapper,
633
  inputs=[url_ensemble, cookies_ensemble],
634
+ outputs=[ensemble_audio, ensemble_download_status]
635
  )
636
  ensemble_button.click(
637
  fn=auto_ensemble_process,
 
643
  ],
644
  outputs=[ensemble_output, ensemble_status]
645
  )
 
646
  return app
647
 
648
  if __name__ == "__main__":
649
+ parser = argparse.ArgumentParser(description="Müzik Kaynak Ayrıştırma Web Arayüzü")
650
+ parser.add_argument("--port", type=int, default=7860, help="Arayüzün çalışacağı port")
651
  args = parser.parse_args()
 
652
  app = create_interface()
653
  try:
654
  app.launch(server_name="0.0.0.0", server_port=args.port, share=True)
655
  except Exception as e:
656
+ logger.error(f"Arayüz başlatılamadı: {e}")
657
  raise
658
  finally:
659
  app.close()