Spaces:
Running
Running
File size: 40,788 Bytes
92e9644 cc64221 92e9644 cc64221 5e51096 f87864c cc64221 a5643d8 cc64221 489680b fd43400 26e2b18 86bcf81 92e9644 a5643d8 defb0b3 57b3cc7 b00852e fd43400 b00852e fd43400 2297009 fd43400 489680b 2297009 489680b fd43400 2297009 489680b fd43400 2297009 fd43400 2297009 1bed755 2297009 fd43400 5e51096 26e2b18 cc64221 a05e815 5e51096 a05e815 cc64221 fdd4054 cc64221 fdd4054 cc64221 fdd4054 cc64221 fdd4054 cc64221 fdd4054 cc64221 fdd4054 cc64221 fdd4054 cc64221 b00852e 3b02063 cc64221 bfd82aa cc64221 bfd82aa cc64221 bfd82aa cc64221 bfd82aa cc64221 bfd82aa cc64221 bfd82aa cc64221 bfd82aa cc64221 bfd82aa cc64221 bfd82aa cc64221 bfd82aa cc64221 b00852e cc64221 bc8e842 b00852e cc64221 b00852e bc8e842 a5643d8 bc8e842 b00852e bc8e842 a05e815 bc8e842 b00852e bc8e842 a05e815 bc8e842 db94774 b00852e bc8e842 b00852e cc64221 bfd82aa cc64221 bfd82aa cc64221 bfd82aa cc64221 bfd82aa cc64221 9e5c4b5 cc64221 9e5c4b5 e121019 67568d2 e121019 bc8e842 1bed755 bc8e842 cc64221 26e2b18 4f96bfa 390f3f3 26e2b18 6db3d10 768f7e4 ddbc8fa 6db3d10 a5643d8 3047c6d a5643d8 26e2b18 9e5c4b5 ddbc8fa 3047c6d ddbc8fa 26e2b18 ddbc8fa 26e2b18 ddbc8fa 26e2b18 ddbc8fa 26e2b18 a05e815 cc64221 ddbc8fa 26e2b18 cc64221 7b79193 26e2b18 defb0b3 7b79193 ddbc8fa cc64221 ddbc8fa cc64221 ddbc8fa cc64221 e0ac733 cc64221 e0ac733 cc64221 ddbc8fa aecae1e 7b79193 26e2b18 ddbc8fa 26e2b18 ddbc8fa cc64221 a05e815 defb0b3 e14bd0f 92e9644 a5643d8 defb0b3 a5643d8 7b79193 26e2b18 ddbc8fa 26e2b18 ddbc8fa a5643d8 defb0b3 a5643d8 defb0b3 a5643d8 7b79193 a5643d8 7b79193 92e9644 7b79193 ddbc8fa a5643d8 7b79193 a5643d8 defb0b3 a5643d8 defb0b3 a5643d8 3047c6d a5643d8 defb0b3 a5643d8 92e9644 a5643d8 defb0b3 a5643d8 3047c6d defb0b3 a5643d8 defb0b3 a5643d8 efe5936 3047c6d defb0b3 a5643d8 defb0b3 a5643d8 defb0b3 a5643d8 defb0b3 a5643d8 defb0b3 a5643d8 defb0b3 a5643d8 3047c6d 8205184 3047c6d defb0b3 3047c6d a5643d8 26e2b18 7b79193 92e9644 a5643d8 7b79193 a5643d8 7b79193 3047c6d ddbc8fa 3047c6d a5643d8 a0c4c4a efe5936 3047c6d a5643d8 e0ac733 3047c6d efe5936 7b79193 ddbc8fa defb0b3 92e9644 7b79193 defb0b3 7b79193 defb0b3 7b79193 defb0b3 26e2b18 ddbc8fa cc64221 ddbc8fa cc64221 b956722 26e2b18 b956722 cc64221 b00852e 768f7e4 cc64221 ddbc8fa 92e9644 cc64221 ddbc8fa cc64221 ddbc8fa cc64221 ddbc8fa a4f6734 cc64221 defb0b3 ddbc8fa cc64221 ddbc8fa cc64221 e0ac733 cc64221 ddbc8fa defb0b3 cc64221 ddbc8fa cc64221 ddbc8fa a5643d8 cc64221 defb0b3 ddbc8fa a5643d8 ddbc8fa 768f7e4 e0ac733 ddbc8fa cc64221 6bc0914 b956722 6bc0914 26e2b18 6bc0914 cc64221 6bc0914 cc64221 768f7e4 cc64221 e0ac733 cc64221 6bc0914 b956722 6bc0914 26e2b18 6bc0914 cc64221 489680b cc64221 e0ac733 cc64221 e0ac733 cc64221 a3908c3 ddbc8fa a3908c3 cb4dec1 a3908c3 ddbc8fa a3908c3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 | from typing import Optional, Any
import os
import sys
import torch
import logging
import yt_dlp
from yt_dlp import YoutubeDL
import gradio as gr
import argparse
from audio_separator.separator import Separator
import numpy as np
import librosa
import soundfile as sf
from ensemble import ensemble_files
import shutil
import gradio_client.utils as client_utils
import matchering as mg
import spaces
import gdown
from pydub import AudioSegment
import gc
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock
import scipy.io.wavfile
# Logging setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Gradio JSON schema patch
original_json_schema_to_python_type = client_utils._json_schema_to_python_type
def patched_json_schema_to_python_type(schema: Any, defs: Optional[dict] = None) -> str:
logger.debug(f"Parsing schema: {schema}")
if isinstance(schema, bool):
logger.info("Found boolean schema, returning 'boolean'")
return "boolean"
if not isinstance(schema, dict):
logger.warning(f"Unexpected schema type: {type(schema)}, returning 'Any'")
return "Any"
if "enum" in schema and schema.get("type") == "string":
logger.info(f"Handling enum schema: {schema['enum']}")
return f"Literal[{', '.join(repr(e) for e in schema['enum'])}]"
try:
return original_json_schema_to_python_type(schema, defs)
except client_utils.APIInfoParseError as e:
logger.error(f"Failed to parse schema {schema}: {e}")
return "str"
client_utils._json_schema_to_python_type = patched_json_schema_to_python_type
# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"
use_autocast = device == "cuda"
logger.info(f"Using device: {device}")
# ROFORMER_MODELS and OUTPUT_FORMATS
ROFORMER_MODELS = {
"Vocals": {
'MelBand Roformer | Big Beta 6X by unwa': 'melband_roformer_big_beta6x.ckpt',
'MelBand Roformer Kim | Big Beta 4 FT by unwa': 'melband_roformer_big_beta4.ckpt',
'MelBand Roformer Kim | Big Beta 5e FT by unwa': 'melband_roformer_big_beta5e.ckpt',
'MelBand Roformer | Big Beta 6 by unwa': 'melband_roformer_big_beta6.ckpt',
'MelBand Roformer | Vocals by Kimberley Jensen': 'vocals_mel_band_roformer.ckpt',
'MelBand Roformer Kim | FT 3 by unwa': 'mel_band_roformer_kim_ft3_unwa.ckpt',
'MelBand Roformer Kim | FT by unwa': 'mel_band_roformer_kim_ft_unwa.ckpt',
'MelBand Roformer Kim | FT 2 by unwa': 'mel_band_roformer_kim_ft2_unwa.ckpt',
'MelBand Roformer Kim | FT 2 Bleedless by unwa': 'mel_band_roformer_kim_ft2_bleedless_unwa.ckpt',
'MelBand Roformer | Vocals by becruily': 'mel_band_roformer_vocals_becruily.ckpt',
'MelBand Roformer | Vocals Fullness by Aname': 'mel_band_roformer_vocal_fullness_aname.ckpt',
'BS Roformer | Vocals by Gabox': 'bs_roformer_vocals_gabox.ckpt',
'MelBand Roformer | Vocals by Gabox': 'mel_band_roformer_vocals_gabox.ckpt',
'MelBand Roformer | Vocals FV1 by Gabox': 'mel_band_roformer_vocals_fv1_gabox.ckpt',
'MelBand Roformer | Vocals FV2 by Gabox': 'mel_band_roformer_vocals_fv2_gabox.ckpt',
'MelBand Roformer | Vocals FV3 by Gabox': 'mel_band_roformer_vocals_fv3_gabox.ckpt',
'MelBand Roformer | Vocals FV4 by Gabox': 'mel_band_roformer_vocals_fv4_gabox.ckpt',
'BS Roformer | Chorus Male-Female by Sucial': 'model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt',
'BS Roformer | Male-Female by aufr33': 'bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt',
},
"Instrumentals": {
'MelBand Roformer | FVX by Gabox': 'mel_band_roformer_instrumental_fvx_gabox.ckpt',
'MelBand Roformer | INSTV8N by Gabox': 'mel_band_roformer_instrumental_instv8n_gabox.ckpt',
'MelBand Roformer | INSTV8 by Gabox': 'mel_band_roformer_instrumental_instv8_gabox.ckpt',
'MelBand Roformer | INSTV7N by Gabox': 'mel_band_roformer_instrumental_instv7n_gabox.ckpt',
'MelBand Roformer | Instrumental Bleedless V3 by Gabox': 'mel_band_roformer_instrumental_bleedless_v3_gabox.ckpt',
'MelBand Roformer Kim | Inst V1 (E) Plus by Unwa': 'melband_roformer_inst_v1e_plus.ckpt',
'MelBand Roformer Kim | Inst V1 Plus by Unwa': 'melband_roformer_inst_v1_plus.ckpt',
'MelBand Roformer Kim | Inst V1 by Unwa': 'melband_roformer_inst_v1.ckpt',
'MelBand Roformer Kim | Inst V1 (E) by Unwa': 'melband_roformer_inst_v1e.ckpt',
'MelBand Roformer Kim | Inst V2 by Unwa': 'melband_roformer_inst_v2.ckpt',
'MelBand Roformer | Instrumental by becruily': 'mel_band_roformer_instrumental_becruily.ckpt',
'MelBand Roformer | Instrumental by Gabox': 'mel_band_roformer_instrumental_gabox.ckpt',
'MelBand Roformer | Instrumental 2 by Gabox': 'mel_band_roformer_instrumental_2_gabox.ckpt',
'MelBand Roformer | Instrumental 3 by Gabox': 'mel_band_roformer_instrumental_3_gabox.ckpt',
'MelBand Roformer | Instrumental Bleedless V1 by Gabox': 'mel_band_roformer_instrumental_bleedless_v1_gabox.ckpt',
'MelBand Roformer | Instrumental Bleedless V2 by Gabox': 'mel_band_roformer_instrumental_bleedless_v2_gabox.ckpt',
'MelBand Roformer | Instrumental Fullness V1 by Gabox': 'mel_band_roformer_instrumental_fullness_v1_gabox.ckpt',
'MelBand Roformer | Instrumental Fullness V2 by Gabox': 'mel_band_roformer_instrumental_fullness_v2_gabox.ckpt',
'MelBand Roformer | Instrumental Fullness V3 by Gabox': 'mel_band_roformer_instrumental_fullness_v3_gabox.ckpt',
'MelBand Roformer | Instrumental Fullness Noisy V4 by Gabox': 'mel_band_roformer_instrumental_fullness_noise_v4_gabox.ckpt',
'MelBand Roformer | INSTV5 by Gabox': 'mel_band_roformer_instrumental_instv5_gabox.ckpt',
'MelBand Roformer | INSTV5N by Gabox': 'mel_band_roformer_instrumental_instv5n_gabox.ckpt',
'MelBand Roformer | INSTV6 by Gabox': 'mel_band_roformer_instrumental_instv6_gabox.ckpt',
'MelBand Roformer | INSTV6N by Gabox': 'mel_band_roformer_instrumental_instv6n_gabox.ckpt',
'MelBand Roformer | INSTV7 by Gabox': 'mel_band_roformer_instrumental_instv7_gabox.ckpt',
},
"InstVoc Duality": {
'MelBand Roformer Kim | InstVoc Duality V1 by Unwa': 'melband_roformer_instvoc_duality_v1.ckpt',
'MelBand Roformer Kim | InstVoc Duality V2 by Unwa': 'melband_roformer_instvox_duality_v2.ckpt',
},
"De-Reverb": {
'BS-Roformer-De-Reverb': 'deverb_bs_roformer_8_384dim_10depth.ckpt',
'MelBand Roformer | De-Reverb by anvuew': 'dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt',
'MelBand Roformer | De-Reverb Less Aggressive by anvuew': 'dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt',
'MelBand Roformer | De-Reverb Mono by anvuew': 'dereverb_mel_band_roformer_mono_anvuew.ckpt',
'MelBand Roformer | De-Reverb Big by Sucial': 'dereverb_big_mbr_ep_362.ckpt',
'MelBand Roformer | De-Reverb Super Big by Sucial': 'dereverb_super_big_mbr_ep_346.ckpt',
'MelBand Roformer | De-Reverb-Echo by Sucial': 'dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt',
'MelBand Roformer | De-Reverb-Echo V2 by Sucial': 'dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt',
'MelBand Roformer | De-Reverb-Echo Fused by Sucial': 'dereverb_echo_mbr_fused.ckpt',
},
"Denoise": {
'Mel-Roformer-Denoise-Aufr33': 'denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt',
'Mel-Roformer-Denoise-Aufr33-Aggr': 'denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt',
'MelBand Roformer | Denoise-Debleed by Gabox': 'mel_band_roformer_denoise_debleed_gabox.ckpt',
'MelBand Roformer | Bleed Suppressor V1 by unwa-97chris': 'mel_band_roformer_bleed_suppressor_v1.ckpt',
},
"Karaoke": {
'Mel-Roformer-Karaoke-Aufr33-Viperx': 'mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt',
'MelBand Roformer | Karaoke by Gabox': 'mel_band_roformer_karaoke_gabox.ckpt',
'MelBand Roformer | Karaoke by becruily': 'mel_band_roformer_karaoke_becruily.ckpt',
},
"General Purpose": {
'BS-Roformer-Viperx-1297': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
'BS-Roformer-Viperx-1296': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
'BS-Roformer-Viperx-1053': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
'Mel-Roformer-Viperx-1143': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt',
'Mel-Roformer-Crowd-Aufr33-Viperx': 'mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt',
'MelBand Roformer Kim | SYHFT by SYH99999': 'MelBandRoformerSYHFT.ckpt',
'MelBand Roformer Kim | SYHFT V2 by SYH99999': 'MelBandRoformerSYHFTV2.ckpt',
'MelBand Roformer Kim | SYHFT V2.5 by SYH99999': 'MelBandRoformerSYHFTV2.5.ckpt',
'MelBand Roformer Kim | SYHFT V3 by SYH99999': 'MelBandRoformerSYHFTV3Epsilon.ckpt',
'MelBand Roformer Kim | Big SYHFT V1 by SYH99999': 'MelBandRoformerBigSYHFTV1.ckpt',
'MelBand Roformer | Aspiration by Sucial': 'aspiration_mel_band_roformer_sdr_18.9845.ckpt',
'MelBand Roformer | Aspiration Less Aggressive by Sucial': 'aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt',
}
}
OUTPUT_FORMATS = ['wav', 'flac', 'mp3', 'ogg', 'opus', 'm4a', 'aiff', 'ac3']
# CSS (unchanged)
CSS = """
body {
background: linear-gradient(to bottom, rgba(45, 11, 11, 0.9), rgba(0, 0, 0, 0.8)), url('/content/logo.jpg') no-repeat center center fixed;
background-size: cover;
min-height: 100vh;
margin: 0;
padding: 1rem;
font-family: 'Poppins', sans-serif;
color: #C0C0C0;
overflow-x: hidden;
}
.header-text {
text-align: center;
padding: 100px 20px 20px;
color: #ff4040;
font-size: 3rem;
font-weight: 900;
text-shadow: 0 0 10px rgba(255, 64, 64, 0.5);
z-index: 1500;
animation: text-glow 2s infinite;
}
.header-subtitle {
text-align: center;
color: #C0C0C0;
font-size: 1.2rem;
font-weight: 300;
margin-top: -10px;
text-shadow: 0 0 5px rgba(255, 64, 64, 0.3);
}
.gr-tab {
background: rgba(128, 0, 0, 0.5) !important;
border-radius: 12px 12px 0 0 !important;
margin: 0 5px !important;
color: #C0C0C0 !important;
border: 1px solid #ff4040 !important;
z-index: 1500;
transition: background 0.3s ease, color 0.3s ease;
padding: 10px 20px !important;
font-size: 1.1rem !important;
}
button {
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
background: #800000 !important;
border: 1px solid #ff4040 !important;
color: #C0C0C0 !important;
border-radius: 8px !important;
padding: 8px 16px !important;
box-shadow: 0 2px 10px rgba(255, 64, 64, 0.3);
}
button:hover {
transform: scale(1.05) !important;
box-shadow: 0 10px 40px rgba(255, 64, 64, 0.7) !important;
background: #ff4040 !important;
}
.compact-upload.horizontal {
display: inline-flex !important;
align-items: center !important;
gap: 8px !important;
max-width: 400px !important;
height: 40px !important;
padding: 0 12px !important;
border: 1px solid #ff4040 !important;
background: rgba(128, 0, 0, 0.5) !important;
border-radius: 8px !important;
}
.compact-dropdown {
padding: 8px 12px !important;
border-radius: 8px !important;
border: 2px solid #ff6b6b !important;
background: rgba(46, 26, 71, 0.7) !important;
color: #e0e0e0 !important;
width: 100%;
font-size: 1rem !important;
transition: border-color 0.3s ease, box-shadow 0.3s ease !important;
position: relative;
z-index: 100;
}
.compact-dropdown:hover {
border-color: #ff8787 !important;
box-shadow: 0 2px 8px rgba(255, 107, 107, 0.4) !important;
}
.compact-dropdown select, .compact-dropdown .gr-dropdown {
background: transparent !important;
color: #e0e0e0 !important;
border: none !important;
width: 100% !important;
padding: 8px !important;
font-size: 1rem !important;
appearance: none !important;
-webkit-appearance: none !important;
-moz-appearance: none !important;
}
.compact-dropdown .gr-dropdown-menu {
background: rgba(46, 26, 71, 0.95) !important;
border: 2px solid #ff6b6b !important;
border-radius: 8px !important;
color: #e0e0e0 !important;
max-height: 300px !important;
overflow-y: auto !important;
z-index: 300 !important;
width: 100% !important;
opacity: 1 !important;
visibility: visible !important;
position: absolute !important;
top: 100% !important;
left: 0 !important;
pointer-events: auto !important;
}
.compact-dropdown:hover .gr-dropdown-menu {
display: block !important;
}
.compact-dropdown .gr-dropdown-menu option {
padding: 8px !important;
color: #e0e0e0 !important;
background: transparent !important;
}
.compact-dropdown .gr-dropdown-menu option:hover {
background: rgba(255, 107, 107, 0.3) !important;
}
#custom-progress {
margin-top: 10px;
padding: 10px;
background: rgba(128, 0, 0, 0.3);
border-radius: 8px;
border: 1px solid #ff4040;
}
#progress-bar {
height: 20px;
background: linear-gradient(to right, #6e8efb, #ff4040);
border-radius: 5px;
transition: width 0.5s ease-in-out;
max-width: 100% !important;
}
.gr-accordion {
background: rgba(128, 0, 0, 0.5) !important;
border-radius: 10px !important;
border: 1px solid #ff4040 !important;
}
.footer {
text-align: center;
padding: 20px;
color: #ff4040;
font-size: 14px;
margin-top: 40px;
background: rgba(128, 0, 0, 0.3);
border-top: 1px solid #ff4040;
}
#log-accordion {
max-height: 400px;
overflow-y: auto;
background: rgba(0, 0, 0, 0.7) !important;
padding: 10px;
border-radius: 8px;
}
@keyframes text-glow {
0% { text-shadow: 0 0 5px rgba(192, 192, 192, 0); }
50% { text-shadow: 0 0 15px rgba(192, 192, 192, 1); }
100% { text-shadow: 0 0 5px rgba(192, 192, 192, 0); }
}
"""
def download_audio(url, cookie_file=None):
ydl_opts = {
'format': 'bestaudio[ext=webm]/bestaudio[ext=m4a]/bestaudio[ext=opus]/bestaudio[ext=aac]/bestaudio -video',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
'preferredquality': '192',
}],
'outtmpl': 'ytdl/%(title)s.%(ext)s',
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'geo_bypass': True,
'force_ipv4': True,
'referer': 'https://www.youtube.com/',
'noplaylist': True,
'cookiefile': cookie_file.name if cookie_file else None,
'extractor_retries': 5,
'ignoreerrors': False,
'no_check_certificate': True,
'verbose': True,
}
temp_output_path = None
try:
if 'drive.google.com' in url:
os.makedirs('ytdl', exist_ok=True)
file_id = url.split('/d/')[1].split('/')[0]
download_url = f'https://drive.google.com/uc?id={file_id}'
temp_output_path = 'ytdl/gdrive_temp_audio'
gdown.download(download_url, temp_output_path, quiet=False)
if not os.path.exists(temp_output_path):
return None, "Downloaded file not found", None
output_path = 'ytdl/gdrive_audio.wav'
try:
audio = AudioSegment.from_file(temp_output_path)
audio.export(output_path, format="wav")
except Exception as e:
return None, f"Failed to process Google Drive file as audio: {str(e)}. Ensure the file contains audio (e.g., MP3, WAV, or video with audio track).", None
sample_rate, data = scipy.io.wavfile.read(output_path)
return output_path, "Download and audio conversion successful", (sample_rate, data)
else:
os.makedirs('ytdl', exist_ok=True)
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=True)
base_file_path = ydl.prepare_filename(info_dict)
file_path = base_file_path
for ext in ['.webm', '.m4a', '.opus', '.aac']:
file_path = file_path.replace(ext, '.wav')
if not os.path.exists(file_path):
return None, "Downloaded file not found", None
sample_rate, data = scipy.io.wavfile.read(file_path)
return file_path, "Download successful", (sample_rate, data)
except yt_dlp.utils.ExtractorError as e:
if "Sign in to confirm youβre not a bot" in str(e):
return None, "Authentication error. Please upload valid YouTube cookies: https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies", None
return None, f"Download error: {str(e)}", None
except Exception as e:
return None, f"Unexpected error: {str(e)}", None
finally:
if temp_output_path and os.path.exists(temp_output_path):
os.remove(temp_output_path)
logger.info(f"Temporary file deleted: {temp_output_path}")
@spaces.GPU
def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, output_dir, out_format, norm_thresh, amp_thresh, batch_size, exclude_stems="", progress=gr.Progress(track_tqdm=True)):
if not audio:
raise ValueError("No audio file provided.")
temp_audio_path = None
try:
if isinstance(audio, tuple):
sample_rate, data = audio
temp_audio_path = os.path.join("/tmp", "temp_audio.wav")
scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
audio = temp_audio_path
if seg_size > 512:
logger.warning(f"Segment size {seg_size} is large, this may cause issues.")
override_seg_size = override_seg_size == "True"
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir, exist_ok=True)
base_name = os.path.splitext(os.path.basename(audio))[0]
for category, models in ROFORMER_MODELS.items():
if model_key in models:
model = models[model_key]
break
else:
raise ValueError(f"Model '{model_key}' not found.")
logger.info(f"Separating {base_name} with {model_key} on {device}")
separator = Separator(
log_level=logging.INFO,
model_file_dir=model_dir,
output_dir=output_dir,
output_format=out_format,
normalization_threshold=norm_thresh,
amplification_threshold=amp_thresh,
use_autocast=use_autocast,
mdxc_params={"segment_size": seg_size, "override_model_segment_size": override_seg_size, "batch_size": batch_size, "overlap": overlap, "pitch_shift": pitch_shift}
)
progress(0.2, desc="Loading model...")
separator.load_model(model_filename=model)
progress(0.7, desc="Separating audio...")
separation = separator.separate(audio)
stems = [os.path.join(output_dir, file_name) for file_name in separation]
file_list = []
if exclude_stems.strip():
excluded = [s.strip().lower() for s in exclude_stems.split(',')]
filtered_stems = [stem for stem in stems if not any(ex in os.path.basename(stem).lower() for ex in excluded)]
file_list = filtered_stems
stem1 = filtered_stems[0] if filtered_stems else None
stem2 = filtered_stems[1] if len(filtered_stems) > 1 else None
else:
file_list = stems
stem1 = stems[0]
stem2 = stems[1] if len(stems) > 1 else None
return stem1, stem2, file_list
except Exception as e:
logger.error(f"Separation error: {e}")
raise RuntimeError(f"Separation error: {e}")
finally:
if temp_audio_path and os.path.exists(temp_audio_path):
os.remove(temp_audio_path)
logger.info(f"Temporary file deleted: {temp_audio_path}")
if torch.cuda.is_available():
torch.cuda.empty_cache()
logger.info("GPU memory cleared")
@spaces.GPU
def auto_ensemble_process(audio, model_keys, seg_size=64, overlap=0.1, out_format="wav", use_tta="False", model_dir="/tmp/audio-separator-models/", output_dir="output", norm_thresh=0.9, amp_thresh=0.9, batch_size=1, ensemble_method="avg_wave", exclude_stems="", weights_str="", progress=gr.Progress(track_tqdm=True)):
temp_audio_path = None
max_retries = 2
start_time = time.time()
time_budget = 100 # seconds
max_models = 6
gpu_lock = Lock()
try:
if not audio:
raise ValueError("No audio file provided.")
if not model_keys:
raise ValueError("No models selected.")
if len(model_keys) > max_models:
logger.warning(f"Selected {len(model_keys)} models, limiting to {max_models}.")
model_keys = model_keys[:max_models]
# Dynamic batch size based on audio duration and model count
audio_data, sr = librosa.load(audio, sr=None, mono=False)
duration = librosa.get_duration(y=audio_data, sr=sr)
logger.info(f"Audio duration: {duration:.2f} seconds")
dynamic_batch_size = max(1, min(4, 1 + int(900 / (duration + 1)) - len(model_keys) // 2))
logger.info(f"Using batch size: {dynamic_batch_size} for {len(model_keys)} models, duration {duration:.2f}s")
if isinstance(audio, tuple):
sample_rate, data = audio
temp_audio_path = os.path.join("/tmp", "temp_audio.wav")
scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
audio = temp_audio_path
use_tta = use_tta == "True"
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir, exist_ok=True)
base_name = os.path.splitext(os.path.basename(audio))[0]
logger.info(f"Ensemble for {base_name} with {model_keys} on {device}")
# Model cache
model_cache = {}
all_stems = []
model_stems = {model_key: {"vocals": [], "other": []} for model_key in model_keys}
total_tasks = len(model_keys)
def process_model(model_key, model_idx):
with torch.no_grad():
for attempt in range(max_retries + 1):
try:
# Find model
for category, models in ROFORMER_MODELS.items():
if model_key in models:
model = models[model_key]
break
else:
logger.warning(f"Model {model_key} not found, skipping")
return []
# Check time budget
elapsed = time.time() - start_time
if elapsed > time_budget:
logger.error(f"Time budget ({time_budget}s) exceeded")
raise TimeoutError("Processing took too long")
# Initialize separator
model_path = os.path.join(model_dir, model)
if model_key not in model_cache:
logger.info(f"Loading {model_key} into cache")
separator = Separator(
log_level=logging.INFO,
model_file_dir=model_dir,
output_dir=output_dir,
output_format=out_format,
normalization_threshold=norm_thresh,
amplification_threshold=amp_thresh,
use_autocast=use_autocast,
mdxc_params={
"segment_size": seg_size,
"overlap": overlap,
"use_tta": use_tta,
"batch_size": dynamic_batch_size
}
)
separator.load_model(model_filename=model)
model_cache[model_key] = separator
else:
separator = model_cache[model_key]
# Process with GPU lock
with gpu_lock:
progress(0.3 + (model_idx / total_tasks) * 0.5, desc=f"Separating with {model_key}")
logger.info(f"Separating with {model_key}")
separation = separator.separate(audio)
stems = [os.path.join(output_dir, file_name) for file_name in separation]
result = []
for stem in stems:
if "vocals" in os.path.basename(stem).lower():
model_stems[model_key]["vocals"].append(stem)
elif "other" in os.path.basename(stem).lower() or "instrumental" in os.path.basename(stem).lower():
model_stems[model_key]["other"].append(stem)
result.append(stem)
return result
except Exception as e:
logger.error(f"Error processing {model_key}, attempt {attempt + 1}/{max_retries + 1}: {e}")
if attempt == max_retries:
logger.error(f"Max retries reached for {model_key}, skipping")
return []
time.sleep(1)
finally:
if torch.cuda.is_available():
torch.cuda.empty_cache()
logger.info(f"Cleared CUDA cache after {model_key}")
# Parallel processing
progress(0.1, desc="Starting model separations...")
with ThreadPoolExecutor(max_workers=min(4, len(model_keys))) as executor:
future_to_task = {executor.submit(process_model, model_key, idx): model_key for idx, model_key in enumerate(model_keys)}
for future in as_completed(future_to_task):
model_key = future_to_task[future]
try:
stems = future.result()
if stems:
logger.info(f"Completed {model_key}")
else:
logger.warning(f"No stems produced for {model_key}")
except Exception as e:
logger.error(f"Task {model_key} failed: {e}")
# Clear model cache
model_cache.clear()
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
logger.info("Cleared model cache and GPU memory")
# Combine stems
progress(0.8, desc="Combining stems...")
for model_key, stems_dict in model_stems.items():
for stem_type in ["vocals", "other"]:
if stems_dict[stem_type]:
combined_path = os.path.join(output_dir, f"{base_name}_{stem_type}_{model_key.replace(' | ', '_').replace(' ', '_')}.wav")
try:
data, _ = librosa.load(stems_dict[stem_type][0], sr=sr, mono=False)
with sf.SoundFile(combined_path, 'w', sr, channels=2 if data.ndim == 2 else 1) as f:
f.write(data.T if data.ndim == 2 else data)
logger.info(f"Combined {stem_type} for {model_key}: {combined_path}")
if exclude_stems.strip() and stem_type.lower() in [s.strip().lower() for s in exclude_stems.split(',')]:
logger.info(f"Excluding {stem_type} for {model_key}")
continue
all_stems.append(combined_path)
except Exception as e:
logger.error(f"Error combining {stem_type} for {model_key}: {e}")
all_stems = [stem for stem in all_stems if os.path.exists(stem)]
if not all_stems:
raise ValueError("No valid stems found for ensemble. Try uploading a local WAV file.")
# Ensemble
weights = [float(w.strip()) for w in weights_str.split(',')] if weights_str.strip() else [1.0] * len(all_stems)
if len(weights) != len(all_stems):
weights = [1.0] * len(all_stems)
logger.info("Weights mismatched, defaulting to 1.0")
output_file = os.path.join(output_dir, f"{base_name}_ensemble_{ensemble_method}.{out_format}")
ensemble_args = [
"--files", *all_stems,
"--type", ensemble_method,
"--weights", *[str(w) for w in weights],
"--output", output_file
]
progress(0.9, desc="Running ensemble...")
logger.info(f"Running ensemble with args: {ensemble_args}")
try:
result = ensemble_files(ensemble_args)
if result is None or not os.path.exists(output_file):
raise RuntimeError(f"Ensemble failed, output file not created: {output_file}")
logger.info(f"Ensemble completed, output: {output_file}")
progress(1.0, desc="Ensemble completed")
elapsed = time.time() - start_time
logger.info(f"Total processing time: {elapsed:.2f}s")
# Prepare file list for download
file_list = [output_file] + all_stems
# Create status message with download links
status = f"Ensemble completed with {ensemble_method}, excluded: {exclude_stems if exclude_stems else 'None'}, {len(model_keys)} models in {elapsed:.2f}s<br>Download files:<ul>"
for file in file_list:
file_name = os.path.basename(file)
status += f"<li><a href='file={file}' download>{file_name}</a></li>"
status += "</ul>"
return output_file, status, file_list
except Exception as e:
logger.error(f"Ensemble processing error: {e}")
if "numpy" in str(e).lower() or "copy" in str(e).lower():
error_msg = f"NumPy compatibility error: {e}. Try installing numpy<2.0.0 or contact support."
else:
error_msg = f"Ensemble processing error: {e}"
raise RuntimeError(error_msg)
except Exception as e:
logger.error(f"Ensemble error: {e}")
error_msg = f"Processing failed. Try fewer models (max {max_models}), shorter audio, or uploading a local WAV file."
raise RuntimeError(error_msg)
finally:
if temp_audio_path and os.path.exists(temp_audio_path):
try:
os.remove(temp_audio_path)
logger.info(f"Temporary file deleted: {temp_audio_path}")
except Exception as e:
logger.warning(f"Failed to delete temporary file {temp_audio_path}: {e}")
if torch.cuda.is_available():
torch.cuda.empty_cache()
logger.info("GPU memory cleared")
def update_roformer_models(category):
"""Update Roformer model dropdown based on selected category."""
choices = list(ROFORMER_MODELS.get(category, {}).keys()) or []
logger.debug(f"Updating roformer models for category {category}: {choices}")
return gr.update(choices=choices, value=choices[0] if choices else None)
def update_ensemble_models(category):
"""Update ensemble model dropdown based on selected category."""
choices = list(ROFORMER_MODELS.get(category, {}).keys()) or []
logger.debug(f"Updating ensemble models for category {category}: {choices}")
return gr.update(choices=choices, value=[])
def download_audio_wrapper(url, cookie_file):
file_path, status, audio_data = download_audio(url, cookie_file)
return audio_data, status
def create_interface():
with gr.Blocks(title="π΅ SESA Fast Separation π΅", css=CSS, elem_id="app-container") as app:
gr.Markdown("<h1 class='header-text'>π΅ SESA Fast Separation π΅</h1>")
gr.Markdown("**Note**: If YouTube downloads fail, upload a valid cookies file or a local WAV file. [Cookie Instructions](https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies)")
gr.Markdown("**Tip**: For best results, use audio shorter than 15 minutes or fewer models (up to 6) to ensure smooth processing.")
with gr.Tabs():
with gr.Tab("βοΈ Settings"):
with gr.Group(elem_classes="dubbing-theme"):
gr.Markdown("### General Settings")
model_file_dir = gr.Textbox(value="/tmp/audio-separator-models/", label="π Model Cache", placeholder="Path to model directory", interactive=True)
output_dir = gr.Textbox(value="output", label="π€ Output Directory", placeholder="Where to save results", interactive=True)
output_format = gr.Dropdown(value="wav", choices=OUTPUT_FORMATS, label="πΆ Output Format", interactive=True)
norm_threshold = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="π Normalization Threshold", interactive=True)
amp_threshold = gr.Slider(0.1, 1.0, value=0.3, step=0.1, label="π Amplification Threshold", interactive=True)
batch_size = gr.Slider(1, 8, value=1, step=1, label="β‘ Batch Size", interactive=True)
with gr.Tab("π€ Roformer"):
with gr.Group(elem_classes="dubbing-theme"):
gr.Markdown("### Audio Separation")
with gr.Row():
roformer_audio = gr.Audio(label="π§ Upload Audio", type="filepath", interactive=True)
url_ro = gr.Textbox(label="π Or Paste URL", placeholder="YouTube or audio URL", interactive=True)
cookies_ro = gr.File(label="πͺ Cookies File", file_types=[".txt"], interactive=True)
download_roformer = gr.Button("β¬οΈ Download", variant="secondary")
roformer_download_status = gr.Textbox(label="π’ Download Status", interactive=False)
roformer_exclude_stems = gr.Textbox(label="π« Exclude Stems", placeholder="e.g., vocals, drums (comma-separated)", interactive=True)
with gr.Row():
roformer_category = gr.Dropdown(label="π Category", choices=list(ROFORMER_MODELS.keys()), value="General Purpose", interactive=True)
roformer_model = gr.Dropdown(label="π οΈ Model", choices=list(ROFORMER_MODELS["General Purpose"].keys()), interactive=True, allow_custom_value=True)
with gr.Row():
roformer_seg_size = gr.Slider(32, 512, value=64, step=32, label="π Segment Size", interactive=True)
roformer_overlap = gr.Slider(2, 10, value=8, step=1, label="π Overlap", interactive=True)
with gr.Row():
roformer_pitch_shift = gr.Slider(-12, 12, value=0, step=1, label="π΅ Pitch Shift", interactive=True)
roformer_override_seg_size = gr.Dropdown(choices=["True", "False"], value="False", label="π§ Override Segment Size", interactive=True)
roformer_button = gr.Button("βοΈ Separate Now!", variant="primary")
with gr.Row():
roformer_stem1 = gr.Audio(label="πΈ Stem 1", type="filepath", interactive=False)
roformer_stem2 = gr.Audio(label="π₯ Stem 2", type="filepath", interactive=False)
roformer_files = gr.File(label="π₯ Download Stems", interactive=False)
with gr.Tab("ποΈ Auto Ensemble"):
with gr.Group(elem_classes="dubbing-theme"):
gr.Markdown("### Ensemble Processing")
gr.Markdown("Note: If weights are not specified, equal weights (1.0) are applied. Use up to 6 models for best results.")
with gr.Row():
ensemble_audio = gr.Audio(label="π§ Upload Audio", type="filepath", interactive=True)
url_ensemble = gr.Textbox(label="π Or Paste URL", placeholder="YouTube or audio URL", interactive=True)
cookies_ensemble = gr.File(label="πͺ Cookies File", file_types=[".txt"], interactive=True)
download_ensemble = gr.Button("β¬οΈ Download", variant="secondary")
ensemble_download_status = gr.Textbox(label="π’ Download Status", interactive=False)
ensemble_exclude_stems = gr.Textbox(label="π« Exclude Stems", placeholder="e.g., vocals, drums (comma-separated)", interactive=True)
with gr.Row():
ensemble_category = gr.Dropdown(label="π Category", choices=list(ROFORMER_MODELS.keys()), value="Instrumentals", interactive=True)
ensemble_models = gr.Dropdown(label="π οΈ Models (Max 6)", choices=list(ROFORMER_MODELS["Instrumentals"].keys()), multiselect=True, interactive=True, allow_custom_value=True)
with gr.Row():
ensemble_seg_size = gr.Slider(32, 512, value=64, step=32, label="π Segment Size", interactive=True)
ensemble_overlap = gr.Slider(2, 10, value=8, step=1, label="π Overlap", interactive=True)
ensemble_use_tta = gr.Dropdown(choices=["True", "False"], value="False", label="π Use TTA", interactive=True)
ensemble_method = gr.Dropdown(label="βοΈ Ensemble Method", choices=['avg_wave', 'median_wave', 'max_wave', 'min_wave', 'avg_fft', 'median_fft', 'max_fft', 'min_fft'], value='avg_wave', interactive=True)
ensemble_weights = gr.Textbox(label="βοΈ Weights", placeholder="e.g., 1.0, 1.0, 1.0 (comma-separated)", interactive=True)
ensemble_button = gr.Button("ποΈ Run Ensemble!", variant="primary")
ensemble_output = gr.Audio(label="πΆ Ensemble Result", type="filepath", interactive=False)
ensemble_status = gr.HTML(label="π’ Status")
ensemble_files = gr.File(label="π₯ Download Ensemble and Stems", interactive=False)
gr.HTML("<div class='footer'>Powered by Audio-Separator ππΆ | Made with β€οΈ</div>")
roformer_category.change(update_roformer_models, inputs=[roformer_category], outputs=[roformer_model])
download_roformer.click(
fn=download_audio_wrapper,
inputs=[url_ro, cookies_ro],
outputs=[roformer_audio, roformer_download_status]
)
roformer_button.click(
fn=roformer_separator,
inputs=[
roformer_audio, roformer_model, roformer_seg_size, roformer_override_seg_size,
roformer_overlap, roformer_pitch_shift, model_dir, output_dir,
output_format, norm_threshold, amp_threshold, batch_size, roformer_exclude_stems
],
outputs=[roformer_stem1, roformer_stem2, roformer_files]
)
ensemble_category.change(update_ensemble_models, inputs=[ensemble_category], outputs=[ensemble_models])
download_ensemble.click(
fn=download_audio_wrapper,
inputs=[url_ensemble, cookies_ensemble],
outputs=[ensemble_audio, ensemble_download_status]
)
ensemble_button.click(
fn=auto_ensemble_process,
inputs=[
ensemble_audio, ensemble_models, ensemble_seg_size, ensemble_overlap,
output_format, ensemble_use_tta, model_dir, output_dir,
norm_threshold, amp_threshold, batch_size, ensemble_method,
ensemble_exclude_stems, ensemble_weights
],
outputs=[ensemble_output, ensemble_status, ensemble_files]
)
return app
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Music Source Separation Web UI")
parser.add_argument("--port", type=int, default=7860, help="Port to run the UI on")
args = parser.parse_args()
app = create_interface()
try:
app.launch(server_name="0.0.0.0", server_port=args.port, share=True)
except Exception as e:
logger.error(f"Failed to launch UI: {e}")
raise
finally:
app.close() |