lainlives commited on
Commit
6c52bb8
·
1 Parent(s): 15afd8d
Files changed (1) hide show
  1. app.py +157 -1067
app.py CHANGED
@@ -1,124 +1,142 @@
1
  from __future__ import annotations
2
 
3
- import logging
4
- import operator
5
  import os
6
- import shutil
7
  import sys
8
- from collections.abc import Sequence
9
- from contextlib import suppress
10
  from enum import IntEnum, StrEnum, auto
11
- from functools import cache, cached_property, partial, reduce
12
  from pathlib import Path
13
  from typing import TYPE_CHECKING, Annotated, Any, TypedDict
14
- from urllib.parse import parse_qs, urlparse
15
 
16
- import ffmpeg
17
  import gradio as gr
18
- import lazy_loader as lazy
19
- import pedalboard
20
- import soundfile as sf
21
- import spaces
22
- import static_ffmpeg
23
- import static_sox
24
  import typer
25
- from audio_separator.separator import Separator
26
  from huggingface_hub import snapshot_download
27
- from pydantic import BaseModel, ValidationError
28
- from rich import print as rprint
29
- from ultimate_rvc.common import (AUDIO_DIR, MODELS_DIR, NODE_PATH,
30
- SEPARATOR_MODELS_DIR, TEMP_DIR)
31
- from ultimate_rvc.core.common import (INTERMEDIATE_AUDIO_BASE_DIR,
32
- OUTPUT_AUDIO_DIR, copy_file_safe,
33
- display_progress, get_file_hash,
34
- get_hash, json_dump, json_dumps,
35
- json_load, validate_audio_dir_exists,
36
- validate_audio_file_exists,
37
- validate_model, validate_url)
38
- from ultimate_rvc.core.exceptions import (Entity, InvalidLocationError,
39
- Location, NotFoundError,
40
- NotProvidedError, UIMessage,
41
- YoutubeUrlError)
42
- from ultimate_rvc.core.generate.common import (convert, get_unique_base_path,
43
- mix_audio,
44
- validate_audio_dir_exists,
45
- validate_audio_file_exists,
46
- wavify)
47
- from ultimate_rvc.core.generate.song_cover import (get_named_song_dirs,
48
- get_song_cover_name,
49
- mix_song, pitch_shift,
50
- postprocess, retrieve_song,
51
- run_pipeline)
52
  from ultimate_rvc.core.generate.speech import get_edge_tts_voice_names
53
- from ultimate_rvc.core.generate.typing_extra import (AudioExtInternal,
54
- EffectedVocalsMetaData,
55
- FileMetaData,
56
- MixedAudioMetaData,
57
- MixedAudioType,
58
- PitchShiftMetaData,
59
- RVCAudioMetaData,
60
- SeparatedAudioMetaData,
61
- SongSourceType,
62
- StagedAudioMetaData,
63
- WaveifiedAudioMetaData)
64
- from ultimate_rvc.core.manage.audio import (get_audio_datasets,
65
- get_named_audio_datasets,
66
- get_saved_output_audio,
67
- get_saved_speech_audio)
68
  from ultimate_rvc.core.manage.config import get_config_names, load_config
69
- from ultimate_rvc.core.manage.models import (get_custom_embedder_model_names,
70
- get_custom_pretrained_model_names,
71
- get_training_model_names,
72
- get_voice_model_names)
73
- # NOTE the only reason this module is imported here is so we can
74
- # annotate the return value of the _get_voice_converter function.
75
- from ultimate_rvc.rvc.infer.infer import VoiceConverter
76
- from ultimate_rvc.typing_extra import (AudioExt, EmbedderModel, F0Method, Json,
77
- RVCContentType, SegmentSize,
78
- SeparationModel, StrPath)
79
- from ultimate_rvc.web.common import (PROGRESS_BAR, exception_harness,
80
- initialize_dropdowns,
81
- render_transfer_component,
82
- setup_transfer_event,
83
- toggle_intermediate_audio,
84
- toggle_visibility,
85
- toggle_visible_component,
86
- update_dropdowns, update_output_name,
87
- update_value)
88
- from ultimate_rvc.web.config.component import (AnyComponentConfig, AudioConfig,
89
- CheckboxConfig, ComponentConfig,
90
- DropdownConfig, NumberConfig,
91
- RadioConfig, SliderConfig,
92
- TextboxConfig)
93
- from ultimate_rvc.web.config.tab import (SongGenerationConfig,
94
- SpeechGenerationConfig,
95
- TrainingConfig)
96
- from ultimate_rvc.web.tabs.generate.speech.multi_step_generation import \
97
- render as render_speech_multi_step_tab
98
- from ultimate_rvc.web.tabs.generate.speech.one_click_generation import \
99
- render as render_speech_one_click_tab
 
 
 
 
 
 
 
 
 
100
  from ultimate_rvc.web.tabs.manage.audio import render as render_audio_tab
101
  from ultimate_rvc.web.tabs.manage.models import render as render_models_tab
102
  from ultimate_rvc.web.tabs.manage.settings import render as render_settings_tab
103
- from ultimate_rvc.web.typing_extra import ConcurrencyId
104
-
105
- # NOTE the only reason this is imported here is so we can annotate
106
- # the return type of the _get_audio_separator function
107
 
 
 
 
108
 
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  type StrPath = str | PathLike[str]
112
 
113
- static_sox = lazy.load("static_sox")
114
- yt_dlp = lazy.load("yt_dlp")
115
- pedalboard = lazy.load("pedalboard")
116
- sf = lazy.load("soundfile")
117
-
118
- logger = logging.getLogger(__name__)
119
-
120
  type Json = Mapping[str, Json] | Sequence[Json] | str | int | float | bool | None
121
- zgpuduration = 10
122
 
123
 
124
  class SegmentSize(IntEnum):
@@ -358,6 +376,7 @@ class EmbedderModel(StrEnum):
358
  """Enumeration of audio embedding models."""
359
 
360
  CONTENTVEC = "contentvec"
 
361
  CUSTOM = "custom"
362
 
363
 
@@ -380,13 +399,8 @@ class SeparationModel(StrEnum):
380
  kuielab_b_bass = "kuielab_b_bass.onnx"
381
  kuielab_a_other = "kuielab_a_other.onnx"
382
  kuielab_b_other = "kuielab_b_other.onnx"
383
- UVR_DeEcho_Aggressive = "UVR-De-Echo-Aggressive.pth"
384
- UVR_BVE_4B_SN = "UVR-BVE-4B_SN-44100-1.pth"
385
- MDX23C_D1581 = "MDX23C_D1581.ckpt"
386
  MDX23C_8KFFT_InstVoc_HQ_2 = "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"
387
- MDX23C_8KFFT_InstVoc_HQ = "MDX23C-8KFFT-InstVoc_HQ.ckpt"
388
  UVR_DeNoise = "UVR-DeNoise.pth"
389
- UVR_DeNoise_Lite = "UVR-DeNoise-Lite.pth"
390
  UVR_DeEcho_DeReverb = "UVR-DeEcho-DeReverb.pth"
391
 
392
 
@@ -1766,7 +1780,7 @@ class SongGenerationConfig(GenerationConfig):
1766
  )
1767
  source: TextboxConfig = TextboxConfig(
1768
  label="Source",
1769
- info="Local (to the server) file",
1770
  value=None,
1771
  exclude_value=True,
1772
  )
@@ -2483,86 +2497,6 @@ def _render_step_4(tab_config: MultiStepSongGenerationConfig) -> None:
2483
  )
2484
 
2485
 
2486
- def separate_audio(
2487
- audio_track: StrPath,
2488
- song_dir: StrPath,
2489
- model_name: SeparationModel,
2490
- segment_size: int,
2491
- ) -> tuple[Path, Path]:
2492
- """
2493
- Separate an audio track into a primary stem and a secondary stem.
2494
-
2495
- Parameters
2496
- ----------
2497
- audio_track : StrPath
2498
- The path to the audio track to separate.
2499
- song_dir : StrPath
2500
- The path to the song directory where the separated primary stem
2501
- and secondary stem will be saved.
2502
- model_name : str
2503
- The name of the model to use for audio separation.
2504
- segment_size : int
2505
- The segment size to use for audio separation.
2506
-
2507
- Returns
2508
- -------
2509
- primary_path : Path
2510
- The path to the separated primary stem.
2511
- secondary_path : Path
2512
- The path to the separated secondary stem.
2513
-
2514
- """
2515
- audio_path = validate_audio_file_exists(audio_track, Entity.AUDIO_TRACK)
2516
- song_dir_path = validate_audio_dir_exists(song_dir, Entity.SONG_DIR)
2517
-
2518
- args_dict = SeparatedAudioMetaData(
2519
- audio_track=FileMetaData(
2520
- name=audio_path.name,
2521
- hash_id=get_file_hash(audio_path),
2522
- ),
2523
- model_name=model_name,
2524
- segment_size=segment_size,
2525
- ).model_dump()
2526
-
2527
- paths = [
2528
- get_unique_base_path(
2529
- song_dir_path,
2530
- prefix,
2531
- args_dict,
2532
- ).with_suffix(suffix)
2533
- for prefix in ["11_Stem_Primary", "11_Stem_Secondary"]
2534
- for suffix in [".wav", ".json"]
2535
- ]
2536
-
2537
- (
2538
- primary_path,
2539
- primary_json_path,
2540
- secondary_path,
2541
- secondary_json_path,
2542
- ) = paths
2543
-
2544
- if not all(path.exists() for path in paths):
2545
- audio_separator = _get_audio_separator(
2546
- output_dir=song_dir_path,
2547
- segment_size=segment_size,
2548
- )
2549
- audio_separator.load_model(model_name)
2550
- audio_separator.separate(
2551
- str(audio_path),
2552
- custom_output_names={
2553
- audio_separator.model_instance.primary_stem_name: str(
2554
- primary_path.with_suffix(""),
2555
- ),
2556
- audio_separator.model_instance.secondary_stem_name: str(
2557
- secondary_path.with_suffix(""),
2558
- ),
2559
- },
2560
- )
2561
- json_dump(args_dict, primary_json_path)
2562
- json_dump(args_dict, secondary_json_path)
2563
-
2564
- return primary_path, secondary_path
2565
-
2566
  def _render_step_5(
2567
  total_config: TotalConfig,
2568
  tab_config: MultiStepSongGenerationConfig,
@@ -2725,870 +2659,7 @@ def _pair_audio_tracks_and_gain(
2725
  ]
2726
 
2727
 
2728
- @cache
2729
- def _get_audio_separator(
2730
- output_dir: StrPath = INTERMEDIATE_AUDIO_BASE_DIR,
2731
- output_format: str = AudioExt.WAV,
2732
- segment_size: int = SegmentSize.SEG_256,
2733
- sample_rate: int = 44100,
2734
- ) -> Separator:
2735
- static_ffmpeg.add_paths(weak=True)
2736
- from audio_separator.separator import Separator # noqa: PLC0415
2737
-
2738
- """
2739
- Get an audio separator.
2740
-
2741
- Parameters
2742
- ----------
2743
- output_dir : StrPath, default=INTERMEDIATE_AUDIO_BASE_DIR
2744
- The directory to save the separated audio to.
2745
- output_format : str, default=AudioExt.WAV
2746
- The format to save the separated audio in.
2747
- segment_size : int, default=SegmentSize.SEG_256
2748
- The segment size to use for separation.
2749
- sample_rate : int, default=44100
2750
- The sample rate to use for separation.
2751
-
2752
- Returns
2753
- -------
2754
- Separator
2755
- An audio separator.
2756
-
2757
- """
2758
- return Separator(
2759
- model_file_dir=SEPARATOR_MODELS_DIR,
2760
- use_autocast=False,
2761
- output_dir=output_dir,
2762
- output_format=output_format,
2763
- sample_rate=sample_rate,
2764
- mdx_params={
2765
- "hop_length": 1024,
2766
- "segment_size": segment_size,
2767
- "overlap": 0.25,
2768
- "batch_size": 1,
2769
- "enable_denoise": True,
2770
- },
2771
- )
2772
-
2773
-
2774
- def initialize_audio_separator() -> None:
2775
- """
2776
- Initialize the audio separator by downloading the models it
2777
- uses.
2778
-
2779
- """
2780
- audio_separator = _get_audio_separator()
2781
- for i, separator_model in enumerate(SeparationModel):
2782
- if not Path(SEPARATOR_MODELS_DIR / separator_model).is_file():
2783
- display_progress(
2784
- f"Downloading {separator_model}...",
2785
- i / len(SeparationModel),
2786
- )
2787
- audio_separator.download_model_files(separator_model)
2788
-
2789
-
2790
- def _get_input_audio_path(directory: StrPath) -> Path | None:
2791
- """
2792
- Get the path to the input audio file in the provided directory, if
2793
- it exists.
2794
-
2795
- The provided directory must be located in the root of the
2796
- intermediate audio base directory.
2797
-
2798
- Parameters
2799
- ----------
2800
- directory : StrPath
2801
- The path to a directory.
2802
-
2803
- Returns
2804
- -------
2805
- Path | None
2806
- The path to the input audio file in the provided directory, if
2807
- it exists.
2808
-
2809
- Raises
2810
- ------
2811
- NotFoundError
2812
- If the provided path does not point to an existing directory.
2813
- InvalidLocationError
2814
- If the provided path is not located in the root of the
2815
- intermediate audio base directory"
2816
-
2817
- """
2818
- dir_path = Path(directory)
2819
-
2820
- if not dir_path.is_dir():
2821
- raise NotFoundError(entity=Entity.DIRECTORY, location=dir_path)
2822
-
2823
- if dir_path.parent != INTERMEDIATE_AUDIO_BASE_DIR:
2824
- raise InvalidLocationError(
2825
- entity=Entity.DIRECTORY,
2826
- location=Location.INTERMEDIATE_AUDIO_ROOT,
2827
- path=dir_path,
2828
- )
2829
- # NOTE directory should never contain more than one element which
2830
- # matches the pattern "00_*"
2831
- return next(dir_path.glob("00_*"), None)
2832
-
2833
-
2834
- def _get_input_audio_paths() -> list[Path]:
2835
- """
2836
- Get the paths to all input audio files in the intermediate audio
2837
- base directory.
2838
-
2839
- Returns
2840
- -------
2841
- list[Path]
2842
- The paths to all input audio files in the intermediate audio
2843
- base directory.
2844
-
2845
- """
2846
- # NOTE if we later add .json file for input then
2847
- # we need to exclude those here
2848
- return list(INTERMEDIATE_AUDIO_BASE_DIR.glob("*/00_*"))
2849
-
2850
-
2851
- def get_named_song_dirs() -> list[tuple[str, str]]:
2852
- """
2853
- Get the names of all saved songs and the paths to the
2854
- directories where they are stored.
2855
-
2856
- Returns
2857
- -------
2858
- list[tuple[str, Path]]
2859
- A list of tuples containing the name of each saved song
2860
- and the path to the directory where it is stored.
2861
-
2862
- """
2863
- return sorted(
2864
- [
2865
- (
2866
- path.stem.removeprefix("00_"),
2867
- str(path.parent),
2868
- )
2869
- for path in _get_input_audio_paths()
2870
- ],
2871
- key=operator.itemgetter(0),
2872
- )
2873
-
2874
-
2875
- def _get_model_name(
2876
- effected_vocals_track: StrPath | None = None,
2877
- song_dir: StrPath | None = None,
2878
- ) -> str:
2879
- """
2880
- Infer the name of the voice model used for vocal conversion from a
2881
- an effected vocals track in a given song directory.
2882
-
2883
- If a voice model name cannot be inferred, "Unknown" is returned.
2884
-
2885
- Parameters
2886
- ----------
2887
- effected_vocals_track : StrPath, optional
2888
- The path to an effected vocals track.
2889
- song_dir : StrPath, optional
2890
- The path to a song directory.
2891
-
2892
- Returns
2893
- -------
2894
- str
2895
- The name of the voice model used for vocal conversion.
2896
-
2897
- """
2898
- model_name = "Unknown"
2899
- if not (effected_vocals_track and song_dir):
2900
- return model_name
2901
- effected_vocals_path = Path(effected_vocals_track)
2902
- song_dir_path = Path(song_dir)
2903
- effected_vocals_json_path = song_dir_path / f"{effected_vocals_path.stem}.json"
2904
- if not effected_vocals_json_path.is_file():
2905
- return model_name
2906
- effected_vocals_dict = json_load(effected_vocals_json_path)
2907
- try:
2908
- effected_vocals_metadata = EffectedVocalsMetaData.model_validate(
2909
- effected_vocals_dict,
2910
- )
2911
- except ValidationError:
2912
- return model_name
2913
- converted_vocals_track_name = effected_vocals_metadata.vocals_track.name
2914
- converted_vocals_json_path = song_dir_path / Path(
2915
- converted_vocals_track_name,
2916
- ).with_suffix(
2917
- ".json",
2918
- )
2919
- if not converted_vocals_json_path.is_file():
2920
- return model_name
2921
- converted_vocals_dict = json_load(converted_vocals_json_path)
2922
- try:
2923
- converted_vocals_metadata = RVCAudioMetaData.model_validate(
2924
- converted_vocals_dict,
2925
- )
2926
- except ValidationError:
2927
- return model_name
2928
- return converted_vocals_metadata.model_name
2929
-
2930
-
2931
- def get_song_cover_name(
2932
- effected_vocals_track: StrPath | None = None,
2933
- song_dir: StrPath | None = None,
2934
- model_name: str | None = None,
2935
- ) -> str:
2936
- """
2937
- Generate a suitable name for a cover of a song based on the name
2938
- of that song and the voice model used for vocal conversion.
2939
-
2940
- If the path of an existing song directory is provided, the name
2941
- of the song is inferred from that directory. If a voice model is not
2942
- provided but the path of an existing song directory and the path of
2943
- an effected vocals track in that directory are provided, then the
2944
- voice model is inferred from the effected vocals track.
2945
-
2946
- Parameters
2947
- ----------
2948
- effected_vocals_track : StrPath, optional
2949
- The path to an effected vocals track.
2950
- song_dir : StrPath, optional
2951
- The path to a song directory.
2952
- model_name : str, optional
2953
- The name of a voice model.
2954
-
2955
- Returns
2956
- -------
2957
- str
2958
- The song cover name
2959
-
2960
- """
2961
- song_name = "Unknown"
2962
- if song_dir and (song_path := _get_input_audio_path(song_dir)):
2963
- song_name = song_path.stem.removeprefix("00_")
2964
- model_name = model_name or _get_model_name(effected_vocals_track, song_dir)
2965
-
2966
- return f"{song_name} ({model_name} Ver)"
2967
-
2968
-
2969
- def _get_youtube_id(url: str, ignore_playlist: bool = True) -> str:
2970
- """
2971
- Get the id of a YouTube video or playlist.
2972
-
2973
- Parameters
2974
- ----------
2975
- url : str
2976
- URL which points to a YouTube video or playlist.
2977
- ignore_playlist : bool, default=True
2978
- Whether to get the id of the first video in a playlist or the
2979
- playlist id itself.
2980
-
2981
- Returns
2982
- -------
2983
- str
2984
- The id of a YouTube video or playlist.
2985
-
2986
- Raises
2987
- ------
2988
- YoutubeUrlError
2989
- If the provided URL does not point to a YouTube video
2990
- or playlist.
2991
-
2992
- """
2993
- yt_id = None
2994
- validate_url(url)
2995
- query = urlparse(url)
2996
- if query.hostname == "youtu.be":
2997
- yt_id = query.query[2:] if query.path[1:] == "watch" else query.path[1:]
2998
-
2999
- elif query.hostname in {"www.youtube.com", "youtube.com", "music.youtube.com"}:
3000
- if not ignore_playlist:
3001
- with suppress(KeyError):
3002
- yt_id = parse_qs(query.query)["list"][0]
3003
- elif query.path == "/watch":
3004
- yt_id = parse_qs(query.query)["v"][0]
3005
- elif query.path[:7] == "/watch/":
3006
- yt_id = query.path.split("/")[1]
3007
- elif query.path[:7] == "/embed/" or query.path[:3] == "/v/":
3008
- yt_id = query.path.split("/")[2]
3009
- if yt_id is None:
3010
- raise YoutubeUrlError(url=url, playlist=True)
3011
-
3012
- return yt_id
3013
-
3014
-
3015
- def init_song_dir(source: str) -> tuple[Path, SongSourceType]:
3016
- """
3017
- Initialize a directory for a song provided by a given source.
3018
-
3019
-
3020
- The song directory is initialized as follows:
3021
-
3022
- * If the source is a YouTube URL, the id of the video which
3023
- that URL points to is extracted. A new song directory with the name
3024
- of that id is then created, if it does not already exist.
3025
- * If the source is a path to a local audio file, the hash of
3026
- that audio file is extracted. A new song directory with the name of
3027
- that hash is then created, if it does not already exist.
3028
- * if the source is a path to an existing song directory, then
3029
- that song directory is used as is.
3030
-
3031
- Parameters
3032
- ----------
3033
- source : str
3034
- The source providing the song to initialize a directory for.
3035
-
3036
- Returns
3037
- -------
3038
- song_dir : Path
3039
- The path to the initialized song directory.
3040
- source_type : SongSourceType
3041
- The type of source provided.
3042
-
3043
- Raises
3044
- ------
3045
- NotProvidedError
3046
- If no source is provided.
3047
- InvalidLocationError
3048
- If a provided path points to a directory that is not located in
3049
- the root of the intermediate audio base directory.
3050
- NotFoundError
3051
- If the provided source is a path to a file that does not exist.
3052
-
3053
- """
3054
- if not source:
3055
- raise NotProvidedError(entity=Entity.SOURCE, ui_msg=UIMessage.NO_AUDIO_SOURCE)
3056
- source_path = Path(source)
3057
-
3058
- # if source is a path to an existing song directory
3059
- if source_path.is_dir():
3060
- if source_path.parent != INTERMEDIATE_AUDIO_BASE_DIR:
3061
- raise InvalidLocationError(
3062
- entity=Entity.DIRECTORY,
3063
- location=Location.INTERMEDIATE_AUDIO_ROOT,
3064
- path=source_path,
3065
- )
3066
- source_type = SongSourceType.SONG_DIR
3067
- return source_path, source_type
3068
-
3069
- # if source is a URL
3070
- if urlparse(source).scheme == "https":
3071
- source_type = SongSourceType.URL
3072
- song_id = _get_youtube_id(source)
3073
-
3074
- # if source is a path to a local audio file
3075
- elif source_path.is_file():
3076
- source_type = SongSourceType.FILE
3077
- song_id = get_file_hash(source_path)
3078
- else:
3079
- raise NotFoundError(entity=Entity.FILE, location=source_path)
3080
-
3081
- song_dir_path = INTERMEDIATE_AUDIO_BASE_DIR / song_id
3082
-
3083
- song_dir_path.mkdir(parents=True, exist_ok=True)
3084
-
3085
- return song_dir_path, source_type
3086
-
3087
-
3088
- def _get_youtube_audio(
3089
- url: str,
3090
- directory: StrPath,
3091
- cookiefile: StrPath | None = None,
3092
- ) -> Path:
3093
- """
3094
- Download audio from a YouTube video.
3095
-
3096
- Parameters
3097
- ----------
3098
- url : str
3099
- URL which points to a YouTube video.
3100
- directory : StrPath
3101
- The directory to save the downloaded audio file to.
3102
- cookiefile : StrPath
3103
- The path to a file containing cookies to use when downloading
3104
- audio from Youtube.
3105
-
3106
- Returns
3107
- -------
3108
- Path
3109
- The path to the downloaded audio file.
3110
-
3111
- Raises
3112
- ------
3113
- YoutubeUrlError
3114
- If the provided URL does not point to a YouTube video.
3115
-
3116
- """
3117
- static_ffmpeg.add_paths(weak=True)
3118
- validate_url(url)
3119
- outtmpl = str(Path(directory, "00_%(title)s.%(ext)s"))
3120
- ydl_opts = {
3121
- "quiet": True,
3122
- "format": "bestaudio/best",
3123
- "cookiefile": cookiefile,
3124
- "outtmpl": outtmpl,
3125
- "postprocessors": [
3126
- {
3127
- "key": "FFmpegExtractAudio",
3128
- "preferredcodec": "wav",
3129
- "preferredquality": 0,
3130
- },
3131
- ],
3132
- "js_runtimes": {
3133
- "node": {"path": str(NODE_PATH)},
3134
- },
3135
- }
3136
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
3137
- result = ydl.extract_info(url, download=True)
3138
- if not result:
3139
- raise YoutubeUrlError(url, playlist=False)
3140
- file = ydl.prepare_filename(result)
3141
-
3142
- return Path(file).with_suffix(".wav")
3143
-
3144
-
3145
- def retrieve_song(source: str, cookiefile: StrPath | None = None) -> tuple[Path, Path]:
3146
- """
3147
- Retrieve a song from a source that can either be a YouTube URL, a
3148
- local audio file or a song directory.
3149
-
3150
- Parameters
3151
- ----------
3152
- source : str
3153
- A Youtube URL, the path to a local audio file or the path to a
3154
- song directory.
3155
- cookiefile: StrPath, optional
3156
- The path to a file containing cookies to use when downloading
3157
- audio from Youtube.
3158
-
3159
- Returns
3160
- -------
3161
- song : Path
3162
- The path to the retrieved song.
3163
- song_dir : Path
3164
- The path to the song directory containing the retrieved song.
3165
-
3166
- Raises
3167
- ------
3168
- NotProvidedError
3169
- If no source is provided.
3170
-
3171
- """
3172
- if not source:
3173
- raise NotProvidedError(entity=Entity.SOURCE, ui_msg=UIMessage.NO_AUDIO_SOURCE)
3174
-
3175
- song_dir_path, source_type = init_song_dir(source)
3176
- song_path = _get_input_audio_path(song_dir_path)
3177
-
3178
- if not song_path:
3179
- if source_type == SongSourceType.URL:
3180
- song_url = source.split("&", maxsplit=1)[0]
3181
- song_path = _get_youtube_audio(song_url, song_dir_path, cookiefile)
3182
-
3183
- else:
3184
- source_path = Path(source)
3185
- song_name = f"00_{source_path.name}"
3186
- song_path = song_dir_path / song_name
3187
- shutil.copyfile(source_path, song_path)
3188
-
3189
- return song_path, song_dir_path
3190
-
3191
-
3192
- def _add_effects(
3193
- audio_track: StrPath,
3194
- output_file: StrPath,
3195
- room_size: float = 0.15,
3196
- wet_level: float = 0.2,
3197
- dry_level: float = 0.8,
3198
- damping: float = 0.7,
3199
- ) -> None:
3200
- """
3201
- Add high-pass filter, compressor and reverb effects to an audio
3202
- track.
3203
-
3204
- Parameters
3205
- ----------
3206
- audio_track : StrPath
3207
- The path to the audio track to add effects to.
3208
- output_file : StrPath
3209
- The path to the file to save the effected audio track to.
3210
- room_size : float, default=0.15
3211
- The room size of the reverb effect.
3212
- wet_level : float, default=0.2
3213
- The wetness level of the reverb effect.
3214
- dry_level : float, default=0.8
3215
- The dryness level of the reverb effect.
3216
- damping : float, default=0.7
3217
- The damping of the reverb effect.
3218
-
3219
- """
3220
- board = pedalboard.Pedalboard(
3221
- [
3222
- pedalboard.HighpassFilter(),
3223
- pedalboard.Compressor(ratio=4, threshold_db=-15),
3224
- pedalboard.Reverb(
3225
- room_size=room_size,
3226
- dry_level=dry_level,
3227
- wet_level=wet_level,
3228
- damping=damping,
3229
- ),
3230
- ],
3231
- )
3232
-
3233
- with (
3234
- pedalboard.io.AudioFile(str(audio_track)) as f,
3235
- pedalboard.io.AudioFile(
3236
- str(output_file),
3237
- "w",
3238
- f.samplerate,
3239
- f.num_channels,
3240
- ) as o,
3241
- ):
3242
- # Read one second of audio at a time, until the file is empty:
3243
- while f.tell() < f.frames:
3244
- chunk = f.read(int(f.samplerate))
3245
- effected = board(chunk, f.samplerate, reset=False)
3246
- o.write(effected)
3247
-
3248
-
3249
-
3250
- def postprocess(
3251
- vocals_track: StrPath,
3252
- song_dir: StrPath,
3253
- room_size: float = 0.15,
3254
- wet_level: float = 0.2,
3255
- dry_level: float = 0.8,
3256
- damping: float = 0.7,
3257
- ) -> Path:
3258
- """
3259
- Apply high-pass filter, compressor and reverb effects to a vocals
3260
- track.
3261
-
3262
- Parameters
3263
- ----------
3264
- vocals_track : StrPath
3265
- The path to the vocals track to add effects to.
3266
- song_dir : StrPath
3267
- The path to the song directory where the effected vocals track
3268
- will be saved.
3269
- room_size : float, default=0.15
3270
- The room size of the reverb effect.
3271
- wet_level : float, default=0.2
3272
- The wetness level of the reverb effect.
3273
- dry_level : float, default=0.8
3274
- The dryness level of the reverb effect.
3275
- damping : float, default=0.7
3276
- The damping of the reverb effect.
3277
-
3278
- Returns
3279
- -------
3280
- Path
3281
- The path to the effected vocals track.
3282
-
3283
- """
3284
- vocals_path = validate_audio_file_exists(vocals_track, Entity.VOCALS_TRACK)
3285
- song_dir_path = validate_audio_dir_exists(song_dir, Entity.SONG_DIR)
3286
-
3287
- vocals_path = wavify(
3288
- vocals_path,
3289
- song_dir_path,
3290
- "30_Input",
3291
- accepted_formats={AudioExt.M4A, AudioExt.AAC},
3292
- )
3293
-
3294
- args_dict = EffectedVocalsMetaData(
3295
- vocals_track=FileMetaData(
3296
- name=vocals_path.name,
3297
- hash_id=get_file_hash(vocals_path),
3298
- ),
3299
- room_size=room_size,
3300
- wet_level=wet_level,
3301
- dry_level=dry_level,
3302
- damping=damping,
3303
- ).model_dump()
3304
-
3305
- paths = [
3306
- get_unique_base_path(
3307
- song_dir_path,
3308
- "31_Vocals_Effected",
3309
- args_dict,
3310
- ).with_suffix(suffix)
3311
- for suffix in [".wav", ".json"]
3312
- ]
3313
-
3314
- effected_vocals_path, effected_vocals_json_path = paths
3315
-
3316
- if not all(path.exists() for path in paths):
3317
- _add_effects(
3318
- vocals_path,
3319
- effected_vocals_path,
3320
- room_size,
3321
- wet_level,
3322
- dry_level,
3323
- damping,
3324
- )
3325
- json_dump(args_dict, effected_vocals_json_path)
3326
- return effected_vocals_path
3327
-
3328
-
3329
- def _pitch_shift(audio_track: StrPath, output_file: StrPath, n_semi_tones: int) -> None:
3330
- """
3331
- Pitch-shift an audio track.
3332
-
3333
- Parameters
3334
- ----------
3335
- audio_track : StrPath
3336
- The path to the audio track to pitch-shift.
3337
- output_file : StrPath
3338
- The path to the file to save the pitch-shifted audio track to.
3339
- n_semi_tones : int
3340
- The number of semi-tones to pitch-shift the audio track by.
3341
-
3342
- """
3343
- static_sox.add_paths(weak=True)
3344
- # NOTE The lazy_import function does not work with sox
3345
- # so we import it here manually
3346
- import sox # noqa: PLC0415
3347
-
3348
- y, sr = sf.read(audio_track)
3349
- tfm = sox.Transformer()
3350
- tfm.pitch(n_semi_tones)
3351
- y_shifted = tfm.build_array(input_array=y, sample_rate_in=sr)
3352
- sf.write(output_file, y_shifted, sr)
3353
-
3354
-
3355
- def pitch_shift(audio_track: StrPath, song_dir: StrPath, n_semitones: int) -> Path:
3356
- """
3357
- Pitch shift an audio track by a given number of semi-tones.
3358
-
3359
- Parameters
3360
- ----------
3361
- audio_track : StrPath
3362
- The path to the audio track to pitch shift.
3363
- song_dir : StrPath
3364
- The path to the song directory where the pitch-shifted audio
3365
- track will be saved.
3366
- n_semitones : int
3367
- The number of semi-tones to pitch-shift the audio track by.
3368
-
3369
- Returns
3370
- -------
3371
- Path
3372
- The path to the pitch-shifted audio track.
3373
-
3374
- """
3375
- audio_path = validate_audio_file_exists(audio_track, Entity.AUDIO_TRACK)
3376
- song_dir_path = validate_audio_dir_exists(song_dir, Entity.SONG_DIR)
3377
-
3378
- audio_path = wavify(
3379
- audio_path,
3380
- song_dir_path,
3381
- "40_Input",
3382
- accepted_formats={AudioExt.M4A, AudioExt.AAC},
3383
- )
3384
-
3385
- shifted_audio_path = audio_path
3386
-
3387
- if n_semitones != 0:
3388
- args_dict = PitchShiftMetaData(
3389
- audio_track=FileMetaData(
3390
- name=audio_path.name,
3391
- hash_id=get_file_hash(audio_path),
3392
- ),
3393
- n_semitones=n_semitones,
3394
- ).model_dump()
3395
-
3396
- paths = [
3397
- get_unique_base_path(
3398
- song_dir_path,
3399
- "41_Audio_Shifted",
3400
- args_dict,
3401
- ).with_suffix(suffix)
3402
- for suffix in [".wav", ".json"]
3403
- ]
3404
-
3405
- shifted_audio_path, shifted_audio_json_path = paths
3406
-
3407
- if not all(path.exists() for path in paths):
3408
- _pitch_shift(audio_path, shifted_audio_path, n_semitones)
3409
- json_dump(args_dict, shifted_audio_json_path)
3410
-
3411
- return shifted_audio_path
3412
-
3413
-
3414
- def mix_song(
3415
- audio_track_gain_pairs: Sequence[tuple[StrPath, int]],
3416
- song_dir: StrPath,
3417
- output_sr: int = 44100,
3418
- output_format: AudioExt = AudioExt.MP3,
3419
- output_name: str | None = None,
3420
- ) -> Path:
3421
- """
3422
- Mix multiple audio tracks to create a song.
3423
-
3424
- Parameters
3425
- ----------
3426
- audio_track_gain_pairs : Sequence[tuple[StrPath, int]]
3427
- A sequence of pairs each containing the path to an audio track
3428
- and the gain to apply to it.
3429
- song_dir : StrPath
3430
- The path to the song directory where the song will be saved.
3431
- output_sr : int, default=44100
3432
- The sample rate of the mixed song.
3433
- output_format : AudioExt, default=AudioExt.MP3
3434
- The audio format of the mixed song.
3435
- output_name : str, optional
3436
- The name of the mixed song.
3437
-
3438
- Returns
3439
- -------
3440
- Path
3441
- The path to the song cover.
3442
-
3443
- """
3444
- mix_path = mix_audio(
3445
- audio_track_gain_pairs,
3446
- song_dir,
3447
- output_sr,
3448
- output_format,
3449
- content_type=MixedAudioType.SONG,
3450
- )
3451
- output_name = output_name or get_song_cover_name(
3452
- audio_track_gain_pairs[0][0],
3453
- song_dir,
3454
- None,
3455
- )
3456
- song_path = OUTPUT_AUDIO_DIR / f"{output_name}.{output_format}"
3457
- return copy_file_safe(mix_path, song_path)
3458
-
3459
- def run_pipeline2(
3460
- source: str,
3461
- model_name: str,
3462
- n_octaves: int = 0,
3463
- n_semitones: int = 0,
3464
- f0_methods: Sequence[F0Method] | None = None,
3465
- index_rate: float = 0.3,
3466
- rms_mix_rate: float = 1.0,
3467
- protect_rate: float = 0.33,
3468
- hop_length: int = 128,
3469
- split_vocals: bool = False,
3470
- autotune_vocals: bool = False,
3471
- autotune_strength: float = 1.0,
3472
- clean_vocals: bool = False,
3473
- clean_strength: float = 0.7,
3474
- embedder_model: EmbedderModel = EmbedderModel.CONTENTVEC,
3475
- custom_embedder_model: str | None = None,
3476
- sid: int = 0,
3477
- room_size: float = 0.15,
3478
- wet_level: float = 0.2,
3479
- dry_level: float = 0.8,
3480
- damping: float = 0.7,
3481
- main_gain: int = 0,
3482
- inst_gain: int = 0,
3483
- backup_gain: int = 0,
3484
- output_sr: int = 44100,
3485
- output_format: AudioExt = AudioExt.MP3,
3486
- output_name: str | None = None,
3487
- cookiefile: StrPath | None = None,
3488
- progress_bar: gr.Progress | None = None,
3489
- ) -> tuple[Path, ...]:
3490
- validate_model(model_name, Entity.VOICE_MODEL)
3491
- if embedder_model == EmbedderModel.CUSTOM:
3492
- validate_model(custom_embedder_model, Entity.CUSTOM_EMBEDDER_MODEL)
3493
- display_progress("[~] Retrieving song...", 0 / 9, progress_bar)
3494
- song, song_dir = retrieve_song(source, cookiefile=cookiefile)
3495
- display_progress("[~] Separating vocals from instrumentals...", 1 / 9, progress_bar)
3496
- vocals_track, instrumentals_track = separate_audio(
3497
- song,
3498
- song_dir,
3499
- SeparationModel.MDX23C_8KFFT_InstVoc_HQ_2,
3500
- SegmentSize.SEG_2048,
3501
- )
3502
- display_progress(
3503
- "[~] Separating main vocals from backup vocals...",
3504
- 2 / 9,
3505
- progress_bar,
3506
- )
3507
- backup_vocals_track, main_vocals_track = separate_audio(
3508
- vocals_track,
3509
- song_dir,
3510
- SeparationModel.UVR_MDX_NET_KARA_2,
3511
- SegmentSize.SEG_2048,
3512
- )
3513
- display_progress("[~] De-reverbing vocals...", 3 / 9, progress_bar)
3514
- reverb_track, vocals_dereverb_track = separate_audio(
3515
- main_vocals_track,
3516
- song_dir,
3517
- SeparationModel.UVR_DeEcho_DeReverb,
3518
- SegmentSize.SEG_2048,
3519
- )
3520
- display_progress("[~] De-noising vocals...", 4 / 9, progress_bar)
3521
- noise_track, clean_track = separate_audio(
3522
- vocals_dereverb_track,
3523
- song_dir,
3524
- SegmentSize.SEG_2048,
3525
- )
3526
- display_progress("[~] Pitch-shifting vocals...", 5 / 9, progress_bar)
3527
- shifted_vocals_track = pitch_shift(
3528
- clean_track,
3529
- song_dir,
3530
- n_semitones,
3531
- )
3532
- display_progress("[~] Converting vocals...", 6 / 9, progress_bar)
3533
- converted_vocals_track = convert(
3534
- audio_track=shifted_vocals_track,
3535
- directory=song_dir,
3536
- model_name=model_name,
3537
- n_octaves=n_octaves,
3538
- n_semitones=0,
3539
- f0_methods=f0_methods,
3540
- index_rate=index_rate,
3541
- rms_mix_rate=rms_mix_rate,
3542
- protect_rate=protect_rate,
3543
- hop_length=hop_length,
3544
- split_audio=split_vocals,
3545
- autotune_audio=autotune_vocals,
3546
- autotune_strength=autotune_strength,
3547
- clean_audio=clean_vocals,
3548
- clean_strength=clean_strength,
3549
- embedder_model=embedder_model,
3550
- custom_embedder_model=custom_embedder_model,
3551
- sid=sid,
3552
- content_type=RVCContentType.VOCALS,
3553
- )
3554
- display_progress("[~] Post-processing vocals...", 7 / 9, progress_bar)
3555
- effected_vocals_track = postprocess(
3556
- converted_vocals_track,
3557
- song_dir,
3558
- room_size,
3559
- wet_level,
3560
- dry_level,
3561
- damping,
3562
- )
3563
-
3564
- song_cover = mix_song(
3565
- [
3566
- (effected_vocals_track, main_gain),
3567
- (instrumentals_track, inst_gain),
3568
- (backup_vocals_track, backup_gain),
3569
- ],
3570
- song_dir,
3571
- output_sr,
3572
- output_format,
3573
- output_name,
3574
- )
3575
- return (
3576
- song_cover,
3577
- song,
3578
- vocals_track,
3579
- instrumentals_track,
3580
- main_vocals_track,
3581
- backup_vocals_track,
3582
- vocals_dereverb_track,
3583
- reverb_track,
3584
- converted_vocals_track,
3585
- effected_vocals_track,
3586
- instrumentals_track,
3587
- backup_vocals_track,
3588
- )
3589
-
3590
-
3591
- def run_pipeline(
3592
  source: str,
3593
  model_name: str,
3594
  n_octaves: int = 0,
@@ -3709,15 +2780,17 @@ def run_pipeline(
3709
  validate_model(custom_embedder_model, Entity.CUSTOM_EMBEDDER_MODEL)
3710
  display_progress("[~] Retrieving song...", 0 / 9, progress_bar)
3711
  song, song_dir = retrieve_song(source, cookiefile=cookiefile)
3712
- display_progress("[~] Separating vocals from instrumentals...", 1 / 9, progress_bar)
 
 
3713
  vocals_track, instrumentals_track = separate_audio(
3714
  song,
3715
  song_dir,
3716
  SeparationModel.MDX23C_8KFFT_InstVoc_HQ_2,
3717
- SegmentSize.SEG_1024,
3718
  )
3719
  display_progress(
3720
- "[~] Separating main vocals from backup vocals...",
3721
  2 / 9,
3722
  progress_bar,
3723
  )
@@ -3727,26 +2800,28 @@ def run_pipeline(
3727
  SeparationModel.UVR_MDX_NET_KARA_2,
3728
  SegmentSize.SEG_2048,
3729
  )
3730
- display_progress("[~] De-reverbing vocals...", 3 / 9, progress_bar)
 
 
 
 
 
 
 
3731
  reverb_track, vocals_dereverb_track = separate_audio(
3732
  main_vocals_track,
3733
  song_dir,
3734
  SeparationModel.UVR_DeEcho_DeReverb,
3735
  SegmentSize.SEG_2048,
3736
  )
3737
- display_progress("[~] Pitch-shifting vocals...", 4 / 9, progress_bar)
3738
- shifted_instrumentals_track = pitch_shift(
3739
- vocals_dereverb_track,
3740
- song_dir,
3741
- n_semitones,
3742
- )
3743
- display_progress("[~] Converting vocals...", 4 / 9, progress_bar)
3744
  converted_vocals_track = convert(
3745
- audio_track=shifted_instrumentals_track,
3746
  directory=song_dir,
3747
  model_name=model_name,
3748
  n_octaves=n_octaves,
3749
- n_semitones=0,
3750
  f0_methods=f0_methods,
3751
  index_rate=index_rate,
3752
  rms_mix_rate=rms_mix_rate,
@@ -3762,7 +2837,7 @@ def run_pipeline(
3762
  sid=sid,
3763
  content_type=RVCContentType.VOCALS,
3764
  )
3765
- display_progress("[~] Post-processing vocals...", 5 / 9, progress_bar)
3766
  effected_vocals_track = postprocess(
3767
  converted_vocals_track,
3768
  song_dir,
@@ -3771,13 +2846,28 @@ def run_pipeline(
3771
  dry_level,
3772
  damping,
3773
  )
3774
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3775
 
3776
  song_cover = mix_song(
3777
  [
3778
  (effected_vocals_track, main_gain),
3779
- (instrumentals_track, inst_gain),
3780
- (backup_vocals_track, backup_gain),
3781
  ],
3782
  song_dir,
3783
  output_sr,
@@ -3795,8 +2885,8 @@ def run_pipeline(
3795
  reverb_track,
3796
  converted_vocals_track,
3797
  effected_vocals_track,
3798
- instrumentals_track,
3799
- backup_vocals_track,
3800
  )
3801
 
3802
 
 
1
  from __future__ import annotations
2
 
 
 
3
  import os
 
4
  import sys
 
 
5
  from enum import IntEnum, StrEnum, auto
6
+ from functools import cached_property
7
  from pathlib import Path
8
  from typing import TYPE_CHECKING, Annotated, Any, TypedDict
 
9
 
 
10
  import gradio as gr
 
 
 
 
 
 
11
  import typer
 
12
  from huggingface_hub import snapshot_download
13
+ from pydantic import BaseModel
14
+ from ultimate_rvc.common import AUDIO_DIR, MODELS_DIR, TEMP_DIR
15
+ from ultimate_rvc.core.generate.song_cover import get_named_song_dirs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  from ultimate_rvc.core.generate.speech import get_edge_tts_voice_names
17
+ from ultimate_rvc.core.manage.audio import (
18
+ get_audio_datasets,
19
+ get_named_audio_datasets,
20
+ get_saved_output_audio,
21
+ get_saved_speech_audio,
22
+ )
 
 
 
 
 
 
 
 
 
23
  from ultimate_rvc.core.manage.config import get_config_names, load_config
24
+ from ultimate_rvc.core.manage.models import (
25
+ get_custom_embedder_model_names,
26
+ get_custom_pretrained_model_names,
27
+ get_training_model_names,
28
+ get_voice_model_names,
29
+ )
30
+ from ultimate_rvc.web.common import (
31
+ initialize_dropdowns,
32
+ exception_harness,
33
+ render_transfer_component,
34
+ setup_transfer_event,
35
+ toggle_visibility,
36
+ toggle_visible_component,
37
+ update_dropdowns,
38
+ update_output_name,
39
+ update_value,
40
+ )
41
+
42
+ from ultimate_rvc.web.config.component import (
43
+ AnyComponentConfig,
44
+ AudioConfig,
45
+ CheckboxConfig,
46
+ ComponentConfig,
47
+ DropdownConfig,
48
+ NumberConfig,
49
+ RadioConfig,
50
+ SliderConfig,
51
+ TextboxConfig,
52
+ )
53
+ from ultimate_rvc.web.config.tab import (
54
+ SongGenerationConfig,
55
+ SpeechGenerationConfig,
56
+ TrainingConfig,
57
+ )
58
+ from ultimate_rvc.web.tabs.generate.speech.multi_step_generation import (
59
+ render as render_speech_multi_step_tab,
60
+ )
61
+ from ultimate_rvc.web.tabs.generate.speech.one_click_generation import (
62
+ render as render_speech_one_click_tab,
63
+ )
64
  from ultimate_rvc.web.tabs.manage.audio import render as render_audio_tab
65
  from ultimate_rvc.web.tabs.manage.models import render as render_models_tab
66
  from ultimate_rvc.web.tabs.manage.settings import render as render_settings_tab
 
 
 
 
67
 
68
+ if TYPE_CHECKING:
69
+ import gradio as gr
70
+ from typing import TYPE_CHECKING
71
 
72
+ from functools import partial
73
 
74
+ import gradio as gr
75
+ from ultimate_rvc.core.common import (
76
+ INTERMEDIATE_AUDIO_BASE_DIR,
77
+ OUTPUT_AUDIO_DIR,
78
+ copy_file_safe,
79
+ display_progress,
80
+ get_file_hash,
81
+ json_dump,
82
+ json_load,
83
+ validate_model,
84
+ validate_url,
85
+ )
86
+ from ultimate_rvc.core.exceptions import (
87
+ Entity,
88
+ InvalidLocationError,
89
+ Location,
90
+ NotFoundError,
91
+ NotProvidedError,
92
+ UIMessage,
93
+ YoutubeUrlError,
94
+ )
95
+ from ultimate_rvc.core.generate.common import (
96
+ convert,
97
+ get_unique_base_path,
98
+ mix_audio,
99
+ validate_audio_dir_exists,
100
+ validate_audio_file_exists,
101
+ wavify,
102
+ )
103
+ from ultimate_rvc.core.generate.song_cover import (
104
+ get_named_song_dirs,
105
+ get_song_cover_name,
106
+ mix_song,
107
+ pitch_shift,
108
+ postprocess,
109
+ retrieve_song,
110
+ separate_audio,
111
+ get_named_song_dirs,
112
+ get_song_cover_name,
113
+ run_pipeline,
114
+ )
115
+ from ultimate_rvc.core.generate.typing_extra import (
116
+ EffectedVocalsMetaData,
117
+ FileMetaData,
118
+ MixedAudioType,
119
+ PitchShiftMetaData,
120
+ RVCAudioMetaData,
121
+ SeparatedAudioMetaData,
122
+ )
123
+ from ultimate_rvc.core.manage.audio import get_saved_output_audio
124
+ from ultimate_rvc.typing_extra import EmbedderModel
125
+ from ultimate_rvc.web.common import (
126
+ PROGRESS_BAR,
127
+ exception_harness,
128
+ toggle_intermediate_audio,
129
+ toggle_visibility,
130
+ toggle_visible_component,
131
+ update_dropdowns,
132
+ update_output_name,
133
+ update_value,
134
+ )
135
+ from ultimate_rvc.web.typing_extra import ConcurrencyId
136
 
137
  type StrPath = str | PathLike[str]
138
 
 
 
 
 
 
 
 
139
  type Json = Mapping[str, Json] | Sequence[Json] | str | int | float | bool | None
 
140
 
141
 
142
  class SegmentSize(IntEnum):
 
376
  """Enumeration of audio embedding models."""
377
 
378
  CONTENTVEC = "contentvec"
379
+ CRUSTY = "Crusty"
380
  CUSTOM = "custom"
381
 
382
 
 
399
  kuielab_b_bass = "kuielab_b_bass.onnx"
400
  kuielab_a_other = "kuielab_a_other.onnx"
401
  kuielab_b_other = "kuielab_b_other.onnx"
 
 
 
402
  MDX23C_8KFFT_InstVoc_HQ_2 = "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"
 
403
  UVR_DeNoise = "UVR-DeNoise.pth"
 
404
  UVR_DeEcho_DeReverb = "UVR-DeEcho-DeReverb.pth"
405
 
406
 
 
1780
  )
1781
  source: TextboxConfig = TextboxConfig(
1782
  label="Source",
1783
+ info="Local (to the server) filepath or http link. Youtube probably wont work but most other sites still do.",
1784
  value=None,
1785
  exclude_value=True,
1786
  )
 
2497
  )
2498
 
2499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2500
  def _render_step_5(
2501
  total_config: TotalConfig,
2502
  tab_config: MultiStepSongGenerationConfig,
 
2659
  ]
2660
 
2661
 
2662
+ def run_newpipeline(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2663
  source: str,
2664
  model_name: str,
2665
  n_octaves: int = 0,
 
2780
  validate_model(custom_embedder_model, Entity.CUSTOM_EMBEDDER_MODEL)
2781
  display_progress("[~] Retrieving song...", 0 / 9, progress_bar)
2782
  song, song_dir = retrieve_song(source, cookiefile=cookiefile)
2783
+ display_progress(
2784
+ "[~] newpipeline: Separating vocals from instrumentals...", 1 / 9, progress_bar
2785
+ )
2786
  vocals_track, instrumentals_track = separate_audio(
2787
  song,
2788
  song_dir,
2789
  SeparationModel.MDX23C_8KFFT_InstVoc_HQ_2,
2790
+ SegmentSize.SEG_2048,
2791
  )
2792
  display_progress(
2793
+ "[~] newpipeline: Separating main vocals from backup vocals...",
2794
  2 / 9,
2795
  progress_bar,
2796
  )
 
2800
  SeparationModel.UVR_MDX_NET_KARA_2,
2801
  SegmentSize.SEG_2048,
2802
  )
2803
+ display_progress("[~] newpipeline: De-noising vocals...", 3 / 9, progress_bar)
2804
+ noise_track, clean_track = separate_audio(
2805
+ clean_track,
2806
+ song_dir,
2807
+ SeparationModel.UVR_DeNoise,
2808
+ SegmentSize.SEG_2048,
2809
+ )
2810
+ display_progress("[~] newpipeline: De-reverbing vocals...", 4 / 9, progress_bar)
2811
  reverb_track, vocals_dereverb_track = separate_audio(
2812
  main_vocals_track,
2813
  song_dir,
2814
  SeparationModel.UVR_DeEcho_DeReverb,
2815
  SegmentSize.SEG_2048,
2816
  )
2817
+
2818
+ display_progress("[~] newpipeline: Converting vocals...", 5 / 9, progress_bar)
 
 
 
 
 
2819
  converted_vocals_track = convert(
2820
+ audio_track=vocals_dereverb_track,
2821
  directory=song_dir,
2822
  model_name=model_name,
2823
  n_octaves=n_octaves,
2824
+ n_semitones=n_semitones,
2825
  f0_methods=f0_methods,
2826
  index_rate=index_rate,
2827
  rms_mix_rate=rms_mix_rate,
 
2837
  sid=sid,
2838
  content_type=RVCContentType.VOCALS,
2839
  )
2840
+ display_progress("[~] newpipeline: Post-processing vocals...", 6 / 9, progress_bar)
2841
  effected_vocals_track = postprocess(
2842
  converted_vocals_track,
2843
  song_dir,
 
2846
  dry_level,
2847
  damping,
2848
  )
2849
+ display_progress(
2850
+ "[~] newpipeline: Pitch-shifting instrumentals...", 7 / 9, progress_bar
2851
+ )
2852
+ shifted_instrumentals_track = pitch_shift(
2853
+ instrumentals_track,
2854
+ song_dir,
2855
+ n_semitones,
2856
+ )
2857
+ display_progress(
2858
+ "[~] newpipeline: Pitch-shifting backup vocals...", 8 / 9, progress_bar
2859
+ )
2860
+ shifted_backup_vocals_track = pitch_shift(
2861
+ backup_vocals_track,
2862
+ song_dir,
2863
+ n_semitones,
2864
+ )
2865
 
2866
  song_cover = mix_song(
2867
  [
2868
  (effected_vocals_track, main_gain),
2869
+ (shifted_instrumentals_track, inst_gain),
2870
+ (shifted_backup_vocals_track, backup_gain),
2871
  ],
2872
  song_dir,
2873
  output_sr,
 
2885
  reverb_track,
2886
  converted_vocals_track,
2887
  effected_vocals_track,
2888
+ shifted_instrumentals_track,
2889
+ shifted_backup_vocals_track,
2890
  )
2891
 
2892