Spaces:
Running
Running
app.py
CHANGED
|
@@ -6,6 +6,7 @@ from enum import IntEnum, StrEnum, auto
|
|
| 6 |
from functools import cached_property
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import TYPE_CHECKING, Annotated, Any, TypedDict
|
|
|
|
| 9 |
import gradio as gr
|
| 10 |
import typer
|
| 11 |
from huggingface_hub import snapshot_download
|
|
@@ -135,7 +136,6 @@ from ultimate_rvc.web.typing_extra import ConcurrencyId
|
|
| 135 |
|
| 136 |
type StrPath = str | PathLike[str]
|
| 137 |
|
| 138 |
-
|
| 139 |
type Json = Mapping[str, Json] | Sequence[Json] | str | int | float | bool | None
|
| 140 |
|
| 141 |
|
|
@@ -303,14 +303,12 @@ class SpeechTransferOption(StrEnum):
|
|
| 303 |
class ComponentVisibilityKwArgs(TypedDict, total=False):
|
| 304 |
"""
|
| 305 |
Keyword arguments for setting component visibility.
|
| 306 |
-
|
| 307 |
Attributes
|
| 308 |
----------
|
| 309 |
visible : bool
|
| 310 |
Whether the component should be visible.
|
| 311 |
value : Any
|
| 312 |
The value of the component.
|
| 313 |
-
|
| 314 |
"""
|
| 315 |
|
| 316 |
visible: bool
|
|
@@ -320,14 +318,12 @@ class ComponentVisibilityKwArgs(TypedDict, total=False):
|
|
| 320 |
class UpdateDropdownKwArgs(TypedDict, total=False):
|
| 321 |
"""
|
| 322 |
Keyword arguments for updating a dropdown component.
|
| 323 |
-
|
| 324 |
Attributes
|
| 325 |
----------
|
| 326 |
choices : DropdownChoices
|
| 327 |
The updated choices for the dropdown component.
|
| 328 |
value : DropdownValue
|
| 329 |
The updated value for the dropdown component.
|
| 330 |
-
|
| 331 |
"""
|
| 332 |
|
| 333 |
choices: DropdownChoices
|
|
@@ -337,14 +333,12 @@ class UpdateDropdownKwArgs(TypedDict, total=False):
|
|
| 337 |
class TextBoxKwArgs(TypedDict, total=False):
|
| 338 |
"""
|
| 339 |
Keyword arguments for updating a textbox component.
|
| 340 |
-
|
| 341 |
Attributes
|
| 342 |
----------
|
| 343 |
value : str | None
|
| 344 |
The updated value for the textbox component.
|
| 345 |
placeholder : str | None
|
| 346 |
The updated placeholder for the textbox component.
|
| 347 |
-
|
| 348 |
"""
|
| 349 |
|
| 350 |
value: str | None
|
|
@@ -354,12 +348,10 @@ class TextBoxKwArgs(TypedDict, total=False):
|
|
| 354 |
class UpdateAudioKwArgs(TypedDict, total=False):
|
| 355 |
"""
|
| 356 |
Keyword arguments for updating an audio component.
|
| 357 |
-
|
| 358 |
Attributes
|
| 359 |
----------
|
| 360 |
value : str | None
|
| 361 |
The updated value for the audio component.
|
| 362 |
-
|
| 363 |
"""
|
| 364 |
|
| 365 |
value: str | None
|
|
@@ -434,7 +426,6 @@ class SongIntermediateAudioConfig(BaseModel):
|
|
| 434 |
"""
|
| 435 |
Configuration settings for intermediate audio components in the
|
| 436 |
one-click song generation tab.
|
| 437 |
-
|
| 438 |
Attributes
|
| 439 |
----------
|
| 440 |
song : AudioConfig
|
|
@@ -467,7 +458,6 @@ class SongIntermediateAudioConfig(BaseModel):
|
|
| 467 |
component.
|
| 468 |
all : list[gr.Audio]
|
| 469 |
List of instances of all intermediate audio components.
|
| 470 |
-
|
| 471 |
"""
|
| 472 |
|
| 473 |
song: AudioConfig = AudioConfig.intermediate(label="Song")
|
|
@@ -505,13 +495,11 @@ class SongIntermediateAudioConfig(BaseModel):
|
|
| 505 |
"""
|
| 506 |
Retrieve instances of all intermediate audio components
|
| 507 |
in the one-click song generation tab.
|
| 508 |
-
|
| 509 |
Returns
|
| 510 |
-------
|
| 511 |
list[gr.Audio]
|
| 512 |
List of instances of all intermediate audio components in
|
| 513 |
the one-click song generation tab.
|
| 514 |
-
|
| 515 |
"""
|
| 516 |
# NOTE we are using self.__annotations__ to get the fields in
|
| 517 |
# the order they are defined in the class
|
|
@@ -521,7 +509,6 @@ class SongIntermediateAudioConfig(BaseModel):
|
|
| 521 |
class OneClickSongGenerationConfig(SongGenerationConfig):
|
| 522 |
"""
|
| 523 |
Configuration settings for the one-click song generation tab.
|
| 524 |
-
|
| 525 |
Attributes
|
| 526 |
----------
|
| 527 |
n_octaves : SliderConfig
|
|
@@ -535,13 +522,11 @@ class OneClickSongGenerationConfig(SongGenerationConfig):
|
|
| 535 |
component.
|
| 536 |
intermediate_audio : SongIntermediateAudioConfig
|
| 537 |
Configuration settings for intermediate audio components.
|
| 538 |
-
|
| 539 |
See Also
|
| 540 |
--------
|
| 541 |
SongGenerationConfig
|
| 542 |
Parent model defining common component configuration settings
|
| 543 |
for song generation tabs.
|
| 544 |
-
|
| 545 |
"""
|
| 546 |
|
| 547 |
n_octaves: SliderConfig = SliderConfig.octave_shift(
|
|
@@ -572,7 +557,6 @@ class SongInputAudioConfig(BaseModel):
|
|
| 572 |
"""
|
| 573 |
Configuration settings for input audio components in the multi-step
|
| 574 |
song generation tab.
|
| 575 |
-
|
| 576 |
Attributes
|
| 577 |
----------
|
| 578 |
audio : AudioConfig
|
|
@@ -600,7 +584,6 @@ class SongInputAudioConfig(BaseModel):
|
|
| 600 |
all : list[AudioConfig]
|
| 601 |
List of configuration settings for all input audio
|
| 602 |
components in the multi-step song generation tab.
|
| 603 |
-
|
| 604 |
"""
|
| 605 |
|
| 606 |
audio: AudioConfig = AudioConfig.input(label="Audio")
|
|
@@ -617,13 +600,11 @@ class SongInputAudioConfig(BaseModel):
|
|
| 617 |
"""
|
| 618 |
Retrieve configuration settings for all input audio components
|
| 619 |
in the multi-step song generation tab.
|
| 620 |
-
|
| 621 |
Returns
|
| 622 |
-------
|
| 623 |
list[AudioConfig]
|
| 624 |
List of configuration settings for all input audio
|
| 625 |
components in the multi-step song generation tab.
|
| 626 |
-
|
| 627 |
"""
|
| 628 |
return [getattr(self, field) for field in self.__annotations__]
|
| 629 |
|
|
@@ -632,7 +613,6 @@ class SongDirsConfig(BaseModel):
|
|
| 632 |
"""
|
| 633 |
Configuration settings for song directory components in the
|
| 634 |
multi-step song generation tab.
|
| 635 |
-
|
| 636 |
Attributes
|
| 637 |
----------
|
| 638 |
separate_audio : DropdownConfig
|
|
@@ -653,7 +633,6 @@ class SongDirsConfig(BaseModel):
|
|
| 653 |
all : list[gr.Dropdown]
|
| 654 |
List of instances of all song directory components in the
|
| 655 |
multi-step song generation tab.
|
| 656 |
-
|
| 657 |
"""
|
| 658 |
|
| 659 |
separate_audio: DropdownConfig = DropdownConfig.song_dir()
|
|
@@ -667,13 +646,11 @@ class SongDirsConfig(BaseModel):
|
|
| 667 |
"""
|
| 668 |
Retrieve instances of all song directory components in the
|
| 669 |
multi-step song generation tab.
|
| 670 |
-
|
| 671 |
Returns
|
| 672 |
-------
|
| 673 |
list[gr.Dropdown]
|
| 674 |
List of instances of all song directory components in
|
| 675 |
the multi-step song generation tab.
|
| 676 |
-
|
| 677 |
"""
|
| 678 |
return [getattr(self, field).instance for field in self.__annotations__]
|
| 679 |
|
|
@@ -681,7 +658,6 @@ class SongDirsConfig(BaseModel):
|
|
| 681 |
class MultiStepSongGenerationConfig(SongGenerationConfig):
|
| 682 |
"""
|
| 683 |
Configuration settings for multi-step song generation tab.
|
| 684 |
-
|
| 685 |
Attributes
|
| 686 |
----------
|
| 687 |
separation_model : DropdownConfig
|
|
@@ -705,13 +681,11 @@ class MultiStepSongGenerationConfig(SongGenerationConfig):
|
|
| 705 |
Configuration settings for input audio components.
|
| 706 |
song_dirs : SongDirsConfig
|
| 707 |
Configuration settings for song directory components.
|
| 708 |
-
|
| 709 |
See Also
|
| 710 |
--------
|
| 711 |
SongGenerationConfig
|
| 712 |
Parent model defining common component configuration settings
|
| 713 |
for song generation tabs.
|
| 714 |
-
|
| 715 |
"""
|
| 716 |
|
| 717 |
separation_model: DropdownConfig = DropdownConfig(
|
|
@@ -759,7 +733,6 @@ class SpeechIntermediateAudioConfig(BaseModel):
|
|
| 759 |
"""
|
| 760 |
Configuration settings for intermediate audio components in the
|
| 761 |
one-click speech generation tab.
|
| 762 |
-
|
| 763 |
Attributes
|
| 764 |
----------
|
| 765 |
speech : AudioConfig
|
|
@@ -769,7 +742,6 @@ class SpeechIntermediateAudioConfig(BaseModel):
|
|
| 769 |
all : list[gr.Audio]
|
| 770 |
List of instances of all intermediate audio components in the
|
| 771 |
speech generation tab.
|
| 772 |
-
|
| 773 |
"""
|
| 774 |
|
| 775 |
speech: AudioConfig = AudioConfig.intermediate(label="Speech")
|
|
@@ -780,13 +752,11 @@ class SpeechIntermediateAudioConfig(BaseModel):
|
|
| 780 |
"""
|
| 781 |
Retrieve instances of all intermediate audio components in the
|
| 782 |
speech generation tab.
|
| 783 |
-
|
| 784 |
Returns
|
| 785 |
-------
|
| 786 |
list[gr.Audio]
|
| 787 |
List of instances of all intermediate audio components in
|
| 788 |
the speech generation tab.
|
| 789 |
-
|
| 790 |
"""
|
| 791 |
return [getattr(self, field).instance for field in self.__annotations__]
|
| 792 |
|
|
@@ -794,7 +764,6 @@ class SpeechIntermediateAudioConfig(BaseModel):
|
|
| 794 |
class OneClickSpeechGenerationConfig(SpeechGenerationConfig):
|
| 795 |
"""
|
| 796 |
Configuration settings for one-click speech generation tab.
|
| 797 |
-
|
| 798 |
Attributes
|
| 799 |
----------
|
| 800 |
intermediate_audio : SpeechIntermediateAudioConfig
|
|
@@ -802,13 +771,11 @@ class OneClickSpeechGenerationConfig(SpeechGenerationConfig):
|
|
| 802 |
show_intermediate_audio : CheckboxConfig
|
| 803 |
Configuration settings for a show intermediate audio checkbox
|
| 804 |
component.
|
| 805 |
-
|
| 806 |
See Also
|
| 807 |
--------
|
| 808 |
SpeechGenerationConfig
|
| 809 |
Parent model defining common component configuration settings
|
| 810 |
for speech generation tabs.
|
| 811 |
-
|
| 812 |
"""
|
| 813 |
|
| 814 |
intermediate_audio: SpeechIntermediateAudioConfig = SpeechIntermediateAudioConfig()
|
|
@@ -825,18 +792,15 @@ class SpeechInputAudioConfig(BaseModel):
|
|
| 825 |
"""
|
| 826 |
Configuration settings for input audio components in the multi-step
|
| 827 |
speech generation tab.
|
| 828 |
-
|
| 829 |
Attributes
|
| 830 |
----------
|
| 831 |
speech : AudioConfig
|
| 832 |
Configuration settings for the input speech audio component.
|
| 833 |
converted_speech : AudioConfig
|
| 834 |
Configuration settings for the converted speech audio component.
|
| 835 |
-
|
| 836 |
all : list[AudioConfig]
|
| 837 |
List of configuration settings for all input audio components in
|
| 838 |
the multi-step speech generation tab.
|
| 839 |
-
|
| 840 |
"""
|
| 841 |
|
| 842 |
speech: AudioConfig = AudioConfig.input("Speech")
|
|
@@ -847,13 +811,11 @@ class SpeechInputAudioConfig(BaseModel):
|
|
| 847 |
"""
|
| 848 |
Retrieve configuration settings for all input audio components
|
| 849 |
in the multi-step speech generation tab.
|
| 850 |
-
|
| 851 |
Returns
|
| 852 |
-------
|
| 853 |
list[AudioConfig]
|
| 854 |
List of configuration settings for all input audio
|
| 855 |
components in the multi-step speech generation tab.
|
| 856 |
-
|
| 857 |
"""
|
| 858 |
return [getattr(self, field) for field in self.__annotations__]
|
| 859 |
|
|
@@ -861,18 +823,15 @@ class SpeechInputAudioConfig(BaseModel):
|
|
| 861 |
class MultiStepSpeechGenerationConfig(SpeechGenerationConfig):
|
| 862 |
"""
|
| 863 |
Configuration settings for the multi-step speech generation tab.
|
| 864 |
-
|
| 865 |
Attributes
|
| 866 |
----------
|
| 867 |
input_audio : SpeechInputAudioConfig
|
| 868 |
Configuration settings for input audio components.
|
| 869 |
-
|
| 870 |
See Also
|
| 871 |
--------
|
| 872 |
SpeechGenerationConfig
|
| 873 |
Parent model defining common component configuration settings
|
| 874 |
for speech generation tabs.
|
| 875 |
-
|
| 876 |
"""
|
| 877 |
|
| 878 |
input_audio: SpeechInputAudioConfig = SpeechInputAudioConfig()
|
|
@@ -884,9 +843,7 @@ class MultiStepTrainingConfig(TrainingConfig):
|
|
| 884 |
|
| 885 |
class ModelManagementConfig(BaseModel):
|
| 886 |
"""
|
| 887 |
-
|
| 888 |
Configuration settings for model management tab.
|
| 889 |
-
|
| 890 |
Attributes
|
| 891 |
----------
|
| 892 |
voices : DropdownConfig
|
|
@@ -903,7 +860,6 @@ class ModelManagementConfig(BaseModel):
|
|
| 903 |
component.
|
| 904 |
dummy_checkbox : CheckboxConfig
|
| 905 |
Configuration settings for a dummy checkbox component.
|
| 906 |
-
|
| 907 |
"""
|
| 908 |
|
| 909 |
voices: DropdownConfig = DropdownConfig.multi_delete(
|
|
@@ -933,7 +889,6 @@ class ModelManagementConfig(BaseModel):
|
|
| 933 |
class AudioManagementConfig(BaseModel):
|
| 934 |
"""
|
| 935 |
Configuration settings for audio management tab.
|
| 936 |
-
|
| 937 |
Attributes
|
| 938 |
----------
|
| 939 |
intermediate : DropdownConfig
|
|
@@ -950,7 +905,6 @@ class AudioManagementConfig(BaseModel):
|
|
| 950 |
component.
|
| 951 |
dummy_checkbox : CheckboxConfig
|
| 952 |
Configuration settings for a dummy checkbox component.
|
| 953 |
-
|
| 954 |
"""
|
| 955 |
|
| 956 |
intermediate: DropdownConfig = DropdownConfig.multi_delete(
|
|
@@ -983,12 +937,10 @@ class AudioManagementConfig(BaseModel):
|
|
| 983 |
class SettingsManagementConfig(BaseModel):
|
| 984 |
"""
|
| 985 |
Configuration settings for settings management tab.
|
| 986 |
-
|
| 987 |
Attributes
|
| 988 |
----------
|
| 989 |
dummy_checkbox : CheckboxConfig
|
| 990 |
Configuration settings for a dummy checkbox component.
|
| 991 |
-
|
| 992 |
"""
|
| 993 |
|
| 994 |
load_config_name: DropdownConfig = DropdownConfig(
|
|
@@ -1012,14 +964,12 @@ class SettingsManagementConfig(BaseModel):
|
|
| 1012 |
class TotalSongGenerationConfig(BaseModel):
|
| 1013 |
"""
|
| 1014 |
All configuration settings for song generation tabs.
|
| 1015 |
-
|
| 1016 |
Attributes
|
| 1017 |
----------
|
| 1018 |
one_click : OneClickSongGenerationConfig
|
| 1019 |
Configuration settings for the one-click song generation tab.
|
| 1020 |
multi_step : MultiStepSongGenerationConfig
|
| 1021 |
Configuration settings for the multi-step song generation tab.
|
| 1022 |
-
|
| 1023 |
"""
|
| 1024 |
|
| 1025 |
one_click: OneClickSongGenerationConfig = OneClickSongGenerationConfig()
|
|
@@ -1029,14 +979,12 @@ class TotalSongGenerationConfig(BaseModel):
|
|
| 1029 |
class TotalSpeechGenerationConfig(BaseModel):
|
| 1030 |
"""
|
| 1031 |
All configuration settings for speech generation tabs.
|
| 1032 |
-
|
| 1033 |
Attributes
|
| 1034 |
----------
|
| 1035 |
one_click : OneClickSpeechGenerationConfig
|
| 1036 |
Configuration settings for the one-click speech generation tab.
|
| 1037 |
multi_step : MultiStepSpeechGenerationConfig
|
| 1038 |
Configuration settings for the multi-step speech generation tab.
|
| 1039 |
-
|
| 1040 |
"""
|
| 1041 |
|
| 1042 |
one_click: OneClickSpeechGenerationConfig = OneClickSpeechGenerationConfig()
|
|
@@ -1046,12 +994,10 @@ class TotalSpeechGenerationConfig(BaseModel):
|
|
| 1046 |
class TotalTrainingConfig(BaseModel):
|
| 1047 |
"""
|
| 1048 |
All configuration settings for training tabs.
|
| 1049 |
-
|
| 1050 |
Attributes
|
| 1051 |
----------
|
| 1052 |
training : TrainingConfig
|
| 1053 |
Configuration settings for the multi-step training tab.
|
| 1054 |
-
|
| 1055 |
"""
|
| 1056 |
|
| 1057 |
multi_step: MultiStepTrainingConfig = MultiStepTrainingConfig()
|
|
@@ -1060,7 +1006,6 @@ class TotalTrainingConfig(BaseModel):
|
|
| 1060 |
class TotalManagementConfig(BaseModel):
|
| 1061 |
"""
|
| 1062 |
All configuration settings for management tabs.
|
| 1063 |
-
|
| 1064 |
Attributes
|
| 1065 |
----------
|
| 1066 |
model : ModelManagementConfig
|
|
@@ -1069,7 +1014,6 @@ class TotalManagementConfig(BaseModel):
|
|
| 1069 |
Configuration settings for the audio management tab.
|
| 1070 |
settings : SettingsManagementConfig
|
| 1071 |
Configuration settings for the settings management tab.
|
| 1072 |
-
|
| 1073 |
"""
|
| 1074 |
|
| 1075 |
model: ModelManagementConfig = ModelManagementConfig()
|
|
@@ -1080,7 +1024,6 @@ class TotalManagementConfig(BaseModel):
|
|
| 1080 |
class TotalConfig(BaseModel):
|
| 1081 |
"""
|
| 1082 |
All configuration settings for the Ultimate RVC app.
|
| 1083 |
-
|
| 1084 |
Attributes
|
| 1085 |
----------
|
| 1086 |
song : TotalSongGenerationConfig
|
|
@@ -1091,7 +1034,6 @@ class TotalConfig(BaseModel):
|
|
| 1091 |
Configuration settings for training tabs.
|
| 1092 |
management : TotalManagementConfig
|
| 1093 |
Configuration settings for management tabs.
|
| 1094 |
-
|
| 1095 |
"""
|
| 1096 |
|
| 1097 |
song: TotalSongGenerationConfig = TotalSongGenerationConfig()
|
|
@@ -1105,14 +1047,12 @@ class TotalConfig(BaseModel):
|
|
| 1105 |
Recursively collect those component configuration models nested
|
| 1106 |
within the current model instance, which have values that are
|
| 1107 |
not excluded.
|
| 1108 |
-
|
| 1109 |
Returns
|
| 1110 |
-------
|
| 1111 |
list[AnyComponentConfig]
|
| 1112 |
A list of component configuration models found within the
|
| 1113 |
current model instance, which have values that are not
|
| 1114 |
excluded.
|
| 1115 |
-
|
| 1116 |
"""
|
| 1117 |
|
| 1118 |
def _collect(model: BaseModel) -> list[AnyComponentConfig]:
|
|
@@ -1132,7 +1072,6 @@ class BaseTabConfig(BaseModel):
|
|
| 1132 |
"""
|
| 1133 |
Base model defining common component configuration settings for
|
| 1134 |
UI tabs.
|
| 1135 |
-
|
| 1136 |
Attributes
|
| 1137 |
----------
|
| 1138 |
embedder_model : DropdownConfig
|
|
@@ -1140,7 +1079,6 @@ class BaseTabConfig(BaseModel):
|
|
| 1140 |
custom_embedder_model : DropdownConfig
|
| 1141 |
Configuration settings for a custom embedder model dropdown
|
| 1142 |
component.
|
| 1143 |
-
|
| 1144 |
"""
|
| 1145 |
|
| 1146 |
embedder_model: DropdownConfig = DropdownConfig(
|
|
@@ -1163,7 +1101,6 @@ class BaseTabConfig(BaseModel):
|
|
| 1163 |
class TrainingConfig(BaseTabConfig):
|
| 1164 |
"""
|
| 1165 |
Common component configuration settings for training tabs.
|
| 1166 |
-
|
| 1167 |
Attributes
|
| 1168 |
----------
|
| 1169 |
dataset_type : DropdownConfig
|
|
@@ -1262,13 +1199,11 @@ class TrainingConfig(BaseTabConfig):
|
|
| 1262 |
reduce_memory_usage : CheckboxConfig
|
| 1263 |
Configuration settings for a reduce-memory-usage checkbox
|
| 1264 |
component.
|
| 1265 |
-
|
| 1266 |
See Also
|
| 1267 |
--------
|
| 1268 |
BaseTabConfig
|
| 1269 |
Parent model defining common component configuration settings
|
| 1270 |
for UI tabs.
|
| 1271 |
-
|
| 1272 |
"""
|
| 1273 |
|
| 1274 |
dataset_type: DropdownConfig = DropdownConfig(
|
|
@@ -1386,7 +1321,7 @@ class TrainingConfig(BaseTabConfig):
|
|
| 1386 |
exclude_value=True,
|
| 1387 |
)
|
| 1388 |
|
| 1389 |
-
hop_length: SliderConfig = SliderConfig(
|
| 1390 |
label="Hop length",
|
| 1391 |
info="The hop length to use for extracting pitch features.<br><br>",
|
| 1392 |
visible=False,
|
|
@@ -1578,7 +1513,6 @@ class TrainingConfig(BaseTabConfig):
|
|
| 1578 |
class GenerationConfig(BaseTabConfig):
|
| 1579 |
"""
|
| 1580 |
Common component configuration settings for generation tabs.
|
| 1581 |
-
|
| 1582 |
voice_model : DropdownConfig
|
| 1583 |
Configuration settings for a voice model dropdown component.
|
| 1584 |
f0_methods : DropdownConfig
|
|
@@ -1607,13 +1541,11 @@ class GenerationConfig(BaseTabConfig):
|
|
| 1607 |
component.
|
| 1608 |
output_name : TextboxConfig
|
| 1609 |
Configuration settings for an output name textbox component.
|
| 1610 |
-
|
| 1611 |
See Also
|
| 1612 |
--------
|
| 1613 |
BaseTabConfig
|
| 1614 |
Parent model defining common component configuration settings
|
| 1615 |
for UI tabs.
|
| 1616 |
-
|
| 1617 |
"""
|
| 1618 |
|
| 1619 |
voice_model: DropdownConfig = DropdownConfig(
|
|
@@ -1732,7 +1664,6 @@ class GenerationConfig(BaseTabConfig):
|
|
| 1732 |
class SongGenerationConfig(GenerationConfig):
|
| 1733 |
"""
|
| 1734 |
Common component configuration settings for song generation tabs.
|
| 1735 |
-
|
| 1736 |
Attributes
|
| 1737 |
----------
|
| 1738 |
source_type : DropdownConfig
|
|
@@ -1761,13 +1692,11 @@ class SongGenerationConfig(GenerationConfig):
|
|
| 1761 |
backup_gain : SliderConfig
|
| 1762 |
Configuration settings for a backup vocals gain slider
|
| 1763 |
component.
|
| 1764 |
-
|
| 1765 |
See Also
|
| 1766 |
--------
|
| 1767 |
GenerationConfig
|
| 1768 |
Parent model defining common component configuration settings
|
| 1769 |
for song generation tabs.
|
| 1770 |
-
|
| 1771 |
"""
|
| 1772 |
|
| 1773 |
source_type: DropdownConfig = DropdownConfig(
|
|
@@ -1850,7 +1779,6 @@ class SongGenerationConfig(GenerationConfig):
|
|
| 1850 |
class SpeechGenerationConfig(GenerationConfig):
|
| 1851 |
"""
|
| 1852 |
Common component configuration settings for speech generation tabs.
|
| 1853 |
-
|
| 1854 |
Attributes
|
| 1855 |
----------
|
| 1856 |
source_type : DropdownConfig
|
|
@@ -1883,13 +1811,11 @@ class SpeechGenerationConfig(GenerationConfig):
|
|
| 1883 |
component.
|
| 1884 |
output_gain : GainSliderConfig
|
| 1885 |
Configuration settings for an output gain slider component.
|
| 1886 |
-
|
| 1887 |
See Also
|
| 1888 |
--------
|
| 1889 |
GenerationConfig
|
| 1890 |
Parent model defining common component configuration settings
|
| 1891 |
for generation tabs.
|
| 1892 |
-
|
| 1893 |
"""
|
| 1894 |
|
| 1895 |
source_type: DropdownConfig = DropdownConfig(
|
|
@@ -1975,7 +1901,6 @@ def render_song_cover_multi_step_tab(
|
|
| 1975 |
) -> None:
|
| 1976 |
"""
|
| 1977 |
Render "Generate song cover - multi-step generation" tab.
|
| 1978 |
-
|
| 1979 |
Parameters
|
| 1980 |
----------
|
| 1981 |
total_config : TotalConfig
|
|
@@ -1984,7 +1909,6 @@ def render_song_cover_multi_step_tab(
|
|
| 1984 |
cookiefile : str, optional
|
| 1985 |
The path to a file containing cookies to use when downloading
|
| 1986 |
audio from Youtube.
|
| 1987 |
-
|
| 1988 |
"""
|
| 1989 |
tab_config = total_config.song.multi_step
|
| 1990 |
for input_track in tab_config.input_audio.all:
|
|
@@ -2618,7 +2542,6 @@ def _pair_audio_tracks_and_gain(
|
|
| 2618 |
"""
|
| 2619 |
Pair audio tracks and gain levels stored in separate gradio
|
| 2620 |
components.
|
| 2621 |
-
|
| 2622 |
This function is meant to first be partially applied to the sequence
|
| 2623 |
of audio components and the sequence of slider components containing
|
| 2624 |
the values that should be combined. The resulting function can then
|
|
@@ -2626,7 +2549,6 @@ def _pair_audio_tracks_and_gain(
|
|
| 2626 |
those audio and slider components. The `data` parameter in that case
|
| 2627 |
will contain a mapping from each of those components to the value
|
| 2628 |
that the component stores.
|
| 2629 |
-
|
| 2630 |
Parameters
|
| 2631 |
----------
|
| 2632 |
audio_components : Sequence[gr.Audio]
|
|
@@ -2635,17 +2557,14 @@ def _pair_audio_tracks_and_gain(
|
|
| 2635 |
Gain level components to pair with audio tracks.
|
| 2636 |
data : dict[gr.Audio | gr.Slider, Any]
|
| 2637 |
Data from the audio and gain components.
|
| 2638 |
-
|
| 2639 |
Returns
|
| 2640 |
-------
|
| 2641 |
list[tuple[str, int]]
|
| 2642 |
Paired audio tracks and gain levels.
|
| 2643 |
-
|
| 2644 |
Raises
|
| 2645 |
------
|
| 2646 |
ValueError
|
| 2647 |
If the number of audio tracks and gain levels are not the same.
|
| 2648 |
-
|
| 2649 |
"""
|
| 2650 |
audio_tracks = [data[component] for component in audio_components]
|
| 2651 |
gain_levels = [data[component] for component in gain_components]
|
|
@@ -2659,19 +2578,244 @@ def _pair_audio_tracks_and_gain(
|
|
| 2659 |
]
|
| 2660 |
|
| 2661 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2662 |
def render_app() -> gr.Blocks:
|
| 2663 |
"""
|
| 2664 |
Render the Ultimate RVC web application.
|
| 2665 |
-
|
| 2666 |
Returns
|
| 2667 |
-------
|
| 2668 |
gr.Blocks
|
| 2669 |
The rendered web application.
|
| 2670 |
-
|
| 2671 |
"""
|
| 2672 |
css = """
|
| 2673 |
h1 { text-align: center; margin-top: 20px; margin-bottom: 20px; }
|
| 2674 |
-
|
| 2675 |
#generate-tab-button { font-weight: bold !important;}
|
| 2676 |
#manage-tab-button { font-weight: bold !important;}
|
| 2677 |
#audio-tab-button { font-weight: bold !important;}
|
|
@@ -2783,13 +2927,11 @@ def _init_dropdowns() -> list[gr.Dropdown]:
|
|
| 2783 |
"""
|
| 2784 |
Initialize the Ultimate RVC web application by updating the choices
|
| 2785 |
and default values of non-static dropdown components.
|
| 2786 |
-
|
| 2787 |
Returns
|
| 2788 |
-------
|
| 2789 |
tuple[gr.Dropdown, ...]
|
| 2790 |
A tuple of gr.Dropdown components with updated choices and
|
| 2791 |
default values.
|
| 2792 |
-
|
| 2793 |
"""
|
| 2794 |
# Initialize model dropdowns
|
| 2795 |
edge_tts_models = initialize_dropdowns(
|
|
@@ -2848,7 +2990,6 @@ def render_song_cover_one_click_tab(
|
|
| 2848 |
) -> None:
|
| 2849 |
"""
|
| 2850 |
Render "Generate song covers - One-click generation" tab.
|
| 2851 |
-
|
| 2852 |
Parameters
|
| 2853 |
----------
|
| 2854 |
total_config : TotalConfig
|
|
@@ -2857,7 +2998,6 @@ def render_song_cover_one_click_tab(
|
|
| 2857 |
cookiefile : str, optional
|
| 2858 |
The path to a file containing cookies to use when downloading
|
| 2859 |
audio from Youtube.
|
| 2860 |
-
|
| 2861 |
"""
|
| 2862 |
with gr.Tab("One-click"):
|
| 2863 |
tab_config = total_config.song.one_click
|
|
@@ -3246,7 +3386,7 @@ def start_app(
|
|
| 3246 |
app.launch(
|
| 3247 |
server_name=listen_host,
|
| 3248 |
server_port=listen_port,
|
| 3249 |
-
ssr_mode=
|
| 3250 |
)
|
| 3251 |
|
| 3252 |
|
|
|
|
| 6 |
from functools import cached_property
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import TYPE_CHECKING, Annotated, Any, TypedDict
|
| 9 |
+
|
| 10 |
import gradio as gr
|
| 11 |
import typer
|
| 12 |
from huggingface_hub import snapshot_download
|
|
|
|
| 136 |
|
| 137 |
type StrPath = str | PathLike[str]
|
| 138 |
|
|
|
|
| 139 |
type Json = Mapping[str, Json] | Sequence[Json] | str | int | float | bool | None
|
| 140 |
|
| 141 |
|
|
|
|
| 303 |
class ComponentVisibilityKwArgs(TypedDict, total=False):
|
| 304 |
"""
|
| 305 |
Keyword arguments for setting component visibility.
|
|
|
|
| 306 |
Attributes
|
| 307 |
----------
|
| 308 |
visible : bool
|
| 309 |
Whether the component should be visible.
|
| 310 |
value : Any
|
| 311 |
The value of the component.
|
|
|
|
| 312 |
"""
|
| 313 |
|
| 314 |
visible: bool
|
|
|
|
| 318 |
class UpdateDropdownKwArgs(TypedDict, total=False):
|
| 319 |
"""
|
| 320 |
Keyword arguments for updating a dropdown component.
|
|
|
|
| 321 |
Attributes
|
| 322 |
----------
|
| 323 |
choices : DropdownChoices
|
| 324 |
The updated choices for the dropdown component.
|
| 325 |
value : DropdownValue
|
| 326 |
The updated value for the dropdown component.
|
|
|
|
| 327 |
"""
|
| 328 |
|
| 329 |
choices: DropdownChoices
|
|
|
|
| 333 |
class TextBoxKwArgs(TypedDict, total=False):
|
| 334 |
"""
|
| 335 |
Keyword arguments for updating a textbox component.
|
|
|
|
| 336 |
Attributes
|
| 337 |
----------
|
| 338 |
value : str | None
|
| 339 |
The updated value for the textbox component.
|
| 340 |
placeholder : str | None
|
| 341 |
The updated placeholder for the textbox component.
|
|
|
|
| 342 |
"""
|
| 343 |
|
| 344 |
value: str | None
|
|
|
|
| 348 |
class UpdateAudioKwArgs(TypedDict, total=False):
|
| 349 |
"""
|
| 350 |
Keyword arguments for updating an audio component.
|
|
|
|
| 351 |
Attributes
|
| 352 |
----------
|
| 353 |
value : str | None
|
| 354 |
The updated value for the audio component.
|
|
|
|
| 355 |
"""
|
| 356 |
|
| 357 |
value: str | None
|
|
|
|
| 426 |
"""
|
| 427 |
Configuration settings for intermediate audio components in the
|
| 428 |
one-click song generation tab.
|
|
|
|
| 429 |
Attributes
|
| 430 |
----------
|
| 431 |
song : AudioConfig
|
|
|
|
| 458 |
component.
|
| 459 |
all : list[gr.Audio]
|
| 460 |
List of instances of all intermediate audio components.
|
|
|
|
| 461 |
"""
|
| 462 |
|
| 463 |
song: AudioConfig = AudioConfig.intermediate(label="Song")
|
|
|
|
| 495 |
"""
|
| 496 |
Retrieve instances of all intermediate audio components
|
| 497 |
in the one-click song generation tab.
|
|
|
|
| 498 |
Returns
|
| 499 |
-------
|
| 500 |
list[gr.Audio]
|
| 501 |
List of instances of all intermediate audio components in
|
| 502 |
the one-click song generation tab.
|
|
|
|
| 503 |
"""
|
| 504 |
# NOTE we are using self.__annotations__ to get the fields in
|
| 505 |
# the order they are defined in the class
|
|
|
|
| 509 |
class OneClickSongGenerationConfig(SongGenerationConfig):
|
| 510 |
"""
|
| 511 |
Configuration settings for the one-click song generation tab.
|
|
|
|
| 512 |
Attributes
|
| 513 |
----------
|
| 514 |
n_octaves : SliderConfig
|
|
|
|
| 522 |
component.
|
| 523 |
intermediate_audio : SongIntermediateAudioConfig
|
| 524 |
Configuration settings for intermediate audio components.
|
|
|
|
| 525 |
See Also
|
| 526 |
--------
|
| 527 |
SongGenerationConfig
|
| 528 |
Parent model defining common component configuration settings
|
| 529 |
for song generation tabs.
|
|
|
|
| 530 |
"""
|
| 531 |
|
| 532 |
n_octaves: SliderConfig = SliderConfig.octave_shift(
|
|
|
|
| 557 |
"""
|
| 558 |
Configuration settings for input audio components in the multi-step
|
| 559 |
song generation tab.
|
|
|
|
| 560 |
Attributes
|
| 561 |
----------
|
| 562 |
audio : AudioConfig
|
|
|
|
| 584 |
all : list[AudioConfig]
|
| 585 |
List of configuration settings for all input audio
|
| 586 |
components in the multi-step song generation tab.
|
|
|
|
| 587 |
"""
|
| 588 |
|
| 589 |
audio: AudioConfig = AudioConfig.input(label="Audio")
|
|
|
|
| 600 |
"""
|
| 601 |
Retrieve configuration settings for all input audio components
|
| 602 |
in the multi-step song generation tab.
|
|
|
|
| 603 |
Returns
|
| 604 |
-------
|
| 605 |
list[AudioConfig]
|
| 606 |
List of configuration settings for all input audio
|
| 607 |
components in the multi-step song generation tab.
|
|
|
|
| 608 |
"""
|
| 609 |
return [getattr(self, field) for field in self.__annotations__]
|
| 610 |
|
|
|
|
| 613 |
"""
|
| 614 |
Configuration settings for song directory components in the
|
| 615 |
multi-step song generation tab.
|
|
|
|
| 616 |
Attributes
|
| 617 |
----------
|
| 618 |
separate_audio : DropdownConfig
|
|
|
|
| 633 |
all : list[gr.Dropdown]
|
| 634 |
List of instances of all song directory components in the
|
| 635 |
multi-step song generation tab.
|
|
|
|
| 636 |
"""
|
| 637 |
|
| 638 |
separate_audio: DropdownConfig = DropdownConfig.song_dir()
|
|
|
|
| 646 |
"""
|
| 647 |
Retrieve instances of all song directory components in the
|
| 648 |
multi-step song generation tab.
|
|
|
|
| 649 |
Returns
|
| 650 |
-------
|
| 651 |
list[gr.Dropdown]
|
| 652 |
List of instances of all song directory components in
|
| 653 |
the multi-step song generation tab.
|
|
|
|
| 654 |
"""
|
| 655 |
return [getattr(self, field).instance for field in self.__annotations__]
|
| 656 |
|
|
|
|
| 658 |
class MultiStepSongGenerationConfig(SongGenerationConfig):
|
| 659 |
"""
|
| 660 |
Configuration settings for multi-step song generation tab.
|
|
|
|
| 661 |
Attributes
|
| 662 |
----------
|
| 663 |
separation_model : DropdownConfig
|
|
|
|
| 681 |
Configuration settings for input audio components.
|
| 682 |
song_dirs : SongDirsConfig
|
| 683 |
Configuration settings for song directory components.
|
|
|
|
| 684 |
See Also
|
| 685 |
--------
|
| 686 |
SongGenerationConfig
|
| 687 |
Parent model defining common component configuration settings
|
| 688 |
for song generation tabs.
|
|
|
|
| 689 |
"""
|
| 690 |
|
| 691 |
separation_model: DropdownConfig = DropdownConfig(
|
|
|
|
| 733 |
"""
|
| 734 |
Configuration settings for intermediate audio components in the
|
| 735 |
one-click speech generation tab.
|
|
|
|
| 736 |
Attributes
|
| 737 |
----------
|
| 738 |
speech : AudioConfig
|
|
|
|
| 742 |
all : list[gr.Audio]
|
| 743 |
List of instances of all intermediate audio components in the
|
| 744 |
speech generation tab.
|
|
|
|
| 745 |
"""
|
| 746 |
|
| 747 |
speech: AudioConfig = AudioConfig.intermediate(label="Speech")
|
|
|
|
| 752 |
"""
|
| 753 |
Retrieve instances of all intermediate audio components in the
|
| 754 |
speech generation tab.
|
|
|
|
| 755 |
Returns
|
| 756 |
-------
|
| 757 |
list[gr.Audio]
|
| 758 |
List of instances of all intermediate audio components in
|
| 759 |
the speech generation tab.
|
|
|
|
| 760 |
"""
|
| 761 |
return [getattr(self, field).instance for field in self.__annotations__]
|
| 762 |
|
|
|
|
| 764 |
class OneClickSpeechGenerationConfig(SpeechGenerationConfig):
|
| 765 |
"""
|
| 766 |
Configuration settings for one-click speech generation tab.
|
|
|
|
| 767 |
Attributes
|
| 768 |
----------
|
| 769 |
intermediate_audio : SpeechIntermediateAudioConfig
|
|
|
|
| 771 |
show_intermediate_audio : CheckboxConfig
|
| 772 |
Configuration settings for a show intermediate audio checkbox
|
| 773 |
component.
|
|
|
|
| 774 |
See Also
|
| 775 |
--------
|
| 776 |
SpeechGenerationConfig
|
| 777 |
Parent model defining common component configuration settings
|
| 778 |
for speech generation tabs.
|
|
|
|
| 779 |
"""
|
| 780 |
|
| 781 |
intermediate_audio: SpeechIntermediateAudioConfig = SpeechIntermediateAudioConfig()
|
|
|
|
| 792 |
"""
|
| 793 |
Configuration settings for input audio components in the multi-step
|
| 794 |
speech generation tab.
|
|
|
|
| 795 |
Attributes
|
| 796 |
----------
|
| 797 |
speech : AudioConfig
|
| 798 |
Configuration settings for the input speech audio component.
|
| 799 |
converted_speech : AudioConfig
|
| 800 |
Configuration settings for the converted speech audio component.
|
|
|
|
| 801 |
all : list[AudioConfig]
|
| 802 |
List of configuration settings for all input audio components in
|
| 803 |
the multi-step speech generation tab.
|
|
|
|
| 804 |
"""
|
| 805 |
|
| 806 |
speech: AudioConfig = AudioConfig.input("Speech")
|
|
|
|
| 811 |
"""
|
| 812 |
Retrieve configuration settings for all input audio components
|
| 813 |
in the multi-step speech generation tab.
|
|
|
|
| 814 |
Returns
|
| 815 |
-------
|
| 816 |
list[AudioConfig]
|
| 817 |
List of configuration settings for all input audio
|
| 818 |
components in the multi-step speech generation tab.
|
|
|
|
| 819 |
"""
|
| 820 |
return [getattr(self, field) for field in self.__annotations__]
|
| 821 |
|
|
|
|
| 823 |
class MultiStepSpeechGenerationConfig(SpeechGenerationConfig):
|
| 824 |
"""
|
| 825 |
Configuration settings for the multi-step speech generation tab.
|
|
|
|
| 826 |
Attributes
|
| 827 |
----------
|
| 828 |
input_audio : SpeechInputAudioConfig
|
| 829 |
Configuration settings for input audio components.
|
|
|
|
| 830 |
See Also
|
| 831 |
--------
|
| 832 |
SpeechGenerationConfig
|
| 833 |
Parent model defining common component configuration settings
|
| 834 |
for speech generation tabs.
|
|
|
|
| 835 |
"""
|
| 836 |
|
| 837 |
input_audio: SpeechInputAudioConfig = SpeechInputAudioConfig()
|
|
|
|
| 843 |
|
| 844 |
class ModelManagementConfig(BaseModel):
|
| 845 |
"""
|
|
|
|
| 846 |
Configuration settings for model management tab.
|
|
|
|
| 847 |
Attributes
|
| 848 |
----------
|
| 849 |
voices : DropdownConfig
|
|
|
|
| 860 |
component.
|
| 861 |
dummy_checkbox : CheckboxConfig
|
| 862 |
Configuration settings for a dummy checkbox component.
|
|
|
|
| 863 |
"""
|
| 864 |
|
| 865 |
voices: DropdownConfig = DropdownConfig.multi_delete(
|
|
|
|
| 889 |
class AudioManagementConfig(BaseModel):
|
| 890 |
"""
|
| 891 |
Configuration settings for audio management tab.
|
|
|
|
| 892 |
Attributes
|
| 893 |
----------
|
| 894 |
intermediate : DropdownConfig
|
|
|
|
| 905 |
component.
|
| 906 |
dummy_checkbox : CheckboxConfig
|
| 907 |
Configuration settings for a dummy checkbox component.
|
|
|
|
| 908 |
"""
|
| 909 |
|
| 910 |
intermediate: DropdownConfig = DropdownConfig.multi_delete(
|
|
|
|
| 937 |
class SettingsManagementConfig(BaseModel):
|
| 938 |
"""
|
| 939 |
Configuration settings for settings management tab.
|
|
|
|
| 940 |
Attributes
|
| 941 |
----------
|
| 942 |
dummy_checkbox : CheckboxConfig
|
| 943 |
Configuration settings for a dummy checkbox component.
|
|
|
|
| 944 |
"""
|
| 945 |
|
| 946 |
load_config_name: DropdownConfig = DropdownConfig(
|
|
|
|
| 964 |
class TotalSongGenerationConfig(BaseModel):
|
| 965 |
"""
|
| 966 |
All configuration settings for song generation tabs.
|
|
|
|
| 967 |
Attributes
|
| 968 |
----------
|
| 969 |
one_click : OneClickSongGenerationConfig
|
| 970 |
Configuration settings for the one-click song generation tab.
|
| 971 |
multi_step : MultiStepSongGenerationConfig
|
| 972 |
Configuration settings for the multi-step song generation tab.
|
|
|
|
| 973 |
"""
|
| 974 |
|
| 975 |
one_click: OneClickSongGenerationConfig = OneClickSongGenerationConfig()
|
|
|
|
| 979 |
class TotalSpeechGenerationConfig(BaseModel):
|
| 980 |
"""
|
| 981 |
All configuration settings for speech generation tabs.
|
|
|
|
| 982 |
Attributes
|
| 983 |
----------
|
| 984 |
one_click : OneClickSpeechGenerationConfig
|
| 985 |
Configuration settings for the one-click speech generation tab.
|
| 986 |
multi_step : MultiStepSpeechGenerationConfig
|
| 987 |
Configuration settings for the multi-step speech generation tab.
|
|
|
|
| 988 |
"""
|
| 989 |
|
| 990 |
one_click: OneClickSpeechGenerationConfig = OneClickSpeechGenerationConfig()
|
|
|
|
| 994 |
class TotalTrainingConfig(BaseModel):
|
| 995 |
"""
|
| 996 |
All configuration settings for training tabs.
|
|
|
|
| 997 |
Attributes
|
| 998 |
----------
|
| 999 |
training : TrainingConfig
|
| 1000 |
Configuration settings for the multi-step training tab.
|
|
|
|
| 1001 |
"""
|
| 1002 |
|
| 1003 |
multi_step: MultiStepTrainingConfig = MultiStepTrainingConfig()
|
|
|
|
| 1006 |
class TotalManagementConfig(BaseModel):
|
| 1007 |
"""
|
| 1008 |
All configuration settings for management tabs.
|
|
|
|
| 1009 |
Attributes
|
| 1010 |
----------
|
| 1011 |
model : ModelManagementConfig
|
|
|
|
| 1014 |
Configuration settings for the audio management tab.
|
| 1015 |
settings : SettingsManagementConfig
|
| 1016 |
Configuration settings for the settings management tab.
|
|
|
|
| 1017 |
"""
|
| 1018 |
|
| 1019 |
model: ModelManagementConfig = ModelManagementConfig()
|
|
|
|
| 1024 |
class TotalConfig(BaseModel):
|
| 1025 |
"""
|
| 1026 |
All configuration settings for the Ultimate RVC app.
|
|
|
|
| 1027 |
Attributes
|
| 1028 |
----------
|
| 1029 |
song : TotalSongGenerationConfig
|
|
|
|
| 1034 |
Configuration settings for training tabs.
|
| 1035 |
management : TotalManagementConfig
|
| 1036 |
Configuration settings for management tabs.
|
|
|
|
| 1037 |
"""
|
| 1038 |
|
| 1039 |
song: TotalSongGenerationConfig = TotalSongGenerationConfig()
|
|
|
|
| 1047 |
Recursively collect those component configuration models nested
|
| 1048 |
within the current model instance, which have values that are
|
| 1049 |
not excluded.
|
|
|
|
| 1050 |
Returns
|
| 1051 |
-------
|
| 1052 |
list[AnyComponentConfig]
|
| 1053 |
A list of component configuration models found within the
|
| 1054 |
current model instance, which have values that are not
|
| 1055 |
excluded.
|
|
|
|
| 1056 |
"""
|
| 1057 |
|
| 1058 |
def _collect(model: BaseModel) -> list[AnyComponentConfig]:
|
|
|
|
| 1072 |
"""
|
| 1073 |
Base model defining common component configuration settings for
|
| 1074 |
UI tabs.
|
|
|
|
| 1075 |
Attributes
|
| 1076 |
----------
|
| 1077 |
embedder_model : DropdownConfig
|
|
|
|
| 1079 |
custom_embedder_model : DropdownConfig
|
| 1080 |
Configuration settings for a custom embedder model dropdown
|
| 1081 |
component.
|
|
|
|
| 1082 |
"""
|
| 1083 |
|
| 1084 |
embedder_model: DropdownConfig = DropdownConfig(
|
|
|
|
| 1101 |
class TrainingConfig(BaseTabConfig):
|
| 1102 |
"""
|
| 1103 |
Common component configuration settings for training tabs.
|
|
|
|
| 1104 |
Attributes
|
| 1105 |
----------
|
| 1106 |
dataset_type : DropdownConfig
|
|
|
|
| 1199 |
reduce_memory_usage : CheckboxConfig
|
| 1200 |
Configuration settings for a reduce-memory-usage checkbox
|
| 1201 |
component.
|
|
|
|
| 1202 |
See Also
|
| 1203 |
--------
|
| 1204 |
BaseTabConfig
|
| 1205 |
Parent model defining common component configuration settings
|
| 1206 |
for UI tabs.
|
|
|
|
| 1207 |
"""
|
| 1208 |
|
| 1209 |
dataset_type: DropdownConfig = DropdownConfig(
|
|
|
|
| 1321 |
exclude_value=True,
|
| 1322 |
)
|
| 1323 |
|
| 1324 |
+
hop_length: SliderConfig = SliderConfig.hop_length(
|
| 1325 |
label="Hop length",
|
| 1326 |
info="The hop length to use for extracting pitch features.<br><br>",
|
| 1327 |
visible=False,
|
|
|
|
| 1513 |
class GenerationConfig(BaseTabConfig):
|
| 1514 |
"""
|
| 1515 |
Common component configuration settings for generation tabs.
|
|
|
|
| 1516 |
voice_model : DropdownConfig
|
| 1517 |
Configuration settings for a voice model dropdown component.
|
| 1518 |
f0_methods : DropdownConfig
|
|
|
|
| 1541 |
component.
|
| 1542 |
output_name : TextboxConfig
|
| 1543 |
Configuration settings for an output name textbox component.
|
|
|
|
| 1544 |
See Also
|
| 1545 |
--------
|
| 1546 |
BaseTabConfig
|
| 1547 |
Parent model defining common component configuration settings
|
| 1548 |
for UI tabs.
|
|
|
|
| 1549 |
"""
|
| 1550 |
|
| 1551 |
voice_model: DropdownConfig = DropdownConfig(
|
|
|
|
| 1664 |
class SongGenerationConfig(GenerationConfig):
|
| 1665 |
"""
|
| 1666 |
Common component configuration settings for song generation tabs.
|
|
|
|
| 1667 |
Attributes
|
| 1668 |
----------
|
| 1669 |
source_type : DropdownConfig
|
|
|
|
| 1692 |
backup_gain : SliderConfig
|
| 1693 |
Configuration settings for a backup vocals gain slider
|
| 1694 |
component.
|
|
|
|
| 1695 |
See Also
|
| 1696 |
--------
|
| 1697 |
GenerationConfig
|
| 1698 |
Parent model defining common component configuration settings
|
| 1699 |
for song generation tabs.
|
|
|
|
| 1700 |
"""
|
| 1701 |
|
| 1702 |
source_type: DropdownConfig = DropdownConfig(
|
|
|
|
| 1779 |
class SpeechGenerationConfig(GenerationConfig):
|
| 1780 |
"""
|
| 1781 |
Common component configuration settings for speech generation tabs.
|
|
|
|
| 1782 |
Attributes
|
| 1783 |
----------
|
| 1784 |
source_type : DropdownConfig
|
|
|
|
| 1811 |
component.
|
| 1812 |
output_gain : GainSliderConfig
|
| 1813 |
Configuration settings for an output gain slider component.
|
|
|
|
| 1814 |
See Also
|
| 1815 |
--------
|
| 1816 |
GenerationConfig
|
| 1817 |
Parent model defining common component configuration settings
|
| 1818 |
for generation tabs.
|
|
|
|
| 1819 |
"""
|
| 1820 |
|
| 1821 |
source_type: DropdownConfig = DropdownConfig(
|
|
|
|
| 1901 |
) -> None:
|
| 1902 |
"""
|
| 1903 |
Render "Generate song cover - multi-step generation" tab.
|
|
|
|
| 1904 |
Parameters
|
| 1905 |
----------
|
| 1906 |
total_config : TotalConfig
|
|
|
|
| 1909 |
cookiefile : str, optional
|
| 1910 |
The path to a file containing cookies to use when downloading
|
| 1911 |
audio from Youtube.
|
|
|
|
| 1912 |
"""
|
| 1913 |
tab_config = total_config.song.multi_step
|
| 1914 |
for input_track in tab_config.input_audio.all:
|
|
|
|
| 2542 |
"""
|
| 2543 |
Pair audio tracks and gain levels stored in separate gradio
|
| 2544 |
components.
|
|
|
|
| 2545 |
This function is meant to first be partially applied to the sequence
|
| 2546 |
of audio components and the sequence of slider components containing
|
| 2547 |
the values that should be combined. The resulting function can then
|
|
|
|
| 2549 |
those audio and slider components. The `data` parameter in that case
|
| 2550 |
will contain a mapping from each of those components to the value
|
| 2551 |
that the component stores.
|
|
|
|
| 2552 |
Parameters
|
| 2553 |
----------
|
| 2554 |
audio_components : Sequence[gr.Audio]
|
|
|
|
| 2557 |
Gain level components to pair with audio tracks.
|
| 2558 |
data : dict[gr.Audio | gr.Slider, Any]
|
| 2559 |
Data from the audio and gain components.
|
|
|
|
| 2560 |
Returns
|
| 2561 |
-------
|
| 2562 |
list[tuple[str, int]]
|
| 2563 |
Paired audio tracks and gain levels.
|
|
|
|
| 2564 |
Raises
|
| 2565 |
------
|
| 2566 |
ValueError
|
| 2567 |
If the number of audio tracks and gain levels are not the same.
|
|
|
|
| 2568 |
"""
|
| 2569 |
audio_tracks = [data[component] for component in audio_components]
|
| 2570 |
gain_levels = [data[component] for component in gain_components]
|
|
|
|
| 2578 |
]
|
| 2579 |
|
| 2580 |
|
| 2581 |
+
def run_newpipeline(
|
| 2582 |
+
source: str,
|
| 2583 |
+
model_name: str,
|
| 2584 |
+
n_octaves: int = 0,
|
| 2585 |
+
n_semitones: int = 0,
|
| 2586 |
+
f0_methods: Sequence[F0Method] | None = None,
|
| 2587 |
+
index_rate: float = 0.3,
|
| 2588 |
+
rms_mix_rate: float = 1.0,
|
| 2589 |
+
protect_rate: float = 0.33,
|
| 2590 |
+
hop_length: int = 128,
|
| 2591 |
+
split_vocals: bool = False,
|
| 2592 |
+
autotune_vocals: bool = False,
|
| 2593 |
+
autotune_strength: float = 1.0,
|
| 2594 |
+
clean_vocals: bool = False,
|
| 2595 |
+
clean_strength: float = 0.7,
|
| 2596 |
+
embedder_model: EmbedderModel = EmbedderModel.CONTENTVEC,
|
| 2597 |
+
custom_embedder_model: str | None = None,
|
| 2598 |
+
sid: int = 0,
|
| 2599 |
+
room_size: float = 0.15,
|
| 2600 |
+
wet_level: float = 0.2,
|
| 2601 |
+
dry_level: float = 0.8,
|
| 2602 |
+
damping: float = 0.7,
|
| 2603 |
+
main_gain: int = 0,
|
| 2604 |
+
inst_gain: int = 0,
|
| 2605 |
+
backup_gain: int = 0,
|
| 2606 |
+
output_sr: int = 44100,
|
| 2607 |
+
output_format: AudioExt = AudioExt.MP3,
|
| 2608 |
+
output_name: str | None = None,
|
| 2609 |
+
cookiefile: StrPath | None = None,
|
| 2610 |
+
progress_bar: gr.Progress | None = None,
|
| 2611 |
+
) -> tuple[Path, ...]:
|
| 2612 |
+
"""
|
| 2613 |
+
Run the song cover generation pipeline.
|
| 2614 |
+
Parameters
|
| 2615 |
+
----------
|
| 2616 |
+
source : str
|
| 2617 |
+
A Youtube URL, the path to a local audio file or the path to a
|
| 2618 |
+
song directory.
|
| 2619 |
+
model_name : str
|
| 2620 |
+
The name of the voice model to use for vocal conversion.
|
| 2621 |
+
n_octaves : int, default=0
|
| 2622 |
+
The number of octaves to pitch-shift the converted vocals by.
|
| 2623 |
+
n_semitones : int, default=0
|
| 2624 |
+
The number of semi-tones to pitch-shift the converted vocals,
|
| 2625 |
+
instrumentals, and backup vocals by.
|
| 2626 |
+
f0_methods : Sequence[F0Method], optional
|
| 2627 |
+
The methods to use for pitch extraction during vocal
|
| 2628 |
+
conversion. If None, the method used is rmvpe.
|
| 2629 |
+
index_rate : float, default=0.3
|
| 2630 |
+
The influence of the index file on the vocal conversion.
|
| 2631 |
+
rms_mix_rate : float, default=1.0
|
| 2632 |
+
The blending rate of the volume envelope of the converted
|
| 2633 |
+
vocals.
|
| 2634 |
+
protect_rate : float, default=0.33
|
| 2635 |
+
The protect rate for consonants and breathing sounds during
|
| 2636 |
+
vocal conversion.
|
| 2637 |
+
hop_length : int, default=128
|
| 2638 |
+
The hop length to use for crepe-based pitch detection.
|
| 2639 |
+
split_vocals : bool, default=False
|
| 2640 |
+
Whether to perform audio splitting before converting the main
|
| 2641 |
+
vocals.
|
| 2642 |
+
autotune_vocals : bool, default=False
|
| 2643 |
+
Whether to apply autotune to the converted vocals.
|
| 2644 |
+
autotune_strength : float, default=1.0
|
| 2645 |
+
The strength of the autotune to apply to the converted vocals.
|
| 2646 |
+
clean_vocals : bool, default=False
|
| 2647 |
+
Whether to clean the converted vocals.
|
| 2648 |
+
clean_strength : float, default=0.7
|
| 2649 |
+
The intensity of the cleaning to apply to the converted vocals.
|
| 2650 |
+
embedder_model : EmbedderModel, default=EmbedderModel.CONTENTVEC
|
| 2651 |
+
The model to use for generating speaker embeddings during vocal
|
| 2652 |
+
conversion.
|
| 2653 |
+
custom_embedder_model : StrPath, optional
|
| 2654 |
+
The name of a custom embedder model to use for generating
|
| 2655 |
+
speaker embeddings during vocal conversion.
|
| 2656 |
+
sid : int, default=0
|
| 2657 |
+
The speaker id to use for multi-speaker models during vocal
|
| 2658 |
+
conversion.
|
| 2659 |
+
room_size : float, default=0.15
|
| 2660 |
+
The room size of the reverb effect to apply to the converted
|
| 2661 |
+
vocals.
|
| 2662 |
+
wet_level : float, default=0.2
|
| 2663 |
+
The wetness level of the reverb effect to apply to the converted
|
| 2664 |
+
vocals.
|
| 2665 |
+
dry_level : float, default=0.8
|
| 2666 |
+
The dryness level of the reverb effect to apply to the converted
|
| 2667 |
+
vocals.
|
| 2668 |
+
damping : float, default=0.7
|
| 2669 |
+
The damping of the reverb effect to apply to the converted
|
| 2670 |
+
vocals.
|
| 2671 |
+
main_gain : int, default=0
|
| 2672 |
+
The gain to apply to the post-processed vocals.
|
| 2673 |
+
inst_gain : int, default=0
|
| 2674 |
+
The gain to apply to the pitch-shifted instrumentals.
|
| 2675 |
+
backup_gain : int, default=0
|
| 2676 |
+
The gain to apply to the pitch-shifted backup vocals.
|
| 2677 |
+
output_sr : int, default=44100
|
| 2678 |
+
The sample rate of the song cover.
|
| 2679 |
+
output_format : AudioExt, default=AudioExt.MP3
|
| 2680 |
+
The audio format of the song cover.
|
| 2681 |
+
output_name : str, optional
|
| 2682 |
+
The name of the song cover.
|
| 2683 |
+
cookiefile : StrPath, optional
|
| 2684 |
+
The path to a file containing cookies to use when downloading
|
| 2685 |
+
audio from Youtube.
|
| 2686 |
+
progress_bar : gr.Progress, optional
|
| 2687 |
+
Gradio progress bar to update.
|
| 2688 |
+
Returns
|
| 2689 |
+
-------
|
| 2690 |
+
tuple[Path,...]
|
| 2691 |
+
The path to the generated song cover and the paths to any
|
| 2692 |
+
intermediate audio files that were generated.
|
| 2693 |
+
"""
|
| 2694 |
+
validate_model(model_name, Entity.VOICE_MODEL)
|
| 2695 |
+
if embedder_model == EmbedderModel.CUSTOM:
|
| 2696 |
+
validate_model(custom_embedder_model, Entity.CUSTOM_EMBEDDER_MODEL)
|
| 2697 |
+
display_progress("[~] Retrieving song...", 0 / 9, progress_bar)
|
| 2698 |
+
song, song_dir = retrieve_song(source, cookiefile=cookiefile)
|
| 2699 |
+
display_progress(
|
| 2700 |
+
"[~] newpipeline: Separating vocals from instrumentals...", 1 / 9, progress_bar
|
| 2701 |
+
)
|
| 2702 |
+
vocals_track, instrumentals_track = separate_audio(
|
| 2703 |
+
song,
|
| 2704 |
+
song_dir,
|
| 2705 |
+
SeparationModel.MDX23C_8KFFT_InstVoc_HQ_2,
|
| 2706 |
+
SegmentSize.SEG_2048,
|
| 2707 |
+
)
|
| 2708 |
+
display_progress(
|
| 2709 |
+
"[~] newpipeline: Separating main vocals from backup vocals...",
|
| 2710 |
+
2 / 9,
|
| 2711 |
+
progress_bar,
|
| 2712 |
+
)
|
| 2713 |
+
backup_vocals_track, main_vocals_track = separate_audio(
|
| 2714 |
+
vocals_track,
|
| 2715 |
+
song_dir,
|
| 2716 |
+
SeparationModel.UVR_MDX_NET_KARA_2,
|
| 2717 |
+
SegmentSize.SEG_2048,
|
| 2718 |
+
)
|
| 2719 |
+
display_progress("[~] newpipeline: De-noising vocals...", 3 / 9, progress_bar)
|
| 2720 |
+
noise_track, clean_track = separate_audio(
|
| 2721 |
+
clean_track,
|
| 2722 |
+
song_dir,
|
| 2723 |
+
SeparationModel.UVR_DeNoise,
|
| 2724 |
+
SegmentSize.SEG_2048,
|
| 2725 |
+
)
|
| 2726 |
+
display_progress("[~] newpipeline: De-reverbing vocals...", 4 / 9, progress_bar)
|
| 2727 |
+
reverb_track, vocals_dereverb_track = separate_audio(
|
| 2728 |
+
main_vocals_track,
|
| 2729 |
+
song_dir,
|
| 2730 |
+
SeparationModel.UVR_DeEcho_DeReverb,
|
| 2731 |
+
SegmentSize.SEG_2048,
|
| 2732 |
+
)
|
| 2733 |
+
|
| 2734 |
+
display_progress("[~] newpipeline: Converting vocals...", 5 / 9, progress_bar)
|
| 2735 |
+
converted_vocals_track = convert(
|
| 2736 |
+
audio_track=vocals_dereverb_track,
|
| 2737 |
+
directory=song_dir,
|
| 2738 |
+
model_name=model_name,
|
| 2739 |
+
n_octaves=n_octaves,
|
| 2740 |
+
n_semitones=n_semitones,
|
| 2741 |
+
f0_methods=f0_methods,
|
| 2742 |
+
index_rate=index_rate,
|
| 2743 |
+
rms_mix_rate=rms_mix_rate,
|
| 2744 |
+
protect_rate=protect_rate,
|
| 2745 |
+
hop_length=hop_length,
|
| 2746 |
+
split_audio=split_vocals,
|
| 2747 |
+
autotune_audio=autotune_vocals,
|
| 2748 |
+
autotune_strength=autotune_strength,
|
| 2749 |
+
clean_audio=clean_vocals,
|
| 2750 |
+
clean_strength=clean_strength,
|
| 2751 |
+
embedder_model=embedder_model,
|
| 2752 |
+
custom_embedder_model=custom_embedder_model,
|
| 2753 |
+
sid=sid,
|
| 2754 |
+
content_type=RVCContentType.VOCALS,
|
| 2755 |
+
)
|
| 2756 |
+
display_progress("[~] newpipeline: Post-processing vocals...", 6 / 9, progress_bar)
|
| 2757 |
+
effected_vocals_track = postprocess(
|
| 2758 |
+
converted_vocals_track,
|
| 2759 |
+
song_dir,
|
| 2760 |
+
room_size,
|
| 2761 |
+
wet_level,
|
| 2762 |
+
dry_level,
|
| 2763 |
+
damping,
|
| 2764 |
+
)
|
| 2765 |
+
display_progress(
|
| 2766 |
+
"[~] newpipeline: Pitch-shifting instrumentals...", 7 / 9, progress_bar
|
| 2767 |
+
)
|
| 2768 |
+
shifted_instrumentals_track = pitch_shift(
|
| 2769 |
+
instrumentals_track,
|
| 2770 |
+
song_dir,
|
| 2771 |
+
n_semitones,
|
| 2772 |
+
)
|
| 2773 |
+
display_progress(
|
| 2774 |
+
"[~] newpipeline: Pitch-shifting backup vocals...", 8 / 9, progress_bar
|
| 2775 |
+
)
|
| 2776 |
+
shifted_backup_vocals_track = pitch_shift(
|
| 2777 |
+
backup_vocals_track,
|
| 2778 |
+
song_dir,
|
| 2779 |
+
n_semitones,
|
| 2780 |
+
)
|
| 2781 |
+
|
| 2782 |
+
song_cover = mix_song(
|
| 2783 |
+
[
|
| 2784 |
+
(effected_vocals_track, main_gain),
|
| 2785 |
+
(shifted_instrumentals_track, inst_gain),
|
| 2786 |
+
(shifted_backup_vocals_track, backup_gain),
|
| 2787 |
+
],
|
| 2788 |
+
song_dir,
|
| 2789 |
+
output_sr,
|
| 2790 |
+
output_format,
|
| 2791 |
+
output_name,
|
| 2792 |
+
)
|
| 2793 |
+
return (
|
| 2794 |
+
song_cover,
|
| 2795 |
+
song,
|
| 2796 |
+
vocals_track,
|
| 2797 |
+
instrumentals_track,
|
| 2798 |
+
main_vocals_track,
|
| 2799 |
+
backup_vocals_track,
|
| 2800 |
+
vocals_dereverb_track,
|
| 2801 |
+
reverb_track,
|
| 2802 |
+
converted_vocals_track,
|
| 2803 |
+
effected_vocals_track,
|
| 2804 |
+
shifted_instrumentals_track,
|
| 2805 |
+
shifted_backup_vocals_track,
|
| 2806 |
+
)
|
| 2807 |
+
|
| 2808 |
+
|
| 2809 |
def render_app() -> gr.Blocks:
|
| 2810 |
"""
|
| 2811 |
Render the Ultimate RVC web application.
|
|
|
|
| 2812 |
Returns
|
| 2813 |
-------
|
| 2814 |
gr.Blocks
|
| 2815 |
The rendered web application.
|
|
|
|
| 2816 |
"""
|
| 2817 |
css = """
|
| 2818 |
h1 { text-align: center; margin-top: 20px; margin-bottom: 20px; }
|
|
|
|
| 2819 |
#generate-tab-button { font-weight: bold !important;}
|
| 2820 |
#manage-tab-button { font-weight: bold !important;}
|
| 2821 |
#audio-tab-button { font-weight: bold !important;}
|
|
|
|
| 2927 |
"""
|
| 2928 |
Initialize the Ultimate RVC web application by updating the choices
|
| 2929 |
and default values of non-static dropdown components.
|
|
|
|
| 2930 |
Returns
|
| 2931 |
-------
|
| 2932 |
tuple[gr.Dropdown, ...]
|
| 2933 |
A tuple of gr.Dropdown components with updated choices and
|
| 2934 |
default values.
|
|
|
|
| 2935 |
"""
|
| 2936 |
# Initialize model dropdowns
|
| 2937 |
edge_tts_models = initialize_dropdowns(
|
|
|
|
| 2990 |
) -> None:
|
| 2991 |
"""
|
| 2992 |
Render "Generate song covers - One-click generation" tab.
|
|
|
|
| 2993 |
Parameters
|
| 2994 |
----------
|
| 2995 |
total_config : TotalConfig
|
|
|
|
| 2998 |
cookiefile : str, optional
|
| 2999 |
The path to a file containing cookies to use when downloading
|
| 3000 |
audio from Youtube.
|
|
|
|
| 3001 |
"""
|
| 3002 |
with gr.Tab("One-click"):
|
| 3003 |
tab_config = total_config.song.one_click
|
|
|
|
| 3386 |
app.launch(
|
| 3387 |
server_name=listen_host,
|
| 3388 |
server_port=listen_port,
|
| 3389 |
+
ssr_mode=false,
|
| 3390 |
)
|
| 3391 |
|
| 3392 |
|