Spaces:
Running
Running
Update main/app/app.py
Browse files- main/app/app.py +211 -211
main/app/app.py
CHANGED
|
@@ -830,6 +830,217 @@ with gr.Blocks(title=" Ultimate RVC Maker ⚡", theme=theme) as app:
|
|
| 830 |
api_name="upload_pretrain_d"
|
| 831 |
)
|
| 832 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 833 |
with gr.TabItem(translations["audio_editing"], visible=configs.get("audioldm2", True)):
|
| 834 |
gr.Markdown(translations["audio_editing_info"])
|
| 835 |
with gr.Row():
|
|
@@ -1151,217 +1362,6 @@ with gr.Blocks(title=" Ultimate RVC Maker ⚡", theme=theme) as app:
|
|
| 1151 |
api_name="create_dataset"
|
| 1152 |
)
|
| 1153 |
|
| 1154 |
-
with gr.TabItem(translations["training_model"], visible=configs.get("training_tab", True)):
|
| 1155 |
-
gr.Markdown(f"## {translations['training_model']}")
|
| 1156 |
-
with gr.Row():
|
| 1157 |
-
gr.Markdown(translations["training_markdown"])
|
| 1158 |
-
with gr.Row():
|
| 1159 |
-
with gr.Column():
|
| 1160 |
-
with gr.Row():
|
| 1161 |
-
with gr.Column():
|
| 1162 |
-
training_name = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
|
| 1163 |
-
training_sr = gr.Radio(label=translations["sample_rate"], info=translations["sample_rate_info"], choices=["32k", "40k", "48k"], value="48k", interactive=True)
|
| 1164 |
-
training_ver = gr.Radio(label=translations["training_version"], info=translations["training_version_info"], choices=["v1", "v2"], value="v2", interactive=True)
|
| 1165 |
-
with gr.Row():
|
| 1166 |
-
clean_dataset = gr.Checkbox(label=translations["clear_dataset"], value=False, interactive=True)
|
| 1167 |
-
preprocess_cut = gr.Checkbox(label=translations["split_audio"], value=True, interactive=True)
|
| 1168 |
-
process_effects = gr.Checkbox(label=translations["preprocess_effect"], value=False, interactive=True)
|
| 1169 |
-
checkpointing1 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
|
| 1170 |
-
training_f0 = gr.Checkbox(label=translations["training_pitch"], value=True, interactive=True)
|
| 1171 |
-
upload = gr.Checkbox(label=translations["upload_dataset"], value=False, interactive=True)
|
| 1172 |
-
with gr.Row():
|
| 1173 |
-
clean_dataset_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.7, step=0.1, interactive=True, visible=clean_dataset.value)
|
| 1174 |
-
with gr.Column():
|
| 1175 |
-
preprocess_button = gr.Button(translations["preprocess_button"], scale=2)
|
| 1176 |
-
upload_dataset = gr.Files(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"], visible=upload.value)
|
| 1177 |
-
preprocess_info = gr.Textbox(label=translations["preprocess_info"], value="", interactive=False)
|
| 1178 |
-
with gr.Column():
|
| 1179 |
-
with gr.Row():
|
| 1180 |
-
with gr.Column():
|
| 1181 |
-
with gr.Accordion(label=translations["f0_method"], open=False):
|
| 1182 |
-
with gr.Group():
|
| 1183 |
-
with gr.Row():
|
| 1184 |
-
onnx_f0_mode2 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
| 1185 |
-
unlock_full_method4 = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
| 1186 |
-
extract_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
|
| 1187 |
-
extract_hop_length = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=512, value=128, step=1, interactive=True, visible=False)
|
| 1188 |
-
with gr.Accordion(label=translations["hubert_model"], open=False):
|
| 1189 |
-
with gr.Group():
|
| 1190 |
-
embed_mode2 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
| 1191 |
-
extract_embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
| 1192 |
-
with gr.Row():
|
| 1193 |
-
extract_embedders_custom = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=extract_embedders.value == "custom")
|
| 1194 |
-
with gr.Column():
|
| 1195 |
-
extract_button = gr.Button(translations["extract_button"], scale=2)
|
| 1196 |
-
extract_info = gr.Textbox(label=translations["extract_info"], value="", interactive=False)
|
| 1197 |
-
with gr.Column():
|
| 1198 |
-
with gr.Row():
|
| 1199 |
-
with gr.Column():
|
| 1200 |
-
total_epochs = gr.Slider(label=translations["total_epoch"], info=translations["total_epoch_info"], minimum=1, maximum=10000, value=300, step=1, interactive=True)
|
| 1201 |
-
save_epochs = gr.Slider(label=translations["save_epoch"], info=translations["save_epoch_info"], minimum=1, maximum=10000, value=50, step=1, interactive=True)
|
| 1202 |
-
with gr.Column():
|
| 1203 |
-
with gr.Row():
|
| 1204 |
-
index_button = gr.Button(f"3. {translations['create_index']}", variant="primary", scale=2)
|
| 1205 |
-
training_button = gr.Button(f"4. {translations['training_model']}", variant="primary", scale=2)
|
| 1206 |
-
with gr.Row():
|
| 1207 |
-
with gr.Accordion(label=translations["setting"], open=False):
|
| 1208 |
-
with gr.Row():
|
| 1209 |
-
index_algorithm = gr.Radio(label=translations["index_algorithm"], info=translations["index_algorithm_info"], choices=["Auto", "Faiss", "KMeans"], value="Auto", interactive=True)
|
| 1210 |
-
with gr.Row():
|
| 1211 |
-
custom_dataset = gr.Checkbox(label=translations["custom_dataset"], info=translations["custom_dataset_info"], value=False, interactive=True)
|
| 1212 |
-
overtraining_detector = gr.Checkbox(label=translations["overtraining_detector"], info=translations["overtraining_detector_info"], value=False, interactive=True)
|
| 1213 |
-
clean_up = gr.Checkbox(label=translations["cleanup_training"], info=translations["cleanup_training_info"], value=False, interactive=True)
|
| 1214 |
-
cache_in_gpu = gr.Checkbox(label=translations["cache_in_gpu"], info=translations["cache_in_gpu_info"], value=False, interactive=True)
|
| 1215 |
-
with gr.Column():
|
| 1216 |
-
dataset_path = gr.Textbox(label=translations["dataset_folder"], value="dataset", interactive=True, visible=custom_dataset.value)
|
| 1217 |
-
with gr.Column():
|
| 1218 |
-
threshold = gr.Slider(minimum=1, maximum=100, value=50, step=1, label=translations["threshold"], interactive=True, visible=overtraining_detector.value)
|
| 1219 |
-
with gr.Accordion(translations["setting_cpu_gpu"], open=False):
|
| 1220 |
-
with gr.Column():
|
| 1221 |
-
gpu_number = gr.Textbox(label=translations["gpu_number"], value=str("-".join(map(str, range(torch.cuda.device_count()))) if torch.cuda.is_available() else "-"), info=translations["gpu_number_info"], interactive=True)
|
| 1222 |
-
gpu_info = gr.Textbox(label=translations["gpu_info"], value=get_gpu_info(), info=translations["gpu_info_2"], interactive=False)
|
| 1223 |
-
cpu_core = gr.Slider(label=translations["cpu_core"], info=translations["cpu_core_info"], minimum=0, maximum=cpu_count(), value=cpu_count(), step=1, interactive=True)
|
| 1224 |
-
train_batch_size = gr.Slider(label=translations["batch_size"], info=translations["batch_size_info"], minimum=1, maximum=64, value=8, step=1, interactive=True)
|
| 1225 |
-
with gr.Row():
|
| 1226 |
-
save_only_latest = gr.Checkbox(label=translations["save_only_latest"], info=translations["save_only_latest_info"], value=True, interactive=True)
|
| 1227 |
-
save_every_weights = gr.Checkbox(label=translations["save_every_weights"], info=translations["save_every_weights_info"], value=True, interactive=True)
|
| 1228 |
-
not_use_pretrain = gr.Checkbox(label=translations["not_use_pretrain_2"], info=translations["not_use_pretrain_info"], value=False, interactive=True)
|
| 1229 |
-
custom_pretrain = gr.Checkbox(label=translations["custom_pretrain"], info=translations["custom_pretrain_info"], value=False, interactive=True)
|
| 1230 |
-
with gr.Row():
|
| 1231 |
-
vocoders = gr.Radio(label=translations["vocoder"], info=translations["vocoder_info"], choices=["Default", "MRF-HiFi-GAN", "RefineGAN"], value="Default", interactive=True)
|
| 1232 |
-
with gr.Row():
|
| 1233 |
-
deterministic = gr.Checkbox(label=translations["deterministic"], info=translations["deterministic_info"], value=False, interactive=True)
|
| 1234 |
-
benchmark = gr.Checkbox(label=translations["benchmark"], info=translations["benchmark_info"], value=False, interactive=True)
|
| 1235 |
-
with gr.Row():
|
| 1236 |
-
model_author = gr.Textbox(label=translations["training_author"], info=translations["training_author_info"], value="", placeholder=translations["training_author"], interactive=True)
|
| 1237 |
-
with gr.Row():
|
| 1238 |
-
with gr.Column():
|
| 1239 |
-
with gr.Accordion(translations["custom_pretrain_info"], open=False, visible=custom_pretrain.value and not not_use_pretrain.value) as pretrain_setting:
|
| 1240 |
-
pretrained_D = gr.Dropdown(label=translations["pretrain_file"].format(dg="D"), choices=pretrainedD, value=pretrainedD[0] if len(pretrainedD) > 0 else '', interactive=True, allow_custom_value=True)
|
| 1241 |
-
pretrained_G = gr.Dropdown(label=translations["pretrain_file"].format(dg="G"), choices=pretrainedG, value=pretrainedG[0] if len(pretrainedG) > 0 else '', interactive=True, allow_custom_value=True)
|
| 1242 |
-
refesh_pretrain = gr.Button(translations["refesh"], scale=2)
|
| 1243 |
-
with gr.Row():
|
| 1244 |
-
training_info = gr.Textbox(label=translations["train_info"], value="", interactive=False)
|
| 1245 |
-
with gr.Row():
|
| 1246 |
-
with gr.Column():
|
| 1247 |
-
with gr.Accordion(translations["export_model"], open=False):
|
| 1248 |
-
with gr.Row():
|
| 1249 |
-
model_file= gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 1250 |
-
index_file = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 1251 |
-
with gr.Row():
|
| 1252 |
-
refesh_file = gr.Button(f"1. {translations['refesh']}", scale=2)
|
| 1253 |
-
zip_model = gr.Button(translations["zip_model"], variant="primary", scale=2)
|
| 1254 |
-
with gr.Row():
|
| 1255 |
-
zip_output = gr.File(label=translations["output_zip"], file_types=[".zip"], interactive=False, visible=False)
|
| 1256 |
-
with gr.Row():
|
| 1257 |
-
vocoders.change(fn=pitch_guidance_lock, inputs=[vocoders], outputs=[training_f0])
|
| 1258 |
-
training_f0.change(fn=vocoders_lock, inputs=[training_f0, vocoders], outputs=[vocoders])
|
| 1259 |
-
unlock_full_method4.change(fn=unlock_f0, inputs=[unlock_full_method4], outputs=[extract_method])
|
| 1260 |
-
with gr.Row():
|
| 1261 |
-
refesh_file.click(fn=change_models_choices, inputs=[], outputs=[model_file, index_file])
|
| 1262 |
-
zip_model.click(fn=zip_file, inputs=[training_name, model_file, index_file], outputs=[zip_output])
|
| 1263 |
-
dataset_path.change(fn=lambda folder: os.makedirs(folder, exist_ok=True), inputs=[dataset_path], outputs=[])
|
| 1264 |
-
with gr.Row():
|
| 1265 |
-
upload.change(fn=visible, inputs=[upload], outputs=[upload_dataset])
|
| 1266 |
-
overtraining_detector.change(fn=visible, inputs=[overtraining_detector], outputs=[threshold])
|
| 1267 |
-
clean_dataset.change(fn=visible, inputs=[clean_dataset], outputs=[clean_dataset_strength])
|
| 1268 |
-
with gr.Row():
|
| 1269 |
-
custom_dataset.change(fn=lambda custom_dataset: [visible(custom_dataset), "dataset"],inputs=[custom_dataset], outputs=[dataset_path, dataset_path])
|
| 1270 |
-
training_ver.change(fn=unlock_vocoder, inputs=[training_ver, vocoders], outputs=[vocoders])
|
| 1271 |
-
vocoders.change(fn=unlock_ver, inputs=[training_ver, vocoders], outputs=[training_ver])
|
| 1272 |
-
upload_dataset.upload(
|
| 1273 |
-
fn=lambda files, folder: [shutil.move(f.name, os.path.join(folder, os.path.split(f.name)[1])) for f in files] if folder != "" else gr_warning(translations["dataset_folder1"]),
|
| 1274 |
-
inputs=[upload_dataset, dataset_path],
|
| 1275 |
-
outputs=[],
|
| 1276 |
-
api_name="upload_dataset"
|
| 1277 |
-
)
|
| 1278 |
-
with gr.Row():
|
| 1279 |
-
not_use_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
|
| 1280 |
-
custom_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
|
| 1281 |
-
refesh_pretrain.click(fn=change_pretrained_choices, inputs=[], outputs=[pretrained_D, pretrained_G])
|
| 1282 |
-
with gr.Row():
|
| 1283 |
-
preprocess_button.click(
|
| 1284 |
-
fn=preprocess,
|
| 1285 |
-
inputs=[
|
| 1286 |
-
training_name,
|
| 1287 |
-
training_sr,
|
| 1288 |
-
cpu_core,
|
| 1289 |
-
preprocess_cut,
|
| 1290 |
-
process_effects,
|
| 1291 |
-
dataset_path,
|
| 1292 |
-
clean_dataset,
|
| 1293 |
-
clean_dataset_strength
|
| 1294 |
-
],
|
| 1295 |
-
outputs=[preprocess_info],
|
| 1296 |
-
api_name="preprocess"
|
| 1297 |
-
)
|
| 1298 |
-
with gr.Row():
|
| 1299 |
-
embed_mode2.change(fn=visible_embedders, inputs=[embed_mode2], outputs=[extract_embedders])
|
| 1300 |
-
extract_method.change(fn=hoplength_show, inputs=[extract_method], outputs=[extract_hop_length])
|
| 1301 |
-
extract_embedders.change(fn=lambda extract_embedders: visible(extract_embedders == "custom"), inputs=[extract_embedders], outputs=[extract_embedders_custom])
|
| 1302 |
-
with gr.Row():
|
| 1303 |
-
extract_button.click(
|
| 1304 |
-
fn=extract,
|
| 1305 |
-
inputs=[
|
| 1306 |
-
training_name,
|
| 1307 |
-
training_ver,
|
| 1308 |
-
extract_method,
|
| 1309 |
-
training_f0,
|
| 1310 |
-
extract_hop_length,
|
| 1311 |
-
cpu_core,
|
| 1312 |
-
gpu_number,
|
| 1313 |
-
training_sr,
|
| 1314 |
-
extract_embedders,
|
| 1315 |
-
extract_embedders_custom,
|
| 1316 |
-
onnx_f0_mode2,
|
| 1317 |
-
embed_mode2
|
| 1318 |
-
],
|
| 1319 |
-
outputs=[extract_info],
|
| 1320 |
-
api_name="extract"
|
| 1321 |
-
)
|
| 1322 |
-
with gr.Row():
|
| 1323 |
-
index_button.click(
|
| 1324 |
-
fn=create_index,
|
| 1325 |
-
inputs=[
|
| 1326 |
-
training_name,
|
| 1327 |
-
training_ver,
|
| 1328 |
-
index_algorithm
|
| 1329 |
-
],
|
| 1330 |
-
outputs=[training_info],
|
| 1331 |
-
api_name="create_index"
|
| 1332 |
-
)
|
| 1333 |
-
with gr.Row():
|
| 1334 |
-
training_button.click(
|
| 1335 |
-
fn=training,
|
| 1336 |
-
inputs=[
|
| 1337 |
-
training_name,
|
| 1338 |
-
training_ver,
|
| 1339 |
-
save_epochs,
|
| 1340 |
-
save_only_latest,
|
| 1341 |
-
save_every_weights,
|
| 1342 |
-
total_epochs,
|
| 1343 |
-
training_sr,
|
| 1344 |
-
train_batch_size,
|
| 1345 |
-
gpu_number,
|
| 1346 |
-
training_f0,
|
| 1347 |
-
not_use_pretrain,
|
| 1348 |
-
custom_pretrain,
|
| 1349 |
-
pretrained_G,
|
| 1350 |
-
pretrained_D,
|
| 1351 |
-
overtraining_detector,
|
| 1352 |
-
threshold,
|
| 1353 |
-
clean_up,
|
| 1354 |
-
cache_in_gpu,
|
| 1355 |
-
model_author,
|
| 1356 |
-
vocoders,
|
| 1357 |
-
checkpointing1,
|
| 1358 |
-
deterministic,
|
| 1359 |
-
benchmark
|
| 1360 |
-
],
|
| 1361 |
-
outputs=[training_info],
|
| 1362 |
-
api_name="training_model"
|
| 1363 |
-
)
|
| 1364 |
-
|
| 1365 |
with gr.TabItem(translations["fushion"], visible=configs.get("fushion_tab", True)):
|
| 1366 |
gr.Markdown(translations["fushion_markdown"])
|
| 1367 |
with gr.Row():
|
|
|
|
| 830 |
api_name="upload_pretrain_d"
|
| 831 |
)
|
| 832 |
|
| 833 |
+
with gr.TabItem(translations["training_model"], visible=configs.get("training_tab", True)):
|
| 834 |
+
gr.Markdown(f"## {translations['training_model']}")
|
| 835 |
+
with gr.Row():
|
| 836 |
+
gr.Markdown(translations["training_markdown"])
|
| 837 |
+
with gr.Row():
|
| 838 |
+
with gr.Column():
|
| 839 |
+
with gr.Row():
|
| 840 |
+
with gr.Column():
|
| 841 |
+
training_name = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
|
| 842 |
+
training_sr = gr.Radio(label=translations["sample_rate"], info=translations["sample_rate_info"], choices=["32k", "40k", "48k"], value="48k", interactive=True)
|
| 843 |
+
training_ver = gr.Radio(label=translations["training_version"], info=translations["training_version_info"], choices=["v1", "v2"], value="v2", interactive=True)
|
| 844 |
+
with gr.Row():
|
| 845 |
+
clean_dataset = gr.Checkbox(label=translations["clear_dataset"], value=False, interactive=True)
|
| 846 |
+
preprocess_cut = gr.Checkbox(label=translations["split_audio"], value=True, interactive=True)
|
| 847 |
+
process_effects = gr.Checkbox(label=translations["preprocess_effect"], value=False, interactive=True)
|
| 848 |
+
checkpointing1 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
|
| 849 |
+
training_f0 = gr.Checkbox(label=translations["training_pitch"], value=True, interactive=True)
|
| 850 |
+
upload = gr.Checkbox(label=translations["upload_dataset"], value=False, interactive=True)
|
| 851 |
+
with gr.Row():
|
| 852 |
+
clean_dataset_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.7, step=0.1, interactive=True, visible=clean_dataset.value)
|
| 853 |
+
with gr.Column():
|
| 854 |
+
preprocess_button = gr.Button(translations["preprocess_button"], scale=2)
|
| 855 |
+
upload_dataset = gr.Files(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"], visible=upload.value)
|
| 856 |
+
preprocess_info = gr.Textbox(label=translations["preprocess_info"], value="", interactive=False)
|
| 857 |
+
with gr.Column():
|
| 858 |
+
with gr.Row():
|
| 859 |
+
with gr.Column():
|
| 860 |
+
with gr.Accordion(label=translations["f0_method"], open=False):
|
| 861 |
+
with gr.Group():
|
| 862 |
+
with gr.Row():
|
| 863 |
+
onnx_f0_mode2 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
| 864 |
+
unlock_full_method4 = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
| 865 |
+
extract_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
|
| 866 |
+
extract_hop_length = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=512, value=128, step=1, interactive=True, visible=False)
|
| 867 |
+
with gr.Accordion(label=translations["hubert_model"], open=False):
|
| 868 |
+
with gr.Group():
|
| 869 |
+
embed_mode2 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
| 870 |
+
extract_embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
| 871 |
+
with gr.Row():
|
| 872 |
+
extract_embedders_custom = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=extract_embedders.value == "custom")
|
| 873 |
+
with gr.Column():
|
| 874 |
+
extract_button = gr.Button(translations["extract_button"], scale=2)
|
| 875 |
+
extract_info = gr.Textbox(label=translations["extract_info"], value="", interactive=False)
|
| 876 |
+
with gr.Column():
|
| 877 |
+
with gr.Row():
|
| 878 |
+
with gr.Column():
|
| 879 |
+
total_epochs = gr.Slider(label=translations["total_epoch"], info=translations["total_epoch_info"], minimum=1, maximum=10000, value=300, step=1, interactive=True)
|
| 880 |
+
save_epochs = gr.Slider(label=translations["save_epoch"], info=translations["save_epoch_info"], minimum=1, maximum=10000, value=50, step=1, interactive=True)
|
| 881 |
+
with gr.Column():
|
| 882 |
+
with gr.Row():
|
| 883 |
+
index_button = gr.Button(f"3. {translations['create_index']}", variant="primary", scale=2)
|
| 884 |
+
training_button = gr.Button(f"4. {translations['training_model']}", variant="primary", scale=2)
|
| 885 |
+
with gr.Row():
|
| 886 |
+
with gr.Accordion(label=translations["setting"], open=False):
|
| 887 |
+
with gr.Row():
|
| 888 |
+
index_algorithm = gr.Radio(label=translations["index_algorithm"], info=translations["index_algorithm_info"], choices=["Auto", "Faiss", "KMeans"], value="Auto", interactive=True)
|
| 889 |
+
with gr.Row():
|
| 890 |
+
custom_dataset = gr.Checkbox(label=translations["custom_dataset"], info=translations["custom_dataset_info"], value=False, interactive=True)
|
| 891 |
+
overtraining_detector = gr.Checkbox(label=translations["overtraining_detector"], info=translations["overtraining_detector_info"], value=False, interactive=True)
|
| 892 |
+
clean_up = gr.Checkbox(label=translations["cleanup_training"], info=translations["cleanup_training_info"], value=False, interactive=True)
|
| 893 |
+
cache_in_gpu = gr.Checkbox(label=translations["cache_in_gpu"], info=translations["cache_in_gpu_info"], value=False, interactive=True)
|
| 894 |
+
with gr.Column():
|
| 895 |
+
dataset_path = gr.Textbox(label=translations["dataset_folder"], value="dataset", interactive=True, visible=custom_dataset.value)
|
| 896 |
+
with gr.Column():
|
| 897 |
+
threshold = gr.Slider(minimum=1, maximum=100, value=50, step=1, label=translations["threshold"], interactive=True, visible=overtraining_detector.value)
|
| 898 |
+
with gr.Accordion(translations["setting_cpu_gpu"], open=False):
|
| 899 |
+
with gr.Column():
|
| 900 |
+
gpu_number = gr.Textbox(label=translations["gpu_number"], value=str("-".join(map(str, range(torch.cuda.device_count()))) if torch.cuda.is_available() else "-"), info=translations["gpu_number_info"], interactive=True)
|
| 901 |
+
gpu_info = gr.Textbox(label=translations["gpu_info"], value=get_gpu_info(), info=translations["gpu_info_2"], interactive=False)
|
| 902 |
+
cpu_core = gr.Slider(label=translations["cpu_core"], info=translations["cpu_core_info"], minimum=0, maximum=cpu_count(), value=cpu_count(), step=1, interactive=True)
|
| 903 |
+
train_batch_size = gr.Slider(label=translations["batch_size"], info=translations["batch_size_info"], minimum=1, maximum=64, value=8, step=1, interactive=True)
|
| 904 |
+
with gr.Row():
|
| 905 |
+
save_only_latest = gr.Checkbox(label=translations["save_only_latest"], info=translations["save_only_latest_info"], value=True, interactive=True)
|
| 906 |
+
save_every_weights = gr.Checkbox(label=translations["save_every_weights"], info=translations["save_every_weights_info"], value=True, interactive=True)
|
| 907 |
+
not_use_pretrain = gr.Checkbox(label=translations["not_use_pretrain_2"], info=translations["not_use_pretrain_info"], value=False, interactive=True)
|
| 908 |
+
custom_pretrain = gr.Checkbox(label=translations["custom_pretrain"], info=translations["custom_pretrain_info"], value=False, interactive=True)
|
| 909 |
+
with gr.Row():
|
| 910 |
+
vocoders = gr.Radio(label=translations["vocoder"], info=translations["vocoder_info"], choices=["Default", "MRF-HiFi-GAN", "RefineGAN"], value="Default", interactive=True)
|
| 911 |
+
with gr.Row():
|
| 912 |
+
deterministic = gr.Checkbox(label=translations["deterministic"], info=translations["deterministic_info"], value=False, interactive=True)
|
| 913 |
+
benchmark = gr.Checkbox(label=translations["benchmark"], info=translations["benchmark_info"], value=False, interactive=True)
|
| 914 |
+
with gr.Row():
|
| 915 |
+
model_author = gr.Textbox(label=translations["training_author"], info=translations["training_author_info"], value="", placeholder=translations["training_author"], interactive=True)
|
| 916 |
+
with gr.Row():
|
| 917 |
+
with gr.Column():
|
| 918 |
+
with gr.Accordion(translations["custom_pretrain_info"], open=False, visible=custom_pretrain.value and not not_use_pretrain.value) as pretrain_setting:
|
| 919 |
+
pretrained_D = gr.Dropdown(label=translations["pretrain_file"].format(dg="D"), choices=pretrainedD, value=pretrainedD[0] if len(pretrainedD) > 0 else '', interactive=True, allow_custom_value=True)
|
| 920 |
+
pretrained_G = gr.Dropdown(label=translations["pretrain_file"].format(dg="G"), choices=pretrainedG, value=pretrainedG[0] if len(pretrainedG) > 0 else '', interactive=True, allow_custom_value=True)
|
| 921 |
+
refesh_pretrain = gr.Button(translations["refesh"], scale=2)
|
| 922 |
+
with gr.Row():
|
| 923 |
+
training_info = gr.Textbox(label=translations["train_info"], value="", interactive=False)
|
| 924 |
+
with gr.Row():
|
| 925 |
+
with gr.Column():
|
| 926 |
+
with gr.Accordion(translations["export_model"], open=False):
|
| 927 |
+
with gr.Row():
|
| 928 |
+
model_file= gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 929 |
+
index_file = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 930 |
+
with gr.Row():
|
| 931 |
+
refesh_file = gr.Button(f"1. {translations['refesh']}", scale=2)
|
| 932 |
+
zip_model = gr.Button(translations["zip_model"], variant="primary", scale=2)
|
| 933 |
+
with gr.Row():
|
| 934 |
+
zip_output = gr.File(label=translations["output_zip"], file_types=[".zip"], interactive=False, visible=False)
|
| 935 |
+
with gr.Row():
|
| 936 |
+
vocoders.change(fn=pitch_guidance_lock, inputs=[vocoders], outputs=[training_f0])
|
| 937 |
+
training_f0.change(fn=vocoders_lock, inputs=[training_f0, vocoders], outputs=[vocoders])
|
| 938 |
+
unlock_full_method4.change(fn=unlock_f0, inputs=[unlock_full_method4], outputs=[extract_method])
|
| 939 |
+
with gr.Row():
|
| 940 |
+
refesh_file.click(fn=change_models_choices, inputs=[], outputs=[model_file, index_file])
|
| 941 |
+
zip_model.click(fn=zip_file, inputs=[training_name, model_file, index_file], outputs=[zip_output])
|
| 942 |
+
dataset_path.change(fn=lambda folder: os.makedirs(folder, exist_ok=True), inputs=[dataset_path], outputs=[])
|
| 943 |
+
with gr.Row():
|
| 944 |
+
upload.change(fn=visible, inputs=[upload], outputs=[upload_dataset])
|
| 945 |
+
overtraining_detector.change(fn=visible, inputs=[overtraining_detector], outputs=[threshold])
|
| 946 |
+
clean_dataset.change(fn=visible, inputs=[clean_dataset], outputs=[clean_dataset_strength])
|
| 947 |
+
with gr.Row():
|
| 948 |
+
custom_dataset.change(fn=lambda custom_dataset: [visible(custom_dataset), "dataset"],inputs=[custom_dataset], outputs=[dataset_path, dataset_path])
|
| 949 |
+
training_ver.change(fn=unlock_vocoder, inputs=[training_ver, vocoders], outputs=[vocoders])
|
| 950 |
+
vocoders.change(fn=unlock_ver, inputs=[training_ver, vocoders], outputs=[training_ver])
|
| 951 |
+
upload_dataset.upload(
|
| 952 |
+
fn=lambda files, folder: [shutil.move(f.name, os.path.join(folder, os.path.split(f.name)[1])) for f in files] if folder != "" else gr_warning(translations["dataset_folder1"]),
|
| 953 |
+
inputs=[upload_dataset, dataset_path],
|
| 954 |
+
outputs=[],
|
| 955 |
+
api_name="upload_dataset"
|
| 956 |
+
)
|
| 957 |
+
with gr.Row():
|
| 958 |
+
not_use_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
|
| 959 |
+
custom_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
|
| 960 |
+
refesh_pretrain.click(fn=change_pretrained_choices, inputs=[], outputs=[pretrained_D, pretrained_G])
|
| 961 |
+
with gr.Row():
|
| 962 |
+
preprocess_button.click(
|
| 963 |
+
fn=preprocess,
|
| 964 |
+
inputs=[
|
| 965 |
+
training_name,
|
| 966 |
+
training_sr,
|
| 967 |
+
cpu_core,
|
| 968 |
+
preprocess_cut,
|
| 969 |
+
process_effects,
|
| 970 |
+
dataset_path,
|
| 971 |
+
clean_dataset,
|
| 972 |
+
clean_dataset_strength
|
| 973 |
+
],
|
| 974 |
+
outputs=[preprocess_info],
|
| 975 |
+
api_name="preprocess"
|
| 976 |
+
)
|
| 977 |
+
with gr.Row():
|
| 978 |
+
embed_mode2.change(fn=visible_embedders, inputs=[embed_mode2], outputs=[extract_embedders])
|
| 979 |
+
extract_method.change(fn=hoplength_show, inputs=[extract_method], outputs=[extract_hop_length])
|
| 980 |
+
extract_embedders.change(fn=lambda extract_embedders: visible(extract_embedders == "custom"), inputs=[extract_embedders], outputs=[extract_embedders_custom])
|
| 981 |
+
with gr.Row():
|
| 982 |
+
extract_button.click(
|
| 983 |
+
fn=extract,
|
| 984 |
+
inputs=[
|
| 985 |
+
training_name,
|
| 986 |
+
training_ver,
|
| 987 |
+
extract_method,
|
| 988 |
+
training_f0,
|
| 989 |
+
extract_hop_length,
|
| 990 |
+
cpu_core,
|
| 991 |
+
gpu_number,
|
| 992 |
+
training_sr,
|
| 993 |
+
extract_embedders,
|
| 994 |
+
extract_embedders_custom,
|
| 995 |
+
onnx_f0_mode2,
|
| 996 |
+
embed_mode2
|
| 997 |
+
],
|
| 998 |
+
outputs=[extract_info],
|
| 999 |
+
api_name="extract"
|
| 1000 |
+
)
|
| 1001 |
+
with gr.Row():
|
| 1002 |
+
index_button.click(
|
| 1003 |
+
fn=create_index,
|
| 1004 |
+
inputs=[
|
| 1005 |
+
training_name,
|
| 1006 |
+
training_ver,
|
| 1007 |
+
index_algorithm
|
| 1008 |
+
],
|
| 1009 |
+
outputs=[training_info],
|
| 1010 |
+
api_name="create_index"
|
| 1011 |
+
)
|
| 1012 |
+
with gr.Row():
|
| 1013 |
+
training_button.click(
|
| 1014 |
+
fn=training,
|
| 1015 |
+
inputs=[
|
| 1016 |
+
training_name,
|
| 1017 |
+
training_ver,
|
| 1018 |
+
save_epochs,
|
| 1019 |
+
save_only_latest,
|
| 1020 |
+
save_every_weights,
|
| 1021 |
+
total_epochs,
|
| 1022 |
+
training_sr,
|
| 1023 |
+
train_batch_size,
|
| 1024 |
+
gpu_number,
|
| 1025 |
+
training_f0,
|
| 1026 |
+
not_use_pretrain,
|
| 1027 |
+
custom_pretrain,
|
| 1028 |
+
pretrained_G,
|
| 1029 |
+
pretrained_D,
|
| 1030 |
+
overtraining_detector,
|
| 1031 |
+
threshold,
|
| 1032 |
+
clean_up,
|
| 1033 |
+
cache_in_gpu,
|
| 1034 |
+
model_author,
|
| 1035 |
+
vocoders,
|
| 1036 |
+
checkpointing1,
|
| 1037 |
+
deterministic,
|
| 1038 |
+
benchmark
|
| 1039 |
+
],
|
| 1040 |
+
outputs=[training_info],
|
| 1041 |
+
api_name="training_model"
|
| 1042 |
+
)
|
| 1043 |
+
|
| 1044 |
with gr.TabItem(translations["audio_editing"], visible=configs.get("audioldm2", True)):
|
| 1045 |
gr.Markdown(translations["audio_editing_info"])
|
| 1046 |
with gr.Row():
|
|
|
|
| 1362 |
api_name="create_dataset"
|
| 1363 |
)
|
| 1364 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1365 |
with gr.TabItem(translations["fushion"], visible=configs.get("fushion_tab", True)):
|
| 1366 |
gr.Markdown(translations["fushion_markdown"])
|
| 1367 |
with gr.Row():
|