Spaces:
Runtime error
Runtime error
Update app_multi.py
Browse files- app_multi.py +467 -2
app_multi.py
CHANGED
|
@@ -72,7 +72,6 @@ app_css = '''
|
|
| 72 |
max-height: 100px;
|
| 73 |
float: right;
|
| 74 |
}
|
| 75 |
-
|
| 76 |
#model_info p {
|
| 77 |
margin: unset;
|
| 78 |
}
|
|
@@ -354,4 +353,470 @@ def youtube_downloader(
|
|
| 354 |
|
| 355 |
quiet = "--quiet --no-warnings" if quiet else ""
|
| 356 |
command = f"""
|
| 357 |
-
yt-dlp {quiet} -x --audio-format wav -f bestaudio -o "{output_filename}" --download-sections "*{start_time}-{end_time}" "{url_base}{video_identifier}" # noqa: E501
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
max-height: 100px;
|
| 73 |
float: right;
|
| 74 |
}
|
|
|
|
| 75 |
#model_info p {
|
| 76 |
margin: unset;
|
| 77 |
}
|
|
|
|
| 353 |
|
| 354 |
quiet = "--quiet --no-warnings" if quiet else ""
|
| 355 |
command = f"""
|
| 356 |
+
yt-dlp {quiet} -x --audio-format wav -f bestaudio -o "{output_filename}" --download-sections "*{start_time}-{end_time}" "{url_base}{video_identifier}" # noqa: E501
|
| 357 |
+
""".strip()
|
| 358 |
+
|
| 359 |
+
attempts = 0
|
| 360 |
+
while True:
|
| 361 |
+
try:
|
| 362 |
+
_ = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
|
| 363 |
+
except subprocess.CalledProcessError:
|
| 364 |
+
attempts += 1
|
| 365 |
+
if attempts == num_attempts:
|
| 366 |
+
return None
|
| 367 |
+
else:
|
| 368 |
+
break
|
| 369 |
+
|
| 370 |
+
if output_path.exists():
|
| 371 |
+
return output_path
|
| 372 |
+
else:
|
| 373 |
+
return None
|
| 374 |
+
|
| 375 |
+
def audio_separated(audio_input, progress=gr.Progress()):
|
| 376 |
+
# start progress
|
| 377 |
+
progress(progress=0, desc="Starting...")
|
| 378 |
+
time.sleep(0.1)
|
| 379 |
+
|
| 380 |
+
# check file input
|
| 381 |
+
if audio_input is None:
|
| 382 |
+
# show progress
|
| 383 |
+
for i in progress.tqdm(range(100), desc="Please wait..."):
|
| 384 |
+
time.sleep(0.01)
|
| 385 |
+
|
| 386 |
+
return (None, None, 'Please input audio.')
|
| 387 |
+
|
| 388 |
+
# create filename
|
| 389 |
+
filename = str(random.randint(10000,99999))+datetime.now().strftime("%d%m%Y%H%M%S")
|
| 390 |
+
|
| 391 |
+
# progress
|
| 392 |
+
progress(progress=0.10, desc="Please wait...")
|
| 393 |
+
|
| 394 |
+
# make dir output
|
| 395 |
+
os.makedirs("output", exist_ok=True)
|
| 396 |
+
|
| 397 |
+
# progress
|
| 398 |
+
progress(progress=0.20, desc="Please wait...")
|
| 399 |
+
|
| 400 |
+
# write
|
| 401 |
+
if high_quality:
|
| 402 |
+
write(filename+".wav", audio_input[0], audio_input[1])
|
| 403 |
+
else:
|
| 404 |
+
write(filename+".mp3", audio_input[0], audio_input[1])
|
| 405 |
+
|
| 406 |
+
# progress
|
| 407 |
+
progress(progress=0.50, desc="Please wait...")
|
| 408 |
+
|
| 409 |
+
# demucs process
|
| 410 |
+
if high_quality:
|
| 411 |
+
command_demucs = "python3 -m demucs --two-stems=vocals -d cpu "+filename+".wav -o output"
|
| 412 |
+
else:
|
| 413 |
+
command_demucs = "python3 -m demucs --two-stems=vocals --mp3 --mp3-bitrate 128 -d cpu "+filename+".mp3 -o output"
|
| 414 |
+
|
| 415 |
+
os.system(command_demucs)
|
| 416 |
+
|
| 417 |
+
# progress
|
| 418 |
+
progress(progress=0.70, desc="Please wait...")
|
| 419 |
+
|
| 420 |
+
# remove file audio
|
| 421 |
+
if high_quality:
|
| 422 |
+
command_delete = "rm -v ./"+filename+".wav"
|
| 423 |
+
else:
|
| 424 |
+
command_delete = "rm -v ./"+filename+".mp3"
|
| 425 |
+
|
| 426 |
+
os.system(command_delete)
|
| 427 |
+
|
| 428 |
+
# progress
|
| 429 |
+
progress(progress=0.80, desc="Please wait...")
|
| 430 |
+
|
| 431 |
+
# progress
|
| 432 |
+
for i in progress.tqdm(range(80,100), desc="Please wait..."):
|
| 433 |
+
time.sleep(0.1)
|
| 434 |
+
|
| 435 |
+
if high_quality:
|
| 436 |
+
return "./output/htdemucs/"+filename+"/vocals.wav","./output/htdemucs/"+filename+"/no_vocals.wav","Successfully..."
|
| 437 |
+
else:
|
| 438 |
+
return "./output/htdemucs/"+filename+"/vocals.mp3","./output/htdemucs/"+filename+"/no_vocals.mp3","Successfully..."
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
# https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/blob/main/infer-web.py#L118 # noqa
|
| 442 |
+
def vc_func(
|
| 443 |
+
input_audio, model_index, pitch_adjust, f0_method, feat_ratio,
|
| 444 |
+
filter_radius, rms_mix_rate, resample_option
|
| 445 |
+
):
|
| 446 |
+
if input_audio is None:
|
| 447 |
+
return (None, 'Please provide input audio.')
|
| 448 |
+
|
| 449 |
+
if model_index is None:
|
| 450 |
+
return (None, 'Please select a model.')
|
| 451 |
+
|
| 452 |
+
model = loaded_models[model_index]
|
| 453 |
+
|
| 454 |
+
# Reference: so-vits
|
| 455 |
+
(audio_samp, audio_npy) = input_audio
|
| 456 |
+
|
| 457 |
+
# https://huggingface.co/spaces/zomehwh/rvc-models/blob/main/app.py#L49
|
| 458 |
+
# Can be change well, we will see
|
| 459 |
+
if (audio_npy.shape[0] / audio_samp) > 600 and in_hf_space:
|
| 460 |
+
return (None, 'Input audio is longer than 600 secs.')
|
| 461 |
+
|
| 462 |
+
# Bloody hell: https://stackoverflow.com/questions/26921836/
|
| 463 |
+
if audio_npy.dtype != np.float32: # :thonk:
|
| 464 |
+
audio_npy = (
|
| 465 |
+
audio_npy / np.iinfo(audio_npy.dtype).max
|
| 466 |
+
).astype(np.float32)
|
| 467 |
+
|
| 468 |
+
if len(audio_npy.shape) > 1:
|
| 469 |
+
audio_npy = librosa.to_mono(audio_npy.transpose(1, 0))
|
| 470 |
+
|
| 471 |
+
if audio_samp != 16000:
|
| 472 |
+
audio_npy = librosa.resample(
|
| 473 |
+
audio_npy,
|
| 474 |
+
orig_sr=audio_samp,
|
| 475 |
+
target_sr=16000
|
| 476 |
+
)
|
| 477 |
+
|
| 478 |
+
pitch_int = int(pitch_adjust)
|
| 479 |
+
|
| 480 |
+
resample = (
|
| 481 |
+
0 if resample_option == 'Disable resampling'
|
| 482 |
+
else int(resample_option)
|
| 483 |
+
)
|
| 484 |
+
|
| 485 |
+
times = [0, 0, 0]
|
| 486 |
+
|
| 487 |
+
checksum = hashlib.sha512()
|
| 488 |
+
checksum.update(audio_npy.tobytes())
|
| 489 |
+
|
| 490 |
+
output_audio = model['vc'].pipeline(
|
| 491 |
+
hubert_model,
|
| 492 |
+
model['net_g'],
|
| 493 |
+
model['metadata'].get('speaker_id', 0),
|
| 494 |
+
audio_npy,
|
| 495 |
+
checksum.hexdigest(),
|
| 496 |
+
times,
|
| 497 |
+
pitch_int,
|
| 498 |
+
f0_method,
|
| 499 |
+
path.join('model', model['name'], model['metadata']['feat_index']),
|
| 500 |
+
feat_ratio,
|
| 501 |
+
model['if_f0'],
|
| 502 |
+
filter_radius,
|
| 503 |
+
model['target_sr'],
|
| 504 |
+
resample,
|
| 505 |
+
rms_mix_rate,
|
| 506 |
+
'v2'
|
| 507 |
+
)
|
| 508 |
+
|
| 509 |
+
out_sr = (
|
| 510 |
+
resample if resample >= 16000 and model['target_sr'] != resample
|
| 511 |
+
else model['target_sr']
|
| 512 |
+
)
|
| 513 |
+
|
| 514 |
+
print(f'npy: {times[0]}s, f0: {times[1]}s, infer: {times[2]}s')
|
| 515 |
+
return ((out_sr, output_audio), 'Success')
|
| 516 |
+
|
| 517 |
+
|
| 518 |
+
async def edge_tts_vc_func(
|
| 519 |
+
input_text, model_index, tts_speaker, pitch_adjust, f0_method, feat_ratio,
|
| 520 |
+
filter_radius, rms_mix_rate, resample_option
|
| 521 |
+
):
|
| 522 |
+
if input_text is None:
|
| 523 |
+
return (None, 'Please provide TTS text.')
|
| 524 |
+
|
| 525 |
+
if tts_speaker is None:
|
| 526 |
+
return (None, 'Please select TTS speaker.')
|
| 527 |
+
|
| 528 |
+
if model_index is None:
|
| 529 |
+
return (None, 'Please select a model.')
|
| 530 |
+
|
| 531 |
+
speaker = tts_speakers_list[tts_speaker]['ShortName']
|
| 532 |
+
(tts_np, tts_sr) = await util.call_edge_tts(speaker, input_text)
|
| 533 |
+
return vc_func(
|
| 534 |
+
(tts_sr, tts_np),
|
| 535 |
+
model_index,
|
| 536 |
+
pitch_adjust,
|
| 537 |
+
f0_method,
|
| 538 |
+
feat_ratio,
|
| 539 |
+
filter_radius,
|
| 540 |
+
rms_mix_rate,
|
| 541 |
+
resample_option
|
| 542 |
+
)
|
| 543 |
+
|
| 544 |
+
|
| 545 |
+
def update_model_info(model_index):
|
| 546 |
+
if model_index is None:
|
| 547 |
+
return str(
|
| 548 |
+
'### Model info\n'
|
| 549 |
+
'Please select a model from dropdown above.'
|
| 550 |
+
)
|
| 551 |
+
|
| 552 |
+
model = loaded_models[model_index]
|
| 553 |
+
model_icon = model['metadata'].get('icon', '')
|
| 554 |
+
|
| 555 |
+
return str(
|
| 556 |
+
'### Model info\n'
|
| 557 |
+
''
|
| 558 |
+
'**{name}**\n\n'
|
| 559 |
+
'Author: {author}\n\n'
|
| 560 |
+
'Source: {source}\n\n'
|
| 561 |
+
'{note}'
|
| 562 |
+
).format(
|
| 563 |
+
name=model['metadata'].get('name'),
|
| 564 |
+
author=model['metadata'].get('author', 'Anonymous'),
|
| 565 |
+
source=model['metadata'].get('source', 'Unknown'),
|
| 566 |
+
note=model['metadata'].get('note', ''),
|
| 567 |
+
icon=(
|
| 568 |
+
model_icon
|
| 569 |
+
if model_icon.startswith(('http://', 'https://'))
|
| 570 |
+
else '/file/model/%s/%s' % (model['name'], model_icon)
|
| 571 |
+
)
|
| 572 |
+
)
|
| 573 |
+
|
| 574 |
+
|
| 575 |
+
def _example_vc(
|
| 576 |
+
input_audio, model_index, pitch_adjust, f0_method, feat_ratio,
|
| 577 |
+
filter_radius, rms_mix_rate, resample_option
|
| 578 |
+
):
|
| 579 |
+
(audio, message) = vc_func(
|
| 580 |
+
input_audio, model_index, pitch_adjust, f0_method, feat_ratio,
|
| 581 |
+
filter_radius, rms_mix_rate, resample_option
|
| 582 |
+
)
|
| 583 |
+
return (
|
| 584 |
+
audio,
|
| 585 |
+
message,
|
| 586 |
+
update_model_info(model_index)
|
| 587 |
+
)
|
| 588 |
+
|
| 589 |
+
|
| 590 |
+
async def _example_edge_tts(
|
| 591 |
+
input_text, model_index, tts_speaker, pitch_adjust, f0_method, feat_ratio,
|
| 592 |
+
filter_radius, rms_mix_rate, resample_option
|
| 593 |
+
):
|
| 594 |
+
(audio, message) = await edge_tts_vc_func(
|
| 595 |
+
input_text, model_index, tts_speaker, pitch_adjust, f0_method,
|
| 596 |
+
feat_ratio, filter_radius, rms_mix_rate, resample_option
|
| 597 |
+
)
|
| 598 |
+
return (
|
| 599 |
+
audio,
|
| 600 |
+
message,
|
| 601 |
+
update_model_info(model_index)
|
| 602 |
+
)
|
| 603 |
+
|
| 604 |
+
|
| 605 |
+
with app:
|
| 606 |
+
gr.HTML("<center>"
|
| 607 |
+
"<h1>🥳🎶🎡 - AI歌手,RVC歌声转换 + AI变声</h1>"
|
| 608 |
+
"</center>")
|
| 609 |
+
gr.Markdown("### <center>🦄 - 能够自动提取视频中的声音,并去除背景音;Powered by [RVC-Project](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)</center>")
|
| 610 |
+
gr.Markdown("### <center>更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")
|
| 611 |
+
|
| 612 |
+
with gr.Tab("🤗 - B站视频提取声音"):
|
| 613 |
+
with gr.Row():
|
| 614 |
+
with gr.Column():
|
| 615 |
+
ydl_url_input = gr.Textbox(label="B站视频网址(可直接填写相应的BV号)", value = "https://www.bilibili.com/video/BV...")
|
| 616 |
+
start = gr.Number(value=0, label="起始时间 (秒)")
|
| 617 |
+
end = gr.Number(value=15, label="结束时间 (秒)")
|
| 618 |
+
ydl_url_submit = gr.Button("提取声音文件吧", variant="primary")
|
| 619 |
+
as_audio_submit = gr.Button("去除背景音吧", variant="primary")
|
| 620 |
+
with gr.Column():
|
| 621 |
+
ydl_audio_output = gr.Audio(label="Audio from Bilibili")
|
| 622 |
+
as_audio_input = ydl_audio_output
|
| 623 |
+
as_audio_vocals = gr.Audio(label="歌曲人声部分")
|
| 624 |
+
as_audio_no_vocals = gr.Audio(label="Music only", type="filepath", visible=False)
|
| 625 |
+
as_audio_message = gr.Textbox(label="Message", visible=False)
|
| 626 |
+
|
| 627 |
+
ydl_url_submit.click(fn=youtube_downloader, inputs=[ydl_url_input, start, end], outputs=[ydl_audio_output])
|
| 628 |
+
as_audio_submit.click(fn=audio_separated, inputs=[as_audio_input], outputs=[as_audio_vocals, as_audio_no_vocals, as_audio_message], show_progress=True, queue=True)
|
| 629 |
+
|
| 630 |
+
with gr.Row():
|
| 631 |
+
with gr.Column():
|
| 632 |
+
with gr.Tab('🎶 - 歌声转换'):
|
| 633 |
+
input_audio = as_audio_vocals
|
| 634 |
+
vc_convert_btn = gr.Button('进行歌声转换吧!', variant='primary')
|
| 635 |
+
full_song = gr.Button("加入歌曲伴奏吧!", variant="primary")
|
| 636 |
+
new_song = gr.Audio(label="AI歌手+伴奏", type="filepath")
|
| 637 |
+
|
| 638 |
+
with gr.Tab('🎙️ - 文本转语音'):
|
| 639 |
+
tts_input = gr.Textbox(
|
| 640 |
+
label='请填写您想要转换的文本(中英皆��)',
|
| 641 |
+
lines=3
|
| 642 |
+
)
|
| 643 |
+
tts_speaker = gr.Dropdown(
|
| 644 |
+
[
|
| 645 |
+
'%s (%s)' % (
|
| 646 |
+
s['FriendlyName'],
|
| 647 |
+
s['Gender']
|
| 648 |
+
)
|
| 649 |
+
for s in tts_speakers_list
|
| 650 |
+
],
|
| 651 |
+
label='请选择一个相应语言的说话人',
|
| 652 |
+
type='index'
|
| 653 |
+
)
|
| 654 |
+
|
| 655 |
+
tts_convert_btn = gr.Button('进行AI变声吧', variant='primary')
|
| 656 |
+
|
| 657 |
+
with gr.Tab("📺 - 音乐视频"):
|
| 658 |
+
with gr.Row():
|
| 659 |
+
with gr.Column():
|
| 660 |
+
inp1 = gr.Textbox(label="为视频配上精彩的文案吧(选填;英文)")
|
| 661 |
+
inp2 = new_song
|
| 662 |
+
inp3 = gr.Image(source='upload', type='filepath', label="上传一张背景图片吧")
|
| 663 |
+
btn = gr.Button("生成您的专属音乐视频吧", variant="primary")
|
| 664 |
+
|
| 665 |
+
with gr.Column():
|
| 666 |
+
out1 = gr.Video(label='您的专属音乐视频')
|
| 667 |
+
btn.click(fn=infer, inputs=[inp1, inp2, inp3], outputs=[out1])
|
| 668 |
+
|
| 669 |
+
pitch_adjust = gr.Slider(
|
| 670 |
+
label='Pitch',
|
| 671 |
+
minimum=-24,
|
| 672 |
+
maximum=24,
|
| 673 |
+
step=1,
|
| 674 |
+
value=0
|
| 675 |
+
)
|
| 676 |
+
f0_method = gr.Radio(
|
| 677 |
+
label='f0 methods',
|
| 678 |
+
choices=['pm', 'rmvpe'],
|
| 679 |
+
value='rmvpe',
|
| 680 |
+
interactive=True
|
| 681 |
+
)
|
| 682 |
+
|
| 683 |
+
with gr.Accordion('更多设置', open=False):
|
| 684 |
+
feat_ratio = gr.Slider(
|
| 685 |
+
label='Feature ratio',
|
| 686 |
+
minimum=0,
|
| 687 |
+
maximum=1,
|
| 688 |
+
step=0.1,
|
| 689 |
+
value=0.6
|
| 690 |
+
)
|
| 691 |
+
filter_radius = gr.Slider(
|
| 692 |
+
label='Filter radius',
|
| 693 |
+
minimum=0,
|
| 694 |
+
maximum=7,
|
| 695 |
+
step=1,
|
| 696 |
+
value=3
|
| 697 |
+
)
|
| 698 |
+
rms_mix_rate = gr.Slider(
|
| 699 |
+
label='Volume envelope mix rate',
|
| 700 |
+
minimum=0,
|
| 701 |
+
maximum=1,
|
| 702 |
+
step=0.1,
|
| 703 |
+
value=1
|
| 704 |
+
)
|
| 705 |
+
resample_rate = gr.Dropdown(
|
| 706 |
+
[
|
| 707 |
+
'Disable resampling',
|
| 708 |
+
'16000',
|
| 709 |
+
'22050',
|
| 710 |
+
'44100',
|
| 711 |
+
'48000'
|
| 712 |
+
],
|
| 713 |
+
label='Resample rate',
|
| 714 |
+
value='Disable resampling'
|
| 715 |
+
)
|
| 716 |
+
|
| 717 |
+
with gr.Column():
|
| 718 |
+
# Model select
|
| 719 |
+
model_index = gr.Dropdown(
|
| 720 |
+
[
|
| 721 |
+
'%s - %s' % (
|
| 722 |
+
m['metadata'].get('source', 'Unknown'),
|
| 723 |
+
m['metadata'].get('name')
|
| 724 |
+
)
|
| 725 |
+
for m in loaded_models
|
| 726 |
+
],
|
| 727 |
+
label='请选择您的AI歌手(必选)',
|
| 728 |
+
type='index'
|
| 729 |
+
)
|
| 730 |
+
|
| 731 |
+
# Model info
|
| 732 |
+
with gr.Box():
|
| 733 |
+
model_info = gr.Markdown(
|
| 734 |
+
'### AI歌手信息\n'
|
| 735 |
+
'Please select a model from dropdown above.',
|
| 736 |
+
elem_id='model_info'
|
| 737 |
+
)
|
| 738 |
+
|
| 739 |
+
output_audio = gr.Audio(label='AI歌手(无伴奏)', type="filepath")
|
| 740 |
+
output_msg = gr.Textbox(label='Output message')
|
| 741 |
+
|
| 742 |
+
multi_examples = multi_cfg.get('examples')
|
| 743 |
+
if (
|
| 744 |
+
multi_examples and
|
| 745 |
+
multi_examples.get('vc') and multi_examples.get('tts_vc')
|
| 746 |
+
):
|
| 747 |
+
with gr.Accordion('Sweet sweet examples', open=False):
|
| 748 |
+
with gr.Row():
|
| 749 |
+
# VC Example
|
| 750 |
+
if multi_examples.get('vc'):
|
| 751 |
+
gr.Examples(
|
| 752 |
+
label='Audio conversion examples',
|
| 753 |
+
examples=multi_examples.get('vc'),
|
| 754 |
+
inputs=[
|
| 755 |
+
input_audio, model_index, pitch_adjust, f0_method,
|
| 756 |
+
feat_ratio
|
| 757 |
+
],
|
| 758 |
+
outputs=[output_audio, output_msg, model_info],
|
| 759 |
+
fn=_example_vc,
|
| 760 |
+
cache_examples=args.cache_examples,
|
| 761 |
+
run_on_click=args.cache_examples
|
| 762 |
+
)
|
| 763 |
+
|
| 764 |
+
# Edge TTS Example
|
| 765 |
+
if multi_examples.get('tts_vc'):
|
| 766 |
+
gr.Examples(
|
| 767 |
+
label='TTS conversion examples',
|
| 768 |
+
examples=multi_examples.get('tts_vc'),
|
| 769 |
+
inputs=[
|
| 770 |
+
tts_input, model_index, tts_speaker, pitch_adjust,
|
| 771 |
+
f0_method, feat_ratio
|
| 772 |
+
],
|
| 773 |
+
outputs=[output_audio, output_msg, model_info],
|
| 774 |
+
fn=_example_edge_tts,
|
| 775 |
+
cache_examples=args.cache_examples,
|
| 776 |
+
run_on_click=args.cache_examples
|
| 777 |
+
)
|
| 778 |
+
|
| 779 |
+
vc_convert_btn.click(
|
| 780 |
+
vc_func,
|
| 781 |
+
[
|
| 782 |
+
input_audio, model_index, pitch_adjust, f0_method, feat_ratio,
|
| 783 |
+
filter_radius, rms_mix_rate, resample_rate
|
| 784 |
+
],
|
| 785 |
+
[output_audio, output_msg],
|
| 786 |
+
api_name='audio_conversion'
|
| 787 |
+
)
|
| 788 |
+
|
| 789 |
+
tts_convert_btn.click(
|
| 790 |
+
edge_tts_vc_func,
|
| 791 |
+
[
|
| 792 |
+
tts_input, model_index, tts_speaker, pitch_adjust, f0_method,
|
| 793 |
+
feat_ratio, filter_radius, rms_mix_rate, resample_rate
|
| 794 |
+
],
|
| 795 |
+
[output_audio, output_msg],
|
| 796 |
+
api_name='tts_conversion'
|
| 797 |
+
)
|
| 798 |
+
|
| 799 |
+
full_song.click(fn=mix, inputs=[output_audio, as_audio_no_vocals], outputs=[new_song])
|
| 800 |
+
|
| 801 |
+
model_index.change(
|
| 802 |
+
update_model_info,
|
| 803 |
+
inputs=[model_index],
|
| 804 |
+
outputs=[model_info],
|
| 805 |
+
show_progress=False,
|
| 806 |
+
queue=False
|
| 807 |
+
)
|
| 808 |
+
|
| 809 |
+
gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
|
| 810 |
+
gr.Markdown("### <center>🧸 - 如何使用此程序:填写视频网址和视频起止时间后,依次点击“提取声音文件吧”、“去除背景音吧”、“进行歌声转换吧!”、“加入歌曲伴奏吧!”四个按键即可。</center>")
|
| 811 |
+
gr.HTML('''
|
| 812 |
+
<div class="footer">
|
| 813 |
+
<p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
|
| 814 |
+
</p>
|
| 815 |
+
</div>
|
| 816 |
+
''')
|
| 817 |
+
|
| 818 |
+
app.queue(
|
| 819 |
+
concurrency_count=1,
|
| 820 |
+
max_size=20,
|
| 821 |
+
api_open=args.api
|
| 822 |
+
).launch(show_error=True)
|