Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
#13
by
hjbfd
- opened
app.py
CHANGED
|
@@ -46,7 +46,8 @@ logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
|
|
| 46 |
converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
|
| 47 |
converter.hu_bert_model = load_hu_bert(Config(only_cpu=False), converter.hubert_path)
|
| 48 |
|
| 49 |
-
|
|
|
|
| 50 |
test_names = ["model.pth", "model.index"]
|
| 51 |
|
| 52 |
for url, filename in zip(test_model.split(", "), test_names):
|
|
@@ -64,9 +65,9 @@ for url, filename in zip(test_model.split(", "), test_names):
|
|
| 64 |
with open(filename, "wb") as f:
|
| 65 |
pass
|
| 66 |
|
| 67 |
-
title = "<center><strong><font size='7'>RVC⚡ZERO</font></strong></center>"
|
| 68 |
-
description = "
|
| 69 |
-
RESOURCES = "- You can
|
| 70 |
theme = args.theme
|
| 71 |
delete_cache_time = (3200, 3200) if IS_ZERO_GPU else (86400, 86400)
|
| 72 |
|
|
@@ -83,15 +84,15 @@ async def get_voices_list(proxy=None):
|
|
| 83 |
"""Print all available voices."""
|
| 84 |
from edge_tts import list_voices
|
| 85 |
voices = await list_voices(proxy=proxy)
|
| 86 |
-
voices = sorted(voices, key=lambda voice: voice
|
| 87 |
|
| 88 |
table = [
|
| 89 |
{
|
| 90 |
-
"ShortName": voice
|
| 91 |
-
"Gender": voice
|
| 92 |
-
"ContentCategories": ", ".join(voice
|
| 93 |
-
"VoicePersonalities": ", ".join(voice
|
| 94 |
-
"FriendlyName": voice
|
| 95 |
}
|
| 96 |
for voice in voices
|
| 97 |
]
|
|
@@ -102,11 +103,8 @@ async def get_voices_list(proxy=None):
|
|
| 102 |
def find_files(directory):
|
| 103 |
file_paths = []
|
| 104 |
for filename in os.listdir(directory):
|
| 105 |
-
# Check if the file has the desired extension
|
| 106 |
if filename.endswith('.pth') or filename.endswith('.zip') or filename.endswith('.index'):
|
| 107 |
-
# If yes, add the file path to the list
|
| 108 |
file_paths.append(os.path.join(directory, filename))
|
| 109 |
-
|
| 110 |
return file_paths
|
| 111 |
|
| 112 |
|
|
@@ -120,7 +118,6 @@ def unzip_in_folder(my_zip, my_dir):
|
|
| 120 |
|
| 121 |
|
| 122 |
def find_my_model(a_, b_):
|
| 123 |
-
|
| 124 |
if a_ is None or a_.endswith(".pth"):
|
| 125 |
return a_, b_
|
| 126 |
|
|
@@ -179,7 +176,6 @@ def ensure_valid_file(url):
|
|
| 179 |
raise ValueError("No Content-Length header found")
|
| 180 |
|
| 181 |
file_size = int(content_length)
|
| 182 |
-
# print("debug", url, file_size)
|
| 183 |
if file_size > 900000000 and IS_ZERO_GPU:
|
| 184 |
raise ValueError("The file is too large. Max allowed is 900 MB.")
|
| 185 |
|
|
@@ -196,11 +192,10 @@ def clear_files(directory):
|
|
| 196 |
|
| 197 |
|
| 198 |
def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
|
| 199 |
-
|
| 200 |
-
if not url_data:
|
| 201 |
return None, None
|
| 202 |
|
| 203 |
-
if "," in
|
| 204 |
a_, b_ = url_data.split(",")
|
| 205 |
a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
|
| 206 |
else:
|
|
@@ -250,8 +245,6 @@ def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
|
|
| 250 |
except Exception as e:
|
| 251 |
raise e
|
| 252 |
finally:
|
| 253 |
-
# time.sleep(10)
|
| 254 |
-
# shutil.rmtree(directory)
|
| 255 |
t = threading.Thread(target=clear_files, args=(directory,))
|
| 256 |
t.start()
|
| 257 |
|
|
@@ -264,7 +257,6 @@ def add_audio_effects(audio_list, type_output):
|
|
| 264 |
try:
|
| 265 |
output_path = f'{os.path.splitext(audio_path)[0]}_effects.{type_output}'
|
| 266 |
|
| 267 |
-
# Initialize audio effects plugins
|
| 268 |
board = Pedalboard(
|
| 269 |
[
|
| 270 |
HighpassFilter(),
|
|
@@ -273,7 +265,6 @@ def add_audio_effects(audio_list, type_output):
|
|
| 273 |
]
|
| 274 |
)
|
| 275 |
|
| 276 |
-
# Temporary WAV to hold processed data before exporting
|
| 277 |
temp_wav = f'{os.path.splitext(audio_path)[0]}_temp.wav'
|
| 278 |
|
| 279 |
with AudioFile(audio_path) as f:
|
|
@@ -283,11 +274,9 @@ def add_audio_effects(audio_list, type_output):
|
|
| 283 |
effected = board(chunk, f.samplerate, reset=False)
|
| 284 |
o.write(effected)
|
| 285 |
|
| 286 |
-
# Convert with pydub to desired output type
|
| 287 |
audio_seg = AudioSegment.from_file(temp_wav, format=type_output)
|
| 288 |
audio_seg.export(output_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
|
| 289 |
|
| 290 |
-
# Clean up temp file
|
| 291 |
os.remove(temp_wav)
|
| 292 |
|
| 293 |
result.append(output_path)
|
|
@@ -300,7 +289,6 @@ def add_audio_effects(audio_list, type_output):
|
|
| 300 |
|
| 301 |
|
| 302 |
def apply_noisereduce(audio_list, type_output):
|
| 303 |
-
# https://github.com/sa-if/Audio-Denoiser
|
| 304 |
print("Noice reduce")
|
| 305 |
|
| 306 |
result = []
|
|
@@ -308,16 +296,10 @@ def apply_noisereduce(audio_list, type_output):
|
|
| 308 |
out_path = f"{os.path.splitext(audio_path)[0]}_noisereduce.{type_output}"
|
| 309 |
|
| 310 |
try:
|
| 311 |
-
# Load audio file
|
| 312 |
audio = AudioSegment.from_file(audio_path)
|
| 313 |
-
|
| 314 |
-
# Convert audio to numpy array
|
| 315 |
samples = np.array(audio.get_array_of_samples())
|
| 316 |
-
|
| 317 |
-
# Reduce noise
|
| 318 |
reduced_noise = nr.reduce_noise(samples, sr=audio.frame_rate, prop_decrease=0.6)
|
| 319 |
|
| 320 |
-
# Convert reduced noise signal back to audio
|
| 321 |
reduced_audio = AudioSegment(
|
| 322 |
reduced_noise.tobytes(),
|
| 323 |
frame_rate=audio.frame_rate,
|
|
@@ -325,7 +307,6 @@ def apply_noisereduce(audio_list, type_output):
|
|
| 325 |
channels=audio.channels
|
| 326 |
)
|
| 327 |
|
| 328 |
-
# Save reduced audio to file
|
| 329 |
reduced_audio.export(out_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
|
| 330 |
result.append(out_path)
|
| 331 |
|
|
@@ -367,7 +348,7 @@ def run(
|
|
| 367 |
steps,
|
| 368 |
):
|
| 369 |
if not audio_files:
|
| 370 |
-
raise ValueError("
|
| 371 |
|
| 372 |
if isinstance(audio_files, str):
|
| 373 |
audio_files = [audio_files]
|
|
@@ -411,17 +392,19 @@ def run(
|
|
| 411 |
|
| 412 |
def audio_conf():
|
| 413 |
return gr.File(
|
| 414 |
-
label="Audio
|
| 415 |
file_count="multiple",
|
| 416 |
type="filepath",
|
|
|
|
| 417 |
container=True,
|
| 418 |
)
|
| 419 |
|
| 420 |
|
| 421 |
def model_conf():
|
| 422 |
return gr.File(
|
| 423 |
-
label="Model
|
| 424 |
type="filepath",
|
|
|
|
| 425 |
height=130,
|
| 426 |
)
|
| 427 |
|
|
@@ -450,8 +433,9 @@ def pitch_lvl_conf():
|
|
| 450 |
|
| 451 |
def index_conf():
|
| 452 |
return gr.File(
|
| 453 |
-
label="Index
|
| 454 |
type="filepath",
|
|
|
|
| 455 |
height=130,
|
| 456 |
)
|
| 457 |
|
|
@@ -498,14 +482,15 @@ def consonant_protec_conf():
|
|
| 498 |
|
| 499 |
def button_conf():
|
| 500 |
return gr.Button(
|
| 501 |
-
"
|
| 502 |
variant="primary",
|
|
|
|
| 503 |
)
|
| 504 |
|
| 505 |
|
| 506 |
def output_conf():
|
| 507 |
return gr.File(
|
| 508 |
-
label="
|
| 509 |
file_count="multiple",
|
| 510 |
interactive=False,
|
| 511 |
)
|
|
@@ -514,25 +499,24 @@ def output_conf():
|
|
| 514 |
def active_tts_conf():
|
| 515 |
return gr.Checkbox(
|
| 516 |
False,
|
| 517 |
-
label="
|
| 518 |
-
# info="",
|
| 519 |
container=False,
|
| 520 |
)
|
| 521 |
|
| 522 |
|
| 523 |
def tts_voice_conf():
|
| 524 |
return gr.Dropdown(
|
| 525 |
-
label="
|
| 526 |
-
choices=
|
| 527 |
visible=False,
|
| 528 |
-
value=
|
| 529 |
)
|
| 530 |
|
| 531 |
|
| 532 |
def tts_text_conf():
|
| 533 |
return gr.Textbox(
|
| 534 |
value="",
|
| 535 |
-
placeholder="
|
| 536 |
label="Text",
|
| 537 |
visible=False,
|
| 538 |
lines=3,
|
|
@@ -541,7 +525,7 @@ def tts_text_conf():
|
|
| 541 |
|
| 542 |
def tts_button_conf():
|
| 543 |
return gr.Button(
|
| 544 |
-
"
|
| 545 |
variant="secondary",
|
| 546 |
visible=False,
|
| 547 |
)
|
|
@@ -550,8 +534,7 @@ def tts_button_conf():
|
|
| 550 |
def tts_play_conf():
|
| 551 |
return gr.Checkbox(
|
| 552 |
False,
|
| 553 |
-
label="
|
| 554 |
-
# info="",
|
| 555 |
container=False,
|
| 556 |
visible=False,
|
| 557 |
)
|
|
@@ -561,7 +544,6 @@ def sound_gui():
|
|
| 561 |
return gr.Audio(
|
| 562 |
value=None,
|
| 563 |
type="filepath",
|
| 564 |
-
# format="mp3",
|
| 565 |
autoplay=True,
|
| 566 |
visible=True,
|
| 567 |
interactive=False,
|
|
@@ -582,16 +564,16 @@ def steps_conf():
|
|
| 582 |
|
| 583 |
def format_output_gui():
|
| 584 |
return gr.Dropdown(
|
| 585 |
-
label="Format
|
| 586 |
choices=["wav", "mp3", "flac"],
|
| 587 |
value="wav",
|
| 588 |
)
|
| 589 |
|
|
|
|
| 590 |
def denoise_conf():
|
| 591 |
return gr.Checkbox(
|
| 592 |
False,
|
| 593 |
-
label="
|
| 594 |
-
# info="",
|
| 595 |
container=False,
|
| 596 |
visible=True,
|
| 597 |
)
|
|
@@ -600,8 +582,7 @@ def denoise_conf():
|
|
| 600 |
def effects_conf():
|
| 601 |
return gr.Checkbox(
|
| 602 |
False,
|
| 603 |
-
label="Reverb",
|
| 604 |
-
# info="",
|
| 605 |
container=False,
|
| 606 |
visible=True,
|
| 607 |
)
|
|
@@ -615,7 +596,13 @@ def infer_tts_audio(tts_voice, tts_text, play_tts):
|
|
| 615 |
os.makedirs(os.path.join(out_dir, folder_tts), exist_ok=True)
|
| 616 |
out_path = os.path.join(out_dir, folder_tts, "tts.mp3")
|
| 617 |
|
| 618 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
if play_tts:
|
| 620 |
return [out_path], out_path
|
| 621 |
return [out_path], None
|
|
@@ -636,8 +623,7 @@ def show_components_tts(value_active):
|
|
| 636 |
def down_active_conf():
|
| 637 |
return gr.Checkbox(
|
| 638 |
False,
|
| 639 |
-
label="URL
|
| 640 |
-
# info="",
|
| 641 |
container=False,
|
| 642 |
)
|
| 643 |
|
|
@@ -645,8 +631,8 @@ def down_active_conf():
|
|
| 645 |
def down_url_conf():
|
| 646 |
return gr.Textbox(
|
| 647 |
value="",
|
| 648 |
-
placeholder="
|
| 649 |
-
label="
|
| 650 |
visible=False,
|
| 651 |
lines=1,
|
| 652 |
)
|
|
@@ -654,7 +640,7 @@ def down_url_conf():
|
|
| 654 |
|
| 655 |
def down_button_conf():
|
| 656 |
return gr.Button(
|
| 657 |
-
"
|
| 658 |
variant="secondary",
|
| 659 |
visible=False,
|
| 660 |
)
|
|
@@ -671,7 +657,7 @@ def show_components_down(value_active):
|
|
| 671 |
|
| 672 |
CSS = """
|
| 673 |
#audio_tts {
|
| 674 |
-
visibility: hidden;
|
| 675 |
height: 0px;
|
| 676 |
width: 0px;
|
| 677 |
max-width: 0px;
|
|
@@ -684,168 +670,146 @@ def get_gui(theme):
|
|
| 684 |
gr.Markdown(title)
|
| 685 |
gr.Markdown(description)
|
| 686 |
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
fn=infer_tts_audio,
|
| 712 |
-
inputs=[tts_voice, tts_text, tts_active_play],
|
| 713 |
-
outputs=[aud, tts_play],
|
| 714 |
-
)
|
| 715 |
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
with gr.Row():
|
| 722 |
-
with gr.Column(scale=3):
|
| 723 |
-
down_url_gui = down_url_conf()
|
| 724 |
-
with gr.Column(scale=1):
|
| 725 |
-
down_button_gui = down_button_conf()
|
| 726 |
|
| 727 |
-
|
|
|
|
|
|
|
| 728 |
with gr.Row():
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 743 |
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
envel_r = envelope_ratio_conf()
|
| 750 |
-
const = consonant_protec_conf()
|
| 751 |
-
steps_gui = steps_conf()
|
| 752 |
-
format_out = format_output_gui()
|
| 753 |
-
with gr.Row():
|
| 754 |
-
with gr.Column():
|
| 755 |
-
with gr.Row():
|
| 756 |
-
denoise_gui = denoise_conf()
|
| 757 |
-
effects_gui = effects_conf()
|
| 758 |
-
button_base = button_conf()
|
| 759 |
-
output_base = output_conf()
|
| 760 |
-
|
| 761 |
-
button_base.click(
|
| 762 |
-
run,
|
| 763 |
-
inputs=[
|
| 764 |
-
aud,
|
| 765 |
-
model,
|
| 766 |
-
algo,
|
| 767 |
-
algo_lvl,
|
| 768 |
-
indx,
|
| 769 |
-
indx_inf,
|
| 770 |
-
res_fc,
|
| 771 |
-
envel_r,
|
| 772 |
-
const,
|
| 773 |
-
denoise_gui,
|
| 774 |
-
effects_gui,
|
| 775 |
-
format_out,
|
| 776 |
-
steps_gui,
|
| 777 |
-
],
|
| 778 |
-
outputs=[output_base],
|
| 779 |
-
)
|
| 780 |
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 815 |
],
|
|
|
|
|
|
|
| 816 |
|
| 817 |
-
],
|
| 818 |
-
fn=run,
|
| 819 |
-
inputs=[
|
| 820 |
-
aud,
|
| 821 |
-
model,
|
| 822 |
-
algo,
|
| 823 |
-
algo_lvl,
|
| 824 |
-
indx,
|
| 825 |
-
indx_inf,
|
| 826 |
-
res_fc,
|
| 827 |
-
envel_r,
|
| 828 |
-
const,
|
| 829 |
-
],
|
| 830 |
-
outputs=[output_base],
|
| 831 |
-
cache_examples=False,
|
| 832 |
-
)
|
| 833 |
gr.Markdown(RESOURCES)
|
| 834 |
|
| 835 |
return app
|
| 836 |
|
| 837 |
|
| 838 |
if __name__ == "__main__":
|
|
|
|
| 839 |
tts_voice_list = asyncio.new_event_loop().run_until_complete(get_voices_list(proxy=None))
|
|
|
|
|
|
|
| 840 |
voices = sorted([
|
| 841 |
-
(
|
| 842 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 843 |
])
|
| 844 |
|
|
|
|
| 845 |
app = get_gui(theme)
|
| 846 |
-
|
| 847 |
app.queue(default_concurrency_limit=40)
|
| 848 |
|
|
|
|
| 849 |
app.launch(
|
| 850 |
max_threads=40,
|
| 851 |
share=IS_COLAB,
|
|
@@ -853,4 +817,4 @@ if __name__ == "__main__":
|
|
| 853 |
quiet=False,
|
| 854 |
debug=IS_COLAB,
|
| 855 |
ssr_mode=False,
|
| 856 |
-
)
|
|
|
|
| 46 |
converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
|
| 47 |
converter.hu_bert_model = load_hu_bert(Config(only_cpu=False), converter.hubert_path)
|
| 48 |
|
| 49 |
+
# مدل پیشفرض (اختیاری)
|
| 50 |
+
test_model = "https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.pth?download=true, https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.index?download=true"
|
| 51 |
test_names = ["model.pth", "model.index"]
|
| 52 |
|
| 53 |
for url, filename in zip(test_model.split(", "), test_names):
|
|
|
|
| 65 |
with open(filename, "wb") as f:
|
| 66 |
pass
|
| 67 |
|
| 68 |
+
title = "<center><strong><font size='7'>RVC⚡ZERO - Local Upload</font></strong></center>"
|
| 69 |
+
description = "Upload your own model (.pth) and audio files for voice conversion." if IS_ZERO_GPU else ""
|
| 70 |
+
RESOURCES = "- You can upload your custom RVC models and audio files directly."
|
| 71 |
theme = args.theme
|
| 72 |
delete_cache_time = (3200, 3200) if IS_ZERO_GPU else (86400, 86400)
|
| 73 |
|
|
|
|
| 84 |
"""Print all available voices."""
|
| 85 |
from edge_tts import list_voices
|
| 86 |
voices = await list_voices(proxy=proxy)
|
| 87 |
+
voices = sorted(voices, key=lambda voice: voice.get("ShortName", ""))
|
| 88 |
|
| 89 |
table = [
|
| 90 |
{
|
| 91 |
+
"ShortName": voice.get("ShortName", "Unknown"),
|
| 92 |
+
"Gender": voice.get("Gender", "Unknown"),
|
| 93 |
+
"ContentCategories": ", ".join(voice.get("VoiceTag", {}).get("ContentCategories", [])),
|
| 94 |
+
"VoicePersonalities": ", ".join(voice.get("VoiceTag", {}).get("VoicePersonalities", [])),
|
| 95 |
+
"FriendlyName": voice.get("FriendlyName", voice.get("Name", "Unknown Voice")),
|
| 96 |
}
|
| 97 |
for voice in voices
|
| 98 |
]
|
|
|
|
| 103 |
def find_files(directory):
|
| 104 |
file_paths = []
|
| 105 |
for filename in os.listdir(directory):
|
|
|
|
| 106 |
if filename.endswith('.pth') or filename.endswith('.zip') or filename.endswith('.index'):
|
|
|
|
| 107 |
file_paths.append(os.path.join(directory, filename))
|
|
|
|
| 108 |
return file_paths
|
| 109 |
|
| 110 |
|
|
|
|
| 118 |
|
| 119 |
|
| 120 |
def find_my_model(a_, b_):
|
|
|
|
| 121 |
if a_ is None or a_.endswith(".pth"):
|
| 122 |
return a_, b_
|
| 123 |
|
|
|
|
| 176 |
raise ValueError("No Content-Length header found")
|
| 177 |
|
| 178 |
file_size = int(content_length)
|
|
|
|
| 179 |
if file_size > 900000000 and IS_ZERO_GPU:
|
| 180 |
raise ValueError("The file is too large. Max allowed is 900 MB.")
|
| 181 |
|
|
|
|
| 192 |
|
| 193 |
|
| 194 |
def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
|
| 195 |
+
if not url_
|
|
|
|
| 196 |
return None, None
|
| 197 |
|
| 198 |
+
if "," in url_
|
| 199 |
a_, b_ = url_data.split(",")
|
| 200 |
a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
|
| 201 |
else:
|
|
|
|
| 245 |
except Exception as e:
|
| 246 |
raise e
|
| 247 |
finally:
|
|
|
|
|
|
|
| 248 |
t = threading.Thread(target=clear_files, args=(directory,))
|
| 249 |
t.start()
|
| 250 |
|
|
|
|
| 257 |
try:
|
| 258 |
output_path = f'{os.path.splitext(audio_path)[0]}_effects.{type_output}'
|
| 259 |
|
|
|
|
| 260 |
board = Pedalboard(
|
| 261 |
[
|
| 262 |
HighpassFilter(),
|
|
|
|
| 265 |
]
|
| 266 |
)
|
| 267 |
|
|
|
|
| 268 |
temp_wav = f'{os.path.splitext(audio_path)[0]}_temp.wav'
|
| 269 |
|
| 270 |
with AudioFile(audio_path) as f:
|
|
|
|
| 274 |
effected = board(chunk, f.samplerate, reset=False)
|
| 275 |
o.write(effected)
|
| 276 |
|
|
|
|
| 277 |
audio_seg = AudioSegment.from_file(temp_wav, format=type_output)
|
| 278 |
audio_seg.export(output_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
|
| 279 |
|
|
|
|
| 280 |
os.remove(temp_wav)
|
| 281 |
|
| 282 |
result.append(output_path)
|
|
|
|
| 289 |
|
| 290 |
|
| 291 |
def apply_noisereduce(audio_list, type_output):
|
|
|
|
| 292 |
print("Noice reduce")
|
| 293 |
|
| 294 |
result = []
|
|
|
|
| 296 |
out_path = f"{os.path.splitext(audio_path)[0]}_noisereduce.{type_output}"
|
| 297 |
|
| 298 |
try:
|
|
|
|
| 299 |
audio = AudioSegment.from_file(audio_path)
|
|
|
|
|
|
|
| 300 |
samples = np.array(audio.get_array_of_samples())
|
|
|
|
|
|
|
| 301 |
reduced_noise = nr.reduce_noise(samples, sr=audio.frame_rate, prop_decrease=0.6)
|
| 302 |
|
|
|
|
| 303 |
reduced_audio = AudioSegment(
|
| 304 |
reduced_noise.tobytes(),
|
| 305 |
frame_rate=audio.frame_rate,
|
|
|
|
| 307 |
channels=audio.channels
|
| 308 |
)
|
| 309 |
|
|
|
|
| 310 |
reduced_audio.export(out_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
|
| 311 |
result.append(out_path)
|
| 312 |
|
|
|
|
| 348 |
steps,
|
| 349 |
):
|
| 350 |
if not audio_files:
|
| 351 |
+
raise ValueError("Please upload audio files")
|
| 352 |
|
| 353 |
if isinstance(audio_files, str):
|
| 354 |
audio_files = [audio_files]
|
|
|
|
| 392 |
|
| 393 |
def audio_conf():
|
| 394 |
return gr.File(
|
| 395 |
+
label="Upload Audio Files (wav, mp3, ogg, flac)",
|
| 396 |
file_count="multiple",
|
| 397 |
type="filepath",
|
| 398 |
+
file_types=[".wav", ".mp3", ".ogg", ".flac", ".m4a"],
|
| 399 |
container=True,
|
| 400 |
)
|
| 401 |
|
| 402 |
|
| 403 |
def model_conf():
|
| 404 |
return gr.File(
|
| 405 |
+
label="Upload Model File (.pth)",
|
| 406 |
type="filepath",
|
| 407 |
+
file_types=[".pth"],
|
| 408 |
height=130,
|
| 409 |
)
|
| 410 |
|
|
|
|
| 433 |
|
| 434 |
def index_conf():
|
| 435 |
return gr.File(
|
| 436 |
+
label="Upload Index File (.index) - Optional",
|
| 437 |
type="filepath",
|
| 438 |
+
file_types=[".index"],
|
| 439 |
height=130,
|
| 440 |
)
|
| 441 |
|
|
|
|
| 482 |
|
| 483 |
def button_conf():
|
| 484 |
return gr.Button(
|
| 485 |
+
"Convert Voice",
|
| 486 |
variant="primary",
|
| 487 |
+
size="lg",
|
| 488 |
)
|
| 489 |
|
| 490 |
|
| 491 |
def output_conf():
|
| 492 |
return gr.File(
|
| 493 |
+
label="Converted Audio",
|
| 494 |
file_count="multiple",
|
| 495 |
interactive=False,
|
| 496 |
)
|
|
|
|
| 499 |
def active_tts_conf():
|
| 500 |
return gr.Checkbox(
|
| 501 |
False,
|
| 502 |
+
label="Use Text-to-Speech",
|
|
|
|
| 503 |
container=False,
|
| 504 |
)
|
| 505 |
|
| 506 |
|
| 507 |
def tts_voice_conf():
|
| 508 |
return gr.Dropdown(
|
| 509 |
+
label="TTS Voice",
|
| 510 |
+
choices=[], # Will be populated later
|
| 511 |
visible=False,
|
| 512 |
+
value=None,
|
| 513 |
)
|
| 514 |
|
| 515 |
|
| 516 |
def tts_text_conf():
|
| 517 |
return gr.Textbox(
|
| 518 |
value="",
|
| 519 |
+
placeholder="Enter text to convert to speech...",
|
| 520 |
label="Text",
|
| 521 |
visible=False,
|
| 522 |
lines=3,
|
|
|
|
| 525 |
|
| 526 |
def tts_button_conf():
|
| 527 |
return gr.Button(
|
| 528 |
+
"Generate Speech",
|
| 529 |
variant="secondary",
|
| 530 |
visible=False,
|
| 531 |
)
|
|
|
|
| 534 |
def tts_play_conf():
|
| 535 |
return gr.Checkbox(
|
| 536 |
False,
|
| 537 |
+
label="Auto-play generated audio",
|
|
|
|
| 538 |
container=False,
|
| 539 |
visible=False,
|
| 540 |
)
|
|
|
|
| 544 |
return gr.Audio(
|
| 545 |
value=None,
|
| 546 |
type="filepath",
|
|
|
|
| 547 |
autoplay=True,
|
| 548 |
visible=True,
|
| 549 |
interactive=False,
|
|
|
|
| 564 |
|
| 565 |
def format_output_gui():
|
| 566 |
return gr.Dropdown(
|
| 567 |
+
label="Output Format:",
|
| 568 |
choices=["wav", "mp3", "flac"],
|
| 569 |
value="wav",
|
| 570 |
)
|
| 571 |
|
| 572 |
+
|
| 573 |
def denoise_conf():
|
| 574 |
return gr.Checkbox(
|
| 575 |
False,
|
| 576 |
+
label="Apply Noise Reduction",
|
|
|
|
| 577 |
container=False,
|
| 578 |
visible=True,
|
| 579 |
)
|
|
|
|
| 582 |
def effects_conf():
|
| 583 |
return gr.Checkbox(
|
| 584 |
False,
|
| 585 |
+
label="Apply Audio Effects (Reverb)",
|
|
|
|
| 586 |
container=False,
|
| 587 |
visible=True,
|
| 588 |
)
|
|
|
|
| 596 |
os.makedirs(os.path.join(out_dir, folder_tts), exist_ok=True)
|
| 597 |
out_path = os.path.join(out_dir, folder_tts, "tts.mp3")
|
| 598 |
|
| 599 |
+
# Extract ShortName from combined value (e.g., "en-US-EmmaMultilingualNeural-Female")
|
| 600 |
+
if tts_voice:
|
| 601 |
+
short_name = "-".join(tts_voice.split('-')[:-1])
|
| 602 |
+
else:
|
| 603 |
+
short_name = "en-US-EmmaMultilingualNeural"
|
| 604 |
+
|
| 605 |
+
asyncio.run(edge_tts.Communicate(tts_text, short_name).save(out_path))
|
| 606 |
if play_tts:
|
| 607 |
return [out_path], out_path
|
| 608 |
return [out_path], None
|
|
|
|
| 623 |
def down_active_conf():
|
| 624 |
return gr.Checkbox(
|
| 625 |
False,
|
| 626 |
+
label="Download from URL",
|
|
|
|
| 627 |
container=False,
|
| 628 |
)
|
| 629 |
|
|
|
|
| 631 |
def down_url_conf():
|
| 632 |
return gr.Textbox(
|
| 633 |
value="",
|
| 634 |
+
placeholder="Hugging Face model URL...",
|
| 635 |
+
label="Model URL",
|
| 636 |
visible=False,
|
| 637 |
lines=1,
|
| 638 |
)
|
|
|
|
| 640 |
|
| 641 |
def down_button_conf():
|
| 642 |
return gr.Button(
|
| 643 |
+
"Download Model",
|
| 644 |
variant="secondary",
|
| 645 |
visible=False,
|
| 646 |
)
|
|
|
|
| 657 |
|
| 658 |
CSS = """
|
| 659 |
#audio_tts {
|
| 660 |
+
visibility: hidden;
|
| 661 |
height: 0px;
|
| 662 |
width: 0px;
|
| 663 |
max-width: 0px;
|
|
|
|
| 670 |
gr.Markdown(title)
|
| 671 |
gr.Markdown(description)
|
| 672 |
|
| 673 |
+
with gr.Tab("Voice Conversion"):
|
| 674 |
+
# بخش آپلود فایلهای صوتی
|
| 675 |
+
gr.Markdown("### 📤 Upload Audio Files")
|
| 676 |
+
aud = audio_conf()
|
| 677 |
+
|
| 678 |
+
# بخش TTS
|
| 679 |
+
active_tts = active_tts_conf()
|
| 680 |
+
with gr.Row(visible=False) as tts_row:
|
| 681 |
+
with gr.Column(scale=1):
|
| 682 |
+
tts_text = tts_text_conf()
|
| 683 |
+
with gr.Column(scale=2):
|
| 684 |
+
with gr.Row():
|
| 685 |
+
with gr.Column():
|
| 686 |
+
with gr.Row():
|
| 687 |
+
tts_voice = tts_voice_conf()
|
| 688 |
+
tts_active_play = tts_play_conf()
|
| 689 |
+
tts_button = tts_button_conf()
|
| 690 |
+
tts_play = sound_gui()
|
| 691 |
+
|
| 692 |
+
active_tts.change(
|
| 693 |
+
fn=show_components_tts,
|
| 694 |
+
inputs=[active_tts],
|
| 695 |
+
outputs=[tts_voice, tts_text, tts_button, tts_active_play],
|
| 696 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
|
| 698 |
+
tts_button.click(
|
| 699 |
+
fn=infer_tts_audio,
|
| 700 |
+
inputs=[tts_voice, tts_text, tts_active_play],
|
| 701 |
+
outputs=[aud, tts_play],
|
| 702 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 703 |
|
| 704 |
+
# بخش مدل
|
| 705 |
+
gr.Markdown("### 🎯 Model Selection")
|
| 706 |
+
|
| 707 |
with gr.Row():
|
| 708 |
+
with gr.Column(scale=1):
|
| 709 |
+
model = model_conf()
|
| 710 |
+
gr.Markdown("*Upload your .pth model file*")
|
| 711 |
+
with gr.Column(scale=1):
|
| 712 |
+
indx = index_conf()
|
| 713 |
+
gr.Markdown("*Upload .index file (optional)*")
|
| 714 |
+
|
| 715 |
+
# بخش دانلود از URL
|
| 716 |
+
down_active_gui = down_active_conf()
|
| 717 |
+
down_info = gr.Markdown(
|
| 718 |
+
f"Download models from Hugging Face URLs",
|
| 719 |
+
visible=False
|
| 720 |
+
)
|
| 721 |
+
with gr.Row(visible=False) as url_row:
|
| 722 |
+
with gr.Column(scale=3):
|
| 723 |
+
down_url_gui = down_url_conf()
|
| 724 |
+
with gr.Column(scale=1):
|
| 725 |
+
down_button_gui = down_button_conf()
|
| 726 |
+
|
| 727 |
+
down_active_gui.change(
|
| 728 |
+
show_components_down,
|
| 729 |
+
[down_active_gui],
|
| 730 |
+
[down_info, down_url_gui, down_button_gui]
|
| 731 |
+
)
|
| 732 |
|
| 733 |
+
down_button_gui.click(
|
| 734 |
+
get_my_model,
|
| 735 |
+
[down_url_gui],
|
| 736 |
+
[model, indx]
|
| 737 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 738 |
|
| 739 |
+
# تنظیمات پیشرفته
|
| 740 |
+
with gr.Accordion(label="⚙️ Advanced Settings", open=False):
|
| 741 |
+
with gr.Row():
|
| 742 |
+
algo = pitch_algo_conf()
|
| 743 |
+
algo_lvl = pitch_lvl_conf()
|
| 744 |
+
|
| 745 |
+
with gr.Row():
|
| 746 |
+
indx_inf = index_inf_conf()
|
| 747 |
+
steps_gui = steps_conf()
|
| 748 |
+
|
| 749 |
+
with gr.Row():
|
| 750 |
+
res_fc = respiration_filter_conf()
|
| 751 |
+
envel_r = envelope_ratio_conf()
|
| 752 |
+
const = consonant_protec_conf()
|
| 753 |
+
|
| 754 |
+
with gr.Row():
|
| 755 |
+
format_out = format_output_gui()
|
| 756 |
+
denoise_gui = denoise_conf()
|
| 757 |
+
effects_gui = effects_conf()
|
| 758 |
+
|
| 759 |
+
# دکمه تبدیل
|
| 760 |
+
button_base = button_conf()
|
| 761 |
+
|
| 762 |
+
# نتیجه
|
| 763 |
+
gr.Markdown("### 🎵 Output")
|
| 764 |
+
output_base = output_conf()
|
| 765 |
+
|
| 766 |
+
button_base.click(
|
| 767 |
+
run,
|
| 768 |
+
inputs=[
|
| 769 |
+
aud,
|
| 770 |
+
model,
|
| 771 |
+
algo,
|
| 772 |
+
algo_lvl,
|
| 773 |
+
indx,
|
| 774 |
+
indx_inf,
|
| 775 |
+
res_fc,
|
| 776 |
+
envel_r,
|
| 777 |
+
const,
|
| 778 |
+
denoise_gui,
|
| 779 |
+
effects_gui,
|
| 780 |
+
format_out,
|
| 781 |
+
steps_gui,
|
| 782 |
],
|
| 783 |
+
outputs=[output_base],
|
| 784 |
+
)
|
| 785 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 786 |
gr.Markdown(RESOURCES)
|
| 787 |
|
| 788 |
return app
|
| 789 |
|
| 790 |
|
| 791 |
if __name__ == "__main__":
|
| 792 |
+
# Get voice list safely
|
| 793 |
tts_voice_list = asyncio.new_event_loop().run_until_complete(get_voices_list(proxy=None))
|
| 794 |
+
|
| 795 |
+
# Build voice dropdown options with safe .get() access
|
| 796 |
voices = sorted([
|
| 797 |
+
(
|
| 798 |
+
" - ".join(
|
| 799 |
+
reversed(
|
| 800 |
+
voice.get("FriendlyName", voice.get("Name", "Unknown Voice")).split("-")
|
| 801 |
+
)
|
| 802 |
+
).replace("Microsoft ", "").replace("Online (Natural)", f"({voice.get('Gender', 'Unknown')})").strip(),
|
| 803 |
+
f"{voice.get('ShortName', 'Unknown')}-{voice.get('Gender', 'Unknown')}"
|
| 804 |
+
)
|
| 805 |
+
for voice in tts_voice_list
|
| 806 |
])
|
| 807 |
|
| 808 |
+
# Initialize GUI
|
| 809 |
app = get_gui(theme)
|
|
|
|
| 810 |
app.queue(default_concurrency_limit=40)
|
| 811 |
|
| 812 |
+
# Launch app
|
| 813 |
app.launch(
|
| 814 |
max_threads=40,
|
| 815 |
share=IS_COLAB,
|
|
|
|
| 817 |
quiet=False,
|
| 818 |
debug=IS_COLAB,
|
| 819 |
ssr_mode=False,
|
| 820 |
+
)
|