File size: 3,317 Bytes
f3fa36c 416031b f3fa36c 416031b f3fa36c 416031b f3fa36c 416031b f3fa36c 416031b f3fa36c 416031b f3fa36c 028cdeb 416031b 028cdeb 416031b 028cdeb 416031b f3fa36c 416031b f3fa36c 416031b e293430 416031b e261806 416031b e293430 f3fa36c 028cdeb f3fa36c 416031b f3fa36c 416031b 028cdeb 416031b f3fa36c 416031b f3fa36c 416031b f3fa36c 416031b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import os
import torch
import argparse
import gradio as gr
parser = argparse.ArgumentParser()
# parser.add_argument(
# "--online_checkpoint_url",
# default="https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_1226.zip",
# )
parser.add_argument(
"--share", action="store_true", default=False, help="make link public"
)
args = parser.parse_args()
# first download the checkpoints from server
# if not os.path.exists("checkpoints/"):
# print("Downloading OpenVoice checkpoint ...")
# os.system(f"wget {args.online_checkpoint_url} -O ckpt.zip")
# print("Extracting OpenVoice checkpoint ...")
# ZipFile("ckpt.zip").extractall()
print("Starting OpenVoice")
from openvoice import se_extractor
from openvoice.api import ToneColorConverter
ckpt_converter = "checkpoints/converter"
device = "cuda" if torch.cuda.is_available() else "cpu"
output_dir = "outputs"
os.makedirs(output_dir, exist_ok=True)
tone_color_converter = ToneColorConverter(
f"{ckpt_converter}/config.json", device=device
)
tone_color_converter.load_ckpt(f"{ckpt_converter}/checkpoint.pth")
def predict(speaker_wav, transform_wav):
# initialize a empty info
text_hint = ""
# extract source_se
source_se = se_extractor.get_se(
transform_wav,
tone_color_converter,
)
# note diffusion_conditioning not used on hifigan (default mode), it will be empty but need to pass it to model.inference
try:
target_se = se_extractor.get_se(
speaker_wav,
tone_color_converter,
)
except Exception as e:
text_hint += f"[ERROR] Get target tone color error {str(e)} \n"
gr.Warning("[ERROR] Get target tone color error {str(e)} \n")
return (
text_hint,
None,
None,
)
save_path = f"{output_dir}/output.wav"
# Run the tone color converter
tone_color_converter.convert(
audio_src_path=transform_wav,
src_se=source_se,
tgt_se=target_se,
output_path=save_path,
)
text_hint += f"""Get response successfully \n"""
return (
text_hint,
save_path,
speaker_wav,
)
with gr.Blocks(analytics_enabled=False) as demo:
with gr.Row():
with gr.Column():
ref_gr = gr.Audio(
label="Reference Audio",
# info="Click on the ✎ button to upload your own target speaker audio",
type="filepath",
value="examples/speaker0.mp3",
)
tra_gr = gr.Audio(
label="Transform Audio",
# info="Click on the ✎ button to upload your own target transform audio",
type="filepath",
value=None,
)
tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
with gr.Column():
out_text_gr = gr.Text(label="Info")
audio_gr = gr.Audio(label="Synthesized Audio", autoplay=True)
ref_audio_gr = gr.Audio(label="Reference Audio Used")
tts_button.click(
predict,
[ref_gr, tra_gr],
outputs=[out_text_gr, audio_gr, ref_audio_gr],
)
demo.queue()
demo.launch(debug=True, show_api=True, share=args.share)
|