File size: 3,317 Bytes
f3fa36c
416031b
 
f3fa36c
 
 
416031b
 
 
 
 
 
 
 
 
f3fa36c
416031b
 
 
 
 
 
f3fa36c
416031b
f3fa36c
416031b
 
f3fa36c
416031b
 
 
 
 
 
 
 
f3fa36c
028cdeb
416031b
 
 
 
 
028cdeb
416031b
 
 
 
 
 
028cdeb
416031b
 
f3fa36c
416031b
 
 
f3fa36c
 
 
 
 
 
416031b
 
 
 
 
 
 
 
e293430
416031b
e261806
416031b
 
 
 
 
e293430
f3fa36c
 
 
 
 
 
 
028cdeb
f3fa36c
416031b
f3fa36c
416031b
 
028cdeb
416031b
 
f3fa36c
 
 
 
 
 
416031b
f3fa36c
 
416031b
 
 
 
 
f3fa36c
416031b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import torch
import argparse
import gradio as gr


parser = argparse.ArgumentParser()
# parser.add_argument(
#     "--online_checkpoint_url",
#     default="https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_1226.zip",
# )
parser.add_argument(
    "--share", action="store_true", default=False, help="make link public"
)
args = parser.parse_args()

# first download the checkpoints from server
# if not os.path.exists("checkpoints/"):
#     print("Downloading OpenVoice checkpoint ...")
#     os.system(f"wget {args.online_checkpoint_url} -O ckpt.zip")
#     print("Extracting OpenVoice checkpoint ...")
#     ZipFile("ckpt.zip").extractall()

print("Starting OpenVoice")

from openvoice import se_extractor
from openvoice.api import ToneColorConverter

ckpt_converter = "checkpoints/converter"
device = "cuda" if torch.cuda.is_available() else "cpu"
output_dir = "outputs"
os.makedirs(output_dir, exist_ok=True)
tone_color_converter = ToneColorConverter(
    f"{ckpt_converter}/config.json", device=device
)
tone_color_converter.load_ckpt(f"{ckpt_converter}/checkpoint.pth")


def predict(speaker_wav, transform_wav):
    # initialize a empty info
    text_hint = ""

    # extract source_se
    source_se = se_extractor.get_se(
        transform_wav,
        tone_color_converter,
    )

    # note diffusion_conditioning not used on hifigan (default mode), it will be empty but need to pass it to model.inference
    try:
        target_se = se_extractor.get_se(
            speaker_wav,
            tone_color_converter,
        )
    except Exception as e:
        text_hint += f"[ERROR] Get target tone color error {str(e)} \n"
        gr.Warning("[ERROR] Get target tone color error {str(e)} \n")
        return (
            text_hint,
            None,
            None,
        )

    save_path = f"{output_dir}/output.wav"
    # Run the tone color converter
    tone_color_converter.convert(
        audio_src_path=transform_wav,
        src_se=source_se,
        tgt_se=target_se,
        output_path=save_path,
    )

    text_hint += f"""Get response successfully \n"""

    return (
        text_hint,
        save_path,
        speaker_wav,
    )


with gr.Blocks(analytics_enabled=False) as demo:

    with gr.Row():
        with gr.Column():
            ref_gr = gr.Audio(
                label="Reference Audio",
                # info="Click on the ✎ button to upload your own target speaker audio",
                type="filepath",
                value="examples/speaker0.mp3",
            )
            tra_gr = gr.Audio(
                label="Transform Audio",
                # info="Click on the ✎ button to upload your own target transform audio",
                type="filepath",
                value=None,
            )

            tts_button = gr.Button("Send", elem_id="send-btn", visible=True)

        with gr.Column():
            out_text_gr = gr.Text(label="Info")
            audio_gr = gr.Audio(label="Synthesized Audio", autoplay=True)
            ref_audio_gr = gr.Audio(label="Reference Audio Used")

            tts_button.click(
                predict,
                [ref_gr, tra_gr],
                outputs=[out_text_gr, audio_gr, ref_audio_gr],
            )

demo.queue()
demo.launch(debug=True, show_api=True, share=args.share)