File size: 2,881 Bytes
52c1ced
1942c17
52c1ced
1942c17
 
52c1ced
1942c17
 
52c1ced
 
 
1942c17
 
52c1ced
9626389
52c1ced
1942c17
52c1ced
 
 
 
 
 
1942c17
 
 
52c1ced
1942c17
 
 
9626389
1942c17
52c1ced
 
9626389
 
 
 
 
 
 
 
52c1ced
 
1942c17
52c1ced
1942c17
 
9626389
 
 
 
1942c17
 
52c1ced
1942c17
52c1ced
 
 
 
 
 
 
1942c17
 
 
 
 
 
9626389
1942c17
9626389
 
 
 
 
 
 
 
1942c17
 
52c1ced
 
 
9626389
1942c17
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import httpx
import torch
import gradio as gr
from tempfile import NamedTemporaryFile
from pathlib import Path

from mockingbirdforuse import MockingBird


mockingbird = MockingBird()
mockingbird_path = Path(os.path.dirname(__file__)) / "data"
base_url = "https://al.smoe.top/d/Home/source/mockingbird/"

for sy in ["encoder.pt", "g_hifigan.pt", "wavernn.pt"]:
    if not os.path.exists(os.path.join(mockingbird_path, sy)):
        torch.hub.download_url_to_file(f"{base_url}/{sy}", mockingbird_path / sy)

for model in ["azusa", "nanmei", "ltyai", "tianyi"]:
    model_path = mockingbird_path / model
    model_path.mkdir(parents=True, exist_ok=True)
    for file_name in ["record.wav", f"{model}.pt"]:
        if not os.path.exists(os.path.join(model_path, file_name)):
            torch.hub.download_url_to_file(
                f"{base_url}/{model}/{file_name}", model_path / file_name
            )

mockingbird.load_model(
    Path(os.path.join(mockingbird_path, "encoder.pt")),
    Path(os.path.join(mockingbird_path, "g_hifigan.pt")),
    Path(os.path.join(mockingbird_path, "wavernn.pt")),
)


def inference(
    text: str,
    model_name: str,
    vocoder_type: str = "HifiGan",
    style_idx: int = 0,
    min_stop_token: int = 9,
    steps: int = 2000,
):
    model_path = mockingbird_path / model_name
    mockingbird.set_synthesizer(Path(os.path.join(model_path, f"{model_name}.pt")))
    fd = NamedTemporaryFile(suffix=".wav", delete=False)
    record = mockingbird.synthesize(
        text=str(text),
        input_wav=model_path / "record.wav",
        vocoder_type=vocoder_type,
        style_idx=style_idx,
        min_stop_token=min_stop_token,
        steps=steps,
    )
    with open(fd.name, "wb") as file:
        file.write(record.getvalue())
    return fd.name


title = "MockingBird"
description = "🚀AI拟声: 5秒内克隆您的声音并生成任意语音内容 Clone a voice in 5 seconds to generate arbitrary speech in real-time"
article = "<a href='https://github.com/babysor/MockingBird'>Github Repo</a></p>"

gr.Interface(
    inference,
    [
        gr.Textbox(label="Input"),
        gr.Radio(
            ["azusa", "nanmei", "ltyai", "tianyi"],
            label="model type",
            value="azusa",
        ),
        gr.Radio(
            ["HifiGan", "WaveRNN"],
            label="Vocoder type",
            value="HifiGan",
        ),
        gr.Slider(minimum=-1, maximum=9, step=1, label="style idx", value=0),
        gr.Slider(minimum=3, maximum=9, label="min stop token", value=9),
        gr.Slider(minimum=200, maximum=2000, label="steps", value=2000),
    ],
    gr.Audio(type="filepath", label="Output"),
    title=title,
    description=description,
    article=article,
    examples=[["阿梓不是你的电子播放器", "azusa", "HifiGan", 0, 9, 2000], ["不是", "nanmei", "HifiGan", 0, 9, 2000]],
).launch()