Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,11 +4,46 @@ import gradio as gr
|
|
| 4 |
|
| 5 |
CHAR_LIMIT = 5000
|
| 6 |
|
| 7 |
-
MODEL = KModel().eval() # always cpu
|
| 8 |
PIPELINES = {lang: KPipeline(lang_code=lang, model=False) for lang in "ab"}
|
| 9 |
PIPELINES["a"].g2p.lexicon.golds["kokoro"] = "kหOkษษนO"
|
| 10 |
PIPELINES["b"].g2p.lexicon.golds["kokoro"] = "kหQkษษนQ"
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
def generate_first(text, voice="af_heart", speed=1):
|
| 13 |
text = text.strip()[:CHAR_LIMIT]
|
| 14 |
pipeline = PIPELINES[voice[0]]
|
|
@@ -16,17 +51,11 @@ def generate_first(text, voice="af_heart", speed=1):
|
|
| 16 |
for _, ps, _ in pipeline(text, voice, speed):
|
| 17 |
ref_s = pack[len(ps) - 1]
|
| 18 |
audio = MODEL(ps, ref_s, speed)
|
| 19 |
-
return (24000, audio.numpy())
|
| 20 |
-
return None
|
| 21 |
|
| 22 |
def predict(text, voice="af_heart", speed=1):
|
| 23 |
-
return generate_first(text, voice, speed)
|
| 24 |
-
|
| 25 |
-
def tokenize_first(text, voice="af_heart"):
|
| 26 |
-
pipeline = PIPELINES[voice[0]]
|
| 27 |
-
for _, ps, _ in pipeline(text, voice):
|
| 28 |
-
return ps
|
| 29 |
-
return ""
|
| 30 |
|
| 31 |
def generate_all(text, voice="af_heart", speed=1):
|
| 32 |
text = text.strip()[:CHAR_LIMIT]
|
|
@@ -44,14 +73,11 @@ def gpu():
|
|
| 44 |
with gr.Blocks() as app:
|
| 45 |
with gr.Row():
|
| 46 |
text_input = gr.Textbox(label="input text")
|
| 47 |
-
voice_input = gr.
|
| 48 |
speed_input = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="speed")
|
| 49 |
out_audio = gr.Audio(label="output audio", interactive=False, autoplay=True)
|
| 50 |
-
out_tokens = gr.Textbox(label="tokens", interactive=False)
|
| 51 |
gen_btn = gr.Button("generate")
|
| 52 |
-
|
| 53 |
-
gen_btn.click(fn=generate_first, inputs=[text_input, voice_input, speed_input], outputs=[out_audio, out_tokens])
|
| 54 |
-
token_btn.click(fn=tokenize_first, inputs=[text_input, voice_input], outputs=out_tokens)
|
| 55 |
|
| 56 |
if __name__ == "__main__":
|
| 57 |
app.launch()
|
|
|
|
| 4 |
|
| 5 |
CHAR_LIMIT = 5000
|
| 6 |
|
|
|
|
| 7 |
PIPELINES = {lang: KPipeline(lang_code=lang, model=False) for lang in "ab"}
|
| 8 |
PIPELINES["a"].g2p.lexicon.golds["kokoro"] = "kหOkษษนO"
|
| 9 |
PIPELINES["b"].g2p.lexicon.golds["kokoro"] = "kหQkษษนQ"
|
| 10 |
|
| 11 |
+
CHOICES = {
|
| 12 |
+
"๐บ๐ธ ๐บ Heart โค๏ธ": "af_heart",
|
| 13 |
+
"๐บ๐ธ ๐บ Bella ๐ฅ": "af_bella",
|
| 14 |
+
"๐บ๐ธ ๐บ Nicole ๐ง": "af_nicole",
|
| 15 |
+
"๐บ๐ธ ๐บ Aoede": "af_aoede",
|
| 16 |
+
"๐บ๐ธ ๐บ Kore": "af_kore",
|
| 17 |
+
"๐บ๐ธ ๐บ Sarah": "af_sarah",
|
| 18 |
+
"๐บ๐ธ ๐บ Nova": "af_nova",
|
| 19 |
+
"๐บ๐ธ ๐บ Sky": "af_sky",
|
| 20 |
+
"๐บ๐ธ ๐บ Alloy": "af_alloy",
|
| 21 |
+
"๐บ๐ธ ๐บ Jessica": "af_jessica",
|
| 22 |
+
"๐บ๐ธ ๐บ River": "af_river",
|
| 23 |
+
"๐บ๐ธ ๐น Michael": "am_michael",
|
| 24 |
+
"๐บ๐ธ ๐น Fenrir": "am_fenrir",
|
| 25 |
+
"๐บ๐ธ ๐น Puck": "am_puck",
|
| 26 |
+
"๐บ๐ธ ๐น Echo": "am_echo",
|
| 27 |
+
"๐บ๐ธ ๐น Eric": "am_eric",
|
| 28 |
+
"๐บ๐ธ ๐น Liam": "am_liam",
|
| 29 |
+
"๐บ๐ธ ๐น Onyx": "am_onyx",
|
| 30 |
+
"๐บ๐ธ ๐น Santa": "am_santa",
|
| 31 |
+
"๐บ๐ธ ๐น Adam": "am_adam",
|
| 32 |
+
"๐ฌ๐ง ๐บ Emma": "bf_emma",
|
| 33 |
+
"๐ฌ๐ง ๐บ Isabella": "bf_isabella",
|
| 34 |
+
"๐ฌ๐ง ๐บ Alice": "bf_alice",
|
| 35 |
+
"๐ฌ๐ง ๐บ Lily": "bf_lily",
|
| 36 |
+
"๐ฌ๐ง ๐น George": "bm_george",
|
| 37 |
+
"๐ฌ๐ง ๐น Fable": "bm_fable",
|
| 38 |
+
"๐ฌ๐ง ๐น Lewis": "bm_lewis",
|
| 39 |
+
"๐ฌ๐ง ๐น Daniel": "bm_daniel",
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
for v in CHOICES.values():
|
| 43 |
+
PIPELINES[v[0]].load_voice(v)
|
| 44 |
+
|
| 45 |
+
MODEL = KModel().eval()
|
| 46 |
+
|
| 47 |
def generate_first(text, voice="af_heart", speed=1):
|
| 48 |
text = text.strip()[:CHAR_LIMIT]
|
| 49 |
pipeline = PIPELINES[voice[0]]
|
|
|
|
| 51 |
for _, ps, _ in pipeline(text, voice, speed):
|
| 52 |
ref_s = pack[len(ps) - 1]
|
| 53 |
audio = MODEL(ps, ref_s, speed)
|
| 54 |
+
return (24000, audio.numpy())
|
| 55 |
+
return None
|
| 56 |
|
| 57 |
def predict(text, voice="af_heart", speed=1):
|
| 58 |
+
return generate_first(text, voice, speed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
def generate_all(text, voice="af_heart", speed=1):
|
| 61 |
text = text.strip()[:CHAR_LIMIT]
|
|
|
|
| 73 |
with gr.Blocks() as app:
|
| 74 |
with gr.Row():
|
| 75 |
text_input = gr.Textbox(label="input text")
|
| 76 |
+
voice_input = gr.Dropdown(list(CHOICES.items()), value="af_heart", label="voice")
|
| 77 |
speed_input = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="speed")
|
| 78 |
out_audio = gr.Audio(label="output audio", interactive=False, autoplay=True)
|
|
|
|
| 79 |
gen_btn = gr.Button("generate")
|
| 80 |
+
gen_btn.click(fn=generate_first, inputs=[text_input, voice_input, speed_input], outputs=out_audio)
|
|
|
|
|
|
|
| 81 |
|
| 82 |
if __name__ == "__main__":
|
| 83 |
app.launch()
|