Spaces:

Staticaliza
/

Voice

Paused

App Files Files Community

Staticaliza commited on Feb 2, 2025

Commit

36a226d

verified ·

1 Parent(s): 33d5f3b

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -15

app.py CHANGED Viewed

@@ -4,11 +4,46 @@ import gradio as gr
 CHAR_LIMIT = 5000
-MODEL = KModel().eval()  # always cpu
 PIPELINES = {lang: KPipeline(lang_code=lang, model=False) for lang in "ab"}
 PIPELINES["a"].g2p.lexicon.golds["kokoro"] = "kˈOkəɹO"
 PIPELINES["b"].g2p.lexicon.golds["kokoro"] = "kˈQkəɹQ"
 def generate_first(text, voice="af_heart", speed=1):
     text = text.strip()[:CHAR_LIMIT]
     pipeline = PIPELINES[voice[0]]
@@ -16,17 +51,11 @@ def generate_first(text, voice="af_heart", speed=1):
     for _, ps, _ in pipeline(text, voice, speed):
         ref_s = pack[len(ps) - 1]
         audio = MODEL(ps, ref_s, speed)
-        return (24000, audio.numpy()), ps
-    return None, ""
 def predict(text, voice="af_heart", speed=1):
-    return generate_first(text, voice, speed)[0]
-def tokenize_first(text, voice="af_heart"):
-    pipeline = PIPELINES[voice[0]]
-    for _, ps, _ in pipeline(text, voice):
-        return ps
-    return ""
 def generate_all(text, voice="af_heart", speed=1):
     text = text.strip()[:CHAR_LIMIT]
@@ -44,14 +73,11 @@ def gpu():
 with gr.Blocks() as app:
     with gr.Row():
         text_input = gr.Textbox(label="input text")
-        voice_input = gr.Textbox(label="voice", value="af_heart")
         speed_input = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="speed")
     out_audio = gr.Audio(label="output audio", interactive=False, autoplay=True)
-    out_tokens = gr.Textbox(label="tokens", interactive=False)
     gen_btn = gr.Button("generate")
-    token_btn = gr.Button("tokenize")
-    gen_btn.click(fn=generate_first, inputs=[text_input, voice_input, speed_input], outputs=[out_audio, out_tokens])
-    token_btn.click(fn=tokenize_first, inputs=[text_input, voice_input], outputs=out_tokens)
 if __name__ == "__main__":
     app.launch()

 CHAR_LIMIT = 5000
 PIPELINES = {lang: KPipeline(lang_code=lang, model=False) for lang in "ab"}
 PIPELINES["a"].g2p.lexicon.golds["kokoro"] = "kˈOkəɹO"
 PIPELINES["b"].g2p.lexicon.golds["kokoro"] = "kˈQkəɹQ"
+CHOICES = {
+    "🇺🇸 🚺 Heart ❤️": "af_heart",
+    "🇺🇸 🚺 Bella 🔥": "af_bella",
+    "🇺🇸 🚺 Nicole 🎧": "af_nicole",
+    "🇺🇸 🚺 Aoede": "af_aoede",
+    "🇺🇸 🚺 Kore": "af_kore",
+    "🇺🇸 🚺 Sarah": "af_sarah",
+    "🇺🇸 🚺 Nova": "af_nova",
+    "🇺🇸 🚺 Sky": "af_sky",
+    "🇺🇸 🚺 Alloy": "af_alloy",
+    "🇺🇸 🚺 Jessica": "af_jessica",
+    "🇺🇸 🚺 River": "af_river",
+    "🇺🇸 🚹 Michael": "am_michael",
+    "🇺🇸 🚹 Fenrir": "am_fenrir",
+    "🇺🇸 🚹 Puck": "am_puck",
+    "🇺🇸 🚹 Echo": "am_echo",
+    "🇺🇸 🚹 Eric": "am_eric",
+    "🇺🇸 🚹 Liam": "am_liam",
+    "🇺🇸 🚹 Onyx": "am_onyx",
+    "🇺🇸 🚹 Santa": "am_santa",
+    "🇺🇸 🚹 Adam": "am_adam",
+    "🇬🇧 🚺 Emma": "bf_emma",
+    "🇬🇧 🚺 Isabella": "bf_isabella",
+    "🇬🇧 🚺 Alice": "bf_alice",
+    "🇬🇧 🚺 Lily": "bf_lily",
+    "🇬🇧 🚹 George": "bm_george",
+    "🇬🇧 🚹 Fable": "bm_fable",
+    "🇬🇧 🚹 Lewis": "bm_lewis",
+    "🇬🇧 🚹 Daniel": "bm_daniel",
+}
+for v in CHOICES.values():
+    PIPELINES[v[0]].load_voice(v)
+MODEL = KModel().eval()
 def generate_first(text, voice="af_heart", speed=1):
     text = text.strip()[:CHAR_LIMIT]
     pipeline = PIPELINES[voice[0]]
     for _, ps, _ in pipeline(text, voice, speed):
         ref_s = pack[len(ps) - 1]
         audio = MODEL(ps, ref_s, speed)
+        return (24000, audio.numpy())
+    return None
 def predict(text, voice="af_heart", speed=1):
+    return generate_first(text, voice, speed)
 def generate_all(text, voice="af_heart", speed=1):
     text = text.strip()[:CHAR_LIMIT]
 with gr.Blocks() as app:
     with gr.Row():
         text_input = gr.Textbox(label="input text")
+        voice_input = gr.Dropdown(list(CHOICES.items()), value="af_heart", label="voice")
         speed_input = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="speed")
     out_audio = gr.Audio(label="output audio", interactive=False, autoplay=True)
     gen_btn = gr.Button("generate")
+    gen_btn.click(fn=generate_first, inputs=[text_input, voice_input, speed_input], outputs=out_audio)
 if __name__ == "__main__":
     app.launch()