Spaces:

PeterPinetree
/

HomeworkHelper

Runtime error

App Files Files Community

PeterPinetree commited on Mar 3, 2025

Commit

867ffb1

verified ·

1 Parent(s): ab3b679

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -21

app.py CHANGED Viewed

@@ -1,29 +1,50 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
-# Load Kokoro TTS Model
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model_name = "hexgrad/Kokoro-82M"
-model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name).to(device)
-processor = AutoProcessor.from_pretrained(model_name)
-def text_to_speech(text):
-    """Convert input text to speech using Kokoro TTS"""
-    inputs = processor(text, return_tensors="pt").to(device)
-    with torch.no_grad():
-        output = model.generate(**inputs)
-    return output.cpu().numpy()
-# Gradio Interface
-description = "Enter text and listen to the Kokoro TTS model read it aloud."
-demo = gr.Interface(
-    fn=text_to_speech,
-    inputs=gr.Textbox(placeholder="Type something here..."),
-    outputs=gr.Audio(type="numpy"),
-    title="Kokoro TTS - Text-to-Speech",
-    description=description,
 )
-demo.launch()

 import gradio as gr
 import torch
+import soundfile as sf
+import tempfile
+from kokoro_onnx import Kokoro
+# Load Kokoro TTS Model (No need for external files)
+kokoro = Kokoro()
+# Fetch available voices dynamically (if supported)
+try:
+    voices = kokoro.get_voices()  # If `get_voices()` exists, use it
+except AttributeError:
+    # Default voice list if `get_voices()` isn't available
+    voices = ['af', 'af_bella', 'af_nicole', 'af_sarah', 'af_sky',
+              'am_adam', 'am_michael', 'bf_emma', 'bf_isabella',
+              'bm_george', 'bm_lewis']
+def generate_speech(text, voice, speed, show_transcript):
+    """Convert input text to speech using Kokoro TTS"""
+    samples, sample_rate = kokoro.create(text, voice=voice, speed=float(speed))
+    # Save audio file temporarily
+    temp_file = tempfile.mktemp(suffix=".wav")
+    sf.write(temp_file, samples, sample_rate)
+    # Return audio and optional transcript
+    return temp_file, text if show_transcript else None
+# Gradio UI
+interface = gr.Interface(
+    fn=generate_speech,
+    inputs=[
+        gr.Textbox(label="Input Text", lines=5, placeholder="Type here..."),
+        gr.Dropdown(choices=voices, label="Select Voice", value=voices[0]),
+        gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"),
+        gr.Checkbox(label="Show Transcript", value=True)
+    ],
+    outputs=[
+        gr.Audio(label="Generated Speech"),
+        gr.Textbox(label="Transcript", visible=True)
+    ],
+    title="Educational Text-to-Speech",
+    description="Enter text, choose a voice, and generate speech. Use the transcript option to follow along while listening.",
+    allow_flagging="never"
 )
+# Launch the app
+if __name__ == "__main__":
+    interface.launch()