Spaces:

farid678
/

TTSModel

Runtime error

App Files Files Community

farid678 commited on Dec 23, 2025

Commit

19f1a35

verified ·

1 Parent(s): c210c3a

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -37

app.py CHANGED Viewed

@@ -1,18 +1,17 @@
 import torch
-from transformers import pipeline
-import gradio as gr
 import numpy as np
-import soundfile as sf
-import io
 # -----------------------------
-# LOAD PIPELINE
 # -----------------------------
 device = 0 if torch.cuda.is_available() else -1
 tts_pipe = pipeline(
     task="text-to-speech",
     model="canopylabs/orpheus-3b-0.1-ft",
-    device=device
 )
 # -----------------------------
@@ -22,55 +21,50 @@ def tts_generate(text):
     if not text.strip():
         return None
-    try:
-        output = tts_pipe(text)
-    except Exception as e:
-        print("Error:", e)
-        return None
     audio = np.asarray(output["audio"], dtype=np.float32)
     sr = output["sampling_rate"]
-    # Convert to bytes for Gradio Audio
-    buffer = io.BytesIO()
-    sf.write(buffer, audio, sr, format="WAV")
-    buffer.seek(0)
-    return buffer
 # -----------------------------
-# SAMPLE TEXTS WITH STYLE TAGS
 # -----------------------------
 SAMPLES = [
-    "[neutral] Hello! This is a neutral English voice generated by Orpheus.",
-    "[expressive] I'm really excited to show you how natural this voice sounds!",
-    "[calm] Please relax and enjoy this calm and smooth narration.",
-    "[narration] In the beginning, there was only silence.",
-    "[conversation] Hey, are you coming to the meeting later today?",
 ]
 # -----------------------------
-# GRADIO INTERFACE
 # -----------------------------
 demo = gr.Interface(
     fn=tts_generate,
     inputs=gr.Textbox(
-        label="Enter text (English supported with tags)",
         placeholder=SAMPLES[0],
-        lines=4
     ),
-    outputs=gr.Audio(type="file", label="Generated Audio"),
-    title="Orpheus‑3B TTS",
     description=(
-        "English TTS using **canopylabs/orpheus-3b-0.1-ft** via Transformers pipeline.\n\n"
-        "Supported style tags examples:\n"
-        "- `[neutral]`\n"
-        "- `[expressive]`\n"
-        "- `[calm]`\n"
-        "- `[narration]`\n"
-        "- `[conversation]`\n\n"
-        "Example:\n"
-        "`[expressive] I'm very happy to see you today!`"
     ),
     examples=[[s] for s in SAMPLES],
 )

 import torch
 import numpy as np
+import gradio as gr
+from transformers import pipeline
 # -----------------------------
+# LOAD PIPELINE (HF AUTH REQUIRED)
 # -----------------------------
 device = 0 if torch.cuda.is_available() else -1
 tts_pipe = pipeline(
     task="text-to-speech",
     model="canopylabs/orpheus-3b-0.1-ft",
+    device=device,
 )
 # -----------------------------
     if not text.strip():
         return None
+    output = tts_pipe(text)
     audio = np.asarray(output["audio"], dtype=np.float32)
     sr = output["sampling_rate"]
+    return (sr, audio)
 # -----------------------------
+# SAMPLE TEXTS WITH TAGS
 # -----------------------------
 SAMPLES = [
+    "Just end up crashing somewhere. <laughs> No, because remember last time? You fell asleep—",
+    "But now that the cat's out of the bag, we can be the couple that we were always destined to be.",
+    "Running through the grass, playing under the falling leaves. <laughs> My sweet little kit, the—",
+    "Deal with it. I will. I'll just scowl and watch TV by myself <sighs>.",
+    "Hmm… I don't know. <nervous laughter> This feels like a bad idea.",
+    "I'm so tired today <yawning> but I still have so much work to do.",
+    "Wait—did you hear that? <gasps> I swear something just moved.",
+    "<whispers> Don't turn around. Just keep walking.",
+    "Ugh… <scoffs> I can't believe this is happening again.",
+    "Okay okay <laughs nervously> maybe it wasn't my best decision."
 ]
 # -----------------------------
+# GRADIO UI
 # -----------------------------
 demo = gr.Interface(
     fn=tts_generate,
     inputs=gr.Textbox(
+        label="Enter text (use expressive tags like <laughs>, <sighs>)",
+        lines=5,
         placeholder=SAMPLES[0],
     ),
+    outputs=gr.Audio(type="numpy", label="Generated Audio"),
+    title="Orpheus‑3B Expressive TTS",
     description=(
+        "Use expressive tags **inside the text**.\n\n"
+        "Examples:\n"
+        "- `<laughs>`\n"
+        "- `<sighs>`\n"
+        "- `<whispers>`\n"
+        "- `<gasps>`\n"
+        "- `<nervous laughter>`\n\n"
+        "Tags can appear at the **start, middle, or end** of a sentence."
     ),
     examples=[[s] for s in SAMPLES],
 )