farid678 commited on
Commit
19f1a35
·
verified ·
1 Parent(s): c210c3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -37
app.py CHANGED
@@ -1,18 +1,17 @@
1
  import torch
2
- from transformers import pipeline
3
- import gradio as gr
4
  import numpy as np
5
- import soundfile as sf
6
- import io
7
 
8
  # -----------------------------
9
- # LOAD PIPELINE
10
  # -----------------------------
11
  device = 0 if torch.cuda.is_available() else -1
 
12
  tts_pipe = pipeline(
13
  task="text-to-speech",
14
  model="canopylabs/orpheus-3b-0.1-ft",
15
- device=device
16
  )
17
 
18
  # -----------------------------
@@ -22,55 +21,50 @@ def tts_generate(text):
22
  if not text.strip():
23
  return None
24
 
25
- try:
26
- output = tts_pipe(text)
27
- except Exception as e:
28
- print("Error:", e)
29
- return None
30
 
31
  audio = np.asarray(output["audio"], dtype=np.float32)
32
  sr = output["sampling_rate"]
33
 
34
- # Convert to bytes for Gradio Audio
35
- buffer = io.BytesIO()
36
- sf.write(buffer, audio, sr, format="WAV")
37
- buffer.seek(0)
38
-
39
- return buffer
40
 
41
  # -----------------------------
42
- # SAMPLE TEXTS WITH STYLE TAGS
43
  # -----------------------------
44
  SAMPLES = [
45
- "[neutral] Hello! This is a neutral English voice generated by Orpheus.",
46
- "[expressive] I'm really excited to show you how natural this voice sounds!",
47
- "[calm] Please relax and enjoy this calm and smooth narration.",
48
- "[narration] In the beginning, there was only silence.",
49
- "[conversation] Hey, are you coming to the meeting later today?",
 
 
 
 
 
50
  ]
51
 
52
  # -----------------------------
53
- # GRADIO INTERFACE
54
  # -----------------------------
55
  demo = gr.Interface(
56
  fn=tts_generate,
57
  inputs=gr.Textbox(
58
- label="Enter text (English supported with tags)",
 
59
  placeholder=SAMPLES[0],
60
- lines=4
61
  ),
62
- outputs=gr.Audio(type="file", label="Generated Audio"),
63
- title="Orpheus‑3B TTS",
64
  description=(
65
- "English TTS using **canopylabs/orpheus-3b-0.1-ft** via Transformers pipeline.\n\n"
66
- "Supported style tags examples:\n"
67
- "- `[neutral]`\n"
68
- "- `[expressive]`\n"
69
- "- `[calm]`\n"
70
- "- `[narration]`\n"
71
- "- `[conversation]`\n\n"
72
- "Example:\n"
73
- "`[expressive] I'm very happy to see you today!`"
74
  ),
75
  examples=[[s] for s in SAMPLES],
76
  )
 
1
  import torch
 
 
2
  import numpy as np
3
+ import gradio as gr
4
+ from transformers import pipeline
5
 
6
  # -----------------------------
7
+ # LOAD PIPELINE (HF AUTH REQUIRED)
8
  # -----------------------------
9
  device = 0 if torch.cuda.is_available() else -1
10
+
11
  tts_pipe = pipeline(
12
  task="text-to-speech",
13
  model="canopylabs/orpheus-3b-0.1-ft",
14
+ device=device,
15
  )
16
 
17
  # -----------------------------
 
21
  if not text.strip():
22
  return None
23
 
24
+ output = tts_pipe(text)
 
 
 
 
25
 
26
  audio = np.asarray(output["audio"], dtype=np.float32)
27
  sr = output["sampling_rate"]
28
 
29
+ return (sr, audio)
 
 
 
 
 
30
 
31
  # -----------------------------
32
+ # SAMPLE TEXTS WITH TAGS
33
  # -----------------------------
34
  SAMPLES = [
35
+ "Just end up crashing somewhere. <laughs> No, because remember last time? You fell asleep—",
36
+ "But now that the cat's out of the bag, we can be the couple that we were always destined to be.",
37
+ "Running through the grass, playing under the falling leaves. <laughs> My sweet little kit, the—",
38
+ "Deal with it. I will. I'll just scowl and watch TV by myself <sighs>.",
39
+ "Hmm… I don't know. <nervous laughter> This feels like a bad idea.",
40
+ "I'm so tired today <yawning> but I still have so much work to do.",
41
+ "Wait—did you hear that? <gasps> I swear something just moved.",
42
+ "<whispers> Don't turn around. Just keep walking.",
43
+ "Ugh… <scoffs> I can't believe this is happening again.",
44
+ "Okay okay <laughs nervously> maybe it wasn't my best decision."
45
  ]
46
 
47
  # -----------------------------
48
+ # GRADIO UI
49
  # -----------------------------
50
  demo = gr.Interface(
51
  fn=tts_generate,
52
  inputs=gr.Textbox(
53
+ label="Enter text (use expressive tags like <laughs>, <sighs>)",
54
+ lines=5,
55
  placeholder=SAMPLES[0],
 
56
  ),
57
+ outputs=gr.Audio(type="numpy", label="Generated Audio"),
58
+ title="Orpheus‑3B Expressive TTS",
59
  description=(
60
+ "Use expressive tags **inside the text**.\n\n"
61
+ "Examples:\n"
62
+ "- `<laughs>`\n"
63
+ "- `<sighs>`\n"
64
+ "- `<whispers>`\n"
65
+ "- `<gasps>`\n"
66
+ "- `<nervous laughter>`\n\n"
67
+ "Tags can appear at the **start, middle, or end** of a sentence."
 
68
  ),
69
  examples=[[s] for s in SAMPLES],
70
  )