farid678 commited on
Commit
9ed7609
·
verified ·
1 Parent(s): 7bacf1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -29
app.py CHANGED
@@ -3,6 +3,9 @@ import numpy as np
3
  import gradio as gr
4
  from transformers import pipeline
5
  import logging
 
 
 
6
 
7
  # -----------------------------
8
  # Reduce Transformers warnings
@@ -10,7 +13,7 @@ import logging
10
  logging.getLogger("transformers").setLevel(logging.ERROR)
11
 
12
  # -----------------------------
13
- # LOAD PIPELINE (HF AUTH REQUIRED)
14
  # -----------------------------
15
  device = 0 if torch.cuda.is_available() else -1
16
  tts_pipe = pipeline(
@@ -26,31 +29,25 @@ def tts_generate(text):
26
  if not text.strip():
27
  return None
28
 
29
- try:
30
- output = tts_pipe(text)
31
- except Exception as e:
32
- print("Error:", e)
33
- return None
34
 
35
  audio = np.asarray(output["audio"], dtype=np.float32)
36
  sr = output["sampling_rate"]
37
 
38
- return (sr, audio)
 
 
 
 
 
39
 
40
  # -----------------------------
41
- # SAMPLE TEXTS WITH EXPRESSIVE TAGS
42
  # -----------------------------
43
  SAMPLES = [
44
- "Just end up crashing somewhere. <laugh> No, because remember last time? You fell asleep—",
45
- "Running through the grass, playing under the falling leaves. <chuckle> My sweet little kit, the—",
46
- "Deal with it. I will. I'll just scowl and watch TV by myself <sigh>.",
47
  "Hmm… I don't know. <laugh> This feels like a bad idea. <gasp>",
48
  "I'm so tired today <yawn> but I still have so much work to do.",
49
- "<cough> Wait—did you hear that? <gasp> I swear something just moved.",
50
- "<whisper> Don't turn around. Just keep walking.",
51
- "Ugh… <sigh> I can't believe this is happening again.",
52
- "Okay okay <laughs nervously> maybe it wasn't my best decision.",
53
- "I just got a new sword, ooh, it's a mighty Anduril! <groan>"
54
  ]
55
 
56
  # -----------------------------
@@ -59,23 +56,14 @@ SAMPLES = [
59
  demo = gr.Interface(
60
  fn=tts_generate,
61
  inputs=gr.Textbox(
62
- label="Enter text (use expressive tags like <laugh>, <sigh>, <gasp>)",
63
  lines=5,
64
  placeholder=SAMPLES[0],
65
  ),
66
- outputs=gr.Audio(type="numpy", label="Generated Audio"),
67
- title="Orpheus3B Expressive TTS",
68
- description=(
69
- "You can add expressiveness to speech by inserting tags:\n"
70
- "- <laugh>, <chuckle>: For laughter sounds\n"
71
- "- <sigh>: For sighing sounds\n"
72
- "- <cough>, <sniffle>: For subtle interruptions\n"
73
- "- <groan>, <yawn>, <gasp>: For additional emotional expression\n\n"
74
- "Tags can appear at the start, middle, or end of a sentence.\n"
75
- "Example: `I can't believe it! <laugh>`"
76
- ),
77
  examples=[[s] for s in SAMPLES],
78
  )
79
 
80
  if __name__ == "__main__":
81
- demo.launch(ssr_mode=False)
 
3
  import gradio as gr
4
  from transformers import pipeline
5
  import logging
6
+ from scipy.io.wavfile import write
7
+ import uuid
8
+ import os
9
 
10
  # -----------------------------
11
  # Reduce Transformers warnings
 
13
  logging.getLogger("transformers").setLevel(logging.ERROR)
14
 
15
  # -----------------------------
16
+ # LOAD PIPELINE
17
  # -----------------------------
18
  device = 0 if torch.cuda.is_available() else -1
19
  tts_pipe = pipeline(
 
29
  if not text.strip():
30
  return None
31
 
32
+ output = tts_pipe(text)
 
 
 
 
33
 
34
  audio = np.asarray(output["audio"], dtype=np.float32)
35
  sr = output["sampling_rate"]
36
 
37
+ os.makedirs("outputs", exist_ok=True)
38
+ out_path = f"outputs/{uuid.uuid4().hex}.wav"
39
+
40
+ write(out_path, sr, audio)
41
+
42
+ return out_path
43
 
44
  # -----------------------------
45
+ # SAMPLE TEXTS
46
  # -----------------------------
47
  SAMPLES = [
48
+ "Just end up crashing somewhere. <laugh> No, because remember last time?",
 
 
49
  "Hmm… I don't know. <laugh> This feels like a bad idea. <gasp>",
50
  "I'm so tired today <yawn> but I still have so much work to do.",
 
 
 
 
 
51
  ]
52
 
53
  # -----------------------------
 
56
  demo = gr.Interface(
57
  fn=tts_generate,
58
  inputs=gr.Textbox(
59
+ label="Enter text (use expressive tags like <laugh>, <sigh>)",
60
  lines=5,
61
  placeholder=SAMPLES[0],
62
  ),
63
+ outputs=gr.Audio(type="filepath", label="Generated Audio"),
64
+ title="Orpheus-3B Expressive TTS",
 
 
 
 
 
 
 
 
 
65
  examples=[[s] for s in SAMPLES],
66
  )
67
 
68
  if __name__ == "__main__":
69
+ demo.launch(enable_queue=False)