Aranwer commited on
Commit
20c432f
·
verified ·
1 Parent(s): 9d610a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -47
app.py CHANGED
@@ -3,41 +3,56 @@ from TTS.api import TTS
3
  import tempfile
4
  import os
5
 
6
- # Load multilingual TTS model
7
- model_name = "tts_models/multilingual/multi-dataset/your_tts"
8
  tts = TTS(model_name)
9
 
10
- # Get available languages and speakers
11
- available_languages = list(tts.languages) # ['en', 'fr-fr', 'pt-br']
12
- available_speakers = tts.speakers
 
 
 
 
 
 
 
 
 
 
13
 
14
- def text_to_speech(text, language, speaker_name, speed, pitch):
 
 
 
15
  try:
16
- # Validate inputs
17
  if not text.strip():
18
  raise ValueError("Please enter some text")
19
 
20
- if language not in available_languages:
21
- raise ValueError(f"Language '{language}' not supported by this model")
22
-
23
- # Create parameters dictionary
24
- params = {
25
- "text": text,
26
- "speaker": speaker_name,
27
- "language": language,
28
- "file_path": None
29
- }
30
-
31
- # Add optional parameters
32
- if speed != 1.0:
33
- params["speed"] = speed
34
- if pitch != 1.0:
35
- params["pitch"] = pitch
36
-
37
- # Save to temporary WAV file
38
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
39
- params["file_path"] = f.name
40
- tts.tts_to_file(**params)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  return f.name
42
 
43
  except Exception as e:
@@ -48,32 +63,61 @@ def create_download_link(audio_file):
48
  return None
49
  return gr.DownloadButton(label="Download Audio", value=audio_file)
50
 
51
- with gr.Blocks(title="Enhanced TTS App") as app:
52
- gr.Markdown("# Enhanced Multilingual Text-to-Speech")
53
- gr.Markdown(f"Supported languages: {', '.join(available_languages)}")
54
 
55
  with gr.Row():
56
  with gr.Column():
57
- text_input = gr.Textbox(label="Enter text", lines=5)
58
- language = gr.Dropdown(choices=available_languages, label="Language", value="en")
59
- speaker = gr.Dropdown(choices=available_speakers, label="Voice")
 
 
 
 
 
 
60
 
61
- with gr.Accordion("Advanced Settings", open=False):
62
- speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1,
63
- label="Speed (1.0 = normal)")
64
- pitch = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1,
65
- label="Pitch (1.0 = normal)")
 
 
 
 
 
 
 
 
66
 
67
- generate_btn = gr.Button("Generate Speech", variant="primary")
68
 
69
  with gr.Column():
70
- audio_output = gr.Audio(label="Generated Audio", type="filepath")
 
 
 
 
71
  download_section = gr.Group(visible=False)
72
 
 
 
 
 
 
 
 
 
 
 
 
73
  # Set up interactivity
74
  generate_btn.click(
75
  fn=text_to_speech,
76
- inputs=[text_input, language, speaker, speed, pitch],
77
  outputs=audio_output
78
  ).then(
79
  fn=lambda: gr.Group(visible=True),
@@ -84,18 +128,24 @@ with gr.Blocks(title="Enhanced TTS App") as app:
84
  outputs=download_section
85
  )
86
 
87
- # Update examples to only use supported languages
88
  gr.Examples(
89
  examples=[
90
- ["Hello, welcome to our text-to-speech application!", "en", available_speakers[0], 1.0, 1.0],
91
- ["Bonjour, bienvenue dans notre application!", "fr-fr", available_speakers[-1], 1.0, 1.0],
92
- ["Olá, bem-vindo ao nosso aplicativo!", "pt-br", available_speakers[0], 1.0, 1.0]
93
  ],
94
- inputs=[text_input, language, speaker, speed, pitch],
95
  outputs=audio_output,
96
  fn=text_to_speech,
97
- cache_examples=True
98
  )
99
 
100
  if __name__ == "__main__":
 
 
 
 
 
 
101
  app.launch()
 
3
  import tempfile
4
  import os
5
 
6
+ # Initialize TTS with a better English storytelling model
7
+ model_name = "tts_models/en/vctk/vits" # Better for English narration
8
  tts = TTS(model_name)
9
 
10
+ # Custom speaker labels for better narration options
11
+ speaker_labels = {
12
+ "p225": "Male, Young Adult",
13
+ "p226": "Female, Middle-Aged",
14
+ "p227": "Male, Mature Storyteller",
15
+ "p228": "Female, Young Adult",
16
+ "p229": "Male, Elderly Narrator",
17
+ "p230": "Female, Warm Storyteller",
18
+ "p231": "Male, Deep Voice",
19
+ "p232": "Female, Clear Articulation",
20
+ "p233": "Male, Authoritative",
21
+ "p234": "Female, Gentle Storyteller"
22
+ }
23
 
24
+ # Get available speakers and filter to our labeled ones
25
+ available_speakers = [spk for spk in tts.speakers if spk in speaker_labels]
26
+
27
+ def text_to_speech(text, speaker_name, speed, pitch):
28
  try:
 
29
  if not text.strip():
30
  raise ValueError("Please enter some text")
31
 
32
+ # Create temporary file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
34
+ # Generate with adjusted speed and pitch
35
+ tts.tts_to_file(
36
+ text=text,
37
+ speaker=speaker_name,
38
+ file_path=f.name,
39
+ speed=speed,
40
+ # Note: Some models may not support pitch directly
41
+ # We'll use a workaround for pitch adjustment
42
+ )
43
+
44
+ # Apply pitch adjustment if needed (using sox if available)
45
+ if pitch != 1.0:
46
+ try:
47
+ import sox
48
+ tfm = sox.Transformer()
49
+ tfm.pitch(pitch)
50
+ adjusted_file = f.name + "_adjusted.wav"
51
+ tfm.build_file(f.name, adjusted_file)
52
+ os.replace(adjusted_file, f.name)
53
+ except ImportError:
54
+ print("Sox not installed, pitch adjustment skipped")
55
+
56
  return f.name
57
 
58
  except Exception as e:
 
63
  return None
64
  return gr.DownloadButton(label="Download Audio", value=audio_file)
65
 
66
+ with gr.Blocks(title="Storytelling TTS App") as app:
67
+ gr.Markdown("# Professional Storytelling Text-to-Speech")
68
+ gr.Markdown("Perfect for audiobooks, podcasts, and narrative content")
69
 
70
  with gr.Row():
71
  with gr.Column():
72
+ text_input = gr.Textbox(label="Enter your story text", lines=8,
73
+ placeholder="Once upon a time...")
74
+
75
+ speaker = gr.Dropdown(
76
+ choices=available_speakers,
77
+ label="Narrator Voice",
78
+ value="p227", # Default to mature storyteller
79
+ format_func=lambda x: speaker_labels[x]
80
+ )
81
 
82
+ with gr.Accordion("Voice Adjustment", open=True):
83
+ speed = gr.Slider(
84
+ minimum=0.5, maximum=2.0,
85
+ value=1.0, step=0.1,
86
+ label="Speaking Rate",
87
+ info="1.0 = normal, lower for slower narration"
88
+ )
89
+ pitch = gr.Slider(
90
+ minimum=-5.0, maximum=5.0,
91
+ value=0.0, step=0.5,
92
+ label="Pitch Adjustment",
93
+ info="0 = normal, positive for higher pitch"
94
+ )
95
 
96
+ generate_btn = gr.Button("Generate Narration", variant="primary")
97
 
98
  with gr.Column():
99
+ audio_output = gr.Audio(
100
+ label="Generated Narration",
101
+ type="filepath",
102
+ elem_classes=["output-audio"]
103
+ )
104
  download_section = gr.Group(visible=False)
105
 
106
+ # Voice preview samples
107
+ with gr.Accordion("Preview Narrator Voices", open=False):
108
+ gr.Markdown("Listen to sample narration from each voice:")
109
+ with gr.Row():
110
+ for speaker_id in available_speakers[:3]:
111
+ gr.Audio(
112
+ value=f"https://example.com/samples/{speaker_id}.wav", # Replace with actual samples
113
+ label=speaker_labels[speaker_id],
114
+ visible=False # Set to True if you have sample files
115
+ )
116
+
117
  # Set up interactivity
118
  generate_btn.click(
119
  fn=text_to_speech,
120
+ inputs=[text_input, speaker, speed, pitch],
121
  outputs=audio_output
122
  ).then(
123
  fn=lambda: gr.Group(visible=True),
 
128
  outputs=download_section
129
  )
130
 
131
+ # Storytelling examples
132
  gr.Examples(
133
  examples=[
134
+ ["The old man sat by the fireplace, his eyes twinkling with memories of adventures past.", "p227", 0.9, 0.0],
135
+ ["In a quiet village nestled between the mountains, a young girl discovered a secret that would change everything.", "p234", 1.0, 0.5],
136
+ ["The detective examined the clue carefully, knowing this small piece of evidence could crack the entire case wide open.", "p231", 1.1, -1.0]
137
  ],
138
+ inputs=[text_input, speaker, speed, pitch],
139
  outputs=audio_output,
140
  fn=text_to_speech,
141
+ cache_examples=False
142
  )
143
 
144
  if __name__ == "__main__":
145
+ # Install sox for pitch adjustment if not available
146
+ try:
147
+ import sox
148
+ except ImportError:
149
+ print("Consider installing sox for pitch adjustment: pip install sox")
150
+
151
  app.launch()