NeuralFalcon commited on
Commit
9ab845a
·
verified ·
1 Parent(s): 2dfd0aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -32
app.py CHANGED
@@ -1,3 +1,9 @@
 
 
 
 
 
 
1
  import gradio as gr
2
  import os
3
  import re
@@ -6,13 +12,6 @@ import scipy.io.wavfile
6
  import torch
7
  from pocket_tts import TTSModel
8
 
9
- #for voice clone
10
- from huggingface_hub import login
11
- hf_token = os.getenv("HF_TOKEN")
12
- if hf_token:
13
- login(token=hf_token)
14
-
15
-
16
  print("Loading TTS Model...")
17
  try:
18
  tts_model = TTSModel.load_model()
@@ -36,7 +35,7 @@ def get_tts_file_name(text, language="en"):
36
  )
37
 
38
  DEFAULT_VOICES = [
39
- "alba", "marius", "javert", "jean",
40
  "fantine", "cosette", "eponine", "azelma"
41
  ]
42
 
@@ -45,15 +44,15 @@ def generate_speech(text, mode, preset_voice, clone_audio_path):
45
  raise gr.Error("Please enter text to generate speech.")
46
 
47
  state = None
48
-
49
  if mode == "Default Voices":
50
  print(f"Using preset voice: {preset_voice}")
51
  state = tts_model.get_state_for_audio_prompt(preset_voice)
52
-
53
  else:
54
  if not clone_audio_path:
55
  raise gr.Error("Please upload a reference audio file for cloning.")
56
-
57
  print(f"Cloning voice from: {clone_audio_path}")
58
  try:
59
  state = tts_model.get_state_for_audio_prompt(clone_audio_path)
@@ -64,10 +63,10 @@ def generate_speech(text, mode, preset_voice, clone_audio_path):
64
 
65
  try:
66
  audio_tensor = tts_model.generate_audio(state, text)
67
-
68
  output_filename = get_tts_file_name(text)
69
  scipy.io.wavfile.write(output_filename, tts_model.sample_rate, audio_tensor.numpy())
70
-
71
  return output_filename
72
  except Exception as e:
73
  raise gr.Error(f"Generation failed: {str(e)}")
@@ -80,8 +79,8 @@ def toggle_inputs(mode):
80
 
81
 
82
  CUSTOM_CSS = """
83
- .gradio-container {
84
- font-family: 'SF Pro Display', -apple-system, BlinkMacSystemFont, sans-serif;
85
  }
86
  .header-container {
87
  text-align: center;
@@ -98,8 +97,8 @@ CUSTOM_CSS = """
98
  opacity: 0.9;
99
  }
100
  .links-container a {
101
- text-decoration: none;
102
- color: #4a90e2;
103
  font-weight: 500;
104
  }
105
  .links-container a:hover {
@@ -165,22 +164,22 @@ HEADER_HTML = """
165
 
166
  with gr.Blocks(theme='JohnSmith9982/small_and_pretty', css=CUSTOM_CSS) as demo:
167
  gr.HTML(HEADER_HTML)
168
-
169
  with gr.Row():
170
  with gr.Column():
171
  text_input = gr.Textbox(
172
- label="Text Input",
173
- placeholder="Hi, how are you?",
174
  lines=3,
175
  value="Hi, how are you?"
176
  )
177
-
178
  mode_radio = gr.Radio(
179
  choices=["Default Voices", "Voice Clone"],
180
  value="Default Voices",
181
  label="TTS Mode"
182
  )
183
-
184
  with gr.Group():
185
  dropdown_input = gr.Dropdown(
186
  choices=DEFAULT_VOICES,
@@ -188,18 +187,22 @@ with gr.Blocks(theme='JohnSmith9982/small_and_pretty', css=CUSTOM_CSS) as demo:
188
  label="Select Voice",
189
  visible=True
190
  )
191
-
192
  audio_upload = gr.Audio(
193
  label="Upload Reference Audio (WAV recommended)",
194
  type="filepath",
195
  visible=False
196
  )
197
-
198
  generate_btn = gr.Button("Generate Audio", variant="primary")
199
-
200
  example_audio_url = "https://huggingface.co/kyutai/tts-voices/resolve/main/alba-mackenna/casual.wav"
201
 
202
- gr.Examples(
 
 
 
 
203
  examples=[
204
  ["Hello, I am Fantine. Nice to meet you.", "Default Voices", "fantine", None],
205
  ["I am Cosette, and the weather is lovely.", "Default Voices", "cosette", None],
@@ -210,17 +213,12 @@ with gr.Blocks(theme='JohnSmith9982/small_and_pretty', css=CUSTOM_CSS) as demo:
210
  inputs=[text_input, mode_radio, dropdown_input, audio_upload],
211
  label="Click on an Example to Try"
212
  )
213
-
214
- with gr.Column():
215
- output_audio = gr.Audio(label="Generated Speech", type="filepath")
216
-
217
-
218
  mode_radio.change(
219
  fn=toggle_inputs,
220
  inputs=[mode_radio],
221
  outputs=[dropdown_input, audio_upload]
222
  )
223
-
224
  generate_btn.click(
225
  fn=generate_speech,
226
  inputs=[text_input, mode_radio, dropdown_input, audio_upload],
 
1
+ #for voice clone
2
+ from huggingface_hub import login
3
+ hf_token = os.getenv("HF_TOKEN")
4
+ if hf_token:
5
+ login(token=hf_token)
6
+
7
  import gradio as gr
8
  import os
9
  import re
 
12
  import torch
13
  from pocket_tts import TTSModel
14
 
 
 
 
 
 
 
 
15
  print("Loading TTS Model...")
16
  try:
17
  tts_model = TTSModel.load_model()
 
35
  )
36
 
37
  DEFAULT_VOICES = [
38
+ "alba", "marius", "javert", "jean",
39
  "fantine", "cosette", "eponine", "azelma"
40
  ]
41
 
 
44
  raise gr.Error("Please enter text to generate speech.")
45
 
46
  state = None
47
+
48
  if mode == "Default Voices":
49
  print(f"Using preset voice: {preset_voice}")
50
  state = tts_model.get_state_for_audio_prompt(preset_voice)
51
+
52
  else:
53
  if not clone_audio_path:
54
  raise gr.Error("Please upload a reference audio file for cloning.")
55
+
56
  print(f"Cloning voice from: {clone_audio_path}")
57
  try:
58
  state = tts_model.get_state_for_audio_prompt(clone_audio_path)
 
63
 
64
  try:
65
  audio_tensor = tts_model.generate_audio(state, text)
66
+
67
  output_filename = get_tts_file_name(text)
68
  scipy.io.wavfile.write(output_filename, tts_model.sample_rate, audio_tensor.numpy())
69
+
70
  return output_filename
71
  except Exception as e:
72
  raise gr.Error(f"Generation failed: {str(e)}")
 
79
 
80
 
81
  CUSTOM_CSS = """
82
+ .gradio-container {
83
+ font-family: 'SF Pro Display', -apple-system, BlinkMacSystemFont, sans-serif;
84
  }
85
  .header-container {
86
  text-align: center;
 
97
  opacity: 0.9;
98
  }
99
  .links-container a {
100
+ text-decoration: none;
101
+ color: #4a90e2;
102
  font-weight: 500;
103
  }
104
  .links-container a:hover {
 
164
 
165
  with gr.Blocks(theme='JohnSmith9982/small_and_pretty', css=CUSTOM_CSS) as demo:
166
  gr.HTML(HEADER_HTML)
167
+
168
  with gr.Row():
169
  with gr.Column():
170
  text_input = gr.Textbox(
171
+ label="Text Input",
172
+ placeholder="Hi, how are you?",
173
  lines=3,
174
  value="Hi, how are you?"
175
  )
176
+
177
  mode_radio = gr.Radio(
178
  choices=["Default Voices", "Voice Clone"],
179
  value="Default Voices",
180
  label="TTS Mode"
181
  )
182
+
183
  with gr.Group():
184
  dropdown_input = gr.Dropdown(
185
  choices=DEFAULT_VOICES,
 
187
  label="Select Voice",
188
  visible=True
189
  )
190
+
191
  audio_upload = gr.Audio(
192
  label="Upload Reference Audio (WAV recommended)",
193
  type="filepath",
194
  visible=False
195
  )
196
+
197
  generate_btn = gr.Button("Generate Audio", variant="primary")
198
+
199
  example_audio_url = "https://huggingface.co/kyutai/tts-voices/resolve/main/alba-mackenna/casual.wav"
200
 
201
+
202
+ with gr.Column():
203
+ output_audio = gr.Audio(label="Generated Speech", type="filepath")
204
+
205
+ gr.Examples(
206
  examples=[
207
  ["Hello, I am Fantine. Nice to meet you.", "Default Voices", "fantine", None],
208
  ["I am Cosette, and the weather is lovely.", "Default Voices", "cosette", None],
 
213
  inputs=[text_input, mode_radio, dropdown_input, audio_upload],
214
  label="Click on an Example to Try"
215
  )
 
 
 
 
 
216
  mode_radio.change(
217
  fn=toggle_inputs,
218
  inputs=[mode_radio],
219
  outputs=[dropdown_input, audio_upload]
220
  )
221
+
222
  generate_btn.click(
223
  fn=generate_speech,
224
  inputs=[text_input, mode_radio, dropdown_input, audio_upload],