ylankgz commited on
Commit
4e3722d
·
1 Parent(s): 949c8bd

Add additional settings slider

Browse files
Files changed (1) hide show
  1. app.py +32 -10
app.py CHANGED
@@ -128,6 +128,28 @@ with gr.Blocks(title="😻 KaniTTS - Text to Speech", theme=gr.themes.Default())
128
  value="Ready to generate speech",
129
  lines=3
130
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  # GPU generation event
133
  generate_btn.click(
@@ -144,21 +166,21 @@ with gr.Blocks(title="😻 KaniTTS - Text to Speech", theme=gr.themes.Default())
144
  with gr.Row():
145
 
146
  examples = [
147
- ["Anyway, um, so, um, tell me, tell me all about her. I mean, what's she like? Is she really, you know, pretty?", "male"],
148
- ["No, that does not make you a failure. No, sweetie, no. It just, uh, it just means that you're having a tough time...", "male"],
149
- ["I-- Oh, I am such an idiot sometimes. I'm so sorry. Um, I-I don't know where my head's at.", "male"],
150
- ["Got it. $300,000. I can definitely help you get a very good price for your property by selecting a realtor.", "female"],
151
- ["Holy fu- Oh my God! Don't you understand how dangerous it is, huh?", "male"],
152
- ["You make my days brighter, and my wildest dreams feel like reality. How do you do that?", "female"],
153
- ["Great, and just a couple quick questions so we can match you with the right buyer. Is your home address still 330 East Charleston Road?", "female"],
154
- ["Oh, yeah. I mean did you want to get a quick snack together or maybe something before you go?", "female"],
155
  ]
156
 
157
 
158
  gr.Examples(
159
  examples=examples,
160
- inputs=[text_input, model_dropdown],
161
- fn=lambda t=text_input: play_demo(t),
162
  outputs=[audio_output, time_report_output],
163
  cache_examples=True,
164
  )
 
128
  value="Ready to generate speech",
129
  lines=3
130
  )
131
+
132
+ with gr.Accordion("Settings", open=False):
133
+ temperature = gr.Slider(
134
+ minimum=0.1, maximum=1.5, value=0.6, step=0.05,
135
+ label="Temperature",
136
+ info="Higher values (0.7-1.0) create more expressive but less stable speech"
137
+ )
138
+ top_p = gr.Slider(
139
+ minimum=0.1, maximum=1.0, value=0.95, step=0.05,
140
+ label="Top P",
141
+ info="Nucleus sampling threshold"
142
+ )
143
+ repetition_penalty = gr.Slider(
144
+ minimum=1.0, maximum=2.0, value=1.1, step=0.05,
145
+ label="Repetition Penalty",
146
+ info="Higher values discourage repetitive patterns"
147
+ )
148
+ max_new_tokens = gr.Slider(
149
+ minimum=100, maximum=2000, value=1200, step=100,
150
+ label="Max Length",
151
+ info="Maximum length of generated audio (in tokens)"
152
+ )
153
 
154
  # GPU generation event
155
  generate_btn.click(
 
166
  with gr.Row():
167
 
168
  examples = [
169
+ ["Anyway, um, so, um, tell me, tell me all about her. I mean, what's she like? Is she really, you know, pretty?", "male", 0.6, 0.95, 1.1, 1200],
170
+ ["No, that does not make you a failure. No, sweetie, no. It just, uh, it just means that you're having a tough time...", "male", 0.6, 0.95, 1.1, 1200],
171
+ ["I-- Oh, I am such an idiot sometimes. I'm so sorry. Um, I-I don't know where my head's at.", "male", 0.6, 0.95, 1.1, 1200],
172
+ ["Got it. $300,000. I can definitely help you get a very good price for your property by selecting a realtor.", "female", 0.6, 0.95, 1.1, 1200],
173
+ ["Holy fu- Oh my God! Don't you understand how dangerous it is, huh?", "male", 0.6, 0.95, 1.1, 1200],
174
+ ["You make my days brighter, and my wildest dreams feel like reality. How do you do that?", "female", 0.6, 0.95, 1.1, 1200],
175
+ ["Great, and just a couple quick questions so we can match you with the right buyer. Is your home address still 330 East Charleston Road?", "female", 0.6, 0.95, 1.1, 1200],
176
+ ["Oh, yeah. I mean did you want to get a quick snack together or maybe something before you go?", "female", 0.6, 0.95, 1.1, 1200],
177
  ]
178
 
179
 
180
  gr.Examples(
181
  examples=examples,
182
+ inputs=[text_input, model_dropdown, temperature, top_p, repetition_penalty, max_new_tokens],
183
+ fn=generate_speech_gpu,
184
  outputs=[audio_output, time_report_output],
185
  cache_examples=True,
186
  )