Spaces:

nineninesix
/

KaniTTS

Running on Zero

App Files Files Community

ylankgz commited on Sep 18

Commit

4e3722d

1 Parent(s): 949c8bd

Add additional settings slider

Browse files

Files changed (1) hide show

app.py +32 -10

app.py CHANGED Viewed

@@ -128,6 +128,28 @@ with gr.Blocks(title="😻 KaniTTS - Text to Speech", theme=gr.themes.Default())
                 value="Ready to generate speech",
                 lines=3
             )
     # GPU generation event
     generate_btn.click(
@@ -144,21 +166,21 @@ with gr.Blocks(title="😻 KaniTTS - Text to Speech", theme=gr.themes.Default())
     with gr.Row():
         examples = [
-            ["Anyway, um, so, um, tell me, tell me all about her. I mean, what's she like? Is she really, you know, pretty?", "male"],
-            ["No, that does not make you a failure. No, sweetie, no. It just, uh, it just means that you're having a tough time...", "male"],
-            ["I-- Oh, I am such an idiot sometimes. I'm so sorry. Um, I-I don't know where my head's at.", "male"],
-            ["Got it. $300,000. I can definitely help you get a very good price for your property by selecting a realtor.", "female"],
-            ["Holy fu- Oh my God! Don't you understand how dangerous it is, huh?", "male"],
-            ["You make my days brighter, and my wildest dreams feel like reality. How do you do that?", "female"],
-            ["Great, and just a couple quick questions so we can match you with the right buyer. Is your home address still 330 East Charleston Road?", "female"],
-            ["Oh, yeah. I mean did you want to get a quick snack together or maybe something before you go?", "female"],
         ]
         gr.Examples(
             examples=examples,
-            inputs=[text_input, model_dropdown],
-            fn=lambda t=text_input: play_demo(t),
             outputs=[audio_output, time_report_output],
             cache_examples=True,
         )

                 value="Ready to generate speech",
                 lines=3
             )
+        with gr.Accordion("Settings", open=False):
+            temperature = gr.Slider(
+                minimum=0.1, maximum=1.5, value=0.6, step=0.05,
+                label="Temperature",
+                info="Higher values (0.7-1.0) create more expressive but less stable speech"
+            )
+            top_p = gr.Slider(
+                minimum=0.1, maximum=1.0, value=0.95, step=0.05,
+                label="Top P",
+                info="Nucleus sampling threshold"
+            )
+            repetition_penalty = gr.Slider(
+                minimum=1.0, maximum=2.0, value=1.1, step=0.05,
+                label="Repetition Penalty",
+                info="Higher values discourage repetitive patterns"
+            )
+            max_new_tokens = gr.Slider(
+                minimum=100, maximum=2000, value=1200, step=100,
+                label="Max Length",
+                info="Maximum length of generated audio (in tokens)"
+            )
     # GPU generation event
     generate_btn.click(
     with gr.Row():
         examples = [
+            ["Anyway, um, so, um, tell me, tell me all about her. I mean, what's she like? Is she really, you know, pretty?", "male", 0.6, 0.95, 1.1, 1200],
+            ["No, that does not make you a failure. No, sweetie, no. It just, uh, it just means that you're having a tough time...", "male", 0.6, 0.95, 1.1, 1200],
+            ["I-- Oh, I am such an idiot sometimes. I'm so sorry. Um, I-I don't know where my head's at.", "male", 0.6, 0.95, 1.1, 1200],
+            ["Got it. $300,000. I can definitely help you get a very good price for your property by selecting a realtor.", "female", 0.6, 0.95, 1.1, 1200],
+            ["Holy fu- Oh my God! Don't you understand how dangerous it is, huh?", "male", 0.6, 0.95, 1.1, 1200],
+            ["You make my days brighter, and my wildest dreams feel like reality. How do you do that?", "female", 0.6, 0.95, 1.1, 1200],
+            ["Great, and just a couple quick questions so we can match you with the right buyer. Is your home address still 330 East Charleston Road?", "female", 0.6, 0.95, 1.1, 1200],
+            ["Oh, yeah. I mean did you want to get a quick snack together or maybe something before you go?", "female", 0.6, 0.95, 1.1, 1200],
         ]
         gr.Examples(
             examples=examples,
+            inputs=[text_input, model_dropdown, temperature, top_p, repetition_penalty, max_new_tokens],
+            fn=generate_speech_gpu,
             outputs=[audio_output, time_report_output],
             cache_examples=True,
         )