Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -34,9 +34,6 @@ DESCRIPTION = """
|
|
| 34 |
Text-to-speech with the [MisoLabs/MisoTTS](https://huggingface.co/MisoLabs/MisoTTS) model — an
|
| 35 |
8B [Sesame CSM](https://github.com/SesameAILabs/csm)-style model that generates Mimi audio codes
|
| 36 |
from text, with optional voice continuation from a reference clip.
|
| 37 |
-
|
| 38 |
-
Provide a reference audio + its transcript to clone a voice, or leave them empty for a default voice.
|
| 39 |
-
Outputs carry an imperceptible watermark identifying the audio as AI-generated.
|
| 40 |
"""
|
| 41 |
|
| 42 |
|
|
@@ -122,7 +119,7 @@ with gr.Blocks(title="Miso TTS 8B") as demo:
|
|
| 122 |
speaker_id = gr.Slider(0, 1, value=0, step=1, label="Speaker ID")
|
| 123 |
max_length = gr.Slider(2, 60, value=10, step=1, label="Max audio length (s)")
|
| 124 |
temperature = gr.Slider(
|
| 125 |
-
0.1, 1.5, value=0.
|
| 126 |
label="Temperature (auto-lowered when cloning a voice)",
|
| 127 |
)
|
| 128 |
topk = gr.Slider(1, 100, value=50, step=1, label="Top-k")
|
|
@@ -133,7 +130,7 @@ with gr.Blocks(title="Miso TTS 8B") as demo:
|
|
| 133 |
ref_audio.change(transcribe, inputs=[ref_audio], outputs=[ref_text])
|
| 134 |
# Cloning tracks the reference much more closely at low temperature.
|
| 135 |
ref_audio.change(
|
| 136 |
-
lambda p: 0.4 if p else 0.
|
| 137 |
)
|
| 138 |
|
| 139 |
run.click(
|
|
|
|
| 34 |
Text-to-speech with the [MisoLabs/MisoTTS](https://huggingface.co/MisoLabs/MisoTTS) model — an
|
| 35 |
8B [Sesame CSM](https://github.com/SesameAILabs/csm)-style model that generates Mimi audio codes
|
| 36 |
from text, with optional voice continuation from a reference clip.
|
|
|
|
|
|
|
|
|
|
| 37 |
"""
|
| 38 |
|
| 39 |
|
|
|
|
| 119 |
speaker_id = gr.Slider(0, 1, value=0, step=1, label="Speaker ID")
|
| 120 |
max_length = gr.Slider(2, 60, value=10, step=1, label="Max audio length (s)")
|
| 121 |
temperature = gr.Slider(
|
| 122 |
+
0.1, 1.5, value=0.7, step=0.05,
|
| 123 |
label="Temperature (auto-lowered when cloning a voice)",
|
| 124 |
)
|
| 125 |
topk = gr.Slider(1, 100, value=50, step=1, label="Top-k")
|
|
|
|
| 130 |
ref_audio.change(transcribe, inputs=[ref_audio], outputs=[ref_text])
|
| 131 |
# Cloning tracks the reference much more closely at low temperature.
|
| 132 |
ref_audio.change(
|
| 133 |
+
lambda p: 0.4 if p else 0.7, inputs=[ref_audio], outputs=[temperature]
|
| 134 |
)
|
| 135 |
|
| 136 |
run.click(
|