MisoTTS

Runtime error

multimodalart HF Staff commited on 3 days ago

Commit

c271af2

verified ·

1 Parent(s): 7847a40

Upload app.py with huggingface_hub

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,9 +34,6 @@ DESCRIPTION = """
 Text-to-speech with the [MisoLabs/MisoTTS](https://huggingface.co/MisoLabs/MisoTTS) model — an
 8B [Sesame CSM](https://github.com/SesameAILabs/csm)-style model that generates Mimi audio codes
 from text, with optional voice continuation from a reference clip.
-Provide a reference audio + its transcript to clone a voice, or leave them empty for a default voice.
-Outputs carry an imperceptible watermark identifying the audio as AI-generated.
 """
@@ -122,7 +119,7 @@ with gr.Blocks(title="Miso TTS 8B") as demo:
                 speaker_id = gr.Slider(0, 1, value=0, step=1, label="Speaker ID")
                 max_length = gr.Slider(2, 60, value=10, step=1, label="Max audio length (s)")
                 temperature = gr.Slider(
-                    0.1, 1.5, value=0.9, step=0.05,
                     label="Temperature (auto-lowered when cloning a voice)",
                 )
                 topk = gr.Slider(1, 100, value=50, step=1, label="Top-k")
@@ -133,7 +130,7 @@ with gr.Blocks(title="Miso TTS 8B") as demo:
     ref_audio.change(transcribe, inputs=[ref_audio], outputs=[ref_text])
     # Cloning tracks the reference much more closely at low temperature.
     ref_audio.change(
-        lambda p: 0.4 if p else 0.9, inputs=[ref_audio], outputs=[temperature]
     )
     run.click(

 Text-to-speech with the [MisoLabs/MisoTTS](https://huggingface.co/MisoLabs/MisoTTS) model — an
 8B [Sesame CSM](https://github.com/SesameAILabs/csm)-style model that generates Mimi audio codes
 from text, with optional voice continuation from a reference clip.
 """
                 speaker_id = gr.Slider(0, 1, value=0, step=1, label="Speaker ID")
                 max_length = gr.Slider(2, 60, value=10, step=1, label="Max audio length (s)")
                 temperature = gr.Slider(
+                    0.1, 1.5, value=0.7, step=0.05,
                     label="Temperature (auto-lowered when cloning a voice)",
                 )
                 topk = gr.Slider(1, 100, value=50, step=1, label="Top-k")
     ref_audio.change(transcribe, inputs=[ref_audio], outputs=[ref_text])
     # Cloning tracks the reference much more closely at low temperature.
     ref_audio.change(
+        lambda p: 0.4 if p else 0.7, inputs=[ref_audio], outputs=[temperature]
     )
     run.click(