multimodalart HF Staff commited on
Commit
c271af2
·
verified ·
1 Parent(s): 7847a40

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +2 -5
app.py CHANGED
@@ -34,9 +34,6 @@ DESCRIPTION = """
34
  Text-to-speech with the [MisoLabs/MisoTTS](https://huggingface.co/MisoLabs/MisoTTS) model — an
35
  8B [Sesame CSM](https://github.com/SesameAILabs/csm)-style model that generates Mimi audio codes
36
  from text, with optional voice continuation from a reference clip.
37
-
38
- Provide a reference audio + its transcript to clone a voice, or leave them empty for a default voice.
39
- Outputs carry an imperceptible watermark identifying the audio as AI-generated.
40
  """
41
 
42
 
@@ -122,7 +119,7 @@ with gr.Blocks(title="Miso TTS 8B") as demo:
122
  speaker_id = gr.Slider(0, 1, value=0, step=1, label="Speaker ID")
123
  max_length = gr.Slider(2, 60, value=10, step=1, label="Max audio length (s)")
124
  temperature = gr.Slider(
125
- 0.1, 1.5, value=0.9, step=0.05,
126
  label="Temperature (auto-lowered when cloning a voice)",
127
  )
128
  topk = gr.Slider(1, 100, value=50, step=1, label="Top-k")
@@ -133,7 +130,7 @@ with gr.Blocks(title="Miso TTS 8B") as demo:
133
  ref_audio.change(transcribe, inputs=[ref_audio], outputs=[ref_text])
134
  # Cloning tracks the reference much more closely at low temperature.
135
  ref_audio.change(
136
- lambda p: 0.4 if p else 0.9, inputs=[ref_audio], outputs=[temperature]
137
  )
138
 
139
  run.click(
 
34
  Text-to-speech with the [MisoLabs/MisoTTS](https://huggingface.co/MisoLabs/MisoTTS) model — an
35
  8B [Sesame CSM](https://github.com/SesameAILabs/csm)-style model that generates Mimi audio codes
36
  from text, with optional voice continuation from a reference clip.
 
 
 
37
  """
38
 
39
 
 
119
  speaker_id = gr.Slider(0, 1, value=0, step=1, label="Speaker ID")
120
  max_length = gr.Slider(2, 60, value=10, step=1, label="Max audio length (s)")
121
  temperature = gr.Slider(
122
+ 0.1, 1.5, value=0.7, step=0.05,
123
  label="Temperature (auto-lowered when cloning a voice)",
124
  )
125
  topk = gr.Slider(1, 100, value=50, step=1, label="Top-k")
 
130
  ref_audio.change(transcribe, inputs=[ref_audio], outputs=[ref_text])
131
  # Cloning tracks the reference much more closely at low temperature.
132
  ref_audio.change(
133
+ lambda p: 0.4 if p else 0.7, inputs=[ref_audio], outputs=[temperature]
134
  )
135
 
136
  run.click(