Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -62,7 +62,7 @@ def transcribe(ref_audio_path):
|
|
| 62 |
|
| 63 |
|
| 64 |
@spaces.GPU(duration=120)
|
| 65 |
-
def synthesize(text, ref_audio_path, ref_text, speaker_id,
|
| 66 |
text = (text or "").strip()
|
| 67 |
if not text:
|
| 68 |
raise gr.Error("Please enter some text to synthesize.")
|
|
@@ -92,7 +92,7 @@ def synthesize(text, ref_audio_path, ref_text, speaker_id, max_length_ms, temper
|
|
| 92 |
text=text,
|
| 93 |
speaker=int(speaker_id),
|
| 94 |
context=context,
|
| 95 |
-
max_audio_length_ms=float(
|
| 96 |
temperature=float(temperature),
|
| 97 |
topk=int(topk),
|
| 98 |
)
|
|
@@ -120,7 +120,7 @@ with gr.Blocks(title="Miso TTS 8B") as demo:
|
|
| 120 |
)
|
| 121 |
with gr.Accordion("Advanced", open=False):
|
| 122 |
speaker_id = gr.Slider(0, 1, value=0, step=1, label="Speaker ID")
|
| 123 |
-
max_length = gr.Slider(
|
| 124 |
temperature = gr.Slider(
|
| 125 |
0.1, 1.5, value=0.9, step=0.05,
|
| 126 |
label="Temperature (auto-lowered when cloning a voice)",
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
@spaces.GPU(duration=120)
|
| 65 |
+
def synthesize(text, ref_audio_path, ref_text, speaker_id, max_length_s, temperature, topk):
|
| 66 |
text = (text or "").strip()
|
| 67 |
if not text:
|
| 68 |
raise gr.Error("Please enter some text to synthesize.")
|
|
|
|
| 92 |
text=text,
|
| 93 |
speaker=int(speaker_id),
|
| 94 |
context=context,
|
| 95 |
+
max_audio_length_ms=float(max_length_s) * 1000.0,
|
| 96 |
temperature=float(temperature),
|
| 97 |
topk=int(topk),
|
| 98 |
)
|
|
|
|
| 120 |
)
|
| 121 |
with gr.Accordion("Advanced", open=False):
|
| 122 |
speaker_id = gr.Slider(0, 1, value=0, step=1, label="Speaker ID")
|
| 123 |
+
max_length = gr.Slider(2, 60, value=10, step=1, label="Max audio length (s)")
|
| 124 |
temperature = gr.Slider(
|
| 125 |
0.1, 1.5, value=0.9, step=0.05,
|
| 126 |
label="Temperature (auto-lowered when cloning a voice)",
|