Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
-
import os
|
| 2 |
import gradio as gr
|
| 3 |
from voxcpm import VoxCPM
|
| 4 |
import soundfile as sf
|
| 5 |
import tempfile
|
|
|
|
| 6 |
|
| 7 |
model = None
|
| 8 |
|
|
@@ -12,18 +12,49 @@ def load_model():
|
|
| 12 |
model = VoxCPM.from_pretrained("openbmb/VoxCPM2", load_denoiser=False)
|
| 13 |
return model
|
| 14 |
|
| 15 |
-
def generate_speech(text):
|
| 16 |
m = load_model()
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
| 19 |
sf.write(tmp.name, wav, m.tts_model.sample_rate)
|
| 20 |
return tmp.name
|
| 21 |
|
| 22 |
demo = gr.Interface(
|
| 23 |
fn=generate_speech,
|
| 24 |
-
inputs=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
outputs=gr.Audio(label="Hasil suara"),
|
| 26 |
title="VoxCPM TTS API",
|
|
|
|
| 27 |
)
|
| 28 |
|
| 29 |
demo.launch()
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from voxcpm import VoxCPM
|
| 3 |
import soundfile as sf
|
| 4 |
import tempfile
|
| 5 |
+
import numpy as np
|
| 6 |
|
| 7 |
model = None
|
| 8 |
|
|
|
|
| 12 |
model = VoxCPM.from_pretrained("openbmb/VoxCPM2", load_denoiser=False)
|
| 13 |
return model
|
| 14 |
|
| 15 |
+
def generate_speech(text, ref_audio, mode):
|
| 16 |
m = load_model()
|
| 17 |
+
|
| 18 |
+
if mode == "🎨 Voice Design" or ref_audio is None:
|
| 19 |
+
# TTS biasa / voice design dari deskripsi
|
| 20 |
+
wav = m.generate(text=text, cfg_value=2.0, inference_timesteps=10)
|
| 21 |
+
else:
|
| 22 |
+
# Voice cloning pakai audio referensi
|
| 23 |
+
ref_sr, ref_wav = ref_audio
|
| 24 |
+
ref_wav = ref_wav.astype(np.float32) / 32768.0
|
| 25 |
+
wav = m.generate(
|
| 26 |
+
text=text,
|
| 27 |
+
ref_audio=ref_wav,
|
| 28 |
+
ref_sr=ref_sr,
|
| 29 |
+
cfg_value=2.0,
|
| 30 |
+
inference_timesteps=10,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
| 34 |
sf.write(tmp.name, wav, m.tts_model.sample_rate)
|
| 35 |
return tmp.name
|
| 36 |
|
| 37 |
demo = gr.Interface(
|
| 38 |
fn=generate_speech,
|
| 39 |
+
inputs=[
|
| 40 |
+
gr.Textbox(
|
| 41 |
+
label="Teks yang mau diucapkan",
|
| 42 |
+
placeholder="Ketik teks di sini... (untuk Voice Design, awali dengan (deskripsi suara)teks)"
|
| 43 |
+
),
|
| 44 |
+
gr.Audio(
|
| 45 |
+
label="🎙️ Upload audio referensi (untuk Voice Cloning)",
|
| 46 |
+
type="numpy",
|
| 47 |
+
sources=["upload", "microphone"]
|
| 48 |
+
),
|
| 49 |
+
gr.Radio(
|
| 50 |
+
choices=["🎨 Voice Design", "🎙️ Voice Cloning"],
|
| 51 |
+
value="🎙️ Voice Cloning",
|
| 52 |
+
label="Mode"
|
| 53 |
+
),
|
| 54 |
+
],
|
| 55 |
outputs=gr.Audio(label="Hasil suara"),
|
| 56 |
title="VoxCPM TTS API",
|
| 57 |
+
description="**Voice Cloning:** upload audio referensi + ketik teks\n**Voice Design:** tulis (deskripsi suara)teks, tanpa audio referensi"
|
| 58 |
)
|
| 59 |
|
| 60 |
demo.launch()
|