voxcpm-api / app.py
ypedz's picture
Update app.py
42b69ad verified
import gradio as gr
from voxcpm import VoxCPM
import soundfile as sf
import tempfile
import numpy as np
model = None
def load_model():
global model
if model is None:
model = VoxCPM.from_pretrained("openbmb/VoxCPM2", load_denoiser=False)
return model
def generate_speech(text, ref_audio, mode):
m = load_model()
if mode == "🎨 Voice Design" or ref_audio is None:
# TTS biasa / voice design dari deskripsi
wav = m.generate(text=text, cfg_value=2.0, inference_timesteps=10)
else:
# Voice cloning pakai audio referensi
ref_sr, ref_wav = ref_audio
ref_wav = ref_wav.astype(np.float32) / 32768.0
wav = m.generate(
text=text,
ref_audio=ref_wav,
ref_sr=ref_sr,
cfg_value=2.0,
inference_timesteps=10,
)
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
sf.write(tmp.name, wav, m.tts_model.sample_rate)
return tmp.name
demo = gr.Interface(
fn=generate_speech,
inputs=[
gr.Textbox(
label="Teks yang mau diucapkan",
placeholder="Ketik teks di sini... (untuk Voice Design, awali dengan (deskripsi suara)teks)"
),
gr.Audio(
label="πŸŽ™οΈ Upload audio referensi (untuk Voice Cloning)",
type="numpy",
sources=["upload", "microphone"]
),
gr.Radio(
choices=["🎨 Voice Design", "πŸŽ™οΈ Voice Cloning"],
value="πŸŽ™οΈ Voice Cloning",
label="Mode"
),
],
outputs=gr.Audio(label="Hasil suara"),
title="VoxCPM TTS API",
description="**Voice Cloning:** upload audio referensi + ketik teks\n**Voice Design:** tulis (deskripsi suara)teks, tanpa audio referensi"
)
demo.launch()