Spaces:
Running
Running
| import gradio as gr | |
| from voxcpm import VoxCPM | |
| import soundfile as sf | |
| import tempfile | |
| import numpy as np | |
| model = None | |
| def load_model(): | |
| global model | |
| if model is None: | |
| model = VoxCPM.from_pretrained("openbmb/VoxCPM2", load_denoiser=False) | |
| return model | |
| def generate_speech(text, ref_audio, mode): | |
| m = load_model() | |
| if mode == "π¨ Voice Design" or ref_audio is None: | |
| # TTS biasa / voice design dari deskripsi | |
| wav = m.generate(text=text, cfg_value=2.0, inference_timesteps=10) | |
| else: | |
| # Voice cloning pakai audio referensi | |
| ref_sr, ref_wav = ref_audio | |
| ref_wav = ref_wav.astype(np.float32) / 32768.0 | |
| wav = m.generate( | |
| text=text, | |
| ref_audio=ref_wav, | |
| ref_sr=ref_sr, | |
| cfg_value=2.0, | |
| inference_timesteps=10, | |
| ) | |
| tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) | |
| sf.write(tmp.name, wav, m.tts_model.sample_rate) | |
| return tmp.name | |
| demo = gr.Interface( | |
| fn=generate_speech, | |
| inputs=[ | |
| gr.Textbox( | |
| label="Teks yang mau diucapkan", | |
| placeholder="Ketik teks di sini... (untuk Voice Design, awali dengan (deskripsi suara)teks)" | |
| ), | |
| gr.Audio( | |
| label="ποΈ Upload audio referensi (untuk Voice Cloning)", | |
| type="numpy", | |
| sources=["upload", "microphone"] | |
| ), | |
| gr.Radio( | |
| choices=["π¨ Voice Design", "ποΈ Voice Cloning"], | |
| value="ποΈ Voice Cloning", | |
| label="Mode" | |
| ), | |
| ], | |
| outputs=gr.Audio(label="Hasil suara"), | |
| title="VoxCPM TTS API", | |
| description="**Voice Cloning:** upload audio referensi + ketik teks\n**Voice Design:** tulis (deskripsi suara)teks, tanpa audio referensi" | |
| ) | |
| demo.launch() |