ypedz commited on
Commit
42b69ad
·
verified ·
1 Parent(s): c184f37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -4
app.py CHANGED
@@ -1,8 +1,8 @@
1
- import os
2
  import gradio as gr
3
  from voxcpm import VoxCPM
4
  import soundfile as sf
5
  import tempfile
 
6
 
7
  model = None
8
 
@@ -12,18 +12,49 @@ def load_model():
12
  model = VoxCPM.from_pretrained("openbmb/VoxCPM2", load_denoiser=False)
13
  return model
14
 
15
- def generate_speech(text):
16
  m = load_model()
17
- wav = m.generate(text=text, cfg_value=2.0, inference_timesteps=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
19
  sf.write(tmp.name, wav, m.tts_model.sample_rate)
20
  return tmp.name
21
 
22
  demo = gr.Interface(
23
  fn=generate_speech,
24
- inputs=gr.Textbox(label="Masukkan teks"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  outputs=gr.Audio(label="Hasil suara"),
26
  title="VoxCPM TTS API",
 
27
  )
28
 
29
  demo.launch()
 
 
1
  import gradio as gr
2
  from voxcpm import VoxCPM
3
  import soundfile as sf
4
  import tempfile
5
+ import numpy as np
6
 
7
  model = None
8
 
 
12
  model = VoxCPM.from_pretrained("openbmb/VoxCPM2", load_denoiser=False)
13
  return model
14
 
15
+ def generate_speech(text, ref_audio, mode):
16
  m = load_model()
17
+
18
+ if mode == "🎨 Voice Design" or ref_audio is None:
19
+ # TTS biasa / voice design dari deskripsi
20
+ wav = m.generate(text=text, cfg_value=2.0, inference_timesteps=10)
21
+ else:
22
+ # Voice cloning pakai audio referensi
23
+ ref_sr, ref_wav = ref_audio
24
+ ref_wav = ref_wav.astype(np.float32) / 32768.0
25
+ wav = m.generate(
26
+ text=text,
27
+ ref_audio=ref_wav,
28
+ ref_sr=ref_sr,
29
+ cfg_value=2.0,
30
+ inference_timesteps=10,
31
+ )
32
+
33
  tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
34
  sf.write(tmp.name, wav, m.tts_model.sample_rate)
35
  return tmp.name
36
 
37
  demo = gr.Interface(
38
  fn=generate_speech,
39
+ inputs=[
40
+ gr.Textbox(
41
+ label="Teks yang mau diucapkan",
42
+ placeholder="Ketik teks di sini... (untuk Voice Design, awali dengan (deskripsi suara)teks)"
43
+ ),
44
+ gr.Audio(
45
+ label="🎙️ Upload audio referensi (untuk Voice Cloning)",
46
+ type="numpy",
47
+ sources=["upload", "microphone"]
48
+ ),
49
+ gr.Radio(
50
+ choices=["🎨 Voice Design", "🎙️ Voice Cloning"],
51
+ value="🎙️ Voice Cloning",
52
+ label="Mode"
53
+ ),
54
+ ],
55
  outputs=gr.Audio(label="Hasil suara"),
56
  title="VoxCPM TTS API",
57
+ description="**Voice Cloning:** upload audio referensi + ketik teks\n**Voice Design:** tulis (deskripsi suara)teks, tanpa audio referensi"
58
  )
59
 
60
  demo.launch()