YatharthS commited on
Commit
f0e6eda
·
verified ·
1 Parent(s): d8433e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -42
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import sys
3
  import subprocess
4
- import time
5
 
6
  # 1. Clone the repo if it doesn't exist
7
  if not os.path.exists("LuxTTS"):
@@ -20,75 +20,126 @@ from zipvoice.luxvoice import LuxTTS
20
 
21
  # Init Model
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
23
- # Note: 2 threads on a 2-core CPU is the bottleneck
24
- lux_tts = LuxTTS('YatharthS/LuxTTS', device=device, threads=2)
25
 
26
- def infer(text, audio_prompt, rms, t_shift, num_steps, speed, return_smooth):
 
 
 
 
 
 
 
 
 
27
  if audio_prompt is None or not text:
28
  return None, "Please provide text and reference audio."
29
-
30
  start_time = time.time()
31
-
32
- # Encode reference
33
- encoded_prompt = lux_tts.encode_prompt(audio_prompt, rms=rms)
34
-
 
 
 
 
35
  # Generate speech
36
  final_wav = lux_tts.generate_speech(
37
- text,
38
- encoded_prompt,
39
- num_steps=int(num_steps),
40
- t_shift=t_shift,
41
- speed=speed,
42
- return_smooth=return_smooth
43
  )
44
-
45
- end_time = time.time()
46
- duration = round(end_time - start_time, 2)
47
-
48
  final_wav = final_wav.cpu().squeeze(0).numpy()
49
  final_wav = (np.clip(final_wav, -1.0, 1.0) * 32767).astype(np.int16)
50
-
51
  stats_msg = f"✨ Generation complete in **{duration}s**."
52
  return (48000, final_wav), stats_msg
53
 
 
54
  # Gradio UI
 
55
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
56
  gr.Markdown("# 🎙️ LuxTTS Voice Cloning")
57
-
58
- # Info Panel
59
  gr.Markdown(
60
  """
61
- > **Note:** Processing may feel slow as this instance uses a **2-core CPU** (lower specs than most modern phones).
62
- >
63
- > **Tip:** If you notice words are being **cut off** at the end, try **lowering the speed** further.
64
  """
65
  )
66
-
67
  with gr.Row():
68
  with gr.Column():
69
- input_text = gr.Textbox(label="Text to Synthesize", value="Hey, what's up? I'm feeling really great!")
70
- input_audio = gr.Audio(label="Reference Audio (.wav)", type="filepath")
71
-
 
 
 
 
 
 
72
  with gr.Row():
73
- rms_val = gr.Number(value=0.01, label="RMS (Loudness)")
74
- t_shift_val = gr.Number(value=0.9, label="T-Shift")
75
- steps_val = gr.Slider(1, 10, value=4, step=1, label="Num Steps")
76
-
 
 
 
 
 
 
 
 
 
 
77
  with gr.Row():
78
- # Default speed set to 0.8
79
- speed_val = gr.Slider(0.5, 2.0, value=0.8, step=0.1, label="Speed (Lower = Longer/Clearer)")
80
- smooth_val = gr.Checkbox(label="Return Smooth", value=False)
81
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  btn = gr.Button("Generate Speech", variant="primary")
83
-
84
  with gr.Column():
85
  audio_out = gr.Audio(label="Result")
86
  status_text = gr.Markdown("Ready to generate...")
87
 
88
  btn.click(
89
- fn=infer,
90
- inputs=[input_text, input_audio, rms_val, t_shift_val, steps_val, speed_val, smooth_val],
91
- outputs=[audio_out, status_text]
 
 
 
 
 
 
 
 
 
92
  )
93
 
94
- demo.launch()
 
1
  import os
2
  import sys
3
  import subprocess
4
+ import time
5
 
6
  # 1. Clone the repo if it doesn't exist
7
  if not os.path.exists("LuxTTS"):
 
20
 
21
  # Init Model
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
23
+ lux_tts = LuxTTS("YatharthS/LuxTTS", device=device, threads=2)
 
24
 
25
+ def infer(
26
+ text,
27
+ audio_prompt,
28
+ rms,
29
+ ref_duration,
30
+ t_shift,
31
+ num_steps,
32
+ speed,
33
+ return_smooth,
34
+ ):
35
  if audio_prompt is None or not text:
36
  return None, "Please provide text and reference audio."
37
+
38
  start_time = time.time()
39
+
40
+ # Encode reference (WITH duration)
41
+ encoded_prompt = lux_tts.encode_prompt(
42
+ audio_prompt,
43
+ duration=ref_duration,
44
+ rms=rms,
45
+ )
46
+
47
  # Generate speech
48
  final_wav = lux_tts.generate_speech(
49
+ text,
50
+ encoded_prompt,
51
+ num_steps=int(num_steps),
52
+ t_shift=t_shift,
53
+ speed=speed,
54
+ return_smooth=return_smooth,
55
  )
56
+
57
+ duration = round(time.time() - start_time, 2)
58
+
 
59
  final_wav = final_wav.cpu().squeeze(0).numpy()
60
  final_wav = (np.clip(final_wav, -1.0, 1.0) * 32767).astype(np.int16)
61
+
62
  stats_msg = f"✨ Generation complete in **{duration}s**."
63
  return (48000, final_wav), stats_msg
64
 
65
+ # =======================
66
  # Gradio UI
67
+ # =======================
68
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
69
  gr.Markdown("# 🎙️ LuxTTS Voice Cloning")
70
+
 
71
  gr.Markdown(
72
  """
73
+ > **Note:** This demo runs on a **2-core CPU**, so expect slower inference.
74
+ > **Tip:** If words get cut off, lower **Speed** or increase **Ref Duration**.
 
75
  """
76
  )
77
+
78
  with gr.Row():
79
  with gr.Column():
80
+ input_text = gr.Textbox(
81
+ label="Text to Synthesize",
82
+ value="Hey, what's up? I'm feeling really great!",
83
+ )
84
+ input_audio = gr.Audio(
85
+ label="Reference Audio (.wav)",
86
+ type="filepath",
87
+ )
88
+
89
  with gr.Row():
90
+ rms_val = gr.Number(
91
+ value=0.01,
92
+ label="RMS (Loudness)",
93
+ )
94
+ ref_duration_val = gr.Number(
95
+ value=5,
96
+ label="Reference Duration (sec)",
97
+ info="Lower = faster. Set ~1000 if you hear artifacts.",
98
+ )
99
+ t_shift_val = gr.Number(
100
+ value=0.9,
101
+ label="T-Shift",
102
+ )
103
+
104
  with gr.Row():
105
+ steps_val = gr.Slider(
106
+ 1,
107
+ 10,
108
+ value=4,
109
+ step=1,
110
+ label="Num Steps",
111
+ )
112
+ speed_val = gr.Slider(
113
+ 0.5,
114
+ 2.0,
115
+ value=0.8,
116
+ step=0.1,
117
+ label="Speed (Lower = Longer / Clearer)",
118
+ )
119
+ smooth_val = gr.Checkbox(
120
+ label="Return Smooth",
121
+ value=False,
122
+ )
123
+
124
  btn = gr.Button("Generate Speech", variant="primary")
125
+
126
  with gr.Column():
127
  audio_out = gr.Audio(label="Result")
128
  status_text = gr.Markdown("Ready to generate...")
129
 
130
  btn.click(
131
+ fn=infer,
132
+ inputs=[
133
+ input_text,
134
+ input_audio,
135
+ rms_val,
136
+ ref_duration_val,
137
+ t_shift_val,
138
+ steps_val,
139
+ speed_val,
140
+ smooth_val,
141
+ ],
142
+ outputs=[audio_out, status_text],
143
  )
144
 
145
+ demo.launch()