YatharthS commited on
Commit
ed16b98
·
verified ·
1 Parent(s): f0993f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -10
app.py CHANGED
@@ -1,16 +1,16 @@
1
  import os
2
  import sys
3
  import subprocess
 
4
 
5
  # 1. Clone the repo if it doesn't exist
6
  if not os.path.exists("LuxTTS"):
7
  subprocess.run(["git", "clone", "https://github.com/ysharma3501/LuxTTS.git"])
8
 
9
- # 2. Install requirements from the cloned folder
10
- # This ensures all dependencies (transformers, librosa, etc.) are present
11
  subprocess.run([sys.executable, "-m", "pip", "install", "-r", "LuxTTS/requirements.txt"])
12
 
13
- # 3. Add to path so the 'zipvoice' module is importable
14
  sys.path.append(os.path.abspath("LuxTTS"))
15
 
16
  import numpy as np
@@ -20,16 +20,19 @@ from zipvoice.luxvoice import LuxTTS
20
 
21
  # Init Model
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
23
  lux_tts = LuxTTS('YatharthS/LuxTTS', device=device, threads=2)
24
 
25
  def infer(text, audio_prompt, rms, t_shift, num_steps, speed, return_smooth):
26
  if audio_prompt is None or not text:
27
- return None
 
 
28
 
29
  # Encode reference
30
  encoded_prompt = lux_tts.encode_prompt(audio_prompt, rms=rms)
31
 
32
- # Generate speech with ALL params
33
  final_wav = lux_tts.generate_speech(
34
  text,
35
  encoded_prompt,
@@ -38,14 +41,29 @@ def infer(text, audio_prompt, rms, t_shift, num_steps, speed, return_smooth):
38
  speed=speed,
39
  return_smooth=return_smooth
40
  )
 
 
 
 
41
  final_wav = final_wav.cpu().squeeze(0).numpy()
42
  final_wav = (np.clip(final_wav, -1.0, 1.0) * 32767).astype(np.int16)
43
- return (48000, final_wav)
 
 
44
 
45
  # Gradio UI
46
- with gr.Blocks() as demo:
47
  gr.Markdown("# 🎙️ LuxTTS Voice Cloning")
48
 
 
 
 
 
 
 
 
 
 
49
  with gr.Row():
50
  with gr.Column():
51
  input_text = gr.Textbox(label="Text to Synthesize", value="Hey, what's up? I'm feeling really great!")
@@ -57,19 +75,20 @@ with gr.Blocks() as demo:
57
  steps_val = gr.Slider(1, 10, value=4, step=1, label="Num Steps")
58
 
59
  with gr.Row():
60
- speed_val = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Speed (Higher = Faster)")
 
61
  smooth_val = gr.Checkbox(label="Return Smooth", value=False)
62
 
63
  btn = gr.Button("Generate Speech", variant="primary")
64
 
65
  with gr.Column():
66
  audio_out = gr.Audio(label="Result")
 
67
 
68
- # Fixed: Passing all inputs to match the infer function signature
69
  btn.click(
70
  fn=infer,
71
  inputs=[input_text, input_audio, rms_val, t_shift_val, steps_val, speed_val, smooth_val],
72
- outputs=audio_out
73
  )
74
 
75
  demo.launch()
 
1
  import os
2
  import sys
3
  import subprocess
4
+ import time # Added for tracking duration
5
 
6
  # 1. Clone the repo if it doesn't exist
7
  if not os.path.exists("LuxTTS"):
8
  subprocess.run(["git", "clone", "https://github.com/ysharma3501/LuxTTS.git"])
9
 
10
+ # 2. Install requirements
 
11
  subprocess.run([sys.executable, "-m", "pip", "install", "-r", "LuxTTS/requirements.txt"])
12
 
13
+ # 3. Add to path
14
  sys.path.append(os.path.abspath("LuxTTS"))
15
 
16
  import numpy as np
 
20
 
21
  # Init Model
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
23
+ # Note: 2 threads on a 2-core CPU is the bottleneck
24
  lux_tts = LuxTTS('YatharthS/LuxTTS', device=device, threads=2)
25
 
26
  def infer(text, audio_prompt, rms, t_shift, num_steps, speed, return_smooth):
27
  if audio_prompt is None or not text:
28
+ return None, "Please provide text and reference audio."
29
+
30
+ start_time = time.time()
31
 
32
  # Encode reference
33
  encoded_prompt = lux_tts.encode_prompt(audio_prompt, rms=rms)
34
 
35
+ # Generate speech
36
  final_wav = lux_tts.generate_speech(
37
  text,
38
  encoded_prompt,
 
41
  speed=speed,
42
  return_smooth=return_smooth
43
  )
44
+
45
+ end_time = time.time()
46
+ duration = round(end_time - start_time, 2)
47
+
48
  final_wav = final_wav.cpu().squeeze(0).numpy()
49
  final_wav = (np.clip(final_wav, -1.0, 1.0) * 32767).astype(np.int16)
50
+
51
+ stats_msg = f"✨ Generation complete in **{duration}s**."
52
+ return (48000, final_wav), stats_msg
53
 
54
  # Gradio UI
55
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
56
  gr.Markdown("# 🎙️ LuxTTS Voice Cloning")
57
 
58
+ # Info Panel
59
+ gr.Markdown(
60
+ """
61
+ > **Note:** Processing may feel slow as this instance uses a **2-core CPU** (lower specs than most modern phones).
62
+ >
63
+ > **Tip:** If you notice words are being **cut off** at the end, try **lowering the speed** further.
64
+ """
65
+ )
66
+
67
  with gr.Row():
68
  with gr.Column():
69
  input_text = gr.Textbox(label="Text to Synthesize", value="Hey, what's up? I'm feeling really great!")
 
75
  steps_val = gr.Slider(1, 10, value=4, step=1, label="Num Steps")
76
 
77
  with gr.Row():
78
+ # Default speed set to 0.8
79
+ speed_val = gr.Slider(0.5, 2.0, value=0.8, step=0.1, label="Speed (Lower = Longer/Clearer)")
80
  smooth_val = gr.Checkbox(label="Return Smooth", value=False)
81
 
82
  btn = gr.Button("Generate Speech", variant="primary")
83
 
84
  with gr.Column():
85
  audio_out = gr.Audio(label="Result")
86
+ status_text = gr.Markdown("Ready to generate...")
87
 
 
88
  btn.click(
89
  fn=infer,
90
  inputs=[input_text, input_audio, rms_val, t_shift_val, steps_val, speed_val, smooth_val],
91
+ outputs=[audio_out, status_text]
92
  )
93
 
94
  demo.launch()