LuxTTS

Runtime error

App Files Files Community

YatharthS commited on Jan 23

Commit

ed16b98

verified ·

1 Parent(s): f0993f6

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -10

app.py CHANGED Viewed

@@ -1,16 +1,16 @@
 import os
 import sys
 import subprocess
 # 1. Clone the repo if it doesn't exist
 if not os.path.exists("LuxTTS"):
     subprocess.run(["git", "clone", "https://github.com/ysharma3501/LuxTTS.git"])
-# 2. Install requirements from the cloned folder
-# This ensures all dependencies (transformers, librosa, etc.) are present
 subprocess.run([sys.executable, "-m", "pip", "install", "-r", "LuxTTS/requirements.txt"])
-# 3. Add to path so the 'zipvoice' module is importable
 sys.path.append(os.path.abspath("LuxTTS"))
 import numpy as np
@@ -20,16 +20,19 @@ from zipvoice.luxvoice import LuxTTS
 # Init Model
 device = "cuda" if torch.cuda.is_available() else "cpu"
 lux_tts = LuxTTS('YatharthS/LuxTTS', device=device, threads=2)
 def infer(text, audio_prompt, rms, t_shift, num_steps, speed, return_smooth):
     if audio_prompt is None or not text:
-        return None
     # Encode reference
     encoded_prompt = lux_tts.encode_prompt(audio_prompt, rms=rms)
-    # Generate speech with ALL params
     final_wav = lux_tts.generate_speech(
         text,
         encoded_prompt,
@@ -38,14 +41,29 @@ def infer(text, audio_prompt, rms, t_shift, num_steps, speed, return_smooth):
         speed=speed,
         return_smooth=return_smooth
     )
     final_wav = final_wav.cpu().squeeze(0).numpy()
     final_wav = (np.clip(final_wav, -1.0, 1.0) * 32767).astype(np.int16)
-    return (48000, final_wav)
 # Gradio UI
-with gr.Blocks() as demo:
     gr.Markdown("# 🎙️ LuxTTS Voice Cloning")
     with gr.Row():
         with gr.Column():
             input_text = gr.Textbox(label="Text to Synthesize", value="Hey, what's up? I'm feeling really great!")
@@ -57,19 +75,20 @@ with gr.Blocks() as demo:
                 steps_val = gr.Slider(1, 10, value=4, step=1, label="Num Steps")
             with gr.Row():
-                speed_val = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Speed (Higher = Faster)")
                 smooth_val = gr.Checkbox(label="Return Smooth", value=False)
             btn = gr.Button("Generate Speech", variant="primary")
         with gr.Column():
             audio_out = gr.Audio(label="Result")
-    # Fixed: Passing all inputs to match the infer function signature
     btn.click(
         fn=infer,
         inputs=[input_text, input_audio, rms_val, t_shift_val, steps_val, speed_val, smooth_val],
-        outputs=audio_out
     )
 demo.launch()

 import os
 import sys
 import subprocess
+import time  # Added for tracking duration
 # 1. Clone the repo if it doesn't exist
 if not os.path.exists("LuxTTS"):
     subprocess.run(["git", "clone", "https://github.com/ysharma3501/LuxTTS.git"])
+# 2. Install requirements
 subprocess.run([sys.executable, "-m", "pip", "install", "-r", "LuxTTS/requirements.txt"])
+# 3. Add to path
 sys.path.append(os.path.abspath("LuxTTS"))
 import numpy as np
 # Init Model
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Note: 2 threads on a 2-core CPU is the bottleneck
 lux_tts = LuxTTS('YatharthS/LuxTTS', device=device, threads=2)
 def infer(text, audio_prompt, rms, t_shift, num_steps, speed, return_smooth):
     if audio_prompt is None or not text:
+        return None, "Please provide text and reference audio."
+    start_time = time.time()
     # Encode reference
     encoded_prompt = lux_tts.encode_prompt(audio_prompt, rms=rms)
+    # Generate speech
     final_wav = lux_tts.generate_speech(
         text,
         encoded_prompt,
         speed=speed,
         return_smooth=return_smooth
     )
+    end_time = time.time()
+    duration = round(end_time - start_time, 2)
     final_wav = final_wav.cpu().squeeze(0).numpy()
     final_wav = (np.clip(final_wav, -1.0, 1.0) * 32767).astype(np.int16)
+    stats_msg = f"✨ Generation complete in **{duration}s**."
+    return (48000, final_wav), stats_msg
 # Gradio UI
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🎙️ LuxTTS Voice Cloning")
+    # Info Panel
+    gr.Markdown(
+        """
+        > **Note:** Processing may feel slow as this instance uses a **2-core CPU** (lower specs than most modern phones).
+        >
+        > **Tip:** If you notice words are being **cut off** at the end, try **lowering the speed** further.
+        """
+    )
     with gr.Row():
         with gr.Column():
             input_text = gr.Textbox(label="Text to Synthesize", value="Hey, what's up? I'm feeling really great!")
                 steps_val = gr.Slider(1, 10, value=4, step=1, label="Num Steps")
             with gr.Row():
+                # Default speed set to 0.8
+                speed_val = gr.Slider(0.5, 2.0, value=0.8, step=0.1, label="Speed (Lower = Longer/Clearer)")
                 smooth_val = gr.Checkbox(label="Return Smooth", value=False)
             btn = gr.Button("Generate Speech", variant="primary")
         with gr.Column():
             audio_out = gr.Audio(label="Result")
+            status_text = gr.Markdown("Ready to generate...")
     btn.click(
         fn=infer,
         inputs=[input_text, input_audio, rms_val, t_shift_val, steps_val, speed_val, smooth_val],
+        outputs=[audio_out, status_text]
     )
 demo.launch()