| | import os |
| | import sys |
| | import subprocess |
| | import time |
| |
|
| | |
| | if not os.path.exists("LuxTTS"): |
| | subprocess.run(["git", "clone", "https://github.com/ysharma3501/LuxTTS.git"]) |
| |
|
| | |
| | subprocess.run([sys.executable, "-m", "pip", "install", "-r", "LuxTTS/requirements.txt"]) |
| |
|
| | |
| | sys.path.append(os.path.abspath("LuxTTS")) |
| |
|
| | import numpy as np |
| | import gradio as gr |
| | import torch |
| | from zipvoice.luxvoice import LuxTTS |
| |
|
| | |
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| | |
| | lux_tts = LuxTTS('YatharthS/LuxTTS', device=device, threads=2) |
| |
|
| | def infer(text, audio_prompt, rms, t_shift, num_steps, speed, return_smooth): |
| | if audio_prompt is None or not text: |
| | return None, "Please provide text and reference audio." |
| | |
| | start_time = time.time() |
| | |
| | |
| | encoded_prompt = lux_tts.encode_prompt(audio_prompt, rms=rms) |
| | |
| | |
| | final_wav = lux_tts.generate_speech( |
| | text, |
| | encoded_prompt, |
| | num_steps=int(num_steps), |
| | t_shift=t_shift, |
| | speed=speed, |
| | return_smooth=return_smooth |
| | ) |
| | |
| | end_time = time.time() |
| | duration = round(end_time - start_time, 2) |
| | |
| | final_wav = final_wav.cpu().squeeze(0).numpy() |
| | final_wav = (np.clip(final_wav, -1.0, 1.0) * 32767).astype(np.int16) |
| | |
| | stats_msg = f"✨ Generation complete in **{duration}s**." |
| | return (48000, final_wav), stats_msg |
| |
|
| | |
| | with gr.Blocks(theme=gr.themes.Soft()) as demo: |
| | gr.Markdown("# 🎙️ LuxTTS Voice Cloning") |
| | |
| | |
| | gr.Markdown( |
| | """ |
| | > **Note:** Processing may feel slow as this instance uses a **2-core CPU** (lower specs than most modern phones). |
| | > |
| | > **Tip:** If you notice words are being **cut off** at the end, try **lowering the speed** further. |
| | """ |
| | ) |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | input_text = gr.Textbox(label="Text to Synthesize", value="Hey, what's up? I'm feeling really great!") |
| | input_audio = gr.Audio(label="Reference Audio (.wav)", type="filepath") |
| | |
| | with gr.Row(): |
| | rms_val = gr.Number(value=0.01, label="RMS (Loudness)") |
| | t_shift_val = gr.Number(value=0.9, label="T-Shift") |
| | steps_val = gr.Slider(1, 10, value=4, step=1, label="Num Steps") |
| | |
| | with gr.Row(): |
| | |
| | speed_val = gr.Slider(0.5, 2.0, value=0.8, step=0.1, label="Speed (Lower = Longer/Clearer)") |
| | smooth_val = gr.Checkbox(label="Return Smooth", value=False) |
| | |
| | btn = gr.Button("Generate Speech", variant="primary") |
| | |
| | with gr.Column(): |
| | audio_out = gr.Audio(label="Result") |
| | status_text = gr.Markdown("Ready to generate...") |
| |
|
| | btn.click( |
| | fn=infer, |
| | inputs=[input_text, input_audio, rms_val, t_shift_val, steps_val, speed_val, smooth_val], |
| | outputs=[audio_out, status_text] |
| | ) |
| |
|
| | demo.launch() |