File size: 3,180 Bytes
f239f3d ed16b98 f239f3d 928010a f239f3d ed16b98 f239f3d ed16b98 f239f3d 60591b3 f239f3d cefdd32 f239f3d 928010a f239f3d ed16b98 f239f3d ed16b98 f239f3d 928010a f239f3d 928010a ed16b98 f239f3d ed16b98 2a9225b 60591b3 ed16b98 f239f3d ed16b98 928010a ed16b98 f239f3d 928010a f239f3d 928010a f239f3d 928010a ed16b98 928010a f239f3d 928010a ed16b98 f239f3d 928010a ed16b98 928010a f239f3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import os
import sys
import subprocess
import time # Added for tracking duration
# 1. Clone the repo if it doesn't exist
if not os.path.exists("LuxTTS"):
subprocess.run(["git", "clone", "https://github.com/ysharma3501/LuxTTS.git"])
# 2. Install requirements
subprocess.run([sys.executable, "-m", "pip", "install", "-r", "LuxTTS/requirements.txt"])
# 3. Add to path
sys.path.append(os.path.abspath("LuxTTS"))
import numpy as np
import gradio as gr
import torch
from zipvoice.luxvoice import LuxTTS
# Init Model
device = "cuda" if torch.cuda.is_available() else "cpu"
# Note: 2 threads on a 2-core CPU is the bottleneck
lux_tts = LuxTTS('YatharthS/LuxTTS', device=device, threads=2)
def infer(text, audio_prompt, rms, t_shift, num_steps, speed, return_smooth):
if audio_prompt is None or not text:
return None, "Please provide text and reference audio."
start_time = time.time()
# Encode reference
encoded_prompt = lux_tts.encode_prompt(audio_prompt, rms=rms)
# Generate speech
final_wav = lux_tts.generate_speech(
text,
encoded_prompt,
num_steps=int(num_steps),
t_shift=t_shift,
speed=speed,
return_smooth=return_smooth
)
end_time = time.time()
duration = round(end_time - start_time, 2)
final_wav = final_wav.cpu().squeeze(0).numpy()
final_wav = (np.clip(final_wav, -1.0, 1.0) * 32767).astype(np.int16)
stats_msg = f"✨ Generation complete in **{duration}s**."
return (48000, final_wav), stats_msg
# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🎙️ LuxTTS Voice Cloning")
# Info Panel
gr.Markdown(
"""
> **Note:** Processing may feel slow as this instance uses a **2-core CPU** (lower specs than most modern phones).
>
> **Tip:** If you notice words are being **cut off** at the end, try **lowering the speed** further.
"""
)
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Text to Synthesize", value="Hey, what's up? I'm feeling really great!")
input_audio = gr.Audio(label="Reference Audio (.wav)", type="filepath")
with gr.Row():
rms_val = gr.Number(value=0.01, label="RMS (Loudness)")
t_shift_val = gr.Number(value=0.9, label="T-Shift")
steps_val = gr.Slider(1, 10, value=4, step=1, label="Num Steps")
with gr.Row():
# Default speed set to 0.8
speed_val = gr.Slider(0.5, 2.0, value=0.8, step=0.1, label="Speed (Lower = Longer/Clearer)")
smooth_val = gr.Checkbox(label="Return Smooth", value=False)
btn = gr.Button("Generate Speech", variant="primary")
with gr.Column():
audio_out = gr.Audio(label="Result")
status_text = gr.Markdown("Ready to generate...")
btn.click(
fn=infer,
inputs=[input_text, input_audio, rms_val, t_shift_val, steps_val, speed_val, smooth_val],
outputs=[audio_out, status_text]
)
demo.launch() |