LuxTTS

Running

App Files Files Community

LuxTTS / app.py

YatharthS

Update app.py

ed16b98 verified about 1 month ago

raw

history blame contribute delete

3.18 kB

	import os
	import sys
	import subprocess
	import time # Added for tracking duration

	# 1. Clone the repo if it doesn't exist
	if not os.path.exists("LuxTTS"):
	subprocess.run(["git", "clone", "https://github.com/ysharma3501/LuxTTS.git"])

	# 2. Install requirements
	subprocess.run([sys.executable, "-m", "pip", "install", "-r", "LuxTTS/requirements.txt"])

	# 3. Add to path
	sys.path.append(os.path.abspath("LuxTTS"))

	import numpy as np
	import gradio as gr
	import torch
	from zipvoice.luxvoice import LuxTTS

	# Init Model
	device = "cuda" if torch.cuda.is_available() else "cpu"
	# Note: 2 threads on a 2-core CPU is the bottleneck
	lux_tts = LuxTTS('YatharthS/LuxTTS', device=device, threads=2)

	def infer(text, audio_prompt, rms, t_shift, num_steps, speed, return_smooth):
	if audio_prompt is None or not text:
	return None, "Please provide text and reference audio."

	start_time = time.time()

	# Encode reference
	encoded_prompt = lux_tts.encode_prompt(audio_prompt, rms=rms)

	# Generate speech
	final_wav = lux_tts.generate_speech(
	text,
	encoded_prompt,
	num_steps=int(num_steps),
	t_shift=t_shift,
	speed=speed,
	return_smooth=return_smooth
	)

	end_time = time.time()
	duration = round(end_time - start_time, 2)

	final_wav = final_wav.cpu().squeeze(0).numpy()
	final_wav = (np.clip(final_wav, -1.0, 1.0) * 32767).astype(np.int16)

	stats_msg = f"✨ Generation complete in {duration}s."
	return (48000, final_wav), stats_msg

	# Gradio UI
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🎙️ LuxTTS Voice Cloning")

	# Info Panel
	gr.Markdown(
	"""
	> Note: Processing may feel slow as this instance uses a 2-core CPU (lower specs than most modern phones).
	>
	> Tip: If you notice words are being cut off at the end, try lowering the speed further.
	"""
	)

	with gr.Row():
	with gr.Column():
	input_text = gr.Textbox(label="Text to Synthesize", value="Hey, what's up? I'm feeling really great!")
	input_audio = gr.Audio(label="Reference Audio (.wav)", type="filepath")

	with gr.Row():
	rms_val = gr.Number(value=0.01, label="RMS (Loudness)")
	t_shift_val = gr.Number(value=0.9, label="T-Shift")
	steps_val = gr.Slider(1, 10, value=4, step=1, label="Num Steps")

	with gr.Row():
	# Default speed set to 0.8
	speed_val = gr.Slider(0.5, 2.0, value=0.8, step=0.1, label="Speed (Lower = Longer/Clearer)")
	smooth_val = gr.Checkbox(label="Return Smooth", value=False)

	btn = gr.Button("Generate Speech", variant="primary")

	with gr.Column():
	audio_out = gr.Audio(label="Result")
	status_text = gr.Markdown("Ready to generate...")

	btn.click(
	fn=infer,
	inputs=[input_text, input_audio, rms_val, t_shift_val, steps_val, speed_val, smooth_val],
	outputs=[audio_out, status_text]
	)

	demo.launch()