SevenLabs

Running

App Files Files Community

SevenLabs / app.py

Xlnk

Update app.py

504450b verified 17 days ago

raw

history blame contribute delete

2.9 kB

	import gradio as gr
	import torch
	import numpy as np
	from soprano import SopranoTTS
	from scipy.io.wavfile import write as wav_write
	import tempfile
	import os

	# Detect device
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {DEVICE}")

	# Load model once - works on both CUDA and CPU
	model = SopranoTTS(
	backend="auto", # Will automatically choose best backend for device
	device=DEVICE,
	cache_size_mb=100, # Only relevant for CUDA
	decoder_batch_size=1,
	)

	SAMPLE_RATE = 32000

	# Remove @spaces.GPU decorator - not needed for CPU support
	def tts_stream(text, temperature, top_p, repetition_penalty, state):
	if not text.strip():
	yield None, state
	return

	out = model.infer(
	text,
	temperature=temperature,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	)

	audio_np = out.cpu().numpy()
	yield (SAMPLE_RATE, audio_np), audio_np

	def save_audio(state):
	if state is None or len(state) == 0:
	return None

	fd, path = tempfile.mkstemp(suffix=".wav")
	os.close(fd)
	wav_write(path, SAMPLE_RATE, state)
	return path

	with gr.Blocks() as demo:
	state_audio = gr.State(None)

	with gr.Row():
	with gr.Column():
	gr.Markdown(
	f"# SevenLabs"

	)

	text_in = gr.Textbox(
	label="Input Text",
	placeholder="Enter text to synthesize...",
	value="SevenLabs is an extremely lightweight text to speech model designed to produce highly realistic speech at unprecedented speed.",
	lines=4,
	)

	with gr.Accordion("Advanced options", open=False):
	temperature = gr.Slider(
	0.0, 1.0, value=1, step=0.05, label="Temperature"
	)
	top_p = gr.Slider(
	0.0, 1.0, value=0.95, step=0.01, label="Top-p"
	)
	repetition_penalty = gr.Slider(
	1.0, 2.0, value=1.2, step=0.05, label="Repetition penalty"
	)

	gen_btn = gr.Button("Generate")

	with gr.Column():
	audio_out = gr.Audio(
	label="Output Audio",
	autoplay=True,
	streaming=False,
	)

	download_btn = gr.Button("Download")
	file_out = gr.File(label="Download file")

	gr.Markdown(

	)

	gen_btn.click(
	fn=tts_stream,
	inputs=[text_in, temperature, top_p, repetition_penalty, state_audio],
	outputs=[audio_out, state_audio],
	)

	download_btn.click(
	fn=save_audio,
	inputs=[state_audio],
	outputs=[file_out],
	)

	demo.queue()
	demo.launch()