Soprano-RVC

Runtime error

App Files Files Community

Soprano-RVC / app.py

ekwek

Update app.py

46db302 verified about 1 month ago

raw

history blame

3.34 kB

	import gradio as gr
	import torch
	import numpy as np
	from soprano import SopranoTTS
	from scipy.io.wavfile import write as wav_write
	import tempfile
	import os
	import spaces

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	print(DEVICE)

	# Load model once
	model = SopranoTTS(
	backend="auto",
	device=DEVICE,
	cache_size_mb=100,
	decoder_batch_size=1,
	)

	SAMPLE_RATE = 32000


	@spaces.GPU
	def tts_stream(text, temperature, top_p, repetition_penalty, state):
	if not text.strip():
	yield None, state
	return

	chunks = []
	stream = model.infer_stream(
	text,
	chunk_size=20,
	temperature=temperature,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	)

	for chunk in stream:
	if isinstance(chunk, torch.Tensor):
	audio_np = chunk.detach().cpu().numpy().astype(np.float32)
	chunks.append(audio_np)
	# stream partial audio
	yield (SAMPLE_RATE, audio_np), np.concatenate(chunks)

	#if chunks:
	# final_audio = np.concatenate(chunks)
	# yield (SAMPLE_RATE, final_audio), final_audio


	def save_audio(state):
	if state is None or len(state) == 0:
	return None
	fd, path = tempfile.mkstemp(suffix=".wav")
	os.close(fd)
	wav_write(path, SAMPLE_RATE, state)
	return path


	with gr.Blocks() as demo:
	state_audio = gr.State(None)

	with gr.Row():
	with gr.Column():
	gr.Markdown("# Soprano Demo")

	text_in = gr.Textbox(
	label="Input Text",
	placeholder="Enter text to synthesize...",
	lines=4,
	)

	with gr.Accordion("Advanced options", open=False):
	temperature = gr.Slider(
	0.0, 1.0, value=0.3, step=0.05, label="Temperature"
	)
	top_p = gr.Slider(
	0.0, 1.0, value=0.95, step=0.01, label="Top-p"
	)
	repetition_penalty = gr.Slider(
	0.5, 2.0, value=1.2, step=0.05, label="Repetition penalty"
	)

	gen_btn = gr.Button("Generate")

	with gr.Column():
	audio_out = gr.Audio(
	label="Output Audio",
	autoplay=True,
	streaming=True,
	)
	download_btn = gr.Button("Download")
	file_out = gr.File(label="Download file")
	gr.Markdown(
	"Usage tips:\n\n"
	"- Soprano works best when each sentence is between 2 and 15 seconds long.\n"
	"- Although Soprano recognizes numbers and some special characters, it occasionally mispronounces them. Best results can be achieved by converting these into their phonetic form. (1+1 -> one plus one, etc)\n"
	"- If Soprano produces unsatisfactory results, you can easily regenerate it for a new, potentially better generation. You may also change the sampling settings for more varied results."
	)


	gen_btn.click(
	fn=tts_stream,
	inputs=[text_in, temperature, top_p, repetition_penalty, state_audio],
	outputs=[audio_out, state_audio],
	)

	download_btn.click(
	fn=save_audio,
	inputs=[state_audio],
	outputs=[file_out],
	)

	demo.queue()
	demo.launch()