Spaces:

SentiVue
/

pt-PT_TTS_Demo

Sleeping

App Files Files Community

pt-PT_TTS_Demo / app.py

m-nagy

fix: pass original voice name to generate_speech instead of display name

b0728cf 2 months ago

raw

history blame contribute delete

7.73 kB

	import gradio as gr
	import requests
	import os
	from deployment_options import voice_id_2_name, defualt_values, voice_name_2_note

	import uuid

	API_URL = "https://sentivue-endpoint.hf.space"
	ENDPOINT_URL = "https://sentivue-endpoint.hf.space/v1/tts"
	ENDPOINT_TOKEN = os.getenv("endpoint_READ")

	print(f"Public demo will call endpoint: {ENDPOINT_URL}")
	print(f"Token loaded: {'Yes' if ENDPOINT_TOKEN else 'No'}")

	voice_names = list(voice_id_2_name.values())

	voice_names_display_dict = {
	f'{voice_name} ({voice_name_2_note[voice_name]})' : voice_name
	for voice_name in voice_names
	}
	voice_names_display_default = defualt_values['voice_name']

	def generate_speech(text: str, voice_name: str):
	"""
	Calls the private FastAPI endpoint and returns audio
	"""
	if not text.strip():
	return None, "Please enter some text"

	if not ENDPOINT_TOKEN:
	return None, "Error: endpoint_READ token not found in environment"

	try:

	voice_name_2_id = {}
	for vid, name in voice_id_2_name.items():
	voice_name_2_id[name] = vid

	voice_id = voice_name_2_id[voice_name]

	payload = {
	"text": text
	}

	print(f"Sending request to: {ENDPOINT_URL}/{voice_id}")
	print(f"Payload: {payload}")

	response = requests.post(
	f"{ENDPOINT_URL}/{voice_id}",
	headers={
	"Authorization": f"Bearer {ENDPOINT_TOKEN}",
	"Content-Type": "application/json"
	},
	json=payload,
	# timeout=60,
	stream=True
	)

	response.raise_for_status()

	# # Return raw WAV bytes - Gradio handles the rest
	# return response.content, "Success!"

	# # Save to temporary WAV file
	# with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
	# tmp_file.write(response.content)
	# tmp_path = tmp_file.name

	# return tmp_path, "Success!"

	# Save to a regular file in current directory (not temp)
	# Generate unique ID for output file
	generation_id = str(uuid.uuid4())[:15]
	output_path = f"speech_{voice_id}_{generation_id}.wav"

	with open(output_path, 'wb') as f:
	for chunk in response.iter_content(chunk_size=8192):
	if chunk:
	f.write(chunk)

	return output_path, "Success!"

	except requests.exceptions.RequestException as e:
	error_msg = f"Error calling endpoint: {str(e)}"
	print(error_msg)
	return None, error_msg
	except Exception as e:
	error_msg = f"Unexpected error: {str(e)}"
	print(error_msg)
	return None, error_msg

	def check_readiness():
	try:
	URL = f"{API_URL}/health"
	health = requests.get(
	URL,
	headers={
	"Authorization": f"Bearer {ENDPOINT_TOKEN}",
	},
	timeout=5)
	data = health.json()

	if data.get("ready"):
	msg = "✅ Ready"
	print(URL, msg)
	return gr.Button("🔊 Generate Speech", interactive=True), msg, gr.Timer(active=False) # STOP
	else:
	msg = "🔄 2/2: Preparing our model, it takes a few seconds..."
	print(URL, msg)
	return gr.Button(msg, interactive=False), msg, gr.Timer(active=True) # CONTINUE

	except: # /health didn't respond
	msg = "⏳ 1/2: Preparing our server, it takes around 2 minutes..."
	print(URL, msg)
	return gr.Button(msg, interactive=False), msg, gr.Timer(active=True) # CONTINUE


	# ── Gradio Interface ────────────────────────────────────────────────────────

	with gr.Blocks(
	title="pt-PT TTS - Demo",
	css="""
	body {
	zoom: 1.2; /* 110% zoom */
	}
	"""
	) as demo:

	# Header Section
	gr.Markdown(
	"""
	# 🎙️ Síntese de Voz em Português Europeu (pt-PT) — Public Preview
	Síntese de voz natural em português europeu (pt-PT), com prosódia fluida e pronúncia correta de números.
	<small>High-quality European Portuguese (pt-PT) speech synthesis with natural prosody and accurate number pronunciation.</small>
	"""
	)

	gr.Markdown(
	"""
	### Especificações Técnicas
	- Tamanho do modelo: ~3B parâmetros
	- Arquitetura: Backbone de TTS baseado em LLM
	- Dados de Treino: +11k horas de voz pt-PT curada

	<small>
	Model Size: ~3B parameters \| Architecture: LLM-based TTS backbone \| Training Data: +11k hours of curated pt-PT speech
	</small>
	"""
	)

	gr.Markdown(
	"""
	Nota: Para melhor desempenho e compatibilidade de áudio, recomendamos o uso do Google Chrome.
	<small>
	Note: For best audio performance and compatibility, we recommend using Google Chrome.
	</small>
	"""
	)



	# gr.Markdown("---")

	# Main Generation Interface
	# gr.Markdown("## Generate Speech")

	with gr.Row():
	# Left Column - Input Controls
	with gr.Column(scale=5):
	text_input = gr.Textbox(
	label="📝 Text to Synthesize",
	placeholder="Enter Portuguese text here... (e.g., 'Olá! Este é um teste do sistema de síntese de voz.')",
	lines=6,
	max_lines=10,
	)

	with gr.Row():
	voice_dropdown = gr.Dropdown(
	choices=list(voice_names_display_dict.items()),
	value=voice_names_display_default,
	label="🎭 Voice Selection",
	info="More voices coming soon"
	)

	submit_btn = gr.Button(
	"🔊 Generate Speech",
	variant="primary",
	size="lg",
	interactive=False,
	)

	# Right Column - Output
	with gr.Column(scale=4):
	audio_output = gr.Audio(
	label="🔊 Generated Audio",
	type="filepath",
	autoplay=False,
	)

	status_text = gr.Textbox(
	label="Status",
	interactive=False,
	)

	# Example Inputs
	gr.Markdown("### 💡 Example Texts")
	gr.Examples(
	examples=[
	["Olá! Bem-vindo ao sistema de síntese de voz em português europeu."],
	["A temperatura hoje está entre 5 e 9 graus Celsius."],
	["Lisboa é a capital de Portugal, fundada antes do ano 1200."]
	],
	inputs=text_input,
	)


	# Footer
	gr.Markdown(
	"""
	<div style="text-align: center">
	Criado com ❤️ pela SentiVue
	</div>
	<div style="text-align: center">
	Built with ❤️ by SentiVue
	</div>
	"""
	)

	timer = gr.Timer(value=1)


	# demo.load(fn=wake_server, outputs=[submit_btn, status_text])
	timer.tick(fn=check_readiness, outputs=[submit_btn, status_text, timer])

	# Event Handlers
	submit_btn.click(
	fn=generate_speech,
	inputs=[text_input, voice_dropdown],
	outputs=[audio_output, status_text],
	)

	text_input.submit(
	fn=generate_speech,
	inputs=[text_input, voice_dropdown],
	outputs=[audio_output, status_text]
	)

	demo.queue().launch()