Spaces:

SentiVue
/

pt-PT_TTS_Demo

Sleeping

App Files Files Community

pt-PT_TTS_Demo / app-struture-elements.py

m-nagy

UI: update UI and add space info

e280b3f about 2 months ago

raw

history blame contribute delete

7.25 kB

	import gradio as gr
	import requests
	import os
	from deployment_options import voice_id_2_name, defualt_values

	import uuid

	ENDPOINT_URL = "https://sentivue-endpoint.hf.space/v1/tts"
	ENDPOINT_TOKEN = os.getenv("endpoint_READ")

	print(f"Public demo will call endpoint: {ENDPOINT_URL}")
	print(f"Token loaded: {'Yes' if ENDPOINT_TOKEN else 'No'}")

	voice_names = list(voice_id_2_name.values())
	def generate_speech(text: str, voice_name: str):
	"""
	Calls the private FastAPI endpoint and returns audio
	"""
	if not text.strip():
	return None, "Please enter some text"

	if not ENDPOINT_TOKEN:
	return None, "Error: endpoint_READ token not found in environment"

	try:

	voice_name_2_id = {}
	for vid, name in voice_id_2_name.items():
	voice_name_2_id[name] = vid

	voice_id = voice_name_2_id[voice_name]

	payload = {
	"text": text
	}

	print(f"Sending request to: {ENDPOINT_URL}/{voice_id}")
	print(f"Payload: {payload}")

	response = requests.post(
	f"{ENDPOINT_URL}/{voice_id}",
	headers={
	"Authorization": f"Bearer {ENDPOINT_TOKEN}",
	"Content-Type": "application/json"
	},
	json=payload,
	# timeout=60,
	stream=True
	)

	response.raise_for_status()

	# # Return raw WAV bytes - Gradio handles the rest
	# return response.content, "Success!"

	# # Save to temporary WAV file
	# with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
	# tmp_file.write(response.content)
	# tmp_path = tmp_file.name

	# return tmp_path, "Success!"

	# Save to a regular file in current directory (not temp)
	# Generate unique ID for output file
	generation_id = str(uuid.uuid4())[:15]
	output_path = f"speech_{voice_id}_{generation_id}.wav"

	with open(output_path, 'wb') as f:
	for chunk in response.iter_content(chunk_size=8192):
	if chunk:
	f.write(chunk)

	return output_path, "Success!"

	except requests.exceptions.RequestException as e:
	error_msg = f"Error calling endpoint: {str(e)}"
	print(error_msg)
	return None, error_msg
	except Exception as e:
	error_msg = f"Unexpected error: {str(e)}"
	print(error_msg)
	return None, error_msg


	# ── Gradio Interface ────────────────────────────────────────────────────────

	with gr.Blocks(
	title="pt-PT TTS - Demo",
	css="""
	body {
	zoom: 1.2; /* 110% zoom */
	}
	"""
	) as demo:

	# Header Section
	gr.Markdown(
	"""
	# 🎙️ European Portuguese Text-to-Speech

	High-quality, natural-sounding speech synthesis for pt-PT with human-like prosody and accurate number pronunciation.
	"""
	)

	# Model Information Card
	# with gr.Accordion("📋 Model Information", open=False):
	# gr.Markdown(
	# """
	# ### Technical Specifications
	# - Model Size: ~3B parameters
	# - Architecture: LLM-based TTS backbone
	# - Training Data: +11k hours of curated pt-PT speech
	# """
	# )
	gr.Markdown(
	"""
	### Technical Specifications
	- Model Size: ~3B parameters
	- Architecture: LLM-based TTS backbone
	- Training Data: +11k hours of curated pt-PT speech
	"""
	)


	# gr.Markdown("---")

	# Main Generation Interface
	# gr.Markdown("## Generate Speech")

	with gr.Row():
	# Left Column - Input Controls
	with gr.Column(scale=5):
	text_input = gr.Textbox(
	label="📝 Text to Synthesize",
	placeholder="Enter Portuguese text here... (e.g., 'Olá! Este é um teste do sistema de síntese de voz.')",
	lines=6,
	max_lines=10,
	)

	with gr.Row():
	voice_dropdown = gr.Dropdown(
	choices=voice_names,
	value=defualt_values['voice_name'],
	label="🎭 Voice Selection",
	info="More voices coming soon"
	)

	submit_btn = gr.Button(
	"🎵 Generate Speech",
	variant="primary",
	size="lg"
	)

	# Right Column - Output
	with gr.Column(scale=4):
	audio_output = gr.Audio(
	label="🔊 Generated Audio",
	type="filepath",
	autoplay=False,
	)

	status_text = gr.Textbox(
	label="Status",
	interactive=False,
	)

	# Example Inputs
	gr.Markdown("### 💡 Example Texts")
	gr.Examples(
	examples=[
	["Olá! Bem-vindo ao sistema de síntese de voz em português europeu."],
	["A temperatura hoje está entre 15 e 20 graus Celsius."],
	["Lisboa é a capital de Portugal, fundada antes do ano 1200."]
	],
	inputs=text_input,
	)

	gr.Markdown("---")

	# Information Section
	with gr.Row():
	with gr.Column():
	gr.Markdown(
	"""
	### 🎤 Available Voices

	Current Voice:
	- André (Default)

	Coming Soon:
	- Additional voices
	- Extended emotion control
	- Prosody control via tags
	"""
	)

	with gr.Column():
	gr.Markdown(
	"""
	### 🔌 API Access

	Status: Coming soon

	The API will allow programmatic access to the TTS system with full voice control and streaming support.
	"""
	)

	with gr.Column():
	gr.Markdown(
	"""
	### 🎨 Fine-tuning

	Status: Coming soon

	Requirements:
	- ~1.5 hours of recorded speech
	- Create custom voice clones
	- Maintain natural prosody
	"""
	)

	# Footer
	gr.Markdown(
	"""
	<div style="text-align: center">
	Built with ❤️ for European Portuguese • Powered by advanced LLM-based TTS
	</div>
	"""
	)

	# Event Handlers
	submit_btn.click(
	fn=generate_speech,
	inputs=[text_input, voice_dropdown],
	outputs=[audio_output, status_text],
	)

	text_input.submit(
	fn=generate_speech,
	inputs=[text_input, voice_dropdown],
	outputs=[audio_output, status_text]
	)

	demo.queue().launch()