Spaces:

SentiVue
/

pt-PT_TTS_Demo

Sleeping

App Files Files Community

pt-PT_TTS_Demo / app-divide-text.py

m-nagy

UI: update UI and add space info

e280b3f 3 months ago

raw

history blame contribute delete

4.64 kB

	import gradio as gr
	import requests
	import os
	from deployment_options import voice_id_2_name, defualt_values

	import uuid

	ENDPOINT_URL = "https://sentivue-endpoint.hf.space/v1/tts"
	ENDPOINT_TOKEN = os.getenv("endpoint_READ")

	print(f"Public demo will call endpoint: {ENDPOINT_URL}")
	print(f"Token loaded: {'Yes' if ENDPOINT_TOKEN else 'No'}")

	voice_names = list(voice_id_2_name.values())
	def generate_speech(text: str, voice_name: str):
	"""
	Calls the private FastAPI endpoint and returns audio
	"""
	if not text.strip():
	return None, "Please enter some text"

	if not ENDPOINT_TOKEN:
	return None, "Error: endpoint_READ token not found in environment"

	try:

	voice_name_2_id = {}
	for vid, name in voice_id_2_name.items():
	voice_name_2_id[name] = vid

	voice_id = voice_name_2_id[voice_name]

	payload = {
	"text": text
	}

	print(f"Sending request to: {ENDPOINT_URL}/{voice_id}")
	print(f"Payload: {payload}")

	response = requests.post(
	f"{ENDPOINT_URL}/{voice_id}",
	headers={
	"Authorization": f"Bearer {ENDPOINT_TOKEN}",
	"Content-Type": "application/json"
	},
	json=payload,
	# timeout=60,
	stream=True
	)

	response.raise_for_status()

	# # Return raw WAV bytes - Gradio handles the rest
	# return response.content, "Success!"

	# # Save to temporary WAV file
	# with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
	# tmp_file.write(response.content)
	# tmp_path = tmp_file.name

	# return tmp_path, "Success!"

	# Save to a regular file in current directory (not temp)
	# Generate unique ID for output file
	generation_id = str(uuid.uuid4())[:15]
	output_path = f"speech_{voice_id}_{generation_id}.wav"

	with open(output_path, 'wb') as f:
	for chunk in response.iter_content(chunk_size=8192):
	if chunk:
	f.write(chunk)

	return output_path, "Success!"

	except requests.exceptions.RequestException as e:
	error_msg = f"Error calling endpoint: {str(e)}"
	print(error_msg)
	return None, error_msg
	except Exception as e:
	error_msg = f"Unexpected error: {str(e)}"
	print(error_msg)
	return None, error_msg


	# ── Gradio Interface ────────────────────────────────────────────────────────

	with gr.Blocks(title="pt-PT TTS - Demo") as demo:
	gr.Markdown("""
	# pt-PT Text-to-Speech Demo

	High-quality, natural-sounding speech synthesis for European Portuguese (pt-PT) with human-like prosody and accurate number pronunciation.

	### Voices
	- Current: André
	- More voices: Coming soon, with extended emotion and prosody control via tags

	""")

	with gr.Row():
	with gr.Column(scale=3):
	text_input = gr.Textbox(
	label="Text to speak",
	placeholder="Write something here...",
	lines=4
	)

	voice_dropdown = gr.Dropdown(
	choices=voice_names,
	value=defualt_values['voice_name'],
	label="Voice"
	)

	submit_btn = gr.Button("Generate Speech", variant="primary")

	with gr.Column(scale=2):
	audio_output = gr.Audio(
	label="Generated Speech",
	autoplay=False
	)

	status_text = gr.Textbox(label="Status", interactive=False)

	submit_btn.click(
	fn=generate_speech,
	inputs=[text_input, voice_dropdown],
	outputs=[audio_output, status_text]
	)

	gr.Markdown("""

	### Info
	- Language: pt-PT (European Portuguese only)
	- Voice: André
	- Model size: ~3B parameters
	- Architecture: LLM-based TTS backbone
	- Training data: +11k hours of curated pt-PT speech
	- Inference: Streaming audio generation via remote endpoint

	### API
	- Status: Coming soon

	### Fine-tuning
	- Status: Coming soon
	- Requirements: ~1.5 hours of recorded speech to fine-tune a custom voice

	The system is designed for natural prosody, accent fidelity, and long-form synthesis in European Portuguese.
	""")

	demo.queue().launch()