TTS-Spaces-Arena

Running

App Files Files Community

TTS-Spaces-Arena / play_tts_space.py

Pendrokar

XTTS fixed

dc96c59 2 months ago

raw

history blame contribute delete

6.74 kB

	#!/usr/bin/env python3
	"""
	Command-line tool to synthesize and play TTS audio from a Hugging Face Space.

	Usage:
	python play_tts_space.py <space_url> [text]

	Arguments:
	space_url The Hugging Face Space URL (e.g., srinivasbilla/llasa-3b-tts)
	text Optional text to synthesize (default: "Hello world!")

	Example:
	python play_tts_space.py srinivasbilla/llasa-3b-tts "Hello world!"
	"""

	import os
	import sys
	import subprocess
	import tempfile
	from pathlib import Path

	from gradio_client import Client, file

	from test_overrides import _get_param_examples, _override_params, AVAILABLE_MODELS, HF_SPACES


	def play_audio(audio_path: str):
	"""Play audio file on Linux using available audio players."""
	audio_path = Path(audio_path)

	if not audio_path.exists():
	print(f"Error: Audio file not found: {audio_path}")
	return False

	# Try different audio players in order of preference
	players = ['aplay', 'paplay', 'ogg123', 'ffplay', 'mpg123']

	for player in players:
	try:
	result = subprocess.run(
	['which', player],
	capture_output=True,
	text=True
	)
	if result.returncode == 0:
	print(f"Playing audio using {player}...")

	if player == 'ffplay':
	# ffplay requires -autoexit and -nodisp for non-interactive use
	subprocess.run([player, '-autoexit', '-nodisp', str(audio_path)])
	else:
	subprocess.run([player, str(audio_path)])
	return True
	except Exception:
	continue

	print("Error: No suitable audio player found.")
	print("Please install one of: aplay, paplay, ogg123, ffplay, mpg123")
	return False


	def synthesize_and_play(space_url: str, text: str = "Hello world!"):
	"""
	Synthesize text using a Hugging Face Space and play the audio.

	Args:
	space_url: The Hugging Face Space URL (e.g., username/space-name)
	text: The text to synthesize
	"""
	print(f"Connecting to Space: {space_url}")
	print(f"Text to synthesize: '{text}'")

	# Initialize client
	client = Client(space_url, token=os.getenv('HF_TOKEN'))

	# Get API endpoints
	endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')

	api_name = None
	fn_index = None

	# Try to find a suitable endpoint
	if endpoints.get('named_endpoints'):
	# Use the first named endpoint that looks like an inference endpoint
	# for endpoint_name, endpoint_info in endpoints['named_endpoints'].items():
	# if 'infer' in endpoint_name.lower() or 'predict' in endpoint_name.lower():
	# api_name = endpoint_name
	# break
	# # Fallback to first available endpoint
	# if api_name is None:
	# api_name = list(endpoints['named_endpoints'].keys())[0]
	api_name = HF_SPACES[space_url]['function']
	elif endpoints.get('unnamed_endpoints'):
	# Use the first unnamed endpoint
	fn_index = HF_SPACES[space_url]['function']

	# Get endpoint parameters
	if api_name:
	parameters = endpoints['named_endpoints'][api_name]['parameters']
	elif fn_index is not None:
	parameters = endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
	else:
	print("Error: Could not find a suitable API endpoint")
	return False

	# Get parameter examples
	end_parameters = _get_param_examples(parameters)
	print(f"Endpoint parameters: {end_parameters}")

	# Apply any overrides
	space_inputs = _override_params(end_parameters, space_url)

	# Set the text input - try common parameter names
	text_set = False
	if isinstance(space_inputs, dict):
	if space_inputs:
	space_inputs[HF_SPACES[space_url]['text_param_index']] = text
	text_set = True
	# Try common text parameter names
	# for key in ['text', 'target_text', 'input_text', 'prompt', 'sentence', 'input']:
	# if key in space_inputs:
	# space_inputs[key] = text
	# text_set = True
	# break
	# # If no known key found, set the first string parameter
	# if not text_set:
	# for key, value in space_inputs.items():
	# if isinstance(value, str) and key not in ['language', 'voice', 'model']:
	# space_inputs[key] = text
	# text_set = True
	# break
	else:
	# List input - set first element (usually text)
	if space_inputs:
	space_inputs[0] = text
	text_set = True

	if not text_set:
	print("Warning: Could not determine which parameter to set for text input")

	print(f"Final inputs: {space_inputs}")

	# Make prediction
	try:
	if isinstance(space_inputs, dict):
	result = client.predict(**space_inputs, api_name=api_name)
	else:
	if api_name:
	result = client.predict(*space_inputs, api_name=api_name)
	else:
	result = client.predict(*space_inputs, fn_index=fn_index)
	except Exception as e:
	print(f"Error during prediction: {e}")
	return False

	print(f"Result: {result}")

	# Extract audio file path from result
	audio_path = None

	if isinstance(result, (list, tuple)):
	# Result is a list/tuple, find the audio file
	for item in result:
	if isinstance(item, str) and (item.endswith('.wav') or item.endswith('.mp3') or item.endswith('.ogg')):
	audio_path = item
	break
	elif isinstance(result, str):
	# Result is a string path
	audio_path = result
	elif isinstance(result, dict):
	# Result is a dict, try to find audio path
	for key, value in result.items():
	if isinstance(value, str) and (value.endswith('.wav') or value.endswith('.mp3') or value.endswith('.ogg')):
	audio_path = value
	break

	if not audio_path:
	print("Error: Could not find audio file in result")
	return False

	print(f"Audio file: {audio_path}")

	# Play the audio
	return play_audio(audio_path)


	def main():
	if len(sys.argv) < 2:
	print(__doc__)
	sys.exit(1)

	space_url = AVAILABLE_MODELS[sys.argv[1]]
	text = sys.argv[2] if len(sys.argv) > 2 else "Hello world!"

	success = synthesize_and_play(space_url, text)
	sys.exit(0 if success else 1)


	if __name__ == "__main__":
	main()