Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Command-line tool to synthesize and play TTS audio from a Hugging Face Space. | |
| Usage: | |
| python play_tts_space.py <space_url> [text] | |
| Arguments: | |
| space_url The Hugging Face Space URL (e.g., srinivasbilla/llasa-3b-tts) | |
| text Optional text to synthesize (default: "Hello world!") | |
| Example: | |
| python play_tts_space.py srinivasbilla/llasa-3b-tts "Hello world!" | |
| """ | |
| import os | |
| import sys | |
| import subprocess | |
| import tempfile | |
| from pathlib import Path | |
| from gradio_client import Client, file | |
| from test_overrides import _get_param_examples, _override_params, AVAILABLE_MODELS, HF_SPACES | |
| def play_audio(audio_path: str): | |
| """Play audio file on Linux using available audio players.""" | |
| audio_path = Path(audio_path) | |
| if not audio_path.exists(): | |
| print(f"Error: Audio file not found: {audio_path}") | |
| return False | |
| # Try different audio players in order of preference | |
| players = ['aplay', 'paplay', 'ogg123', 'ffplay', 'mpg123'] | |
| for player in players: | |
| try: | |
| result = subprocess.run( | |
| ['which', player], | |
| capture_output=True, | |
| text=True | |
| ) | |
| if result.returncode == 0: | |
| print(f"Playing audio using {player}...") | |
| if player == 'ffplay': | |
| # ffplay requires -autoexit and -nodisp for non-interactive use | |
| subprocess.run([player, '-autoexit', '-nodisp', str(audio_path)]) | |
| else: | |
| subprocess.run([player, str(audio_path)]) | |
| return True | |
| except Exception: | |
| continue | |
| print("Error: No suitable audio player found.") | |
| print("Please install one of: aplay, paplay, ogg123, ffplay, mpg123") | |
| return False | |
| def synthesize_and_play(space_url: str, text: str = "Hello world!"): | |
| """ | |
| Synthesize text using a Hugging Face Space and play the audio. | |
| Args: | |
| space_url: The Hugging Face Space URL (e.g., username/space-name) | |
| text: The text to synthesize | |
| """ | |
| print(f"Connecting to Space: {space_url}") | |
| print(f"Text to synthesize: '{text}'") | |
| # Initialize client | |
| client = Client(space_url, token=os.getenv('HF_TOKEN')) | |
| # Get API endpoints | |
| endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict') | |
| api_name = None | |
| fn_index = None | |
| # Try to find a suitable endpoint | |
| if endpoints.get('named_endpoints'): | |
| # Use the first named endpoint that looks like an inference endpoint | |
| # for endpoint_name, endpoint_info in endpoints['named_endpoints'].items(): | |
| # if 'infer' in endpoint_name.lower() or 'predict' in endpoint_name.lower(): | |
| # api_name = endpoint_name | |
| # break | |
| # # Fallback to first available endpoint | |
| # if api_name is None: | |
| # api_name = list(endpoints['named_endpoints'].keys())[0] | |
| api_name = HF_SPACES[space_url]['function'] | |
| elif endpoints.get('unnamed_endpoints'): | |
| # Use the first unnamed endpoint | |
| fn_index = HF_SPACES[space_url]['function'] | |
| # Get endpoint parameters | |
| if api_name: | |
| parameters = endpoints['named_endpoints'][api_name]['parameters'] | |
| elif fn_index is not None: | |
| parameters = endpoints['unnamed_endpoints'][str(fn_index)]['parameters'] | |
| else: | |
| print("Error: Could not find a suitable API endpoint") | |
| return False | |
| # Get parameter examples | |
| end_parameters = _get_param_examples(parameters) | |
| print(f"Endpoint parameters: {end_parameters}") | |
| # Apply any overrides | |
| space_inputs = _override_params(end_parameters, space_url) | |
| # Set the text input - try common parameter names | |
| text_set = False | |
| if isinstance(space_inputs, dict): | |
| if space_inputs: | |
| space_inputs[HF_SPACES[space_url]['text_param_index']] = text | |
| text_set = True | |
| # Try common text parameter names | |
| # for key in ['text', 'target_text', 'input_text', 'prompt', 'sentence', 'input']: | |
| # if key in space_inputs: | |
| # space_inputs[key] = text | |
| # text_set = True | |
| # break | |
| # # If no known key found, set the first string parameter | |
| # if not text_set: | |
| # for key, value in space_inputs.items(): | |
| # if isinstance(value, str) and key not in ['language', 'voice', 'model']: | |
| # space_inputs[key] = text | |
| # text_set = True | |
| # break | |
| else: | |
| # List input - set first element (usually text) | |
| if space_inputs: | |
| space_inputs[0] = text | |
| text_set = True | |
| if not text_set: | |
| print("Warning: Could not determine which parameter to set for text input") | |
| print(f"Final inputs: {space_inputs}") | |
| # Make prediction | |
| try: | |
| if isinstance(space_inputs, dict): | |
| result = client.predict(**space_inputs, api_name=api_name) | |
| else: | |
| if api_name: | |
| result = client.predict(*space_inputs, api_name=api_name) | |
| else: | |
| result = client.predict(*space_inputs, fn_index=fn_index) | |
| except Exception as e: | |
| print(f"Error during prediction: {e}") | |
| return False | |
| print(f"Result: {result}") | |
| # Extract audio file path from result | |
| audio_path = None | |
| if isinstance(result, (list, tuple)): | |
| # Result is a list/tuple, find the audio file | |
| for item in result: | |
| if isinstance(item, str) and (item.endswith('.wav') or item.endswith('.mp3') or item.endswith('.ogg')): | |
| audio_path = item | |
| break | |
| elif isinstance(result, str): | |
| # Result is a string path | |
| audio_path = result | |
| elif isinstance(result, dict): | |
| # Result is a dict, try to find audio path | |
| for key, value in result.items(): | |
| if isinstance(value, str) and (value.endswith('.wav') or value.endswith('.mp3') or value.endswith('.ogg')): | |
| audio_path = value | |
| break | |
| if not audio_path: | |
| print("Error: Could not find audio file in result") | |
| return False | |
| print(f"Audio file: {audio_path}") | |
| # Play the audio | |
| return play_audio(audio_path) | |
| def main(): | |
| if len(sys.argv) < 2: | |
| print(__doc__) | |
| sys.exit(1) | |
| space_url = AVAILABLE_MODELS[sys.argv[1]] | |
| text = sys.argv[2] if len(sys.argv) > 2 else "Hello world!" | |
| success = synthesize_and_play(space_url, text) | |
| sys.exit(0 if success else 1) | |
| if __name__ == "__main__": | |
| main() | |