Spaces:

Canadies
/

edge-tts

Sleeping

App Files Files Community

edge-tts / app.py

Canadies

update

e3a073e 11 months ago

raw

history blame contribute delete

9.67 kB

	import gradio as gr
	import edge_tts
	import asyncio
	import tempfile
	import os
	import json

	# Global variable to track the currently selected voice
	selected_voice_index = None

	async def update_voices_to_json():
	voices = await edge_tts.list_voices()
	with open("voices.json", "w") as f:
	json.dump(voices, f)
	global selected_voice_index
	selected_voice_index = None # Reset selected voice when updating
	return "Voices updated successfully to voices.json"

	async def get_voices():
	with open("voices.json", "r") as f:
	voices = json.load(f)

	# Organize voices by language
	voices_by_language = {}
	for voice in voices:
	locale = voice.get("Locale", "Unknown")
	voice_name = f"{voice.get('ShortName')} - {voice.get('DisplayName')}"
	if locale not in voices_by_language:
	voices_by_language[locale] = []
	voices_by_language[locale].append(voice_name)

	# Sort voices within each language
	for locale in voices_by_language:
	voices_by_language[locale].sort()

	return voices_by_language

	async def text_to_speech(text, voice, rate, pitch):
	if not text.strip():
	return None, "Please enter text to convert."
	if not voice:
	return None, "Please select a voice."

	voice_short_name = voice.split(" - ")[0]
	rate_str = f"{rate:+d}%"
	pitch_str = f"{pitch:+d}Hz"
	communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)

	# Save directly to mp3 file (Edge TTS actually outputs mp3 format)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
	tmp_path = tmp_file.name
	await communicate.save(tmp_path)

	return tmp_path, None

	async def tts_interface(text, voice, rate, pitch):
	audio, warning = await text_to_speech(text, voice, rate, pitch)
	if warning:
	return audio, gr.Warning(warning)
	return audio, None

	async def get_voices_table():
	with open("voices.json", "r") as f:
	voices = json.load(f)

	global selected_voice_index

	# Get all possible keys from all voices
	all_keys = set()
	for voice in voices:
	all_keys.update(voice.keys())

	# Convert to list and sort for consistent order
	headers = sorted(list(all_keys))
	# Add a "Select" column at the beginning
	headers.insert(0, "Select")

	# Format the voice data for the table
	voice_data = []

	for i, voice in enumerate(voices):
	# Process each value to handle complex objects
	processed_values = []
	for key in headers[1:]: # Skip the "Select" column
	value = voice.get(key, "")
	# Convert complex objects to string representation
	if isinstance(value, dict) or isinstance(value, list):
	value = json.dumps(value)
	processed_values.append(value)

	# Create HTML toggle button
	select_html = create_select_button(i == selected_voice_index)

	# Add select button at the beginning
	row = [select_html] + processed_values
	voice_data.append(row)

	# Sort by locale and name for better readability
	locale_index = headers.index("Locale") if "Locale" in headers else 1
	name_index = headers.index("DisplayName") if "DisplayName" in headers else 2

	# Sort the data
	sorted_with_indices = [(i, row) for i, row in enumerate(voice_data)]
	sorted_with_indices.sort(key=lambda x: (x[1][locale_index], x[1][name_index]))

	# Rebuild the sorted data and keep track of original indices
	sorted_voice_data = [row for _, row in sorted_with_indices]
	sorted_indices = [i for i, _ in sorted_with_indices]

	return (headers, sorted_voice_data, sorted_indices)

	def create_select_button(is_selected):
	"""Create HTML representation of a toggle button"""
	if is_selected:
	return "✓ Selected"
	else:
	return "Select"

	async def select_voice_from_table(evt: gr.SelectData):
	"""Handle voice selection from table"""
	global selected_voice_index

	row_index = evt.index[0]
	with open("voices.json", "r") as f:
	voices = json.load(f)

	# Get the voices table data to get the sorted indices
	_, _, sorted_indices = await get_voices_table()

	# Get the original index of the selected voice
	if row_index < len(sorted_indices):
	voice_index = sorted_indices[row_index]
	selected_voice_index = voice_index
	else:
	# Fallback if index is out of range
	selected_voice_index = row_index

	# Sort the voices the same way as in the table to ensure alignment
	all_keys = set()
	for voice in voices:
	all_keys.update(voice.keys())
	headers = sorted(list(all_keys))
	# Account for the added "Select" column
	headers.insert(0, "Select")

	locale_index = headers.index("Locale") if "Locale" in headers else 1 # +1 because of Select column
	name_index = headers.index("DisplayName") if "DisplayName" in headers else 2 # +1 because of Select column

	sorted_voices = sorted(voices, key=lambda x: (x.get("Locale", ""), x.get("DisplayName", "")))

	# Get the selected voice
	selected_voice = sorted_voices[row_index]
	locale = selected_voice.get("Locale", "")
	short_name = selected_voice.get("ShortName", "")
	display_name = selected_voice.get("DisplayName", "")
	voice_full_name = f"{short_name} - {display_name}"

	# Get all voices for the selected language
	voices_by_language = await get_voices()
	voice_choices = voices_by_language.get(locale, [])

	# Get updated table with new selection
	updated_table = await get_voices_table()

	# Return updates for both dropdowns and the table
	return (
	gr.update(value=locale),
	gr.update(value=voice_full_name, choices=[""] + voice_choices),
	gr.update(headers=updated_table[0], value=updated_table[1])
	)

	async def update_voices_handler():
	result = await update_voices_to_json()
	voices_table_data = await get_voices_table()
	return result, gr.update(headers=voices_table_data[0], value=voices_table_data[1])

	async def filter_voices_by_language(language):
	voices_by_language = await get_voices()
	if language in voices_by_language:
	return gr.update(choices=[""] + voices_by_language[language])
	return gr.update(choices=[""])

	async def create_demo():
	voices_by_language = await get_voices()
	languages = sorted(list(voices_by_language.keys()))
	voices_table_data = await get_voices_table()

	with gr.Blocks(analytics_enabled=False) as demo:
	gr.Markdown("# 🎙️ Edge TTS Text-to-Speech")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("## Text-to-Speech with Microsoft Edge TTS")
	gr.Markdown("""
	Convert text to speech using Microsoft Edge TTS.
	Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
	""")

	with gr.Row():
	update_btn = gr.Button("🔄 Update Voice List")
	update_status = gr.Markdown("")

	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(label="Input Text", lines=5)

	# Language and Voice selection
	language_dropdown = gr.Dropdown(choices=[""] + languages, label="Select Language", value="")
	voice_dropdown = gr.Dropdown(choices=[""], label="Select Voice", value="", allow_custom_value=True)

	# Connect language selection to voice filtering
	language_dropdown.change(
	fn=filter_voices_by_language,
	inputs=language_dropdown,
	outputs=voice_dropdown
	)

	rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1)
	pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)

	generate_btn = gr.Button("Generate Speech", variant="primary")

	audio_output = gr.Audio(label="Generated Audio", type="filepath")
	warning_md = gr.Markdown(label="Warning", visible=False)

	generate_btn.click(
	fn=tts_interface,
	inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
	outputs=[audio_output, warning_md]
	)

	gr.Markdown("## Available Voices - Click any row to select a voice")
	voices_table = gr.Dataframe(
	headers=voices_table_data[0],
	value=voices_table_data[1],
	label="Available Voices",
	interactive=False,
	wrap=True
	)

	# Set up the click handler for the voices table
	voices_table.select(
	fn=select_voice_from_table,
	outputs=[language_dropdown, voice_dropdown, voices_table]
	)

	# Update both the status message and the voices table when update button is clicked
	update_btn.click(
	fn=update_voices_handler,
	outputs=[update_status, voices_table]
	)


	return demo

	async def main():
	demo = await create_demo()
	demo.queue(default_concurrency_limit=5)
	demo.launch(show_api=False)

	if __name__ == "__main__":
	asyncio.run(main())