import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
import json

# Global variable to track the currently selected voice
selected_voice_index = None

async def update_voices_to_json():
    voices = await edge_tts.list_voices()
    with open("voices.json", "w") as f:
        json.dump(voices, f)
    global selected_voice_index
    selected_voice_index = None  # Reset selected voice when updating
    return "Voices updated successfully to voices.json"

async def get_voices():
    with open("voices.json", "r") as f:
        voices = json.load(f)
    
    # Organize voices by language
    voices_by_language = {}
    for voice in voices:
        locale = voice.get("Locale", "Unknown")
        voice_name = f"{voice.get('ShortName')} - {voice.get('DisplayName')}"
        if locale not in voices_by_language:
            voices_by_language[locale] = []
        voices_by_language[locale].append(voice_name)
    
    # Sort voices within each language
    for locale in voices_by_language:
        voices_by_language[locale].sort()
    
    return voices_by_language

async def text_to_speech(text, voice, rate, pitch):
    if not text.strip():
        return None, "Please enter text to convert."
    if not voice:
        return None, "Please select a voice."
    
    voice_short_name = voice.split(" - ")[0]
    rate_str = f"{rate:+d}%"
    pitch_str = f"{pitch:+d}Hz"
    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
    
    # Save directly to mp3 file (Edge TTS actually outputs mp3 format)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    
    return tmp_path, None

async def tts_interface(text, voice, rate, pitch):
    audio, warning = await text_to_speech(text, voice, rate, pitch)
    if warning:
        return audio, gr.Warning(warning)
    return audio, None

async def get_voices_table():
    with open("voices.json", "r") as f:
        voices = json.load(f)
    
    global selected_voice_index
    
    # Get all possible keys from all voices
    all_keys = set()
    for voice in voices:
        all_keys.update(voice.keys())
    
    # Convert to list and sort for consistent order
    headers = sorted(list(all_keys))
    # Add a "Select" column at the beginning
    headers.insert(0, "Select")
    
    # Format the voice data for the table
    voice_data = []
    
    for i, voice in enumerate(voices):
        # Process each value to handle complex objects
        processed_values = []
        for key in headers[1:]:  # Skip the "Select" column
            value = voice.get(key, "")
            # Convert complex objects to string representation
            if isinstance(value, dict) or isinstance(value, list):
                value = json.dumps(value)
            processed_values.append(value)
        
        # Create HTML toggle button
        select_html = create_select_button(i == selected_voice_index)
        
        # Add select button at the beginning
        row = [select_html] + processed_values
        voice_data.append(row)
    
    # Sort by locale and name for better readability
    locale_index = headers.index("Locale") if "Locale" in headers else 1
    name_index = headers.index("DisplayName") if "DisplayName" in headers else 2
    
    # Sort the data
    sorted_with_indices = [(i, row) for i, row in enumerate(voice_data)]
    sorted_with_indices.sort(key=lambda x: (x[1][locale_index], x[1][name_index]))
    
    # Rebuild the sorted data and keep track of original indices
    sorted_voice_data = [row for _, row in sorted_with_indices]
    sorted_indices = [i for i, _ in sorted_with_indices]
    
    return (headers, sorted_voice_data, sorted_indices)

def create_select_button(is_selected):
    """Create HTML representation of a toggle button"""
    if is_selected:
        return "✓ Selected"
    else:
        return "Select"

async def select_voice_from_table(evt: gr.SelectData):
    """Handle voice selection from table"""
    global selected_voice_index
    
    row_index = evt.index[0]
    with open("voices.json", "r") as f:
        voices = json.load(f)
    
    # Get the voices table data to get the sorted indices
    _, _, sorted_indices = await get_voices_table()
    
    # Get the original index of the selected voice
    if row_index < len(sorted_indices):
        voice_index = sorted_indices[row_index]
        selected_voice_index = voice_index
    else:
        # Fallback if index is out of range
        selected_voice_index = row_index
    
    # Sort the voices the same way as in the table to ensure alignment
    all_keys = set()
    for voice in voices:
        all_keys.update(voice.keys())
    headers = sorted(list(all_keys))
    # Account for the added "Select" column
    headers.insert(0, "Select")
    
    locale_index = headers.index("Locale") if "Locale" in headers else 1  # +1 because of Select column
    name_index = headers.index("DisplayName") if "DisplayName" in headers else 2  # +1 because of Select column
    
    sorted_voices = sorted(voices, key=lambda x: (x.get("Locale", ""), x.get("DisplayName", "")))
    
    # Get the selected voice
    selected_voice = sorted_voices[row_index]
    locale = selected_voice.get("Locale", "")
    short_name = selected_voice.get("ShortName", "")
    display_name = selected_voice.get("DisplayName", "")
    voice_full_name = f"{short_name} - {display_name}"
    
    # Get all voices for the selected language
    voices_by_language = await get_voices()
    voice_choices = voices_by_language.get(locale, [])
    
    # Get updated table with new selection
    updated_table = await get_voices_table()
    
    # Return updates for both dropdowns and the table
    return (
        gr.update(value=locale), 
        gr.update(value=voice_full_name, choices=[""] + voice_choices),
        gr.update(headers=updated_table[0], value=updated_table[1])
    )

async def update_voices_handler():
    result = await update_voices_to_json()
    voices_table_data = await get_voices_table()
    return result, gr.update(headers=voices_table_data[0], value=voices_table_data[1])

async def filter_voices_by_language(language):
    voices_by_language = await get_voices()
    if language in voices_by_language:
        return gr.update(choices=[""] + voices_by_language[language])
    return gr.update(choices=[""])

async def create_demo():
    voices_by_language = await get_voices()
    languages = sorted(list(voices_by_language.keys()))
    voices_table_data = await get_voices_table()
    
    with gr.Blocks(analytics_enabled=False) as demo:
        gr.Markdown("# 🎙️ Edge TTS Text-to-Speech")
        
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("## Text-to-Speech with Microsoft Edge TTS")
                gr.Markdown("""
                Convert text to speech using Microsoft Edge TTS. 
                Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
                """)
        
        with gr.Row():
            update_btn = gr.Button("🔄 Update Voice List")
            update_status = gr.Markdown("")
        
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(label="Input Text", lines=5)
                
                # Language and Voice selection
                language_dropdown = gr.Dropdown(choices=[""] + languages, label="Select Language", value="")
                voice_dropdown = gr.Dropdown(choices=[""], label="Select Voice", value="", allow_custom_value=True)
                
                # Connect language selection to voice filtering
                language_dropdown.change(
                    fn=filter_voices_by_language,
                    inputs=language_dropdown,
                    outputs=voice_dropdown
                )
                
                rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1)
                pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
                
                generate_btn = gr.Button("Generate Speech", variant="primary")
                
                audio_output = gr.Audio(label="Generated Audio", type="filepath")
                warning_md = gr.Markdown(label="Warning", visible=False)
                
                generate_btn.click(
                    fn=tts_interface,
                    inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
                    outputs=[audio_output, warning_md]
                )
        
        gr.Markdown("## Available Voices - Click any row to select a voice")
        voices_table = gr.Dataframe(
            headers=voices_table_data[0],
            value=voices_table_data[1],
            label="Available Voices",
            interactive=False,
            wrap=True
        )
        
        # Set up the click handler for the voices table
        voices_table.select(
            fn=select_voice_from_table,
            outputs=[language_dropdown, voice_dropdown, voices_table]
        )
        
        # Update both the status message and the voices table when update button is clicked
        update_btn.click(
            fn=update_voices_handler,
            outputs=[update_status, voices_table]
        )
        
    
    return demo

async def main():
    demo = await create_demo()
    demo.queue(default_concurrency_limit=5)
    demo.launch(show_api=False)

if __name__ == "__main__":
    asyncio.run(main())