import gradio as gr
import os
import requests
import zipfile
import asyncio
import edge_tts
import soundfile as sf
from infer_rvc_python import BaseLoader

# Directory to store downloaded models
MODEL_DIR = "voice_models"
os.makedirs(MODEL_DIR, exist_ok=True)

# Initialize the RVC Loader (CPU mode for the Hugging Face Free Tier)
# This will automatically download required background models on its first run
print("Initializing RVC Engine...")
rvc_converter = BaseLoader(only_cpu=True, hubert_path=None, rmvpe_path=None)

# --- Helper Functions ---

def download_and_extract_model(zip_url, model_name):
    """Downloads the zip link and extracts the RVC files."""
    if not zip_url or not model_name:
        return "Error: Please provide both a URL and a Model Name."
    
    model_folder = os.path.join(MODEL_DIR, model_name)
    os.makedirs(model_folder, exist_ok=True)
    zip_path = os.path.join(model_folder, "model.zip")
    
    try:
        response = requests.get(zip_url, stream=True)
        response.raise_for_status()
        with open(zip_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
                
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(model_folder)
            
        os.remove(zip_path)
        
        # Verify if .pth exists
        pth_found = any(f.endswith(".pth") for root, dirs, files in os.walk(model_folder) for f in files)
                    
        if pth_found:
            return f"Success! Model '{model_name}' downloaded and imported."
        else:
            return "Warning: Downloaded successfully, but no .pth file was found in the zip."
            
    except Exception as e:
        return f"Error downloading model: {str(e)}"

async def generate_base_tts(text, output_path):
    """Generates the base audio using Edge-TTS (Neutral Male Voice)."""
    communicate = edge_tts.Communicate(text, "en-US-ChristopherNeural")
    await communicate.save(output_path)

def text_to_custom_speech(text, model_name, pitch_adjustment):
    """Generates text, then applies the RVC model to change the voice."""
    if not text:
        return None, "Error: Please enter some text."
    
    model_folder = os.path.join(MODEL_DIR, model_name)
    if not model_name or not os.path.exists(model_folder):
        return None, "Error: Please import a valid model first."
        
    try:
        base_audio_path = "temp_base.wav"
        output_audio_path = "final_output.wav"
        
        # 1. Find the .pth and .index files for the requested model
        pth_file = None
        index_file = None
        for root, dirs, files in os.walk(model_folder):
            for file in files:
                if file.endswith(".pth"):
                    pth_file = os.path.join(root, file)
                if file.endswith(".index"):
                    index_file = os.path.join(root, file)

        if not pth_file:
            return None, "Error: No .pth file found for this model."

        # 2. Generate Base TTS
        asyncio.run(generate_base_tts(text, base_audio_path))
        
        # 3. Apply RVC Voice Conversion
        rvc_converter.apply_conf(
            tag=model_name,
            file_model=pth_file,
            pitch_algo="rmvpe",
            pitch_lvl=pitch_adjustment,
            file_index=index_file if index_file else "",
            index_influence=0.66,
            respiration_median_filtering=3,
            envelope_ratio=0.25,
            consonant_breath_protection=0.33
        )
        
        result_array, sample_rate = rvc_converter.generate_from_cache(
            audio_data=base_audio_path,
            tag=model_name,
        )
        
        # Save the final converted audio
        sf.write(file=output_audio_path, samplerate=sample_rate, data=result_array)
        
        return output_audio_path, "Speech generated successfully with custom voice!"
        
    except Exception as e:
        return None, f"Error generating speech: {str(e)}"

# --- Gradio User Interface ---

with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("# 🎙️ RVC Text-to-Speech Space")
    gr.Markdown("Import a custom voice model via a direct zip link, then generate text-to-speech using that voice.")
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("### 1. Import Voice Model")
            model_url_input = gr.Textbox(label="Model Zip URL (e.g., HuggingFace resolve link)", placeholder="https://huggingface.co/...")
            model_name_input = gr.Textbox(label="Model Name", placeholder="e.g., needlev2")
            import_btn = gr.Button("Done (Import Model)", variant="primary")
            import_status = gr.Textbox(label="Import Status", interactive=False)
            
        with gr.Column():
            gr.Markdown("### 2. Text to Speech")
            text_input = gr.Textbox(label="Enter Text", lines=4, placeholder="Type what you want the voice to say here...")
            pitch_slider = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch Adjustment (Set to +12 for Female voices, 0 or -12 for Male)")
            generate_btn = gr.Button("Done (Generate Speech)", variant="primary")
            
            audio_output = gr.Audio(label="Generated Audio", type="filepath")
            generation_status = gr.Textbox(label="Status", interactive=False)

    # Wire up the buttons
    import_btn.click(
        fn=download_and_extract_model,
        inputs=[model_url_input, model_name_input],
        outputs=import_status
    )
    
    generate_btn.click(
        fn=text_to_custom_speech,
        inputs=[text_input, model_name_input, pitch_slider],
        outputs=[audio_output, generation_status]
    )

app.launch()