import os
import asyncio
import edge_tts
import librosa
import torch
import numpy as np
import pandas as pd
import re
import gradio as gr
from phonemizer import phonemize
from transformers import pipeline
from huggingface_hub import InferenceClient

# --- AUTHENTICATION ---
HF_TOKEN = os.getenv("HF_TOKEN")

# --- CONFIGURATION ---
# We use 3B to 9B models because they are the most stable on the free Inference API.
LLM_MODELS = {
    "Llama 3.2 3B (Fastest)": "meta-llama/Llama-3.2-3B-Instruct",
    "Qwen 2.5 7B (Most Accurate)": "Qwen/Qwen2.5-7B-Instruct",
    "Gemma 2 9B (Excellent English)": "google/gemma-2-9b-it"
}

LANGUAGES = {
    "English (US)": {"code": "en-US", "ipa": "en-us", "voice": "en-US-ChristopherNeural"},
    "German": {"code": "de-DE", "ipa": "de", "voice": "de-DE-ConradNeural"},
    "French": {"code": "fr-FR", "ipa": "fr-fr", "voice": "fr-FR-HenriNeural"},
    "Spanish": {"code": "es-ES", "ipa": "es", "voice": "es-ES-AlvaroNeural"},
    "Chinese (Mandarin)": {"code": "zh-CN", "ipa": "cmn", "voice": "zh-CN-XiaoxiaoNeural"}
}

# Load ASR model (Whisper Tiny for CPU efficiency)
print("Loading Whisper ASR...")
asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1)

# --- FUNCTIONS ---

def get_llm_response(model_id, system_prompt, user_prompt):
    # Fixed: Removed the 'provider' argument to prevent TypeError
    client = InferenceClient(model=model_id, token=HF_TOKEN)
    try:
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
        
        output = client.chat_completion(
            messages, 
            max_tokens=500, 
            stream=False 
        )
        return output.choices[0].message.content
        
    except Exception as e:
        err = str(e)
        if "503" in err:
            return "⏳ The model is currently loading on Hugging Face servers. Please wait 30 seconds and try again."
        return f"PANINI LLM Note: {err}"

def generate_curriculum(model_name, language, topic):
    model_id = LLM_MODELS[model_name]
    system_prompt = f"You are PANINI LLM, a world-class {language} teacher. Create a focused lesson plan."
    user_prompt = f"Topic: {topic}. Provide 5 useful words/phrases in {language} with English translations, then give one expert learning tip."
    return get_llm_response(model_id, system_prompt, user_prompt)

async def play_target_audio(text, lang_name):
    if not text: return None
    voice = LANGUAGES[lang_name]["voice"]
    output_path = "target.mp3"
    communicate = edge_tts.Communicate(text, voice)
    await communicate.save(output_path)
    return output_path

def analyze_speech(model_name, lang_name, target_text, audio_path):
    if not audio_path or not target_text:
        return "Incomplete data.", "", "Please provide both text and recording."
    
    # 1. ASR Transcription
    asr_res = asr_pipe(audio_path)["text"].strip()
    
    # 2. Linguistic IPA Layer
    ipa_code = LANGUAGES[lang_name]["ipa"]
    try:
        # Requires espeak-ng installed via packages.txt
        target_ipa = phonemize(target_text, language=ipa_code, backend='espeak', strip=True)
        user_ipa = phonemize(asr_res, language=ipa_code, backend='espeak', strip=True)
    except:
        target_ipa = "IPA Unavailable"
        user_ipa = "IPA Unavailable"
    
    # 3. LLM Anatomical Feedback
    model_id = LLM_MODELS[model_name]
    system_prompt = "You are a professional Speech-Language Pathologist. Compare the student's pronunciation to the target using IPA."
    user_prompt = (
        f"Target: '{target_text}' (IPA: /{target_ipa}/). "
        f"Student: '{asr_res}' (IPA: /{user_ipa}/). "
        f"Identify the primary phonetic error and give 1 specific anatomical tip (tongue/lip placement) in English."
    )
    
    feedback = get_llm_response(model_id, system_prompt, user_prompt)
    return asr_res, f"/{user_ipa}/", feedback

# --- UI DESIGN ---

with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate"), css=".gradio-container {max-width: 950px !important}") as demo:
    gr.HTML("<h1 style='text-align: center; color: #1e40af;'>🎙️ PANINI LLM</h1>")
    gr.HTML("<p style='text-align: center; margin-top: -10px;'>Intelligent Multi-Model Language Tutoring</p>")
    
    with gr.Tab("Step 1: Curriculum Creation"):
        with gr.Row():
            llm_choice = gr.Dropdown(list(LLM_MODELS.keys()), label="Select AI Teacher (LLM)", value="Qwen 2.5 7B (Most Accurate)")
            lang_choice = gr.Dropdown(list(LANGUAGES.keys()), label="Language", value="English (US)")
        
        topic_input = gr.Textbox(label="Lesson Topic", placeholder="e.g., Ordering Food, Job Interview, Airport Travel")
        btn_gen = gr.Button("📚 Build My Lesson", variant="primary")
        curr_output = gr.Markdown("---")

    with gr.Tab("Step 2: Pronunciation Practice"):
        with gr.Row():
            target_word = gr.Textbox(label="Word/Phrase to Practice", placeholder="Copy a phrase from Step 1 here")
            btn_tts = gr.Button("🔊 Play Native AI", scale=0)
        
        audio_ref = gr.Audio(label="Teacher Reference", type="filepath")
        
        with gr.Row():
            audio_user = gr.Audio(label="Your Voice Recording", sources=["microphone"], type="filepath")
            btn_analyze = gr.Button("🚀 Analyze My Accent", variant="primary")
            
        with gr.Row():
            out_transcript = gr.Textbox(label="AI Heard")
            out_ipa = gr.Textbox(label="Your Phonetics (IPA)")
            
        out_feedback = gr.Markdown("### Feedback from the AI Coach")

    # Event Wireup
    btn_gen.click(generate_curriculum, inputs=[llm_choice, lang_choice, topic_input], outputs=curr_output)
    btn_tts.click(fn=lambda t, l: asyncio.run(play_target_audio(t, l)), inputs=[target_word, lang_choice], outputs=audio_ref)
    btn_analyze.click(analyze_speech, inputs=[llm_choice, lang_choice, target_word, audio_user], outputs=[out_transcript, out_ipa, out_feedback])

# Run app
demo.launch()