import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
from deepgram import DeepgramClient, PrerecordedOptions, SpeakOptions

# --- Configuration ---
# 1. API KEY: Ensure you have your Deepgram API Key ready
# Ideally, set this in your environment variables as DEEPGRAM_API_KEY
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY", "YOUR_DEEPGRAM_KEY_HERE")

# 2. Model Config
REPO_ID = "Kezovic/iris-q4gguf-v2"
FILENAME = "llama-3.2-1b-instruct.Q4_K_M.gguf"
CONTEXT_WINDOW = 4096 
MAX_NEW_TOKENS = 512
TEMPERATURE = 0.7 

# --- Initialize Deepgram ---
if DEEPGRAM_API_KEY == "YOUR_DEEPGRAM_KEY_HERE":
    print("WARNING: Please set your DEEPGRAM_API_KEY.")
    
deepgram = DeepgramClient(DEEPGRAM_API_KEY)

# --- Model Loading Function ---
llm = None
def load_llm():
    """Downloads the GGUF model and initializes LlamaCPP."""
    global llm
    print("Downloading LLM...")
    try:
        model_path = hf_hub_download(
            repo_id=REPO_ID,
            filename=FILENAME
        )
        # n_threads=2 is good for free Hugging Face CPU tiers
        llm = Llama(
            model_path=model_path,
            n_ctx=CONTEXT_WINDOW,
            n_threads=2,
            verbose=False 
        )
        print("LLM loaded successfully!")
        return llm
    except Exception as e:
        print(f"Error loading model: {e}")
        return None

# Load model on startup
load_llm()

# --- 1. Speech-to-Text (Deepgram) ---
def transcribe_audio(audio_filepath):
    """Sends audio file to Deepgram and returns text."""
    if not audio_filepath:
        return ""
    
    try:
        with open(audio_filepath, "rb") as buffer:
            payload = {"buffer": buffer}
            options = PrerecordedOptions(
                smart_format=True, 
                model="nova-2", 
                language="en-US"
            )
            response = deepgram.listen.rest.v("1").transcribe_file(payload, options)
            return response.results.channels[0].alternatives[0].transcript
    except Exception as e:
        print(f"STT Error: {e}")
        return ""

# --- 2. Text-to-Speech (Deepgram) ---
def text_to_speech(text):
    """Sends text to Deepgram and returns path to audio file."""
    try:
        filename = "output_response.mp3"
        options = SpeakOptions(
            model="aura-asteria-en", # Choices: aura-asteria-en, aura-helios-en, etc.
            encoding="linear16", 
            container="wav"
        )
        # Save the audio to a file
        deepgram.speak.rest.v("1").save(filename, {"text": text}, options)
        return filename
    except Exception as e:
        print(f"TTS Error: {e}")
        return None

# --- 3. Main Pipeline Function ---
def process_conversation(audio_input):
    """
    1. Transcribe Audio (STT)
    2. Query LLM
    3. Synthesize Speech (TTS)
    """
    if llm is None:
        return "Model not loaded.", None, "System Error: Model failed to load."

    # Step A: Transcribe
    user_text = transcribe_audio(audio_input)
    if not user_text:
        return "Could not hear audio.", None, ""

    print(f"User said: {user_text}")

    # Step B: LLM Inference
    # Using the prompt format from your original code
    full_prompt = f"### Human: {user_text}\n### Assistant:"
    
    output = llm(
        prompt=full_prompt, 
        max_tokens=MAX_NEW_TOKENS,
        temperature=TEMPERATURE,
        stop=["### Human:"], 
        echo=False
    )
    response_text = output['choices'][0]['text'].strip()
    print(f"LLM said: {response_text}")

    # Step C: Speak Response
    output_audio_path = text_to_speech(response_text)

    # Return: Transcription (for display), Audio (for playback), LLM Text (for display)
    return user_text, output_audio_path, response_text

# --- Gradio UI ---
with gr.Blocks(title=f"Voice Chat with {FILENAME}") as demo:
    gr.Markdown(f"## 🗣️ Deepgram Voice Chat with {FILENAME}")
    
    with gr.Row():
        # Input Column
        with gr.Column():
            audio_input = gr.Audio(
                sources=["microphone"], 
                type="filepath", 
                label="Speak Now"
            )
            submit_btn = gr.Button("Submit Audio", variant="primary")

        # Output Column
        with gr.Column():
            audio_output = gr.Audio(
                label="Assistant Voice", 
                autoplay=True, # Automatically plays the response
                interactive=False
            )
            # Debugging/Visuals
            user_transcript = gr.Textbox(label="You said:")
            ai_response_text = gr.Textbox(label="AI Response:")

    # Event Listener
    submit_btn.click(
        fn=process_conversation,
        inputs=[audio_input],
        outputs=[user_transcript, audio_output, ai_response_text]
    )

if __name__ == "__main__":
    demo.launch()