import os
import re
import tempfile

import gradio as gr
import numpy as np
import soundfile as sf
import torch
from ddgs import DDGS
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from transformers import pipeline
from TTS.api import TTS

# --- Device Setup ---
device = "cpu"

# --- 1. STT Setup (Whisper) ---
print("Loading Whisper...")
STT_MODEL_NAME = "openai/whisper-tiny.en"
stt_pipe = pipeline("automatic-speech-recognition", model=STT_MODEL_NAME, device=device)

# --- 2. LLM Setup (Llama.cpp) ---
print("Setting up Llama.cpp...")
HF_API_TOKEN = os.getenv("HF_TOKEN")

print("Downloading gzsol/model_1b GGUF...")
model_path = hf_hub_download(
    repo_id="gzsol/model_1b",
    filename="model.gguf",
    token=HF_API_TOKEN,
)

print(f"Model path: {model_path}")
print(f"File exists: {os.path.exists(model_path)}")
if os.path.exists(model_path):
    print(f"File size: {os.path.getsize(model_path)} bytes")
    print(f"File size: {os.path.getsize(model_path) / (1024**3):.2f} GiB")

print(f"Loading model from {model_path}...")
llm = Llama(model_path=model_path, n_gpu_layers=0, n_ctx=2048)

# --- 3. TTS Setup (Coqui) ---
print("Loading TTS...")
TTS_MODEL_NAME = "tts_models/en/ljspeech/tacotron2-DDC"
tts_model = TTS(model_name=TTS_MODEL_NAME, progress_bar=False)


# --- Core Functions ---
def get_web_context(message):
    search_keywords = [
        "current",
        "latest",
        "recent",
        "today",
        "now",
        "news",
        "weather",
        "price",
        "2024",
        "2025",
        "what is happening",
        "score",
        "match",
    ]

    if not any(keyword in message.lower() for keyword in search_keywords):
        return None

    try:
        with DDGS() as ddgs:
            results = list(ddgs.text(message, max_results=3))

        if not results:
            print("No search results found")
            return None

        print(f"Found {len(results)} results:")
        context = "Current information from web search:\n"
        for i, result in enumerate(results):
            print(f"Result {i+1}: {result['title']}")
            print(f"  Body: {result['body'][:100]}...")
            context += f"- {result['title']}: {result['body'][:200]}...\n"

        return context

    except Exception as e:
        print(f"Search error: {e}")
        return None


def chat_with_bot(message, history):
    if history is None:
        history = []

    if not message or not message.strip():
        return history, ""

    try:
        web_context = get_web_context(message=message)

        # Build conversation context from history
        conversation = ""
        for h in history:
            role = "User" if h.get("role") == "user" else "Assistant"
            conversation += f"{role}: {h.get('content', '')}\n"

        # Create a clearer prompt with system instruction
        if web_context:
            prompt = f"""Answer ONLY using this information:

                {web_context}

                Question: {message}
                Answer:"""
            print("The web context has been added to the prompt")
        else:
            prompt = f"""You are a helpful assistant. Answer naturally and conversationally.
                    {conversation}User: {message}
                    Assistant:"""

        print(f"Generating response with Llama...")

        # Generate response with stricter settings
        response = llm(
            prompt,
            max_tokens=200,
            temperature=0.7,
            top_p=0.95,
            stop=["User:", "\nUser:"],
        )

        response_str = response["choices"][0]["text"].strip()

        response_str = response_str.strip("'\"")
        response_str = response_str.rstrip(",:;")
        response_str = response_str.strip("'\"")
        response_str = re.sub(r"(\d+\.){10,}", "", response_str)

        if "User:" in response_str:
            response_str = response_str.split("User:")[0].strip()

        response_str = response_str.replace("[{", "").replace("}]", "")
        response_str = response_str.replace("'text':", "").replace('"text":', "")
        response_str = response_str.replace("'type': 'text'", "").replace(
            '"type": "text"', ""
        )

        if ", 'type'" in response_str or ', "type"' in response_str:
            response_str = (
                response_str.split(", 'type'")[0].split(', "type"')[0].strip()
            )

        # One final strip
        response_str = response_str.strip("'\",:;")

        if not response_str:
            response_str = "I received an empty response. Please try again."
            print("Warning: Empty response from LLM")

        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": response_str})

        return history, response_str

    except Exception as e:
        import traceback

        error_trace = traceback.format_exc()
        print(f"LLM Error: {e}")
        print(f"Full traceback:\n{error_trace}")

        error_msg = f"Error generating response: {str(e) if str(e) else 'Unknown error occurred'}"

        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": error_msg})
        return history, error_msg


def text_to_speech_from_chat(chat_response):
    """Takes the chat response and converts it to speech."""
    if not chat_response or chat_response.startswith("Error"):
        return None, "No valid response to synthesize."

    output_path = None
    try:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
        output_path = temp_file.name
        temp_file.close()

        tts_model.tts_to_file(
            text=chat_response,
            file_path=output_path,
        )
        return output_path, "Speech synthesis complete."

    except Exception as e:
        if output_path and os.path.exists(output_path):
            os.remove(output_path)
        return None, f"Error during TTS: {e}"


def speech_to_text_and_chat(audio_file_path, history):
    """Performs STT, then Chatbot generation, returning the final response text and audio."""
    if audio_file_path is None:
        return "Please upload an audio file.", history, "", None, "Awaiting input."

    # 1. STT
    try:
        result = stt_pipe(audio_file_path)
        transcribed_text = result["text"]
    except Exception as e:
        return f"Error during STT: {e}", history, "", None, f"Error during STT: {e}"

    # 2. Chatbot (Your GGUF Model)
    updated_history, last_response_text = chat_with_bot(transcribed_text, history)

    # 3. TTS
    audio_path, status_text = text_to_speech_from_chat(last_response_text)

    return (
        transcribed_text,
        updated_history,
        last_response_text,
        audio_path,
        status_text,
    )


# --- Gradio Interface ---
custom_css = """
#status { font-weight: bold; color: #2563eb; }
.chatbot { height: 400px; }
"""

with gr.Blocks() as demo:
    gr.Markdown("# 🗣️ GGUF Voice Assistant (Running your model_1b)")
    gr.Markdown("**Note:** This app uses `gzsol/model_1b` (GGUF) on CPU.")

    # Global State
    # We no longer need 'chat_history_ids' because llama_cpp handles context internally via the messages list

    with gr.Tabs():

        # --- TAB 1: FULL VOICE CHAT ---
        with gr.TabItem("🗣️ Voice Assistant"):
            # CRITICAL FIX: type="messages"
            voice_chat_history = gr.Chatbot(
                label="Conversation Log",
                elem_classes=["chatbot"],
                value=[],
            )

            with gr.Row():
                audio_in = gr.Audio(
                    sources=["microphone", "upload"],
                    type="filepath",
                    label="Input Audio",
                )
                voice_audio_out = gr.Audio(label="AI Voice Response", autoplay=True)

            voice_transcription = gr.Textbox(label="User Transcription")
            voice_response_text = gr.Textbox(label="AI Response (Text)")
            voice_status = gr.Textbox(elem_id="status", label="Status")

            run_btn = gr.Button("Transcribe, Chat & Speak", variant="primary")
            clear_voice_btn = gr.Button("Clear")

            run_btn.click(
                fn=speech_to_text_and_chat,
                inputs=[audio_in, voice_chat_history],
                outputs=[
                    voice_transcription,
                    voice_chat_history,
                    voice_response_text,
                    voice_audio_out,
                    voice_status,
                ],
            )

            clear_voice_btn.click(
                lambda: (None, [], "", None, ""),
                None,
                [
                    audio_in,
                    voice_chat_history,
                    voice_response_text,
                    voice_audio_out,
                    voice_status,
                ],
            )

        # --- TAB 2: TEXT CHAT ---
        with gr.TabItem("💬 Text Chat"):
            chatbot = gr.Chatbot(
                label="Conversation",
                elem_classes=["chatbot"],
                value=[],
            )
            msg = gr.Textbox(label="Message")
            submit_btn = gr.Button("Send")
            clear_btn = gr.Button("Clear")

            def chat_text_wrapper(message, history):
                h, _ = chat_with_bot(message, history)
                return h

            msg.submit(chat_text_wrapper, [msg, chatbot], [chatbot]).then(
                lambda: "", None, msg
            )
            submit_btn.click(chat_text_wrapper, [msg, chatbot], [chatbot]).then(
                lambda: "", None, msg
            )
            clear_btn.click(lambda: [], None, chatbot)

demo.launch()