import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

# --- 1. MODEL LOADING ---
# We still load the quantized GGUF model, which is perfect for CPU.
# We will focus on Llama-3 as it's best for a general-purpose assistant.
model_name_or_path = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
model_file = "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf"

try:
    model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_file)
except Exception as e:
    raise RuntimeError(f"Failed to download the model. Error: {e}")

# Load the model with llama-cpp-python
# n_ctx is the context window size; 2048 is a safe bet for CPU Spaces.
# n_gpu_layers=0 ensures it runs entirely on the CPU.
try:
    llm = Llama(
        model_path=model_path,
        n_ctx=2048,
        n_threads=4, # Set to a reasonable number of threads for the CPU
        n_gpu_layers=0,
        verbose=False
    )
except Exception as e:
    raise RuntimeError(f"Failed to load the GGUF model. Error: {e}")


# --- 2. THE "BRAIN'S INSTRUCTION MANUAL" (SYSTEM PROMPT) ---
# This is the most critical part. We tell the AI how to behave.
# This prompt guides it to be helpful, analytical, and honest about its limitations.

SYSTEM_PROMPT = """You are 'NexusAI', a helpful and highly intelligent AI assistant built by a creative developer.

Your primary goal is to provide comprehensive, insightful, and helpful responses. You must be robust and handle any user input, no matter how brief or poorly phrased.

When a user asks a question, follow these steps:
1.  **Analyze the Intent:** First, understand the user's *true* goal. If they ask "cost to build building?", they don't want you to invent a number. They need a *checklist* of cost categories to research. If their question is vague, identify what they are likely trying to accomplish.
2.  **Provide a Direct Answer:** If you can directly answer, do so clearly and concisely.
3.  **Elaborate and Add Value:** After the direct answer, provide deeper context, explain the "why" behind the answer, and offer related suggestions or next steps. Give the user more than they asked for.
4.  **Acknowledge Limitations:** You are not a real-time calculator, a search engine, or a financial advisor. If a question requires real-world, live data (like prices, stock quotes, personal advice), you MUST state that you cannot provide it. Instead, provide a framework or a list of steps the user can take to find the information themselves. NEVER invent facts.
5.  **Maintain a Friendly, Encouraging Tone:** Be a partner in the user's creative or analytical process.
"""

# --- 3. THE GRADIO CHAT INTERFACE ---

def predict(message, history):
    """
    This function is called by the Gradio ChatInterface for each new message.
    'message' is the new user input.
    'history' is the entire conversation history as a list of lists.
    """
    # Format the conversation history for the model
    # The history format is [['user_message', 'assistant_response'], ...]
    chat_history_formatted = [{"role": "system", "content": SYSTEM_PROMPT}]
    for user_msg, assistant_msg in history:
        chat_history_formatted.append({"role": "user", "content": user_msg})
        chat_history_formatted.append({"role": "assistant", "content": assistant_msg})
    # Add the latest user message
    chat_history_formatted.append({"role": "user", "content": message})

    # Use the model to generate a response stream
    # stream=True allows the text to appear token-by-token for a better UX
    generator = llm.create_chat_completion(
        messages=chat_history_formatted,
        max_tokens=1024,
        temperature=0.7,
        stream=True
    )

    # Yield partial responses to create the streaming effect
    partial_message = ""
    for chunk in generator:
        delta = chunk['choices'][0]['delta']
        if 'content' in delta:
            partial_message += delta['content']
            yield partial_message

# We use gr.ChatInterface, which creates a complete chat UI for us.
# It manages history, input boxes, and message display automatically.
gr.ChatInterface(
    fn=predict,
    title="🤖 NexusAI Assistant",
    description="A powerful, conversational AI running on a Hugging Face CPU. Ask me anything!",
    examples=[
        ["How do I learn to code?"],
        ["Explain the concept of 'supply and demand' like I'm five."],
        ["I want to build a PC, where do I start?"],
        ["I am building a building, how much would it cost me"] # The "bad" prompt from before!
    ],
    theme="soft"
).launch()