import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# 1. Download Model
print("⬇️ Downloading model...")
model_path = hf_hub_download(
    repo_id="XY26/dual-frame-mistral-7",
    filename="mistral-7b-v0.3.Q4_K_M.gguf"
)

# 2. Load Engine
print("⚙️ Loading engine...")
llm = Llama(
    model_path=model_path,
    n_ctx=4096,
    n_threads=2,
    verbose=False
)

def smart_response(message, history):
    
    # --- 1. EXPANDED TRIGGERS ---
    # We add common decision words (English & French) to ensure we catch everything.
    triggers = [
        # English
        "should", "opinion", "think", "good", "bad", "pros", "cons", 
        "benefit", "risk", "impact", "why", "better", "safe", "worth", 
        "buy", "choose", "prefer", "best",
        # French
        "sain", "dangereux", "mieux", "avis", "pensez", "effet", 
        "faut", "acheter", "choisir", "bien", "mal", "pourquoi"
    ]
    
    is_opinion = any(t in message.lower() for t in triggers)

    # --- 2. RESPONSE PRIMING ---
    if is_opinion:
        print(f"🧠 OPINION MODE: {message}")
        
        # We start writing the response FOR the model.
        # By adding "**POSITIVE FRAMING:**" at the end, the model HAS to fill it in.
        prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Analyze the following topic. You must provide the answer in two distinct frames:
1. POSITIVE FRAMING (Arguments For / Benefits)
2. NEGATIVE FRAMING (Arguments Against / Risks)

Topic: {message}

### Response:
**POSITIVE FRAMING:**"""
        
        # We remove "POSITIVE FRAMING" from the output later so it doesn't print twice
        prefix_to_add = "**POSITIVE FRAMING:**"
        
    else:
        print(f"ℹ️ CHAT MODE: {message}")
        prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
You are a helpful assistant. Answer the following question concisely and directly: {message}

### Response:
"""
        prefix_to_add = ""

    # --- 3. GENERATION ---
    stop_tokens = ["</s>", "###", "User:", "Instruction:"]

    output = llm(
        prompt,
        max_tokens=512,
        temperature=0.7,
        stop=stop_tokens,
        echo=False
    )
    
    text = output['choices'][0]['text'].strip()
    
    # If we used priming, we need to attach the prefix back to the start
    final_response = prefix_to_add + " " + text
    return final_response

# 3. Launch Interface
demo = gr.ChatInterface(
    fn=smart_response,
    title="Dual-Mind Mistral 🧠",
    description="Ask a Fact (Normal Chat) or an Opinion (Dual Perspectives)."
)

if __name__ == "__main__":
    demo.queue().launch(server_name="0.0.0.0", server_port=7860)