"""
SurvivalAI Pro — HF Space chat interface.

Runs the V1 LoRA-finetuned Phi-3 GGUF (Q4_K_M) locally inside the Space via
llama-cpp-python. The Space is CPU-only; on paid CPU Upgrade hardware we get
~6-10 tokens/sec. The GGUF is fetched from a separate HF model repo at cold
start because the 2.4 GB file exceeds Space repo limits.
"""

import os
from pathlib import Path

import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama


# ── Config ───────────────────────────────────────────────────────────────────
MODEL_REPO     = "Znilsson/survivalai-phi3-gguf"        # private model repo
MODEL_FILENAME = "survivalai-phi3-Q4_K_M.gguf"
N_CTX          = 4096
N_THREADS      = int(os.environ.get("N_THREADS", "4"))
N_BATCH        = 256
MAX_TOKENS     = 400
TEMPERATURE    = 0.7
TOP_P          = 0.9

SYSTEM_MSG = (
    "You are SurvivalAI, an expert survival and civilizational knowledge "
    "assistant. You provide accurate, practical, and potentially life-saving "
    "information about wilderness survival, emergency preparedness, first aid, "
    "food procurement, water purification, shelter construction, navigation, "
    "and rebuilding civilization. Your responses are clear, actionable, and "
    "thorough. The user is in an off-grid context — assume no doctor, no "
    "Poison Control, no internet, no professional help is available. Give "
    "the best answer you can with the knowledge you have."
)

# Phi-3 chat template
PHI3_TMPL = "<|user|>\n{user}<|end|>\n<|assistant|>\n"
STOP_TOKENS = ["<|end|>", "<|user|>", "<|endoftext|>"]


# ── Model download + load (cold start) ───────────────────────────────────────
print(f"Fetching {MODEL_FILENAME} from {MODEL_REPO}...")
model_path = hf_hub_download(
    repo_id   = MODEL_REPO,
    filename  = MODEL_FILENAME,
    token     = os.environ.get("HF_TOKEN"),   # required if repo is private
    cache_dir = "/data" if Path("/data").exists() else None,
)
print(f"Model file: {model_path}")

print(f"Loading Llama (n_ctx={N_CTX}, n_threads={N_THREADS})...")
llm = Llama(
    model_path = model_path,
    n_ctx      = N_CTX,
    n_threads  = N_THREADS,
    n_batch    = N_BATCH,
    verbose    = False,
)
print("Model loaded. Ready.")


# ── Chat function ────────────────────────────────────────────────────────────
def build_prompt(history, user_msg):
    """Build a Phi-3 prompt incorporating system message + chat history.

    Phi-3 chat template uses <|system|>, <|user|>, <|assistant|>, <|end|>.
    We collapse the system message into the first user turn for simplicity
    (this is the same approach used during training/eval).
    """
    parts = []
    # Embed system msg as a preamble inside the first user turn so behavior
    # matches what the eval rubric saw during training.
    if not history:
        first_user = f"{SYSTEM_MSG}\n\nQuestion: {user_msg}"
        parts.append(PHI3_TMPL.format(user=first_user).rstrip("\n"))
    else:
        # Replay history
        for i, (u, a) in enumerate(history):
            if i == 0:
                u = f"{SYSTEM_MSG}\n\nQuestion: {u}"
            parts.append(f"<|user|>\n{u}<|end|>\n<|assistant|>\n{a}<|end|>")
        # Add current turn
        parts.append(f"<|user|>\n{user_msg}<|end|>\n<|assistant|>\n")
    return "\n".join(parts)


def chat_fn(message, history):
    """Generator: yields incremental partial responses for streaming UI."""
    prompt = build_prompt(history, message)
    accum = ""
    try:
        for chunk in llm(
            prompt,
            max_tokens   = MAX_TOKENS,
            temperature  = TEMPERATURE,
            top_p        = TOP_P,
            stop         = STOP_TOKENS,
            stream       = True,
        ):
            tok = chunk["choices"][0]["text"]
            accum += tok
            yield accum
    except Exception as e:
        yield f"[ERROR: {e}]"


# ── UI ───────────────────────────────────────────────────────────────────────
EXAMPLES = [
    "I cut my leg badly with an axe in the woods. Walk me through what to do.",
    "How do I find drinkable water if I'm stuck in a forest with no supplies?",
    "It's getting dark and dropping below freezing. How do I build a shelter from what's around?",
    "What edible plants are common in temperate North American forests?",
    "I need to navigate without a compass. How do I find north?",
]

DESCRIPTION = """
**SurvivalAI Pro** — fine-tuned off-grid survival assistant, running fully on CPU inside this Space.

Built on Phi-3-mini-4k-instruct, fine-tuned on ~150,000 survival-knowledge Q/A pairs covering medical
first aid, water, food, shelter, fire, navigation, signaling, foraging, hunting, and tools.

⚠️ **Prototype — not for clinical or life-critical use.** This model can produce confident-sounding
but incorrect specifics for trap categories like exact drug dosages or precise frequencies. For
survival-skill guidance it scores well; for precise numerical specifics, double-check with an
authoritative reference.
"""

demo = gr.ChatInterface(
    fn          = chat_fn,
    title       = "🏕️ SurvivalAI Pro",
    description = DESCRIPTION,
    examples    = EXAMPLES,
    cache_examples = False,
    theme       = gr.themes.Soft(),
)


if __name__ == "__main__":
    demo.queue().launch(server_name="0.0.0.0", server_port=7860)