""" SurvivalAI Pro — HF Space chat interface. Runs the V1 LoRA-finetuned Phi-3 GGUF (Q4_K_M) locally inside the Space via llama-cpp-python. The Space is CPU-only; on paid CPU Upgrade hardware we get ~6-10 tokens/sec. The GGUF is fetched from a separate HF model repo at cold start because the 2.4 GB file exceeds Space repo limits. """ import os from pathlib import Path import gradio as gr from huggingface_hub import hf_hub_download from llama_cpp import Llama # ── Config ─────────────────────────────────────────────────────────────────── MODEL_REPO = "Znilsson/survivalai-phi3-gguf" # private model repo MODEL_FILENAME = "survivalai-phi3-Q4_K_M.gguf" N_CTX = 4096 N_THREADS = int(os.environ.get("N_THREADS", "4")) N_BATCH = 256 MAX_TOKENS = 400 TEMPERATURE = 0.7 TOP_P = 0.9 SYSTEM_MSG = ( "You are SurvivalAI, an expert survival and civilizational knowledge " "assistant. You provide accurate, practical, and potentially life-saving " "information about wilderness survival, emergency preparedness, first aid, " "food procurement, water purification, shelter construction, navigation, " "and rebuilding civilization. Your responses are clear, actionable, and " "thorough. The user is in an off-grid context — assume no doctor, no " "Poison Control, no internet, no professional help is available. Give " "the best answer you can with the knowledge you have." ) # Phi-3 chat template PHI3_TMPL = "<|user|>\n{user}<|end|>\n<|assistant|>\n" STOP_TOKENS = ["<|end|>", "<|user|>", "<|endoftext|>"] # ── Model download + load (cold start) ─────────────────────────────────────── print(f"Fetching {MODEL_FILENAME} from {MODEL_REPO}...") model_path = hf_hub_download( repo_id = MODEL_REPO, filename = MODEL_FILENAME, token = os.environ.get("HF_TOKEN"), # required if repo is private cache_dir = "/data" if Path("/data").exists() else None, ) print(f"Model file: {model_path}") print(f"Loading Llama (n_ctx={N_CTX}, n_threads={N_THREADS})...") llm = Llama( model_path = model_path, n_ctx = N_CTX, n_threads = N_THREADS, n_batch = N_BATCH, verbose = False, ) print("Model loaded. Ready.") # ── Chat function ──────────────────────────────────────────────────────────── def build_prompt(history, user_msg): """Build a Phi-3 prompt incorporating system message + chat history. Phi-3 chat template uses <|system|>, <|user|>, <|assistant|>, <|end|>. We collapse the system message into the first user turn for simplicity (this is the same approach used during training/eval). """ parts = [] # Embed system msg as a preamble inside the first user turn so behavior # matches what the eval rubric saw during training. if not history: first_user = f"{SYSTEM_MSG}\n\nQuestion: {user_msg}" parts.append(PHI3_TMPL.format(user=first_user).rstrip("\n")) else: # Replay history for i, (u, a) in enumerate(history): if i == 0: u = f"{SYSTEM_MSG}\n\nQuestion: {u}" parts.append(f"<|user|>\n{u}<|end|>\n<|assistant|>\n{a}<|end|>") # Add current turn parts.append(f"<|user|>\n{user_msg}<|end|>\n<|assistant|>\n") return "\n".join(parts) def chat_fn(message, history): """Generator: yields incremental partial responses for streaming UI.""" prompt = build_prompt(history, message) accum = "" try: for chunk in llm( prompt, max_tokens = MAX_TOKENS, temperature = TEMPERATURE, top_p = TOP_P, stop = STOP_TOKENS, stream = True, ): tok = chunk["choices"][0]["text"] accum += tok yield accum except Exception as e: yield f"[ERROR: {e}]" # ── UI ─────────────────────────────────────────────────────────────────────── EXAMPLES = [ "I cut my leg badly with an axe in the woods. Walk me through what to do.", "How do I find drinkable water if I'm stuck in a forest with no supplies?", "It's getting dark and dropping below freezing. How do I build a shelter from what's around?", "What edible plants are common in temperate North American forests?", "I need to navigate without a compass. How do I find north?", ] DESCRIPTION = """ **SurvivalAI Pro** — fine-tuned off-grid survival assistant, running fully on CPU inside this Space. Built on Phi-3-mini-4k-instruct, fine-tuned on ~150,000 survival-knowledge Q/A pairs covering medical first aid, water, food, shelter, fire, navigation, signaling, foraging, hunting, and tools. ⚠️ **Prototype — not for clinical or life-critical use.** This model can produce confident-sounding but incorrect specifics for trap categories like exact drug dosages or precise frequencies. For survival-skill guidance it scores well; for precise numerical specifics, double-check with an authoritative reference. """ demo = gr.ChatInterface( fn = chat_fn, title = "🏕️ SurvivalAI Pro", description = DESCRIPTION, examples = EXAMPLES, cache_examples = False, theme = gr.themes.Soft(), ) if __name__ == "__main__": demo.queue().launch(server_name="0.0.0.0", server_port=7860)