Znilsson's picture
Initial SurvivalAI Pro deploy
44522b6 verified
"""
SurvivalAI Pro β€” HF Space chat interface.
Runs the V1 LoRA-finetuned Phi-3 GGUF (Q4_K_M) locally inside the Space via
llama-cpp-python. The Space is CPU-only; on paid CPU Upgrade hardware we get
~6-10 tokens/sec. The GGUF is fetched from a separate HF model repo at cold
start because the 2.4 GB file exceeds Space repo limits.
"""
import os
from pathlib import Path
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# ── Config ───────────────────────────────────────────────────────────────────
MODEL_REPO = "Znilsson/survivalai-phi3-gguf" # private model repo
MODEL_FILENAME = "survivalai-phi3-Q4_K_M.gguf"
N_CTX = 4096
N_THREADS = int(os.environ.get("N_THREADS", "4"))
N_BATCH = 256
MAX_TOKENS = 400
TEMPERATURE = 0.7
TOP_P = 0.9
SYSTEM_MSG = (
"You are SurvivalAI, an expert survival and civilizational knowledge "
"assistant. You provide accurate, practical, and potentially life-saving "
"information about wilderness survival, emergency preparedness, first aid, "
"food procurement, water purification, shelter construction, navigation, "
"and rebuilding civilization. Your responses are clear, actionable, and "
"thorough. The user is in an off-grid context β€” assume no doctor, no "
"Poison Control, no internet, no professional help is available. Give "
"the best answer you can with the knowledge you have."
)
# Phi-3 chat template
PHI3_TMPL = "<|user|>\n{user}<|end|>\n<|assistant|>\n"
STOP_TOKENS = ["<|end|>", "<|user|>", "<|endoftext|>"]
# ── Model download + load (cold start) ───────────────────────────────────────
print(f"Fetching {MODEL_FILENAME} from {MODEL_REPO}...")
model_path = hf_hub_download(
repo_id = MODEL_REPO,
filename = MODEL_FILENAME,
token = os.environ.get("HF_TOKEN"), # required if repo is private
cache_dir = "/data" if Path("/data").exists() else None,
)
print(f"Model file: {model_path}")
print(f"Loading Llama (n_ctx={N_CTX}, n_threads={N_THREADS})...")
llm = Llama(
model_path = model_path,
n_ctx = N_CTX,
n_threads = N_THREADS,
n_batch = N_BATCH,
verbose = False,
)
print("Model loaded. Ready.")
# ── Chat function ────────────────────────────────────────────────────────────
def build_prompt(history, user_msg):
"""Build a Phi-3 prompt incorporating system message + chat history.
Phi-3 chat template uses <|system|>, <|user|>, <|assistant|>, <|end|>.
We collapse the system message into the first user turn for simplicity
(this is the same approach used during training/eval).
"""
parts = []
# Embed system msg as a preamble inside the first user turn so behavior
# matches what the eval rubric saw during training.
if not history:
first_user = f"{SYSTEM_MSG}\n\nQuestion: {user_msg}"
parts.append(PHI3_TMPL.format(user=first_user).rstrip("\n"))
else:
# Replay history
for i, (u, a) in enumerate(history):
if i == 0:
u = f"{SYSTEM_MSG}\n\nQuestion: {u}"
parts.append(f"<|user|>\n{u}<|end|>\n<|assistant|>\n{a}<|end|>")
# Add current turn
parts.append(f"<|user|>\n{user_msg}<|end|>\n<|assistant|>\n")
return "\n".join(parts)
def chat_fn(message, history):
"""Generator: yields incremental partial responses for streaming UI."""
prompt = build_prompt(history, message)
accum = ""
try:
for chunk in llm(
prompt,
max_tokens = MAX_TOKENS,
temperature = TEMPERATURE,
top_p = TOP_P,
stop = STOP_TOKENS,
stream = True,
):
tok = chunk["choices"][0]["text"]
accum += tok
yield accum
except Exception as e:
yield f"[ERROR: {e}]"
# ── UI ───────────────────────────────────────────────────────────────────────
EXAMPLES = [
"I cut my leg badly with an axe in the woods. Walk me through what to do.",
"How do I find drinkable water if I'm stuck in a forest with no supplies?",
"It's getting dark and dropping below freezing. How do I build a shelter from what's around?",
"What edible plants are common in temperate North American forests?",
"I need to navigate without a compass. How do I find north?",
]
DESCRIPTION = """
**SurvivalAI Pro** β€” fine-tuned off-grid survival assistant, running fully on CPU inside this Space.
Built on Phi-3-mini-4k-instruct, fine-tuned on ~150,000 survival-knowledge Q/A pairs covering medical
first aid, water, food, shelter, fire, navigation, signaling, foraging, hunting, and tools.
⚠️ **Prototype β€” not for clinical or life-critical use.** This model can produce confident-sounding
but incorrect specifics for trap categories like exact drug dosages or precise frequencies. For
survival-skill guidance it scores well; for precise numerical specifics, double-check with an
authoritative reference.
"""
demo = gr.ChatInterface(
fn = chat_fn,
title = "πŸ•οΈ SurvivalAI Pro",
description = DESCRIPTION,
examples = EXAMPLES,
cache_examples = False,
theme = gr.themes.Soft(),
)
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860)