Spaces:

Znilsson
/

Survival_Model_II

Sleeping

App Files Files Community

Survival_Model_II / app.py

Znilsson

Initial SurvivalAI Pro deploy

44522b6 verified 13 days ago

raw

history blame contribute delete

5.82 kB

	"""
	SurvivalAI Pro — HF Space chat interface.

	Runs the V1 LoRA-finetuned Phi-3 GGUF (Q4_K_M) locally inside the Space via
	llama-cpp-python. The Space is CPU-only; on paid CPU Upgrade hardware we get
	~6-10 tokens/sec. The GGUF is fetched from a separate HF model repo at cold
	start because the 2.4 GB file exceeds Space repo limits.
	"""

	import os
	from pathlib import Path

	import gradio as gr
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama


	# ── Config ───────────────────────────────────────────────────────────────────
	MODEL_REPO = "Znilsson/survivalai-phi3-gguf" # private model repo
	MODEL_FILENAME = "survivalai-phi3-Q4_K_M.gguf"
	N_CTX = 4096
	N_THREADS = int(os.environ.get("N_THREADS", "4"))
	N_BATCH = 256
	MAX_TOKENS = 400
	TEMPERATURE = 0.7
	TOP_P = 0.9

	SYSTEM_MSG = (
	"You are SurvivalAI, an expert survival and civilizational knowledge "
	"assistant. You provide accurate, practical, and potentially life-saving "
	"information about wilderness survival, emergency preparedness, first aid, "
	"food procurement, water purification, shelter construction, navigation, "
	"and rebuilding civilization. Your responses are clear, actionable, and "
	"thorough. The user is in an off-grid context — assume no doctor, no "
	"Poison Control, no internet, no professional help is available. Give "
	"the best answer you can with the knowledge you have."
	)

	# Phi-3 chat template
	PHI3_TMPL = "<\|user\|>\n{user}<\|end\|>\n<\|assistant\|>\n"
	STOP_TOKENS = ["<\|end\|>", "<\|user\|>", "<\|endoftext\|>"]


	# ── Model download + load (cold start) ───────────────────────────────────────
	print(f"Fetching {MODEL_FILENAME} from {MODEL_REPO}...")
	model_path = hf_hub_download(
	repo_id = MODEL_REPO,
	filename = MODEL_FILENAME,
	token = os.environ.get("HF_TOKEN"), # required if repo is private
	cache_dir = "/data" if Path("/data").exists() else None,
	)
	print(f"Model file: {model_path}")

	print(f"Loading Llama (n_ctx={N_CTX}, n_threads={N_THREADS})...")
	llm = Llama(
	model_path = model_path,
	n_ctx = N_CTX,
	n_threads = N_THREADS,
	n_batch = N_BATCH,
	verbose = False,
	)
	print("Model loaded. Ready.")


	# ── Chat function ────────────────────────────────────────────────────────────
	def build_prompt(history, user_msg):
	"""Build a Phi-3 prompt incorporating system message + chat history.

	Phi-3 chat template uses <\|system\|>, <\|user\|>, <\|assistant\|>, <\|end\|>.
	We collapse the system message into the first user turn for simplicity
	(this is the same approach used during training/eval).
	"""
	parts = []
	# Embed system msg as a preamble inside the first user turn so behavior
	# matches what the eval rubric saw during training.
	if not history:
	first_user = f"{SYSTEM_MSG}\n\nQuestion: {user_msg}"
	parts.append(PHI3_TMPL.format(user=first_user).rstrip("\n"))
	else:
	# Replay history
	for i, (u, a) in enumerate(history):
	if i == 0:
	u = f"{SYSTEM_MSG}\n\nQuestion: {u}"
	parts.append(f"<\|user\|>\n{u}<\|end\|>\n<\|assistant\|>\n{a}<\|end\|>")
	# Add current turn
	parts.append(f"<\|user\|>\n{user_msg}<\|end\|>\n<\|assistant\|>\n")
	return "\n".join(parts)


	def chat_fn(message, history):
	"""Generator: yields incremental partial responses for streaming UI."""
	prompt = build_prompt(history, message)
	accum = ""
	try:
	for chunk in llm(
	prompt,
	max_tokens = MAX_TOKENS,
	temperature = TEMPERATURE,
	top_p = TOP_P,
	stop = STOP_TOKENS,
	stream = True,
	):
	tok = chunk["choices"][0]["text"]
	accum += tok
	yield accum
	except Exception as e:
	yield f"[ERROR: {e}]"


	# ── UI ───────────────────────────────────────────────────────────────────────
	EXAMPLES = [
	"I cut my leg badly with an axe in the woods. Walk me through what to do.",
	"How do I find drinkable water if I'm stuck in a forest with no supplies?",
	"It's getting dark and dropping below freezing. How do I build a shelter from what's around?",
	"What edible plants are common in temperate North American forests?",
	"I need to navigate without a compass. How do I find north?",
	]

	DESCRIPTION = """
	SurvivalAI Pro — fine-tuned off-grid survival assistant, running fully on CPU inside this Space.

	Built on Phi-3-mini-4k-instruct, fine-tuned on ~150,000 survival-knowledge Q/A pairs covering medical
	first aid, water, food, shelter, fire, navigation, signaling, foraging, hunting, and tools.

	⚠️ Prototype — not for clinical or life-critical use. This model can produce confident-sounding
	but incorrect specifics for trap categories like exact drug dosages or precise frequencies. For
	survival-skill guidance it scores well; for precise numerical specifics, double-check with an
	authoritative reference.
	"""

	demo = gr.ChatInterface(
	fn = chat_fn,
	title = "🏕️ SurvivalAI Pro",
	description = DESCRIPTION,
	examples = EXAMPLES,
	cache_examples = False,
	theme = gr.themes.Soft(),
	)


	if __name__ == "__main__":
	demo.queue().launch(server_name="0.0.0.0", server_port=7860)