Spaces:
Sleeping
Sleeping
File size: 5,818 Bytes
44522b6 bfd4088 44522b6 bfd4088 44522b6 bfd4088 44522b6 bfd4088 44522b6 bfd4088 44522b6 bfd4088 44522b6 bfd4088 44522b6 bfd4088 44522b6 bfd4088 44522b6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | """
SurvivalAI Pro β HF Space chat interface.
Runs the V1 LoRA-finetuned Phi-3 GGUF (Q4_K_M) locally inside the Space via
llama-cpp-python. The Space is CPU-only; on paid CPU Upgrade hardware we get
~6-10 tokens/sec. The GGUF is fetched from a separate HF model repo at cold
start because the 2.4 GB file exceeds Space repo limits.
"""
import os
from pathlib import Path
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# ββ Config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
MODEL_REPO = "Znilsson/survivalai-phi3-gguf" # private model repo
MODEL_FILENAME = "survivalai-phi3-Q4_K_M.gguf"
N_CTX = 4096
N_THREADS = int(os.environ.get("N_THREADS", "4"))
N_BATCH = 256
MAX_TOKENS = 400
TEMPERATURE = 0.7
TOP_P = 0.9
SYSTEM_MSG = (
"You are SurvivalAI, an expert survival and civilizational knowledge "
"assistant. You provide accurate, practical, and potentially life-saving "
"information about wilderness survival, emergency preparedness, first aid, "
"food procurement, water purification, shelter construction, navigation, "
"and rebuilding civilization. Your responses are clear, actionable, and "
"thorough. The user is in an off-grid context β assume no doctor, no "
"Poison Control, no internet, no professional help is available. Give "
"the best answer you can with the knowledge you have."
)
# Phi-3 chat template
PHI3_TMPL = "<|user|>\n{user}<|end|>\n<|assistant|>\n"
STOP_TOKENS = ["<|end|>", "<|user|>", "<|endoftext|>"]
# ββ Model download + load (cold start) βββββββββββββββββββββββββββββββββββββββ
print(f"Fetching {MODEL_FILENAME} from {MODEL_REPO}...")
model_path = hf_hub_download(
repo_id = MODEL_REPO,
filename = MODEL_FILENAME,
token = os.environ.get("HF_TOKEN"), # required if repo is private
cache_dir = "/data" if Path("/data").exists() else None,
)
print(f"Model file: {model_path}")
print(f"Loading Llama (n_ctx={N_CTX}, n_threads={N_THREADS})...")
llm = Llama(
model_path = model_path,
n_ctx = N_CTX,
n_threads = N_THREADS,
n_batch = N_BATCH,
verbose = False,
)
print("Model loaded. Ready.")
# ββ Chat function ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def build_prompt(history, user_msg):
"""Build a Phi-3 prompt incorporating system message + chat history.
Phi-3 chat template uses <|system|>, <|user|>, <|assistant|>, <|end|>.
We collapse the system message into the first user turn for simplicity
(this is the same approach used during training/eval).
"""
parts = []
# Embed system msg as a preamble inside the first user turn so behavior
# matches what the eval rubric saw during training.
if not history:
first_user = f"{SYSTEM_MSG}\n\nQuestion: {user_msg}"
parts.append(PHI3_TMPL.format(user=first_user).rstrip("\n"))
else:
# Replay history
for i, (u, a) in enumerate(history):
if i == 0:
u = f"{SYSTEM_MSG}\n\nQuestion: {u}"
parts.append(f"<|user|>\n{u}<|end|>\n<|assistant|>\n{a}<|end|>")
# Add current turn
parts.append(f"<|user|>\n{user_msg}<|end|>\n<|assistant|>\n")
return "\n".join(parts)
def chat_fn(message, history):
"""Generator: yields incremental partial responses for streaming UI."""
prompt = build_prompt(history, message)
accum = ""
try:
for chunk in llm(
prompt,
max_tokens = MAX_TOKENS,
temperature = TEMPERATURE,
top_p = TOP_P,
stop = STOP_TOKENS,
stream = True,
):
tok = chunk["choices"][0]["text"]
accum += tok
yield accum
except Exception as e:
yield f"[ERROR: {e}]"
# ββ UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
EXAMPLES = [
"I cut my leg badly with an axe in the woods. Walk me through what to do.",
"How do I find drinkable water if I'm stuck in a forest with no supplies?",
"It's getting dark and dropping below freezing. How do I build a shelter from what's around?",
"What edible plants are common in temperate North American forests?",
"I need to navigate without a compass. How do I find north?",
]
DESCRIPTION = """
**SurvivalAI Pro** β fine-tuned off-grid survival assistant, running fully on CPU inside this Space.
Built on Phi-3-mini-4k-instruct, fine-tuned on ~150,000 survival-knowledge Q/A pairs covering medical
first aid, water, food, shelter, fire, navigation, signaling, foraging, hunting, and tools.
β οΈ **Prototype β not for clinical or life-critical use.** This model can produce confident-sounding
but incorrect specifics for trap categories like exact drug dosages or precise frequencies. For
survival-skill guidance it scores well; for precise numerical specifics, double-check with an
authoritative reference.
"""
demo = gr.ChatInterface(
fn = chat_fn,
title = "ποΈ SurvivalAI Pro",
description = DESCRIPTION,
examples = EXAMPLES,
cache_examples = False,
theme = gr.themes.Soft(),
)
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860)
|