Spaces:
Sleeping
Sleeping
| """ | |
| SurvivalAI Pro β HF Space chat interface. | |
| Runs the V1 LoRA-finetuned Phi-3 GGUF (Q4_K_M) locally inside the Space via | |
| llama-cpp-python. The Space is CPU-only; on paid CPU Upgrade hardware we get | |
| ~6-10 tokens/sec. The GGUF is fetched from a separate HF model repo at cold | |
| start because the 2.4 GB file exceeds Space repo limits. | |
| """ | |
| import os | |
| from pathlib import Path | |
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| # ββ Config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MODEL_REPO = "Znilsson/survivalai-phi3-gguf" # private model repo | |
| MODEL_FILENAME = "survivalai-phi3-Q4_K_M.gguf" | |
| N_CTX = 4096 | |
| N_THREADS = int(os.environ.get("N_THREADS", "4")) | |
| N_BATCH = 256 | |
| MAX_TOKENS = 400 | |
| TEMPERATURE = 0.7 | |
| TOP_P = 0.9 | |
| SYSTEM_MSG = ( | |
| "You are SurvivalAI, an expert survival and civilizational knowledge " | |
| "assistant. You provide accurate, practical, and potentially life-saving " | |
| "information about wilderness survival, emergency preparedness, first aid, " | |
| "food procurement, water purification, shelter construction, navigation, " | |
| "and rebuilding civilization. Your responses are clear, actionable, and " | |
| "thorough. The user is in an off-grid context β assume no doctor, no " | |
| "Poison Control, no internet, no professional help is available. Give " | |
| "the best answer you can with the knowledge you have." | |
| ) | |
| # Phi-3 chat template | |
| PHI3_TMPL = "<|user|>\n{user}<|end|>\n<|assistant|>\n" | |
| STOP_TOKENS = ["<|end|>", "<|user|>", "<|endoftext|>"] | |
| # ββ Model download + load (cold start) βββββββββββββββββββββββββββββββββββββββ | |
| print(f"Fetching {MODEL_FILENAME} from {MODEL_REPO}...") | |
| model_path = hf_hub_download( | |
| repo_id = MODEL_REPO, | |
| filename = MODEL_FILENAME, | |
| token = os.environ.get("HF_TOKEN"), # required if repo is private | |
| cache_dir = "/data" if Path("/data").exists() else None, | |
| ) | |
| print(f"Model file: {model_path}") | |
| print(f"Loading Llama (n_ctx={N_CTX}, n_threads={N_THREADS})...") | |
| llm = Llama( | |
| model_path = model_path, | |
| n_ctx = N_CTX, | |
| n_threads = N_THREADS, | |
| n_batch = N_BATCH, | |
| verbose = False, | |
| ) | |
| print("Model loaded. Ready.") | |
| # ββ Chat function ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_prompt(history, user_msg): | |
| """Build a Phi-3 prompt incorporating system message + chat history. | |
| Phi-3 chat template uses <|system|>, <|user|>, <|assistant|>, <|end|>. | |
| We collapse the system message into the first user turn for simplicity | |
| (this is the same approach used during training/eval). | |
| """ | |
| parts = [] | |
| # Embed system msg as a preamble inside the first user turn so behavior | |
| # matches what the eval rubric saw during training. | |
| if not history: | |
| first_user = f"{SYSTEM_MSG}\n\nQuestion: {user_msg}" | |
| parts.append(PHI3_TMPL.format(user=first_user).rstrip("\n")) | |
| else: | |
| # Replay history | |
| for i, (u, a) in enumerate(history): | |
| if i == 0: | |
| u = f"{SYSTEM_MSG}\n\nQuestion: {u}" | |
| parts.append(f"<|user|>\n{u}<|end|>\n<|assistant|>\n{a}<|end|>") | |
| # Add current turn | |
| parts.append(f"<|user|>\n{user_msg}<|end|>\n<|assistant|>\n") | |
| return "\n".join(parts) | |
| def chat_fn(message, history): | |
| """Generator: yields incremental partial responses for streaming UI.""" | |
| prompt = build_prompt(history, message) | |
| accum = "" | |
| try: | |
| for chunk in llm( | |
| prompt, | |
| max_tokens = MAX_TOKENS, | |
| temperature = TEMPERATURE, | |
| top_p = TOP_P, | |
| stop = STOP_TOKENS, | |
| stream = True, | |
| ): | |
| tok = chunk["choices"][0]["text"] | |
| accum += tok | |
| yield accum | |
| except Exception as e: | |
| yield f"[ERROR: {e}]" | |
| # ββ UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| EXAMPLES = [ | |
| "I cut my leg badly with an axe in the woods. Walk me through what to do.", | |
| "How do I find drinkable water if I'm stuck in a forest with no supplies?", | |
| "It's getting dark and dropping below freezing. How do I build a shelter from what's around?", | |
| "What edible plants are common in temperate North American forests?", | |
| "I need to navigate without a compass. How do I find north?", | |
| ] | |
| DESCRIPTION = """ | |
| **SurvivalAI Pro** β fine-tuned off-grid survival assistant, running fully on CPU inside this Space. | |
| Built on Phi-3-mini-4k-instruct, fine-tuned on ~150,000 survival-knowledge Q/A pairs covering medical | |
| first aid, water, food, shelter, fire, navigation, signaling, foraging, hunting, and tools. | |
| β οΈ **Prototype β not for clinical or life-critical use.** This model can produce confident-sounding | |
| but incorrect specifics for trap categories like exact drug dosages or precise frequencies. For | |
| survival-skill guidance it scores well; for precise numerical specifics, double-check with an | |
| authoritative reference. | |
| """ | |
| demo = gr.ChatInterface( | |
| fn = chat_fn, | |
| title = "ποΈ SurvivalAI Pro", | |
| description = DESCRIPTION, | |
| examples = EXAMPLES, | |
| cache_examples = False, | |
| theme = gr.themes.Soft(), | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch(server_name="0.0.0.0", server_port=7860) | |