chatbot / app.py
d-e-e-k-11's picture
Fix: auto-download model from HF model repo at startup
c55e854 verified
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
MODEL_REPO = "d-e-e-k-11/llama-2-7b-chat-ggml"
MODEL_FILE = "llama-2-7b-chat.ggmlv3.q2_K.bin"
LOCAL_PATH = "/tmp/llama-model.bin"
# ─── Load Model ──────────────────────────────────────────────────────
llm = None
print("Checking for model...")
if not os.path.exists(LOCAL_PATH):
print(f"Downloading model from {MODEL_REPO} ...")
try:
cached = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
os.symlink(cached, LOCAL_PATH)
print("Model downloaded via hf_hub_download.")
except Exception as e:
print(f"Download failed: {e}")
if os.path.exists(LOCAL_PATH):
print("Loading Llama-2 model into memory...")
try:
llm = Llama(model_path=LOCAL_PATH, n_ctx=2048, n_threads=4, verbose=False)
print("Model ready!")
except Exception as e:
print(f"Failed to load model: {e}")
else:
print("Model file not found. Chatbot will return placeholder responses.")
# ─── Chat Function ───────────────────────────────────────────────────
def chat(message, history):
if llm is None:
return (
"Model is still loading or unavailable. "
"Please wait a moment and try again, or check the Space logs."
)
# Build context from last 5 turns
context = ""
for user_msg, bot_msg in history[-5:]:
context += f"[INST] {user_msg} [/INST] {bot_msg} </s>"
prompt = (
f"[INST] <<SYS>>\nYou are a helpful, respectful AI assistant.\n<</SYS>>\n\n"
f"{context}[INST] {message} [/INST]"
)
output = llm(
prompt,
max_tokens=512,
stop=["[/INST]", "</s>", "User:"],
echo=False,
)
return output["choices"][0]["text"].strip()
# ─── Gradio UI ───────────────────────────────────────────────────────
demo = gr.ChatInterface(
fn=chat,
title="Llama-2-7B Chatbot",
description=(
"**Offline AI chatbot** powered by Llama-2-7B (GGMLv3 Q2_K quantized).\n\n"
"Model is downloaded automatically from Hugging Face on startup (~2.7 GB). "
"First load may take a few minutes."
),
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="slate",
),
examples=[
"What is machine learning?",
"Write a Python function to reverse a string.",
"Explain quantum computing in simple terms.",
"What are the planets in the solar system?",
],
retry_btn="Retry",
undo_btn="Undo",
clear_btn="Clear",
)
if __name__ == "__main__":
demo.launch()