Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| import os | |
| MODEL_REPO = "d-e-e-k-11/llama-2-7b-chat-ggml" | |
| MODEL_FILE = "llama-2-7b-chat.ggmlv3.q2_K.bin" | |
| LOCAL_PATH = "/tmp/llama-model.bin" | |
| # βββ Load Model ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| llm = None | |
| print("Checking for model...") | |
| if not os.path.exists(LOCAL_PATH): | |
| print(f"Downloading model from {MODEL_REPO} ...") | |
| try: | |
| cached = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE) | |
| os.symlink(cached, LOCAL_PATH) | |
| print("Model downloaded via hf_hub_download.") | |
| except Exception as e: | |
| print(f"Download failed: {e}") | |
| if os.path.exists(LOCAL_PATH): | |
| print("Loading Llama-2 model into memory...") | |
| try: | |
| llm = Llama(model_path=LOCAL_PATH, n_ctx=2048, n_threads=4, verbose=False) | |
| print("Model ready!") | |
| except Exception as e: | |
| print(f"Failed to load model: {e}") | |
| else: | |
| print("Model file not found. Chatbot will return placeholder responses.") | |
| # βββ Chat Function βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def chat(message, history): | |
| if llm is None: | |
| return ( | |
| "Model is still loading or unavailable. " | |
| "Please wait a moment and try again, or check the Space logs." | |
| ) | |
| # Build context from last 5 turns | |
| context = "" | |
| for user_msg, bot_msg in history[-5:]: | |
| context += f"[INST] {user_msg} [/INST] {bot_msg} </s>" | |
| prompt = ( | |
| f"[INST] <<SYS>>\nYou are a helpful, respectful AI assistant.\n<</SYS>>\n\n" | |
| f"{context}[INST] {message} [/INST]" | |
| ) | |
| output = llm( | |
| prompt, | |
| max_tokens=512, | |
| stop=["[/INST]", "</s>", "User:"], | |
| echo=False, | |
| ) | |
| return output["choices"][0]["text"].strip() | |
| # βββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| demo = gr.ChatInterface( | |
| fn=chat, | |
| title="Llama-2-7B Chatbot", | |
| description=( | |
| "**Offline AI chatbot** powered by Llama-2-7B (GGMLv3 Q2_K quantized).\n\n" | |
| "Model is downloaded automatically from Hugging Face on startup (~2.7 GB). " | |
| "First load may take a few minutes." | |
| ), | |
| theme=gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="slate", | |
| ), | |
| examples=[ | |
| "What is machine learning?", | |
| "Write a Python function to reverse a string.", | |
| "Explain quantum computing in simple terms.", | |
| "What are the planets in the solar system?", | |
| ], | |
| retry_btn="Retry", | |
| undo_btn="Undo", | |
| clear_btn="Clear", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |