Spaces:
Build error
Build error
| import subprocess, sys | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", "gradio==4.44.0", "--force-reinstall", "--no-deps"]) | |
| import gradio as gr | |
| try: | |
| from llama_cpp import Llama | |
| print("llama-cpp-python already installed.") | |
| except ImportError: | |
| print("Installing llama-cpp-python (fast CPU wheel)...") | |
| try: | |
| subprocess.check_call([ | |
| sys.executable, "-m", "pip", "install", "--no-cache-dir", | |
| "https://github.com/yownas/llama-cpp-python-wheels/releases/download/v0.3.16/llama_cpp_python-0.3.16+cpuavx-cp310-cp310-linux_x86_64.whl" | |
| ]) | |
| print("llama-cpp-python installed from wheel.") | |
| except Exception as e: | |
| print("Wheel failed β falling back to PyPI...") | |
| subprocess.check_call([ | |
| sys.executable, "-m", "pip", "install", "--no-cache-dir", | |
| "llama-cpp-python==0.3.16", "--force-reinstall" | |
| ]) | |
| from llama_cpp import Llama # β INDENTED CORRECTLY | |
| from huggingface_hub import hf_hub_download | |
| # === BULLETPROOF WORKING MODEL (hugging-quants repo) === | |
| MODEL_REPO = "hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF" | |
| MODEL_FILE = "llama-3.2-3b-instruct-q4_k_m.gguf" | |
| print("Downloading Llama 3.2 3B Instruct (Q4_K_M)...") | |
| model_path = hf_hub_download( | |
| repo_id=MODEL_REPO, | |
| filename=MODEL_FILE, | |
| local_dir="./models", | |
| local_dir_use_symlinks=False | |
| ) | |
| print(f"Model downloaded: {model_path}") | |
| print("Loading model into memory (20β40 sec)...") | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=8192, | |
| n_threads=8, | |
| n_batch=512, | |
| n_gpu_layers=0, | |
| verbose=False | |
| ) | |
| print("Model loaded β ready to chat!") | |
| def chat(message, history): | |
| if not message.strip(): | |
| return history, "" | |
| messages = [{"role": "system", "content": "You are a helpful AI assistant."}] | |
| for user_msg, bot_msg in history: | |
| messages.append({"role": "user", "content": user_msg}) | |
| if bot_msg: | |
| messages.append({"role": "assistant", "content": bot_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| response = llm.create_chat_completion( | |
| messages=messages, | |
| max_tokens=512, | |
| temperature=0.7, | |
| top_p=0.9, | |
| stop=["<|eot_id|>", "<|end_of_text|>"], | |
| stream=False | |
| ) | |
| bot_response = response["choices"][0]["message"]["content"].strip() | |
| history.append((message, bot_response)) | |
| return history, "" | |
| # === CSS & UI (perfect) === | |
| custom_css = """ | |
| @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Source+Code+Pro:wght@400;600&display=swap'); | |
| body, .gradio-container { background: #0c0c0c !important; font-family: 'JetBrains Mono', monospace !important; } | |
| .gradio-container { max-width: 1400px !important; border: 1px solid #00ff00 !important; box-shadow: 0 0 10px rgba(0,255,0,0.3) !important; } | |
| *, h1, h2, h3, label, p { color: #00ff00 !important; } | |
| .message { background: #1a1a1a !important; border-left: 3px solid #00ff00 !important; padding: 12px !important; } | |
| .user { border-left-color: #00cc00 !important; } | |
| input, textarea { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; } | |
| button { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; } | |
| button:hover { background: #00ff00 !important; color: #000 !important; } | |
| .primary { background: #00ff00 !important; color: #000 !important; } | |
| footer { display: none !important; } | |
| """ | |
| with gr.Blocks(theme=gr.themes.Base(primary_hue="green"), css=custom_css, title="$ LLAMA TERMINAL") as demo: | |
| gr.Markdown("# $ LLAMA TERMINAL\n```\n> System Online | Llama 3.2 3B Ready\n> Type your query below...\n```") | |
| chatbot = gr.Chatbot(height=600) | |
| with gr.Row(): | |
| msg = gr.Textbox(placeholder="$ Enter command...", show_label=False, scale=8, container=False) | |
| submit = gr.Button("SEND", scale=1, variant="primary") | |
| gr.Examples(["What is the capital of France?", "Write a haiku about AI"], inputs=msg) | |
| gr.ClearButton([msg, chatbot], value="CLEAR") | |
| submit.click(chat, [msg, chatbot], [chatbot, msg]) | |
| msg.submit(chat, [msg, chatbot], [chatbot, msg]) | |
| if __name__ == "__main__": | |
| demo.queue().launch(share=True, server_name="0.0.0.0", server_port=7860) |