Spaces:
Build error
Build error
File size: 4,279 Bytes
01be4af f3adf75 691a18b 1a7c573 7b8ee94 1a7c573 7b8ee94 691a18b 633446c f3adf75 7b8ee94 f3adf75 1a7c573 f3adf75 691a18b f3adf75 7b8ee94 f3adf75 7b8ee94 f3adf75 1a7c573 691a18b 1a7c573 f3adf75 7b8ee94 f3adf75 691a18b d9d9611 691a18b f3adf75 691a18b d9d9611 691a18b f3adf75 7b8ee94 691a18b f3adf75 d9d9611 1a7c573 d401a5e f3adf75 7b8ee94 f3adf75 9e8c9e9 691a18b f3adf75 691a18b 7b8ee94 d401a5e 691a18b d401a5e 7b8ee94 691a18b d401a5e f3adf75 4753747 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import subprocess, sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "gradio==4.44.0", "--force-reinstall", "--no-deps"])
import gradio as gr
try:
from llama_cpp import Llama
print("llama-cpp-python already installed.")
except ImportError:
print("Installing llama-cpp-python (fast CPU wheel)...")
try:
subprocess.check_call([
sys.executable, "-m", "pip", "install", "--no-cache-dir",
"https://github.com/yownas/llama-cpp-python-wheels/releases/download/v0.3.16/llama_cpp_python-0.3.16+cpuavx-cp310-cp310-linux_x86_64.whl"
])
print("llama-cpp-python installed from wheel.")
except Exception as e:
print("Wheel failed β falling back to PyPI...")
subprocess.check_call([
sys.executable, "-m", "pip", "install", "--no-cache-dir",
"llama-cpp-python==0.3.16", "--force-reinstall"
])
from llama_cpp import Llama # β INDENTED CORRECTLY
from huggingface_hub import hf_hub_download
# === BULLETPROOF WORKING MODEL (hugging-quants repo) ===
MODEL_REPO = "hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF"
MODEL_FILE = "llama-3.2-3b-instruct-q4_k_m.gguf"
print("Downloading Llama 3.2 3B Instruct (Q4_K_M)...")
model_path = hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILE,
local_dir="./models",
local_dir_use_symlinks=False
)
print(f"Model downloaded: {model_path}")
print("Loading model into memory (20β40 sec)...")
llm = Llama(
model_path=model_path,
n_ctx=8192,
n_threads=8,
n_batch=512,
n_gpu_layers=0,
verbose=False
)
print("Model loaded β ready to chat!")
def chat(message, history):
if not message.strip():
return history, ""
messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
for user_msg, bot_msg in history:
messages.append({"role": "user", "content": user_msg})
if bot_msg:
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
response = llm.create_chat_completion(
messages=messages,
max_tokens=512,
temperature=0.7,
top_p=0.9,
stop=["<|eot_id|>", "<|end_of_text|>"],
stream=False
)
bot_response = response["choices"][0]["message"]["content"].strip()
history.append((message, bot_response))
return history, ""
# === CSS & UI (perfect) ===
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Source+Code+Pro:wght@400;600&display=swap');
body, .gradio-container { background: #0c0c0c !important; font-family: 'JetBrains Mono', monospace !important; }
.gradio-container { max-width: 1400px !important; border: 1px solid #00ff00 !important; box-shadow: 0 0 10px rgba(0,255,0,0.3) !important; }
*, h1, h2, h3, label, p { color: #00ff00 !important; }
.message { background: #1a1a1a !important; border-left: 3px solid #00ff00 !important; padding: 12px !important; }
.user { border-left-color: #00cc00 !important; }
input, textarea { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; }
button { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; }
button:hover { background: #00ff00 !important; color: #000 !important; }
.primary { background: #00ff00 !important; color: #000 !important; }
footer { display: none !important; }
"""
with gr.Blocks(theme=gr.themes.Base(primary_hue="green"), css=custom_css, title="$ LLAMA TERMINAL") as demo:
gr.Markdown("# $ LLAMA TERMINAL\n```\n> System Online | Llama 3.2 3B Ready\n> Type your query below...\n```")
chatbot = gr.Chatbot(height=600)
with gr.Row():
msg = gr.Textbox(placeholder="$ Enter command...", show_label=False, scale=8, container=False)
submit = gr.Button("SEND", scale=1, variant="primary")
gr.Examples(["What is the capital of France?", "Write a haiku about AI"], inputs=msg)
gr.ClearButton([msg, chatbot], value="CLEAR")
submit.click(chat, [msg, chatbot], [chatbot, msg])
msg.submit(chat, [msg, chatbot], [chatbot, msg])
if __name__ == "__main__":
demo.queue().launch(share=True, server_name="0.0.0.0", server_port=7860) |