Spaces:
Build error
Build error
File size: 4,190 Bytes
f3adf75 691a18b f3adf75 691a18b 1a7c573 7b8ee94 1a7c573 7b8ee94 691a18b 7b8ee94 6f5e4a1 f3adf75 7b8ee94 f3adf75 1a7c573 f3adf75 691a18b f3adf75 7b8ee94 f3adf75 7b8ee94 f3adf75 1a7c573 691a18b 1a7c573 f3adf75 7b8ee94 f3adf75 691a18b d9d9611 691a18b f3adf75 691a18b d9d9611 691a18b f3adf75 7b8ee94 691a18b f3adf75 d9d9611 1a7c573 d401a5e f3adf75 7b8ee94 f3adf75 9e8c9e9 691a18b f3adf75 691a18b 7b8ee94 d401a5e 691a18b d401a5e 7b8ee94 691a18b d401a5e f3adf75 2cbc8b7 7b8ee94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import gradio as gr
import subprocess
import sys
import os
try:
from llama_cpp import Llama
print("llama-cpp-python already installed.")
except ImportError:
print("Installing llama-cpp-python (fast CPU wheel)...")
try:
subprocess.check_call([
sys.executable, "-m", "pip", "install", "--no-cache-dir",
"https://github.com/yownas/llama-cpp-python-wheels/releases/download/v0.3.16/llama_cpp_python-0.3.16+cpuavx-cp310-cp310-linux_x86_64.whl"
])
print("llama-cpp-python installed from wheel.")
except Exception as e:
print("Wheel failed β falling back to PyPI...")
subprocess.check_call([
sys.executable, "-m", "pip", "install", "--no-cache-dir",
"llama-cpp-python==0.3.16", "--force-reinstall"
])
from llama_cpp import Llama # β INDENTED CORRECTLY
from huggingface_hub import hf_hub_download
# === WORKING PUBLIC MODEL ===
MODEL_REPO = "QuantFactory/Meta-Llama-3.2-3B-Instruct-GGUF"
MODEL_FILE = "Meta-Llama-3.2-3B-Instruct-Q4_K_M.gguf"
print("Downloading Llama 3.2 3B Instruct (Q4_K_M)...")
model_path = hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILE,
local_dir="./models",
local_dir_use_symlinks=False
)
print(f"Model downloaded: {model_path}")
print("Loading model into memory (20β40 sec)...")
llm = Llama(
model_path=model_path,
n_ctx=8192,
n_threads=8,
n_batch=512,
n_gpu_layers=0,
verbose=False
)
print("Model loaded β ready to chat!")
def chat(message, history):
if not message.strip():
return history, ""
messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
for user_msg, bot_msg in history:
messages.append({"role": "user", "content": user_msg})
if bot_msg:
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
response = llm.create_chat_completion(
messages=messages,
max_tokens=512,
temperature=0.7,
top_p=0.9,
stop=["<|eot_id|>", "<|end_of_text|>"],
stream=False
)
bot_response = response["choices"][0]["message"]["content"].strip()
history.append((message, bot_response))
return history, ""
# === CSS & UI (perfect) ===
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Source+Code+Pro:wght@400;600&display=swap');
body, .gradio-container { background: #0c0c0c !important; font-family: 'JetBrains Mono', monospace !important; }
.gradio-container { max-width: 1400px !important; border: 1px solid #00ff00 !important; box-shadow: 0 0 10px rgba(0,255,0,0.3) !important; }
*, h1, h2, h3, label, p { color: #00ff00 !important; }
.message { background: #1a1a1a !important; border-left: 3px solid #00ff00 !important; padding: 12px !important; }
.user { border-left-color: #00cc00 !important; }
input, textarea { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; }
button { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; }
button:hover { background: #00ff00 !important; color: #000 !important; }
.primary { background: #00ff00 !important; color: #000 !important; }
footer { display: none !important; }
"""
with gr.Blocks(theme=gr.themes.Base(primary_hue="green"), css=custom_css, title="$ LLAMA TERMINAL") as demo:
gr.Markdown("# $ LLAMA TERMINAL\n```\n> System Online | Llama 3.2 3B Ready\n> Type your query below...\n```")
chatbot = gr.Chatbot(height=600)
with gr.Row():
msg = gr.Textbox(placeholder="$ Enter command...", show_label=False, scale=8, container=False)
submit = gr.Button("SEND", scale=1, variant="primary")
gr.Examples(["What is the capital of France?", "Write a haiku about AI"], inputs=msg)
gr.ClearButton([msg, chatbot], value="CLEAR")
submit.click(chat, [msg, chatbot], [chatbot, msg])
msg.submit(chat, [msg, chatbot], [chatbot, msg])
if __name__ == "__main__":
demo.queue(max_size=20)
demo.launch(server_name="0.0.0.0", server_port=7860, share=True, show_error=True) |