lab2 / app.py
gusreinaos
Fixed
01be4af
import subprocess, sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "gradio==4.44.0", "--force-reinstall", "--no-deps"])
import gradio as gr
try:
from llama_cpp import Llama
print("llama-cpp-python already installed.")
except ImportError:
print("Installing llama-cpp-python (fast CPU wheel)...")
try:
subprocess.check_call([
sys.executable, "-m", "pip", "install", "--no-cache-dir",
"https://github.com/yownas/llama-cpp-python-wheels/releases/download/v0.3.16/llama_cpp_python-0.3.16+cpuavx-cp310-cp310-linux_x86_64.whl"
])
print("llama-cpp-python installed from wheel.")
except Exception as e:
print("Wheel failed β†’ falling back to PyPI...")
subprocess.check_call([
sys.executable, "-m", "pip", "install", "--no-cache-dir",
"llama-cpp-python==0.3.16", "--force-reinstall"
])
from llama_cpp import Llama # ← INDENTED CORRECTLY
from huggingface_hub import hf_hub_download
# === BULLETPROOF WORKING MODEL (hugging-quants repo) ===
MODEL_REPO = "hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF"
MODEL_FILE = "llama-3.2-3b-instruct-q4_k_m.gguf"
print("Downloading Llama 3.2 3B Instruct (Q4_K_M)...")
model_path = hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILE,
local_dir="./models",
local_dir_use_symlinks=False
)
print(f"Model downloaded: {model_path}")
print("Loading model into memory (20–40 sec)...")
llm = Llama(
model_path=model_path,
n_ctx=8192,
n_threads=8,
n_batch=512,
n_gpu_layers=0,
verbose=False
)
print("Model loaded β€” ready to chat!")
def chat(message, history):
if not message.strip():
return history, ""
messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
for user_msg, bot_msg in history:
messages.append({"role": "user", "content": user_msg})
if bot_msg:
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
response = llm.create_chat_completion(
messages=messages,
max_tokens=512,
temperature=0.7,
top_p=0.9,
stop=["<|eot_id|>", "<|end_of_text|>"],
stream=False
)
bot_response = response["choices"][0]["message"]["content"].strip()
history.append((message, bot_response))
return history, ""
# === CSS & UI (perfect) ===
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Source+Code+Pro:wght@400;600&display=swap');
body, .gradio-container { background: #0c0c0c !important; font-family: 'JetBrains Mono', monospace !important; }
.gradio-container { max-width: 1400px !important; border: 1px solid #00ff00 !important; box-shadow: 0 0 10px rgba(0,255,0,0.3) !important; }
*, h1, h2, h3, label, p { color: #00ff00 !important; }
.message { background: #1a1a1a !important; border-left: 3px solid #00ff00 !important; padding: 12px !important; }
.user { border-left-color: #00cc00 !important; }
input, textarea { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; }
button { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; }
button:hover { background: #00ff00 !important; color: #000 !important; }
.primary { background: #00ff00 !important; color: #000 !important; }
footer { display: none !important; }
"""
with gr.Blocks(theme=gr.themes.Base(primary_hue="green"), css=custom_css, title="$ LLAMA TERMINAL") as demo:
gr.Markdown("# $ LLAMA TERMINAL\n```\n> System Online | Llama 3.2 3B Ready\n> Type your query below...\n```")
chatbot = gr.Chatbot(height=600)
with gr.Row():
msg = gr.Textbox(placeholder="$ Enter command...", show_label=False, scale=8, container=False)
submit = gr.Button("SEND", scale=1, variant="primary")
gr.Examples(["What is the capital of France?", "Write a haiku about AI"], inputs=msg)
gr.ClearButton([msg, chatbot], value="CLEAR")
submit.click(chat, [msg, chatbot], [chatbot, msg])
msg.submit(chat, [msg, chatbot], [chatbot, msg])
if __name__ == "__main__":
demo.queue().launch(share=True, server_name="0.0.0.0", server_port=7860)