File size: 4,190 Bytes
f3adf75
691a18b
 
f3adf75
 
691a18b
 
 
 
1a7c573
 
 
 
 
 
 
7b8ee94
 
1a7c573
 
 
 
7b8ee94
691a18b
 
 
7b8ee94
6f5e4a1
 
f3adf75
7b8ee94
f3adf75
1a7c573
f3adf75
691a18b
 
f3adf75
7b8ee94
f3adf75
7b8ee94
f3adf75
 
1a7c573
691a18b
 
1a7c573
f3adf75
 
7b8ee94
f3adf75
 
691a18b
 
d9d9611
691a18b
f3adf75
691a18b
 
 
 
d9d9611
691a18b
 
f3adf75
 
 
7b8ee94
691a18b
f3adf75
d9d9611
1a7c573
d401a5e
 
f3adf75
7b8ee94
f3adf75
9e8c9e9
691a18b
 
 
 
 
 
 
 
 
 
f3adf75
 
691a18b
7b8ee94
d401a5e
 
691a18b
d401a5e
7b8ee94
691a18b
d401a5e
 
f3adf75
 
2cbc8b7
7b8ee94
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import gradio as gr
import subprocess
import sys
import os

try:
    from llama_cpp import Llama
    print("llama-cpp-python already installed.")
except ImportError:
    print("Installing llama-cpp-python (fast CPU wheel)...")
    try:
        subprocess.check_call([
            sys.executable, "-m", "pip", "install", "--no-cache-dir",
            "https://github.com/yownas/llama-cpp-python-wheels/releases/download/v0.3.16/llama_cpp_python-0.3.16+cpuavx-cp310-cp310-linux_x86_64.whl"
        ])
        print("llama-cpp-python installed from wheel.")
    except Exception as e:
        print("Wheel failed β†’ falling back to PyPI...")
        subprocess.check_call([
            sys.executable, "-m", "pip", "install", "--no-cache-dir",
            "llama-cpp-python==0.3.16", "--force-reinstall"
        ])
    from llama_cpp import Llama  # ← INDENTED CORRECTLY

from huggingface_hub import hf_hub_download

# === WORKING PUBLIC MODEL ===
MODEL_REPO = "QuantFactory/Meta-Llama-3.2-3B-Instruct-GGUF"
MODEL_FILE = "Meta-Llama-3.2-3B-Instruct-Q4_K_M.gguf"

print("Downloading Llama 3.2 3B Instruct (Q4_K_M)...")
model_path = hf_hub_download(
    repo_id=MODEL_REPO,
    filename=MODEL_FILE,
    local_dir="./models",
    local_dir_use_symlinks=False
)
print(f"Model downloaded: {model_path}")

print("Loading model into memory (20–40 sec)...")
llm = Llama(
    model_path=model_path,
    n_ctx=8192,
    n_threads=8,
    n_batch=512,
    n_gpu_layers=0,
    verbose=False
)
print("Model loaded β€” ready to chat!")

def chat(message, history):
    if not message.strip():
        return history, ""

    messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
    for user_msg, bot_msg in history:
        messages.append({"role": "user", "content": user_msg})
        if bot_msg:
            messages.append({"role": "assistant", "content": bot_msg})
    messages.append({"role": "user", "content": message})

    response = llm.create_chat_completion(
        messages=messages,
        max_tokens=512,
        temperature=0.7,
        top_p=0.9,
        stop=["<|eot_id|>", "<|end_of_text|>"],
        stream=False
    )

    bot_response = response["choices"][0]["message"]["content"].strip()
    history.append((message, bot_response))
    return history, ""

# === CSS & UI (perfect) ===
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Source+Code+Pro:wght@400;600&display=swap');
body, .gradio-container { background: #0c0c0c !important; font-family: 'JetBrains Mono', monospace !important; }
.gradio-container { max-width: 1400px !important; border: 1px solid #00ff00 !important; box-shadow: 0 0 10px rgba(0,255,0,0.3) !important; }
*, h1, h2, h3, label, p { color: #00ff00 !important; }
.message { background: #1a1a1a !important; border-left: 3px solid #00ff00 !important; padding: 12px !important; }
.user { border-left-color: #00cc00 !important; }
input, textarea { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; }
button { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; }
button:hover { background: #00ff00 !important; color: #000 !important; }
.primary { background: #00ff00 !important; color: #000 !important; }
footer { display: none !important; }
"""

with gr.Blocks(theme=gr.themes.Base(primary_hue="green"), css=custom_css, title="$ LLAMA TERMINAL") as demo:
    gr.Markdown("# $ LLAMA TERMINAL\n```\n> System Online | Llama 3.2 3B Ready\n> Type your query below...\n```")
    chatbot = gr.Chatbot(height=600)
    with gr.Row():
        msg = gr.Textbox(placeholder="$ Enter command...", show_label=False, scale=8, container=False)
        submit = gr.Button("SEND", scale=1, variant="primary")
    gr.Examples(["What is the capital of France?", "Write a haiku about AI"], inputs=msg)
    gr.ClearButton([msg, chatbot], value="CLEAR")
    submit.click(chat, [msg, chatbot], [chatbot, msg])
    msg.submit(chat, [msg, chatbot], [chatbot, msg])

if __name__ == "__main__":
    demo.queue(max_size=20)
    demo.launch(server_name="0.0.0.0", server_port=7860, share=True, show_error=True)