chatbot / app.py
eleeter's picture
Update app.py
860685d verified
Raw
History Blame Contribute Delete
2.52 kB
import os
import shutil
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# ── Load model onto CPU ──
MODEL_CACHE = "/tmp/smollm2_360m.gguf"
if not os.path.exists(MODEL_CACHE) or os.path.getsize(MODEL_CACHE) < 1_000_000:
print("Downloading model...")
downloaded = hf_hub_download(
repo_id="bartowski/SmolLM2-360M-Instruct-GGUF",
filename="SmolLM2-360M-Instruct-Q4_K_M.gguf",
)
shutil.copy2(downloaded, MODEL_CACHE)
print("Model cached.")
else:
print(f"Cache hit β€” {os.path.getsize(MODEL_CACHE)/1e6:.0f} MB")
print("Loading model...")
llm = Llama(
model_path=MODEL_CACHE,
n_ctx=512,
n_threads=2,
n_batch=32,
n_gpu_layers=0,
use_mlock=False,
verbose=False,
)
print("Model ready!")
# ── Inference ──
def respond(message, history, system_message, max_tokens, temperature, top_p):
# Build ChatML prompt
prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n"
for exchange in history[-3:]:
user_msg = exchange[0] if isinstance(exchange, (list, tuple)) else ""
asst_msg = exchange[1] if isinstance(exchange, (list, tuple)) else ""
if user_msg:
prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
if asst_msg:
prompt += f"<|im_start|>assistant\n{asst_msg}<|im_end|>\n"
prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
response_text = ""
for chunk in llm(
prompt,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=20,
repeat_penalty=1.1,
stop=["<|im_end|>", "<|im_start|>"],
stream=True,
):
response_text += chunk["choices"][0]["text"]
yield response_text
# ── UI ──
with gr.Blocks() as demo:
gr.Markdown("# πŸ› οΈ Minecraft Modding Log Analyzer")
with gr.Sidebar():
gr.LoginButton()
gr.Markdown("---")
system_msg = gr.Textbox(
value="You are an Elite Minecraft Modder who fixes Fabric and Forge crash logs.",
label="System Prompt",
lines=4,
)
tokens = gr.Slider(128, 2048, value=512, label="Max Tokens")
temp = gr.Slider(0.1, 1.0, value=0.3, label="Temp")
top_p = gr.Slider(0.1, 1.0, value=0.9, label="Top-P")
gr.ChatInterface(
respond,
additional_inputs=[system_msg, tokens, temp, top_p],
)
if __name__ == "__main__":
demo.launch()