import os import shutil import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download # ── Load model onto CPU ── MODEL_CACHE = "/tmp/smollm2_360m.gguf" if not os.path.exists(MODEL_CACHE) or os.path.getsize(MODEL_CACHE) < 1_000_000: print("Downloading model...") downloaded = hf_hub_download( repo_id="bartowski/SmolLM2-360M-Instruct-GGUF", filename="SmolLM2-360M-Instruct-Q4_K_M.gguf", ) shutil.copy2(downloaded, MODEL_CACHE) print("Model cached.") else: print(f"Cache hit — {os.path.getsize(MODEL_CACHE)/1e6:.0f} MB") print("Loading model...") llm = Llama( model_path=MODEL_CACHE, n_ctx=512, n_threads=2, n_batch=32, n_gpu_layers=0, use_mlock=False, verbose=False, ) print("Model ready!") # ── Inference ── def respond(message, history, system_message, max_tokens, temperature, top_p): # Build ChatML prompt prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n" for exchange in history[-3:]: user_msg = exchange[0] if isinstance(exchange, (list, tuple)) else "" asst_msg = exchange[1] if isinstance(exchange, (list, tuple)) else "" if user_msg: prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n" if asst_msg: prompt += f"<|im_start|>assistant\n{asst_msg}<|im_end|>\n" prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" response_text = "" for chunk in llm( prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p, top_k=20, repeat_penalty=1.1, stop=["<|im_end|>", "<|im_start|>"], stream=True, ): response_text += chunk["choices"][0]["text"] yield response_text # ── UI ── with gr.Blocks() as demo: gr.Markdown("# 🛠️ Minecraft Modding Log Analyzer") with gr.Sidebar(): gr.LoginButton() gr.Markdown("---") system_msg = gr.Textbox( value="You are an Elite Minecraft Modder who fixes Fabric and Forge crash logs.", label="System Prompt", lines=4, ) tokens = gr.Slider(128, 2048, value=512, label="Max Tokens") temp = gr.Slider(0.1, 1.0, value=0.3, label="Temp") top_p = gr.Slider(0.1, 1.0, value=0.9, label="Top-P") gr.ChatInterface( respond, additional_inputs=[system_msg, tokens, temp, top_p], ) if __name__ == "__main__": demo.launch()