| import os |
| import shutil |
| import gradio as gr |
| from llama_cpp import Llama |
| from huggingface_hub import hf_hub_download |
|
|
| |
| MODEL_CACHE = "/tmp/smollm2_360m.gguf" |
|
|
| if not os.path.exists(MODEL_CACHE) or os.path.getsize(MODEL_CACHE) < 1_000_000: |
| print("Downloading model...") |
| downloaded = hf_hub_download( |
| repo_id="bartowski/SmolLM2-360M-Instruct-GGUF", |
| filename="SmolLM2-360M-Instruct-Q4_K_M.gguf", |
| ) |
| shutil.copy2(downloaded, MODEL_CACHE) |
| print("Model cached.") |
| else: |
| print(f"Cache hit β {os.path.getsize(MODEL_CACHE)/1e6:.0f} MB") |
|
|
| print("Loading model...") |
| llm = Llama( |
| model_path=MODEL_CACHE, |
| n_ctx=512, |
| n_threads=2, |
| n_batch=32, |
| n_gpu_layers=0, |
| use_mlock=False, |
| verbose=False, |
| ) |
| print("Model ready!") |
|
|
| |
| def respond(message, history, system_message, max_tokens, temperature, top_p): |
| |
| prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n" |
| for exchange in history[-3:]: |
| user_msg = exchange[0] if isinstance(exchange, (list, tuple)) else "" |
| asst_msg = exchange[1] if isinstance(exchange, (list, tuple)) else "" |
| if user_msg: |
| prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n" |
| if asst_msg: |
| prompt += f"<|im_start|>assistant\n{asst_msg}<|im_end|>\n" |
| prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" |
|
|
| response_text = "" |
| for chunk in llm( |
| prompt, |
| max_tokens=max_tokens, |
| temperature=temperature, |
| top_p=top_p, |
| top_k=20, |
| repeat_penalty=1.1, |
| stop=["<|im_end|>", "<|im_start|>"], |
| stream=True, |
| ): |
| response_text += chunk["choices"][0]["text"] |
| yield response_text |
|
|
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# π οΈ Minecraft Modding Log Analyzer") |
|
|
| with gr.Sidebar(): |
| gr.LoginButton() |
| gr.Markdown("---") |
| system_msg = gr.Textbox( |
| value="You are an Elite Minecraft Modder who fixes Fabric and Forge crash logs.", |
| label="System Prompt", |
| lines=4, |
| ) |
| tokens = gr.Slider(128, 2048, value=512, label="Max Tokens") |
| temp = gr.Slider(0.1, 1.0, value=0.3, label="Temp") |
| top_p = gr.Slider(0.1, 1.0, value=0.9, label="Top-P") |
|
|
| gr.ChatInterface( |
| respond, |
| additional_inputs=[system_msg, tokens, temp, top_p], |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |