import os import gradio as gr from pathlib import Path from llama_cpp import Llama import requests MODEL_URL = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_0.gguf" MODEL_PATH = "mistral-7b.Q4_0.gguf" # ๐Ÿ” Download if needed def download_model(): if not Path(MODEL_PATH).exists(): print("โฌ‡๏ธ Downloading model...") with requests.get(MODEL_URL, stream=True) as r: with open(MODEL_PATH, "wb") as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) print("โœ… Model ready.") download_model() # ๐Ÿš€ Load LLM llm = Llama( model_path=MODEL_PATH, n_ctx=4096, n_batch=128, n_threads=os.cpu_count(), use_mlock=True, use_mmap=True, chat_format="mistral-instruct" ) # ๐Ÿ’ฌ Smart prompt formatting def format_chat(chat_history): messages = [] for q, a in chat_history: messages.append({"role": "user", "content": q}) messages.append({"role": "assistant", "content": a}) return messages # ๐Ÿ”„ Main logic def respond(message, chat_history): messages = format_chat(chat_history) messages.append({"role": "user", "content": message}) output = llm.create_chat_completion( messages, max_tokens=300, temperature=0.7, stop=["", "<|endoftext|>"] ) response = output["choices"][0]["message"]["content"] chat_history.append((message, response)) return "", chat_history # ๐Ÿ–ผ๏ธ Custom UI with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("

๐Ÿค– Mistral-7B-Instruct Chat (Aria CPU)

") chatbot = gr.Chatbot(show_label=False, avatar_images=("๐Ÿค–", "๐Ÿง‘โ€๐Ÿ’ป")) msg = gr.Textbox(placeholder="Ask something smart...", label="You") clear = gr.Button("๐Ÿงน Clear") state = gr.State([]) msg.submit(respond, [msg, state], [msg, chatbot, state]) clear.click(lambda: ([], []), None, [chatbot, state]) demo.launch()