mistral-chat / app.py
R-TA's picture
Update app.py
d9de866 verified
import os
import gradio as gr
from pathlib import Path
from llama_cpp import Llama
import requests
MODEL_URL = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_0.gguf"
MODEL_PATH = "mistral-7b.Q4_0.gguf"
# πŸ” Download if needed
def download_model():
if not Path(MODEL_PATH).exists():
print("⬇️ Downloading model...")
with requests.get(MODEL_URL, stream=True) as r:
with open(MODEL_PATH, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
print("βœ… Model ready.")
download_model()
# πŸš€ Load LLM
llm = Llama(
model_path=MODEL_PATH,
n_ctx=4096,
n_batch=128,
n_threads=os.cpu_count(),
use_mlock=True,
use_mmap=True,
chat_format="mistral-instruct"
)
# πŸ’¬ Smart prompt formatting
def format_chat(chat_history):
messages = []
for q, a in chat_history:
messages.append({"role": "user", "content": q})
messages.append({"role": "assistant", "content": a})
return messages
# πŸ”„ Main logic
def respond(message, chat_history):
messages = format_chat(chat_history)
messages.append({"role": "user", "content": message})
output = llm.create_chat_completion(
messages,
max_tokens=300,
temperature=0.7,
stop=["</s>", "<|endoftext|>"]
)
response = output["choices"][0]["message"]["content"]
chat_history.append((message, response))
return "", chat_history
# πŸ–ΌοΈ Custom UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("<h1 style='text-align: center;'>πŸ€– Mistral-7B-Instruct Chat (Aria CPU)</h1>")
chatbot = gr.Chatbot(show_label=False, avatar_images=("πŸ€–", "πŸ§‘β€πŸ’»"))
msg = gr.Textbox(placeholder="Ask something smart...", label="You")
clear = gr.Button("🧹 Clear")
state = gr.State([])
msg.submit(respond, [msg, state], [msg, chatbot, state])
clear.click(lambda: ([], []), None, [chatbot, state])
demo.launch()