jsn / app.py
drdudddd's picture
Update app.py
58298f5 verified
Raw
History Blame Contribute Delete
1.94 kB
import os
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
MODEL_REPO = "Qwen/Qwen3-4B-Instruct-GGUF"
MODEL_FILE = "Qwen3-4B-Instruct-Q4_K_M.gguf"
print("Loading model...")
model_path = hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILE
)
llm = Llama(
model_path=model_path,
n_ctx=1024,
n_batch=512,
n_threads=max(1, os.cpu_count() - 1),
verbose=False
)
PERSONAS = {
"Assistant": "You are a helpful AI assistant.",
"Programmer": "You are an expert software engineer.",
"Writer": "You are a creative writer.",
"Teacher": "You explain concepts clearly and step-by-step."
}
def chat(message, history, persona):
messages = [
{
"role": "system",
"content": PERSONAS[persona]
}
]
for user, assistant in history:
messages.append({"role": "user", "content": user})
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": message})
output = llm.create_chat_completion(
messages=messages,
temperature=0.8,
top_p=0.95,
max_tokens=1024,
repeat_penalty=1.1
)
return output["choices"][0]["message"]["content"]
with gr.Blocks() as demo:
gr.Markdown("# Qwen 4B Chat")
persona = gr.Dropdown(
choices=list(PERSONAS.keys()),
value="Assistant",
label="Persona"
)
chatbot = gr.Chatbot(height=600)
msg = gr.Textbox(
placeholder="Type a message..."
)
clear = gr.Button("Clear")
def respond(message, history, persona):
answer = chat(message, history, persona)
history.append((message, answer))
return "", history
msg.submit(
respond,
[msg, chatbot, persona],
[msg, chatbot]
)
clear.click(
lambda: [],
outputs=chatbot
)
demo.launch()