Spaces:
Runtime error
Runtime error
File size: 2,561 Bytes
5dacbc7 4074487 5dacbc7 87f925a e37b8f7 5dacbc7 4074487 87f925a 4074487 5dacbc7 4074487 5dacbc7 87f925a 9dcfd91 98ba33d 7a19455 98ba33d 87f925a 9dcfd91 98ba33d 87f925a 7a19455 8a9ad09 87f925a 5dacbc7 87f925a 5dacbc7 7a19455 5dacbc7 87f925a 5dacbc7 87f925a 5dacbc7 4074487 5dacbc7 87f925a 4074487 7a19455 5dacbc7 87f925a 5dacbc7 7a19455 5dacbc7 87f925a 5dacbc7 98ba33d 9dcfd91 8a9ad09 98ba33d 5dacbc7 87f925a 7a19455 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
# ----------------------------
# Model configuration
# ----------------------------
MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME, trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map="auto",
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
trust_remote_code=True
)
# ----------------------------
# System prompt for CBE marking
# ----------------------------
SYSTEM_MESSAGE = {
"role": "system",
"content": (
"You are a Kenyan national exam marker. "
"You understand English and Kiswahili. "
"Use ONLY the provided marking scheme. "
"Do NOT invent marks."
)
}
# ----------------------------
# Chat function
# ----------------------------
def chat(user_input, messages):
if not messages:
messages = [SYSTEM_MESSAGE]
# Ensure all contents are strings
messages = [
{"role": m["role"], "content": str(m["content"])}
for m in messages
]
# Add user input
messages.append({"role": "user", "content": str(user_input)})
# Create Qwen prompt
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate response
outputs = model.generate(
**inputs,
max_new_tokens=384, # safe for 3B
temperature=0.7,
top_p=0.9,
do_sample=True
)
response = tokenizer.decode(
outputs[0][inputs["input_ids"].shape[-1]:],
skip_special_tokens=True
)
# Append assistant response
messages.append({"role": "assistant", "content": response})
return messages, ""
# ----------------------------
# Gradio UI
# ----------------------------
with gr.Blocks() as demo:
gr.Markdown("## π€ Qwen 2.5 β 3B Chatbot (English & Kiswahili)")
chatbot = gr.Chatbot() # old Gradio safe
msg = gr.Textbox(label="Your message / Ujumbe wako", autofocus=True)
state = gr.State([]) # keeps messages
msg.submit(
chat,
inputs=[msg, state],
outputs=[chatbot, state]
)
# ----------------------------
# Launch safely on HF Spaces
# ----------------------------
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True
)
|