import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # ---------------------------- # Model configuration # ---------------------------- MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct" tokenizer = AutoTokenizer.from_pretrained( MODEL_NAME, trust_remote_code=True ) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map="auto", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True ) # ---------------------------- # System prompt for CBE marking # ---------------------------- SYSTEM_MESSAGE = { "role": "system", "content": ( "You are a Kenyan national exam marker. " "You understand English and Kiswahili. " "Use ONLY the provided marking scheme. " "Do NOT invent marks." ) } # ---------------------------- # Chat function # ---------------------------- def chat(user_input, messages): if not messages: messages = [SYSTEM_MESSAGE] # Ensure all contents are strings messages = [ {"role": m["role"], "content": str(m["content"])} for m in messages ] # Add user input messages.append({"role": "user", "content": str(user_input)}) # Create Qwen prompt prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Generate response outputs = model.generate( **inputs, max_new_tokens=384, # safe for 3B temperature=0.7, top_p=0.9, do_sample=True ) response = tokenizer.decode( outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True ) # Append assistant response messages.append({"role": "assistant", "content": response}) return messages, "" # ---------------------------- # Gradio UI # ---------------------------- with gr.Blocks() as demo: gr.Markdown("## 🤖 Qwen 2.5 – 3B Chatbot (English & Kiswahili)") chatbot = gr.Chatbot() # old Gradio safe msg = gr.Textbox(label="Your message / Ujumbe wako", autofocus=True) state = gr.State([]) # keeps messages msg.submit( chat, inputs=[msg, state], outputs=[chatbot, state] ) # ---------------------------- # Launch safely on HF Spaces # ---------------------------- if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, show_error=True )