Spaces:
Sleeping
Sleeping
| import torch | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # ---------------------------- | |
| # Model configuration | |
| # ---------------------------- | |
| MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct" | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_NAME, trust_remote_code=True | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| device_map="auto", | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| trust_remote_code=True | |
| ) | |
| # ---------------------------- | |
| # System prompt for CBE marking | |
| # ---------------------------- | |
| SYSTEM_MESSAGE = { | |
| "role": "system", | |
| "content": ( | |
| "You are a Kenyan national exam marker. " | |
| "You understand English and Kiswahili. " | |
| "Use ONLY the provided marking scheme. " | |
| "Do NOT invent marks." | |
| ) | |
| } | |
| # ---------------------------- | |
| # Chat function | |
| # ---------------------------- | |
| def chat(user_input, messages): | |
| if not messages: | |
| messages = [SYSTEM_MESSAGE] | |
| # Ensure all contents are strings | |
| messages = [ | |
| {"role": m["role"], "content": str(m["content"])} | |
| for m in messages | |
| ] | |
| # Add user input | |
| messages.append({"role": "user", "content": str(user_input)}) | |
| # Create Qwen prompt | |
| prompt = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| # Generate response | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=384, # safe for 3B | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True | |
| ) | |
| response = tokenizer.decode( | |
| outputs[0][inputs["input_ids"].shape[-1]:], | |
| skip_special_tokens=True | |
| ) | |
| # Append assistant response | |
| messages.append({"role": "assistant", "content": response}) | |
| return messages, "" | |
| # ---------------------------- | |
| # Gradio UI | |
| # ---------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 🤖 Qwen 2.5 – 3B Chatbot (English & Kiswahili)") | |
| chatbot = gr.Chatbot() # old Gradio safe | |
| msg = gr.Textbox(label="Your message / Ujumbe wako", autofocus=True) | |
| state = gr.State([]) # keeps messages | |
| msg.submit( | |
| chat, | |
| inputs=[msg, state], | |
| outputs=[chatbot, state] | |
| ) | |
| # ---------------------------- | |
| # Launch safely on HF Spaces | |
| # ---------------------------- | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| show_error=True | |
| ) | |