from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr model_name = "Qwen/Qwen3-0.6B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) def predict(message, history): # Build conversation context chat_history = "" for human, ai in history: chat_history += f"User: {human}\nBot: {ai}\n" chat_history += f"User: {message}\nBot:" inputs = tokenizer.encode(chat_history, return_tensors="pt") outputs = model.generate( inputs, max_length=1000, pad_token_id=tokenizer.eos_token_id, do_sample=True, top_p=0.9, top_k=50 ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) bot_reply = response.split("Bot:")[-1].strip() return bot_reply # Use only universally supported args gr.ChatInterface( fn=predict, title="💬 My Chatbot", description="A simple CPU-friendly chatbot using Qwen/Qwen3-0.6B.", examples=["Hello!", "What's your name?", "Tell me a fun fact."], ).launch()