import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch import re # Initialize the model and tokenizer @torch.no_grad() def load_model(): print("Loading Qwen3-0.6B model...") try: tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen3-0.6B", torch_dtype=torch.float16, device_map="auto", trust_remote_code=True ) print("Qwen3-0.6B model loaded successfully!") return tokenizer, model except Exception as e: print(f"Error loading Qwen3-0.6B: {e}") return None, None # Load the model tokenizer, model = load_model() def remove_think_tags(text): """ Remove ... tags from text - METHOD 1 """ cleaned_text = re.sub(r'.*?', '', text, flags=re.DOTALL) return cleaned_text.strip() def generate_response(message, history, temperature=0.7, max_length=256): """ Generate a response using Qwen3-0.6B with your specified method """ if tokenizer is None or model is None: return "⚠️ Model is not loaded properly. Please check the console logs." try: # Convert history to messages format messages = [] for human_msg, assistant_msg in history: messages.extend([ {"role": "user", "content": human_msg}, {"role": "assistant", "content": assistant_msg} ]) # Add current message messages.append({"role": "user", "content": message}) # Apply chat template exactly as in your example inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) # Generate response with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_length, temperature=temperature, do_sample=True if temperature > 0.1 else False, top_p=0.9, repetition_penalty=1.1, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id ) # Extract only the new generated text response = tokenizer.decode( outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True ) # Clean think tags from response clean_response = remove_think_tags(response) return clean_response if clean_response else "I'm here to help! What would you like to know?" except Exception as e: print(f"Generation error: {e}") return f"⚠️ Error generating response: {str(e)}" def chat_interface(message, history, temperature, max_length): """ Main chat interface function """ if not message or not message.strip(): return "", history or [] # Generate response bot_response = generate_response(message, history or [], temperature, max_length) # Update history new_history = (history or []) + [[message, bot_response]] return "", new_history def clear_chat(): """ Clear the chat history """ return [] def retry_last_response(history, temperature, max_length): """ Retry the last user message """ if not history: return history # Remove the last assistant response last_conversation = history[:-1] last_user_message = history[-1][0] # Regenerate response bot_response = generate_response(last_user_message, last_conversation, temperature, max_length) # Update history new_history = last_conversation + [[last_user_message, bot_response]] return new_history # Custom CSS with more space for chat and settings moved to right custom_css = """ .gradio-container { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); min-height: 100vh; padding: 20px; } .main-container { max-width: 1400px; margin: 0 auto; background: white; border-radius: 20px; box-shadow: 0 20px 40px rgba(0,0,0,0.1); overflow: hidden; } .header { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 30px; text-align: center; } .header h1 { margin: 0; font-size: 2.5em; font-weight: 700; } .header p { margin: 10px 0 0 0; opacity: 0.9; font-size: 1.2em; } .content { display: flex; min-height: 700px; gap: 0; } /* Chat column - 80% width */ .chat-column { flex: 4; /* Increased from 3 to 4 for more space */ display: flex; flex-direction: column; min-width: 0; /* Allow shrinking */ } /* Control column - 20% width */ .control-column { flex: 1; /* Reduced to take less space */ background: #f8f9fa; padding: 20px; border-left: 1px solid #e1e5e9; min-width: 280px; /* Minimum width for controls */ max-width: 320px; /* Maximum width for controls */ } .chatbot-container { flex: 1; display: flex; flex-direction: column; min-height: 600px; } #chatbot { flex: 1; min-height: 550px !important; max-height: 550px !important; border: none !important; background: white !important; padding: 25px !important; margin: 0 !important; overflow-y: auto !important; } #chatbot .message { padding: 18px 22px !important; margin: 12px 0 !important; border-radius: 18px !important; max-width: 85% !important; line-height: 1.5 !important; font-size: 15px !important; } #chatbot .user-message { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; color: white !important; margin-left: auto !important; border: none !important; } #chatbot .bot-message { background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%) !important; color: #2d3748 !important; margin-right: auto !important; border: 1px solid #e2e8f0 !important; } .input-container { background: #f8f9fa; padding: 25px; border-top: 1px solid #e1e5e9; } .control-panel { background: white; padding: 20px; border-radius: 15px; margin-bottom: 20px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); border: 1px solid #eef2f7; } .model-info { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 15px; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3); } .gr-button { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; border: none !important; color: white !important; border-radius: 12px !important; padding: 14px 28px !important; font-weight: 600 !important; margin: 5px !important; transition: all 0.3s ease !important; box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3) !important; } .gr-button:hover { transform: translateY(-2px) !important; box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4) !important; } .clear-btn { background: linear-gradient(135deg, #ff6b6b 0%, #ee5a24 100%) !important; box-shadow: 0 4px 12px rgba(255, 107, 107, 0.3) !important; } .retry-btn { background: linear-gradient(135deg, #00b894 0%, #00a085 100%) !important; box-shadow: 0 4px 12px rgba(0, 184, 148, 0.3) !important; } .textbox { border-radius: 15px !important; border: 2px solid #e2e8f0 !important; padding: 18px !important; font-size: 16px !important; background: white !important; box-shadow: 0 2px 8px rgba(0,0,0,0.05) !important; min-height: 80px !important; } .textbox:focus { border-color: #667eea !important; box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1), 0 2px 8px rgba(0,0,0,0.05) !important; } .examples-panel { background: white; padding: 25px; border-top: 1px solid #e1e5e9; } .control-section { margin-bottom: 20px; } .control-section:last-child { margin-bottom: 0; } .slider-container { margin: 15px 0; } .feature-list { list-style: none; padding: 0; margin: 10px 0 0 0; } .feature-list li { padding: 6px 0; position: relative; padding-left: 20px; font-size: 14px; } .feature-list li::before { content: '✓'; position: absolute; left: 0; color: #48bb78; font-weight: bold; } .status-indicator { display: inline-block; width: 10px; height: 10px; border-radius: 50%; margin-right: 8px; } .status-online { background: #48bb78; box-shadow: 0 0 8px #48bb78; } @media (max-width: 1024px) { .content { flex-direction: column; } .control-column { max-width: none; border-left: none; border-top: 1px solid #e1e5e9; } .chat-column { min-height: 500px; } } """ # Create the Gradio interface with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: with gr.Column(elem_classes="main-container"): # Header with gr.Column(elem_classes="header"): gr.Markdown("# 🤖 Qwen3-0.6B Chatbot") gr.Markdown("Chat with Alibaba's advanced Qwen3-0.6B model - Think tags automatically removed!") with gr.Row(elem_classes="content"): # Left Column - Chat (80% width) with gr.Column(elem_classes="chat-column"): with gr.Column(elem_classes="chatbot-container"): chatbot = gr.Chatbot( value=[["Hello! How can I assist you today? 😊", ""]], label="", elem_id="chatbot", show_copy_button=True, avatar_images=("👤", "🤖"), height=550, container=True, show_label=False ) with gr.Column(elem_classes="input-container"): with gr.Row(): msg = gr.Textbox( label="", placeholder="💭 Type your message here... (Press Enter to send, Shift+Enter for new line)", lines=3, scale=4, container=False, show_label=False ) with gr.Column(scale=1): submit_btn = gr.Button("Send 🚀", size="lg") with gr.Row(): clear_btn = gr.Button("🗑️ Clear Chat", elem_classes="clear-btn", size="sm") retry_btn = gr.Button("🔄 Retry Last", elem_classes="retry-btn", size="sm") gr.HTML("""
Press Enter to send • Shift+Enter for new line
""") # Right Column - Controls (20% width) with gr.Column(elem_classes="control-column"): with gr.Column(elem_classes="control-panel"): gr.Markdown("### ⚙️ Settings") with gr.Column(elem_classes="control-section"): gr.Markdown("**🎛️ Temperature**") temperature = gr.Slider( minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="", show_label=False ) gr.Markdown("
Lower = more predictable
Higher = more creative
") with gr.Column(elem_classes="control-section"): gr.Markdown("**📏 Max Length**") max_length = gr.Slider( minimum=50, maximum=1000, value=256, step=50, label="", show_label=False ) gr.Markdown("
Response length in tokens
") with gr.Column(elem_classes="model-info"): gr.Markdown("### ℹ️ Model Info") if tokenizer and model: gr.Markdown(""" **Status:** Online **Model:** Qwen3-0.6B ✅ **Think Tags:** Auto-removed ✅ """) else: gr.Markdown(""" **Status:** Loading Check console for details """) # Examples Section with gr.Column(elem_classes="examples-panel"): gr.Markdown("### 💡 Try These Examples") gr.Examples( examples=[ "Explain quantum computing in simple terms", "Write a short poem about artificial intelligence", "What are the benefits of renewable energy?", "How do I learn programming effectively?", "Tell me an interesting fact about space exploration", "Help me plan a healthy weekly meal plan" ], inputs=msg, label="Click any example to start chatting!", examples_per_page=6 ) # Event handlers submit_event = msg.submit( chat_interface, inputs=[msg, chatbot, temperature, max_length], outputs=[msg, chatbot] ) submit_btn.click( chat_interface, inputs=[msg, chatbot, temperature, max_length], outputs=[msg, chatbot] ) clear_btn.click( clear_chat, outputs=[chatbot] ) retry_btn.click( retry_last_response, inputs=[chatbot, temperature, max_length], outputs=[chatbot] ) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", share=False, show_error=True )