import gradio as gr from transformers import pipeline import torch import spaces # Initialize the pipeline print("Loading VibeThinker model...") pipe = pipeline( "text-generation", model="WeiboAI/VibeThinker-1.5B", torch_dtype=torch.bfloat16, device_map="auto" ) print("Model loaded successfully!") @spaces.GPU def respond(message, history): """ Generate streaming response for the chatbot. Args: message: The user's current message history: List of previous conversation messages in [user, assistant] format """ # Convert history to messages format messages = [] for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": assistant_msg}) # Add current message messages.append({"role": "user", "content": message}) # Generate response with streaming full_response = "" for output in pipe( messages, max_new_tokens=4096, do_sample=True, temperature=0.6, top_p=0.95, return_full_text=False, streamer=None ): # Get the generated text generated_text = output[0]["generated_text"] # Extract only the assistant's response if isinstance(generated_text, list): assistant_response = generated_text[-1]["content"] else: assistant_response = generated_text # Stream character by character for char in assistant_response[len(full_response):]: full_response += char yield full_response # Create the Gradio interface with gr.Blocks( theme=gr.themes.Soft(), css=""" .header-link { text-decoration: none; color: inherit; } .header-link:hover { text-decoration: underline; } """ ) as demo: gr.Markdown( """ # 💭 VibeThinker Chatbot Chat with [WeiboAI/VibeThinker-1.5B](https://huggingface.co/WeiboAI/VibeThinker-1.5B) - a powerful conversational AI model. Built with anycoder """ ) chatbot = gr.ChatInterface( fn=respond, type="messages", title="", description="Ask me anything! I'm powered by VibeThinker with ZeroGPU acceleration.", examples=[ "What is the meaning of life?", "Explain quantum computing in simple terms", "Write a short poem about artificial intelligence", "How can I improve my productivity?", ], cache_examples=False, ) gr.Markdown( """ ### About VibeThinker VibeThinker is a 1.5B parameter conversational AI model designed for engaging and thoughtful conversations. The model uses temperature sampling (0.6) for balanced creativity and coherence. **Powered by ZeroGPU** for efficient GPU resource allocation. """ ) if __name__ == "__main__": demo.launch()