Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,095 Bytes
5e1305b be1c6d2 5e1305b be1c6d2 5e1305b be1c6d2 5e1305b be1c6d2 5e1305b be1c6d2 5e1305b f96aa87 5e1305b f96aa87 5e1305b be1c6d2 5e1305b f96aa87 5e1305b f96aa87 5e1305b be1c6d2 5e1305b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
from transformers import pipeline
import torch
import spaces
# Initialize the pipeline
print("Loading VibeThinker model...")
pipe = pipeline(
"text-generation",
model="WeiboAI/VibeThinker-1.5B",
torch_dtype=torch.bfloat16,
device_map="auto"
)
print("Model loaded successfully!")
@spaces.GPU
def respond(message, history):
"""
Generate streaming response for the chatbot.
Args:
message: The user's current message
history: List of previous conversation messages in [user, assistant] format
"""
# Convert history to messages format
messages = []
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Generate response with streaming
full_response = ""
for output in pipe(
messages,
max_new_tokens=4096,
do_sample=True,
temperature=0.6,
top_p=0.95,
return_full_text=False,
streamer=None
):
# Get the generated text
generated_text = output[0]["generated_text"]
# Extract only the assistant's response
if isinstance(generated_text, list):
assistant_response = generated_text[-1]["content"]
else:
assistant_response = generated_text
# Stream character by character
for char in assistant_response[len(full_response):]:
full_response += char
yield full_response
# Create the Gradio interface
with gr.Blocks(
theme=gr.themes.Soft(),
css="""
.header-link { text-decoration: none; color: inherit; }
.header-link:hover { text-decoration: underline; }
"""
) as demo:
gr.Markdown(
"""
# 💭 VibeThinker Chatbot
Chat with [WeiboAI/VibeThinker-1.5B](https://huggingface.co/WeiboAI/VibeThinker-1.5B) - a powerful conversational AI model.
<a href="https://huggingface.co/spaces/akhaliq/anycoder" class="header-link">Built with anycoder</a>
"""
)
chatbot = gr.ChatInterface(
fn=respond,
type="messages",
title="",
description="Ask me anything! I'm powered by VibeThinker with ZeroGPU acceleration.",
examples=[
"What is the meaning of life?",
"Explain quantum computing in simple terms",
"Write a short poem about artificial intelligence",
"How can I improve my productivity?",
],
cache_examples=False,
)
gr.Markdown(
"""
### About VibeThinker
VibeThinker is a 1.5B parameter conversational AI model designed for engaging and thoughtful conversations.
The model uses temperature sampling (0.6) for balanced creativity and coherence.
**Powered by ZeroGPU** for efficient GPU resource allocation.
"""
)
if __name__ == "__main__":
demo.launch() |