File size: 3,095 Bytes
5e1305b
be1c6d2
5e1305b
be1c6d2
5e1305b
 
be1c6d2
5e1305b
be1c6d2
 
 
 
 
 
5e1305b
 
 
be1c6d2
5e1305b
 
f96aa87
5e1305b
 
 
 
 
 
 
 
 
 
 
 
 
 
f96aa87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e1305b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be1c6d2
5e1305b
f96aa87
5e1305b
 
f96aa87
5e1305b
 
 
 
 
 
 
 
 
be1c6d2
 
5e1305b
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import gradio as gr
from transformers import pipeline
import torch
import spaces


# Initialize the pipeline
print("Loading VibeThinker model...")
pipe = pipeline(
    "text-generation",
    model="WeiboAI/VibeThinker-1.5B",
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
print("Model loaded successfully!")


@spaces.GPU
def respond(message, history):
    """
    Generate streaming response for the chatbot.
    
    Args:
        message: The user's current message
        history: List of previous conversation messages in [user, assistant] format
    """
    # Convert history to messages format
    messages = []
    for user_msg, assistant_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": assistant_msg})
    
    # Add current message
    messages.append({"role": "user", "content": message})
    
    # Generate response with streaming
    full_response = ""
    for output in pipe(
        messages,
        max_new_tokens=4096,
        do_sample=True,
        temperature=0.6,
        top_p=0.95,
        return_full_text=False,
        streamer=None
    ):
        # Get the generated text
        generated_text = output[0]["generated_text"]
        
        # Extract only the assistant's response
        if isinstance(generated_text, list):
            assistant_response = generated_text[-1]["content"]
        else:
            assistant_response = generated_text
        
        # Stream character by character
        for char in assistant_response[len(full_response):]:
            full_response += char
            yield full_response


# Create the Gradio interface
with gr.Blocks(
    theme=gr.themes.Soft(),
    css="""
    .header-link { text-decoration: none; color: inherit; }
    .header-link:hover { text-decoration: underline; }
    """
) as demo:
    gr.Markdown(
        """
        # 💭 VibeThinker Chatbot
        Chat with [WeiboAI/VibeThinker-1.5B](https://huggingface.co/WeiboAI/VibeThinker-1.5B) - a powerful conversational AI model.
        
        <a href="https://huggingface.co/spaces/akhaliq/anycoder" class="header-link">Built with anycoder</a>
        """
    )
    
    chatbot = gr.ChatInterface(
        fn=respond,
        type="messages",
        title="",
        description="Ask me anything! I'm powered by VibeThinker with ZeroGPU acceleration.",
        examples=[
            "What is the meaning of life?",
            "Explain quantum computing in simple terms",
            "Write a short poem about artificial intelligence",
            "How can I improve my productivity?",
        ],
        cache_examples=False,
    )
    
    gr.Markdown(
        """
        ### About VibeThinker
        VibeThinker is a 1.5B parameter conversational AI model designed for engaging and thoughtful conversations.
        The model uses temperature sampling (0.6) for balanced creativity and coherence.
        
        **Powered by ZeroGPU** for efficient GPU resource allocation.
        """
    )

if __name__ == "__main__":
    demo.launch()