vibethinker

Running on Zero

akhaliq HF Staff commited on 30 days ago

Commit

f96aa87

verified ·

1 Parent(s): 30e360b

Update Gradio app with multiple files

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,21 +16,9 @@ print("Model loaded successfully!")
 @spaces.GPU
-def generate_response(messages):
-    """Generate response using the pipeline."""
-    response = pipe(
-        messages,
-        max_new_tokens=4096,
-        do_sample=True,
-        temperature=0.6,
-        top_p=0.95
-    )
-    return response[0]["generated_text"][-1]["content"]
 def respond(message, history):
     """
-    Generate response for the chatbot.
     Args:
         message: The user's current message
@@ -45,10 +33,30 @@ def respond(message, history):
     # Add current message
     messages.append({"role": "user", "content": message})
-    # Generate response
-    response = generate_response(messages)
-    return response
 # Create the Gradio interface
@@ -74,8 +82,10 @@ with gr.Blocks(
         title="",
         description="Ask me anything! I'm powered by VibeThinker with ZeroGPU acceleration.",
         examples=[
             "Explain quantum computing in simple terms",
             "Write a short poem about artificial intelligence",
         ],
         cache_examples=False,
     )

 @spaces.GPU
 def respond(message, history):
     """
+    Generate streaming response for the chatbot.
     Args:
         message: The user's current message
     # Add current message
     messages.append({"role": "user", "content": message})
+    # Generate response with streaming
+    full_response = ""
+    for output in pipe(
+        messages,
+        max_new_tokens=4096,
+        do_sample=True,
+        temperature=0.6,
+        top_p=0.95,
+        return_full_text=False,
+        streamer=None
+    ):
+        # Get the generated text
+        generated_text = output[0]["generated_text"]
+        # Extract only the assistant's response
+        if isinstance(generated_text, list):
+            assistant_response = generated_text[-1]["content"]
+        else:
+            assistant_response = generated_text
+        # Stream character by character
+        for char in assistant_response[len(full_response):]:
+            full_response += char
+            yield full_response
 # Create the Gradio interface
         title="",
         description="Ask me anything! I'm powered by VibeThinker with ZeroGPU acceleration.",
         examples=[
+            "What is the meaning of life?",
             "Explain quantum computing in simple terms",
             "Write a short poem about artificial intelligence",
+            "How can I improve my productivity?",
         ],
         cache_examples=False,
     )