Spaces:
Sleeping
Sleeping
File size: 1,129 Bytes
ce8a258 da57d79 ce8a258 da57d79 ce8a258 da57d79 82a3302 da57d79 82a3302 da57d79 82a3302 da57d79 74a2c8d 82a3302 74a2c8d ce8a258 6ebd9f9 d7f7cd2 ec8cb7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import gradio as gr
from huggingface_hub import InferenceClient
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
def respond(message, history):
messages = [{"role": "system", "content": "You are a friendly chatbot."}]
if history:
messages.extend(history)
messages.append({"role": "user", "content": message})
# Initialize the response as an empty string
response = ""
# Stream the response using yield and for loop
for message in client.chat_completion(
messages,
max_tokens=150,
temperature=0.7,
top_p=0.9,
stream=True # Enable streaming
):
# Capture the most recent token
token = message['choices'][0]['delta']['content']
# Add the token to the response
response += token
# Yield the response to stream it progressively
yield response
chatbot = gr.ChatInterface(
respond,
type="messages",
title="Streaming Chatbot",
description="This chatbot streams responses as they are generated for a more dynamic experience!"
)
chatbot.launch()
|