akhaliq's picture
akhaliq HF Staff
Deploy from anycoder
284c90a verified
import gradio as gr
from huggingface_hub import InferenceClient
# Initialize the client with a lightweight, capable model
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
def respond(message, history):
"""
Formats the chat history and streams the response from the LLM.
"""
messages = [{"role": "system", "content": "You are a helpful assistant."}]
# Convert Gradio history to OpenAI format
for val in history:
if val[0]: messages.append({"role": "user", "content": val[0]})
if val[1]: messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
# Stream response
response = ""
for msg in client.chat_completion(messages, max_tokens=512, stream=True, temperature=0.7):
token = msg.choices[0].delta.content
if token:
response += token
yield response
# Build the Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("### [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
gr.ChatInterface(
respond,
title="50-Line Chatbot",
description="A streaming AI assistant built with Gradio and Hugging Face Inference API.",
examples=["Tell me a joke", "Write a python script", "What is the capital of France?"]
)
if __name__ == "__main__":
demo.launch()