Spaces:
Sleeping
Sleeping
File size: 1,085 Bytes
089dc6e 033d225 089dc6e 033d225 089dc6e 033d225 089dc6e 033d225 089dc6e 033d225 089dc6e 033d225 089dc6e 033d225 089dc6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import os
import gradio as gr
from openai import OpenAI
client = OpenAI(
base_url="https://k0b11x1cc5f1ygmu.us-east4.gcp.endpoints.huggingface.cloud/v1/",
api_key=os.getenv("HUGGING_FACE_API_KEY")
)
def chat_with_streaming(message, history):
# Convert history to OpenAI format
messages = [{"role": msg["role"], "content": msg["content"]} for msg in history]
messages.append({"role": "user", "content": message})
# Create streaming completion inside the function
chat_completion = client.chat.completions.create(
model="qwen3-1-7b-gwo",
messages=messages,
max_tokens=150,
temperature=0.7,
stream=True, # Enable streaming
)
response = ""
for chunk in chat_completion:
if chunk.choices[0].delta.content:
response += chunk.choices[0].delta.content
yield response # Send partial response to Gradio
# Create streaming interface
demo = gr.ChatInterface(
fn=chat_with_streaming,
type="messages",
title="Streaming Chat with Inference Endpoints",
)
demo.launch()
|