Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from openai import OpenAI | |
| client = OpenAI( | |
| base_url="https://k0b11x1cc5f1ygmu.us-east4.gcp.endpoints.huggingface.cloud/v1/", | |
| api_key=os.getenv("HUGGING_FACE_API_KEY") | |
| ) | |
| def chat_with_streaming(message, history): | |
| # Convert history to OpenAI format | |
| messages = [{"role": msg["role"], "content": msg["content"]} for msg in history] | |
| messages.append({"role": "user", "content": message}) | |
| # Create streaming completion inside the function | |
| chat_completion = client.chat.completions.create( | |
| model="qwen3-1-7b-gwo", | |
| messages=messages, | |
| max_tokens=150, | |
| temperature=0.7, | |
| stream=True, # Enable streaming | |
| ) | |
| response = "" | |
| for chunk in chat_completion: | |
| if chunk.choices[0].delta.content: | |
| response += chunk.choices[0].delta.content | |
| yield response # Send partial response to Gradio | |
| # Create streaming interface | |
| demo = gr.ChatInterface( | |
| fn=chat_with_streaming, | |
| type="messages", | |
| title="Streaming Chat with Inference Endpoints", | |
| ) | |
| demo.launch() | |