File size: 1,496 Bytes
1db7196 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | import gradio as gr
from openai import OpenAI
# Initialize the client
client = OpenAI(
base_url="http://localhost:8004/v1",
api_key="token-not-needed",
)
def predict(message, history):
history_openai_format = []
# Manually build the history to ensure it's clean
for pair in history:
# pair[0] is User, pair[1] is Assistant
if len(pair) >= 2:
history_openai_format.append({"role": "user", "content": str(pair[0])})
history_openai_format.append({"role": "assistant", "content": str(pair[1])})
# Add the current message
history_openai_format.append({"role": "user", "content": message})
# Create the completion request
response = client.chat.completions.create(
model="Qwen/Qwen3-30B-A3B-Instruct-2507",
messages=history_openai_format,
temperature=0.7,
stream=True
)
partial_message = ""
for chunk in response:
if chunk.choices[0].delta.content is not None:
partial_message += chunk.choices[0].delta.content
yield partial_message
# Launch the Gradio ChatInterface without the 'type' argument
demo = gr.ChatInterface(
fn=predict,
title="Qwen3 vLLM Chat",
description="Interface for Qwen/Qwen3-30B-A3B-Instruct-2507 running on vLLM",
examples=["What is the capital of France?", "Write a Python function for quicksort."]
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, share=True) |