readctrl / code /interface /vllm_app.py
shahidul034's picture
Add files using upload-large-folder tool
1db7196 verified
import gradio as gr
from openai import OpenAI
# Initialize the client
client = OpenAI(
base_url="http://localhost:8004/v1",
api_key="token-not-needed",
)
def predict(message, history):
history_openai_format = []
# Manually build the history to ensure it's clean
for pair in history:
# pair[0] is User, pair[1] is Assistant
if len(pair) >= 2:
history_openai_format.append({"role": "user", "content": str(pair[0])})
history_openai_format.append({"role": "assistant", "content": str(pair[1])})
# Add the current message
history_openai_format.append({"role": "user", "content": message})
# Create the completion request
response = client.chat.completions.create(
model="Qwen/Qwen3-30B-A3B-Instruct-2507",
messages=history_openai_format,
temperature=0.7,
stream=True
)
partial_message = ""
for chunk in response:
if chunk.choices[0].delta.content is not None:
partial_message += chunk.choices[0].delta.content
yield partial_message
# Launch the Gradio ChatInterface without the 'type' argument
demo = gr.ChatInterface(
fn=predict,
title="Qwen3 vLLM Chat",
description="Interface for Qwen/Qwen3-30B-A3B-Instruct-2507 running on vLLM",
examples=["What is the capital of France?", "Write a Python function for quicksort."]
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)