shahidul034
/

readctrl

Model card Files Files and versions

readctrl / code /interface /vllm_app.py

shahidul034's picture

Add files using upload-large-folder tool

1db7196 verified 28 days ago

history blame contribute delete

1.5 kB

	import gradio as gr
	from openai import OpenAI

	# Initialize the client
	client = OpenAI(
	base_url="http://localhost:8004/v1",
	api_key="token-not-needed",
	)

	def predict(message, history):
	history_openai_format = []

	# Manually build the history to ensure it's clean
	for pair in history:
	# pair[0] is User, pair[1] is Assistant
	if len(pair) >= 2:
	history_openai_format.append({"role": "user", "content": str(pair[0])})
	history_openai_format.append({"role": "assistant", "content": str(pair[1])})

	# Add the current message
	history_openai_format.append({"role": "user", "content": message})

	# Create the completion request
	response = client.chat.completions.create(
	model="Qwen/Qwen3-30B-A3B-Instruct-2507",
	messages=history_openai_format,
	temperature=0.7,
	stream=True
	)

	partial_message = ""
	for chunk in response:
	if chunk.choices[0].delta.content is not None:
	partial_message += chunk.choices[0].delta.content
	yield partial_message

	# Launch the Gradio ChatInterface without the 'type' argument
	demo = gr.ChatInterface(
	fn=predict,
	title="Qwen3 vLLM Chat",
	description="Interface for Qwen/Qwen3-30B-A3B-Instruct-2507 running on vLLM",
	examples=["What is the capital of France?", "Write a Python function for quicksort."]
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860, share=True)