Spaces:

Hrant
/

qwen3-5

Running

App Files Files Community

qwen3-5 / app.py

Hrant

Fix Gradio ChatInterface compatibility

e0ae8e2 verified 2 days ago

raw

history blame contribute delete

4.9 kB

	from __future__ import annotations

	import os
	from typing import Any

	import gradio as gr
	import requests
	from huggingface_hub import InferenceClient

	COLLECTION_API = "https://huggingface.co/api/collections/Qwen/qwen35"


	def fetch_qwen35_models() -> list[dict[str, Any]]:
	try:
	response = requests.get(COLLECTION_API, timeout=30)
	response.raise_for_status()
	payload = response.json()
	except Exception:
	# Minimal fallback for resilience if HF collection API is transiently unavailable.
	return [
	{"id": "Qwen/Qwen3.5-35B-A3B", "live_providers": ["unknown"]},
	{"id": "Qwen/Qwen3.5-27B", "live_providers": ["unknown"]},
	{"id": "Qwen/Qwen3.5-9B", "live_providers": ["unknown"]},
	{"id": "Qwen/Qwen3.5-4B", "live_providers": ["unknown"]},
	{"id": "Qwen/Qwen3.5-2B", "live_providers": ["unknown"]},
	{"id": "Qwen/Qwen3.5-0.8B", "live_providers": ["unknown"]},
	]

	models: list[dict[str, Any]] = []
	for item in payload.get("items", []):
	if item.get("type") != "model":
	continue
	model_id = item.get("id")
	if not model_id:
	continue

	providers = []
	for provider in item.get("availableInferenceProviders", []) or []:
	if provider.get("providerStatus") == "live" and provider.get("modelStatus") == "live":
	providers.append(str(provider.get("provider")))

	models.append(
	{
	"id": model_id,
	"live_providers": sorted(set(providers)),
	}
	)
	return models


	MODEL_INFO = fetch_qwen35_models()
	MODEL_IDS = [x["id"] for x in MODEL_INFO]
	DEFAULT_MODEL = MODEL_IDS[0] if MODEL_IDS else "Qwen/Qwen3.5-35B-A3B"

	PROVIDER_LOOKUP = {x["id"]: x.get("live_providers", []) for x in MODEL_INFO}


	def provider_note(model_id: str) -> str:
	providers = PROVIDER_LOOKUP.get(model_id, [])
	if providers:
	return f"Live inference providers: {', '.join(providers)}"
	return "No live provider listed by HF for this model right now. Try another model."


	def generate_reply(
	message: str,
	history: list[tuple[str, str]],
	model_id: str,
	system_prompt: str,
	max_new_tokens: int,
	temperature: float,
	top_p: float,
	) -> str:
	token = os.getenv("HF_TOKEN")
	client = InferenceClient(token=token, timeout=120)

	messages = []
	if system_prompt.strip():
	messages.append({"role": "system", "content": system_prompt.strip()})

	for user_msg, assistant_msg in history:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})

	messages.append({"role": "user", "content": message})

	try:
	result = client.chat_completion(
	model=model_id,
	messages=messages,
	max_tokens=int(max_new_tokens),
	temperature=float(temperature),
	top_p=float(top_p),
	)
	reply = result.choices[0].message.content
	if isinstance(reply, str):
	return reply
	return str(reply)
	except Exception as exc:
	return (
	f"Model call failed for `{model_id}`.\n\n"
	f"Details: {exc}\n\n"
	"Try another model from the dropdown. Some models may not currently have a live provider."
	)


	with gr.Blocks(title="Qwen3.5 Chat") as demo:
	gr.Markdown("# Qwen3.5 Chat")
	gr.Markdown(
	"Select a model from the official Qwen3.5 collection and chat. "
	"This Space uses Hugging Face Inference providers via `HF_TOKEN`."
	)

	model_dd = gr.Dropdown(
	choices=MODEL_IDS,
	value=DEFAULT_MODEL,
	label="Qwen3.5 Model",
	allow_custom_value=False,
	)
	provider_md = gr.Markdown(provider_note(DEFAULT_MODEL))

	with gr.Accordion("Generation Settings", open=False):
	system_prompt = gr.Textbox(
	label="System prompt",
	value="You are a helpful assistant.",
	lines=2,
	)
	max_new_tokens = gr.Slider(
	label="Max new tokens",
	minimum=64,
	maximum=4096,
	step=32,
	value=1024,
	)
	temperature = gr.Slider(
	label="Temperature",
	minimum=0.0,
	maximum=2.0,
	step=0.05,
	value=0.7,
	)
	top_p = gr.Slider(
	label="Top-p",
	minimum=0.1,
	maximum=1.0,
	step=0.05,
	value=0.9,
	)

	model_dd.change(fn=provider_note, inputs=model_dd, outputs=provider_md)

	gr.ChatInterface(
	fn=generate_reply,
	additional_inputs=[model_dd, system_prompt, max_new_tokens, temperature, top_p],
	)

	demo.queue(max_size=32).launch()