qwen3-5 / app.py
Hrant's picture
Fix Gradio ChatInterface compatibility
e0ae8e2 verified
from __future__ import annotations
import os
from typing import Any
import gradio as gr
import requests
from huggingface_hub import InferenceClient
COLLECTION_API = "https://huggingface.co/api/collections/Qwen/qwen35"
def fetch_qwen35_models() -> list[dict[str, Any]]:
try:
response = requests.get(COLLECTION_API, timeout=30)
response.raise_for_status()
payload = response.json()
except Exception:
# Minimal fallback for resilience if HF collection API is transiently unavailable.
return [
{"id": "Qwen/Qwen3.5-35B-A3B", "live_providers": ["unknown"]},
{"id": "Qwen/Qwen3.5-27B", "live_providers": ["unknown"]},
{"id": "Qwen/Qwen3.5-9B", "live_providers": ["unknown"]},
{"id": "Qwen/Qwen3.5-4B", "live_providers": ["unknown"]},
{"id": "Qwen/Qwen3.5-2B", "live_providers": ["unknown"]},
{"id": "Qwen/Qwen3.5-0.8B", "live_providers": ["unknown"]},
]
models: list[dict[str, Any]] = []
for item in payload.get("items", []):
if item.get("type") != "model":
continue
model_id = item.get("id")
if not model_id:
continue
providers = []
for provider in item.get("availableInferenceProviders", []) or []:
if provider.get("providerStatus") == "live" and provider.get("modelStatus") == "live":
providers.append(str(provider.get("provider")))
models.append(
{
"id": model_id,
"live_providers": sorted(set(providers)),
}
)
return models
MODEL_INFO = fetch_qwen35_models()
MODEL_IDS = [x["id"] for x in MODEL_INFO]
DEFAULT_MODEL = MODEL_IDS[0] if MODEL_IDS else "Qwen/Qwen3.5-35B-A3B"
PROVIDER_LOOKUP = {x["id"]: x.get("live_providers", []) for x in MODEL_INFO}
def provider_note(model_id: str) -> str:
providers = PROVIDER_LOOKUP.get(model_id, [])
if providers:
return f"Live inference providers: {', '.join(providers)}"
return "No live provider listed by HF for this model right now. Try another model."
def generate_reply(
message: str,
history: list[tuple[str, str]],
model_id: str,
system_prompt: str,
max_new_tokens: int,
temperature: float,
top_p: float,
) -> str:
token = os.getenv("HF_TOKEN")
client = InferenceClient(token=token, timeout=120)
messages = []
if system_prompt.strip():
messages.append({"role": "system", "content": system_prompt.strip()})
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
try:
result = client.chat_completion(
model=model_id,
messages=messages,
max_tokens=int(max_new_tokens),
temperature=float(temperature),
top_p=float(top_p),
)
reply = result.choices[0].message.content
if isinstance(reply, str):
return reply
return str(reply)
except Exception as exc:
return (
f"Model call failed for `{model_id}`.\n\n"
f"Details: {exc}\n\n"
"Try another model from the dropdown. Some models may not currently have a live provider."
)
with gr.Blocks(title="Qwen3.5 Chat") as demo:
gr.Markdown("# Qwen3.5 Chat")
gr.Markdown(
"Select a model from the official Qwen3.5 collection and chat. "
"This Space uses Hugging Face Inference providers via `HF_TOKEN`."
)
model_dd = gr.Dropdown(
choices=MODEL_IDS,
value=DEFAULT_MODEL,
label="Qwen3.5 Model",
allow_custom_value=False,
)
provider_md = gr.Markdown(provider_note(DEFAULT_MODEL))
with gr.Accordion("Generation Settings", open=False):
system_prompt = gr.Textbox(
label="System prompt",
value="You are a helpful assistant.",
lines=2,
)
max_new_tokens = gr.Slider(
label="Max new tokens",
minimum=64,
maximum=4096,
step=32,
value=1024,
)
temperature = gr.Slider(
label="Temperature",
minimum=0.0,
maximum=2.0,
step=0.05,
value=0.7,
)
top_p = gr.Slider(
label="Top-p",
minimum=0.1,
maximum=1.0,
step=0.05,
value=0.9,
)
model_dd.change(fn=provider_note, inputs=model_dd, outputs=provider_md)
gr.ChatInterface(
fn=generate_reply,
additional_inputs=[model_dd, system_prompt, max_new_tokens, temperature, top_p],
)
demo.queue(max_size=32).launch()