| | from __future__ import annotations |
| |
|
| | import os |
| | from typing import Any |
| |
|
| | import gradio as gr |
| | import requests |
| | from huggingface_hub import InferenceClient |
| |
|
| | COLLECTION_API = "https://huggingface.co/api/collections/Qwen/qwen35" |
| |
|
| |
|
| | def fetch_qwen35_models() -> list[dict[str, Any]]: |
| | try: |
| | response = requests.get(COLLECTION_API, timeout=30) |
| | response.raise_for_status() |
| | payload = response.json() |
| | except Exception: |
| | |
| | return [ |
| | {"id": "Qwen/Qwen3.5-35B-A3B", "live_providers": ["unknown"]}, |
| | {"id": "Qwen/Qwen3.5-27B", "live_providers": ["unknown"]}, |
| | {"id": "Qwen/Qwen3.5-9B", "live_providers": ["unknown"]}, |
| | {"id": "Qwen/Qwen3.5-4B", "live_providers": ["unknown"]}, |
| | {"id": "Qwen/Qwen3.5-2B", "live_providers": ["unknown"]}, |
| | {"id": "Qwen/Qwen3.5-0.8B", "live_providers": ["unknown"]}, |
| | ] |
| |
|
| | models: list[dict[str, Any]] = [] |
| | for item in payload.get("items", []): |
| | if item.get("type") != "model": |
| | continue |
| | model_id = item.get("id") |
| | if not model_id: |
| | continue |
| |
|
| | providers = [] |
| | for provider in item.get("availableInferenceProviders", []) or []: |
| | if provider.get("providerStatus") == "live" and provider.get("modelStatus") == "live": |
| | providers.append(str(provider.get("provider"))) |
| |
|
| | models.append( |
| | { |
| | "id": model_id, |
| | "live_providers": sorted(set(providers)), |
| | } |
| | ) |
| | return models |
| |
|
| |
|
| | MODEL_INFO = fetch_qwen35_models() |
| | MODEL_IDS = [x["id"] for x in MODEL_INFO] |
| | DEFAULT_MODEL = MODEL_IDS[0] if MODEL_IDS else "Qwen/Qwen3.5-35B-A3B" |
| |
|
| | PROVIDER_LOOKUP = {x["id"]: x.get("live_providers", []) for x in MODEL_INFO} |
| |
|
| |
|
| | def provider_note(model_id: str) -> str: |
| | providers = PROVIDER_LOOKUP.get(model_id, []) |
| | if providers: |
| | return f"Live inference providers: {', '.join(providers)}" |
| | return "No live provider listed by HF for this model right now. Try another model." |
| |
|
| |
|
| | def generate_reply( |
| | message: str, |
| | history: list[tuple[str, str]], |
| | model_id: str, |
| | system_prompt: str, |
| | max_new_tokens: int, |
| | temperature: float, |
| | top_p: float, |
| | ) -> str: |
| | token = os.getenv("HF_TOKEN") |
| | client = InferenceClient(token=token, timeout=120) |
| |
|
| | messages = [] |
| | if system_prompt.strip(): |
| | messages.append({"role": "system", "content": system_prompt.strip()}) |
| |
|
| | for user_msg, assistant_msg in history: |
| | if user_msg: |
| | messages.append({"role": "user", "content": user_msg}) |
| | if assistant_msg: |
| | messages.append({"role": "assistant", "content": assistant_msg}) |
| |
|
| | messages.append({"role": "user", "content": message}) |
| |
|
| | try: |
| | result = client.chat_completion( |
| | model=model_id, |
| | messages=messages, |
| | max_tokens=int(max_new_tokens), |
| | temperature=float(temperature), |
| | top_p=float(top_p), |
| | ) |
| | reply = result.choices[0].message.content |
| | if isinstance(reply, str): |
| | return reply |
| | return str(reply) |
| | except Exception as exc: |
| | return ( |
| | f"Model call failed for `{model_id}`.\n\n" |
| | f"Details: {exc}\n\n" |
| | "Try another model from the dropdown. Some models may not currently have a live provider." |
| | ) |
| |
|
| |
|
| | with gr.Blocks(title="Qwen3.5 Chat") as demo: |
| | gr.Markdown("# Qwen3.5 Chat") |
| | gr.Markdown( |
| | "Select a model from the official Qwen3.5 collection and chat. " |
| | "This Space uses Hugging Face Inference providers via `HF_TOKEN`." |
| | ) |
| |
|
| | model_dd = gr.Dropdown( |
| | choices=MODEL_IDS, |
| | value=DEFAULT_MODEL, |
| | label="Qwen3.5 Model", |
| | allow_custom_value=False, |
| | ) |
| | provider_md = gr.Markdown(provider_note(DEFAULT_MODEL)) |
| |
|
| | with gr.Accordion("Generation Settings", open=False): |
| | system_prompt = gr.Textbox( |
| | label="System prompt", |
| | value="You are a helpful assistant.", |
| | lines=2, |
| | ) |
| | max_new_tokens = gr.Slider( |
| | label="Max new tokens", |
| | minimum=64, |
| | maximum=4096, |
| | step=32, |
| | value=1024, |
| | ) |
| | temperature = gr.Slider( |
| | label="Temperature", |
| | minimum=0.0, |
| | maximum=2.0, |
| | step=0.05, |
| | value=0.7, |
| | ) |
| | top_p = gr.Slider( |
| | label="Top-p", |
| | minimum=0.1, |
| | maximum=1.0, |
| | step=0.05, |
| | value=0.9, |
| | ) |
| |
|
| | model_dd.change(fn=provider_note, inputs=model_dd, outputs=provider_md) |
| |
|
| | gr.ChatInterface( |
| | fn=generate_reply, |
| | additional_inputs=[model_dd, system_prompt, max_new_tokens, temperature, top_p], |
| | ) |
| |
|
| | demo.queue(max_size=32).launch() |
| |
|