Spaces:

Hrant
/

qwen3-5

Running

App Files Files Community

Hrant commited on 2 days ago

Commit

2a8aa9e

verified ·

1 Parent(s): d9d7711

Deploy Qwen3.5 chat app with model selector

Browse files

Files changed (3) hide show

README.md +6 -7
app.py +160 -0
requirements.txt +3 -0

README.md CHANGED Viewed

@@ -1,12 +1,11 @@
 ---
-title: Qwen3 5
-emoji: 🌍
-colorFrom: indigo
-colorTo: pink
 sdk: gradio
-sdk_version: 6.8.0
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Qwen3.5
 sdk: gradio
 app_file: app.py
 ---
+# Qwen3.5
+Chat with models from the official Qwen 3.5 Hugging Face collection.
+This Space expects an `HF_TOKEN` secret for inference calls.

app.py ADDED Viewed

	@@ -0,0 +1,160 @@

+from __future__ import annotations
+import os
+from typing import Any
+import gradio as gr
+import requests
+from huggingface_hub import InferenceClient
+COLLECTION_API = "https://huggingface.co/api/collections/Qwen/qwen35"
+def fetch_qwen35_models() -> list[dict[str, Any]]:
+    try:
+        response = requests.get(COLLECTION_API, timeout=30)
+        response.raise_for_status()
+        payload = response.json()
+    except Exception:
+        # Minimal fallback for resilience if HF collection API is transiently unavailable.
+        return [
+            {"id": "Qwen/Qwen3.5-35B-A3B", "live_providers": ["unknown"]},
+            {"id": "Qwen/Qwen3.5-27B", "live_providers": ["unknown"]},
+            {"id": "Qwen/Qwen3.5-9B", "live_providers": ["unknown"]},
+            {"id": "Qwen/Qwen3.5-4B", "live_providers": ["unknown"]},
+            {"id": "Qwen/Qwen3.5-2B", "live_providers": ["unknown"]},
+            {"id": "Qwen/Qwen3.5-0.8B", "live_providers": ["unknown"]},
+        ]
+    models: list[dict[str, Any]] = []
+    for item in payload.get("items", []):
+        if item.get("type") != "model":
+            continue
+        model_id = item.get("id")
+        if not model_id:
+            continue
+        providers = []
+        for provider in item.get("availableInferenceProviders", []) or []:
+            if provider.get("providerStatus") == "live" and provider.get("modelStatus") == "live":
+                providers.append(str(provider.get("provider")))
+        models.append(
+            {
+                "id": model_id,
+                "live_providers": sorted(set(providers)),
+            }
+        )
+    return models
+MODEL_INFO = fetch_qwen35_models()
+MODEL_IDS = [x["id"] for x in MODEL_INFO]
+DEFAULT_MODEL = MODEL_IDS[0] if MODEL_IDS else "Qwen/Qwen3.5-35B-A3B"
+PROVIDER_LOOKUP = {x["id"]: x.get("live_providers", []) for x in MODEL_INFO}
+def provider_note(model_id: str) -> str:
+    providers = PROVIDER_LOOKUP.get(model_id, [])
+    if providers:
+        return f"Live inference providers: {', '.join(providers)}"
+    return "No live provider listed by HF for this model right now. Try another model."
+def generate_reply(
+    message: str,
+    history: list[tuple[str, str]],
+    model_id: str,
+    system_prompt: str,
+    max_new_tokens: int,
+    temperature: float,
+    top_p: float,
+) -> str:
+    token = os.getenv("HF_TOKEN")
+    client = InferenceClient(token=token, timeout=120)
+    messages = []
+    if system_prompt.strip():
+        messages.append({"role": "system", "content": system_prompt.strip()})
+    for user_msg, assistant_msg in history:
+        if user_msg:
+            messages.append({"role": "user", "content": user_msg})
+        if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
+    messages.append({"role": "user", "content": message})
+    try:
+        result = client.chat_completion(
+            model=model_id,
+            messages=messages,
+            max_tokens=int(max_new_tokens),
+            temperature=float(temperature),
+            top_p=float(top_p),
+        )
+        reply = result.choices[0].message.content
+        if isinstance(reply, str):
+            return reply
+        return str(reply)
+    except Exception as exc:
+        return (
+            f"Model call failed for `{model_id}`.\n\n"
+            f"Details: {exc}\n\n"
+            "Try another model from the dropdown. Some models may not currently have a live provider."
+        )
+with gr.Blocks(title="Qwen3.5 Chat") as demo:
+    gr.Markdown("# Qwen3.5 Chat")
+    gr.Markdown(
+        "Select a model from the official Qwen3.5 collection and chat. "
+        "This Space uses Hugging Face Inference providers via `HF_TOKEN`."
+    )
+    model_dd = gr.Dropdown(
+        choices=MODEL_IDS,
+        value=DEFAULT_MODEL,
+        label="Qwen3.5 Model",
+        allow_custom_value=False,
+    )
+    provider_md = gr.Markdown(provider_note(DEFAULT_MODEL))
+    with gr.Accordion("Generation Settings", open=False):
+        system_prompt = gr.Textbox(
+            label="System prompt",
+            value="You are a helpful assistant.",
+            lines=2,
+        )
+        max_new_tokens = gr.Slider(
+            label="Max new tokens",
+            minimum=64,
+            maximum=4096,
+            step=32,
+            value=1024,
+        )
+        temperature = gr.Slider(
+            label="Temperature",
+            minimum=0.0,
+            maximum=2.0,
+            step=0.05,
+            value=0.7,
+        )
+        top_p = gr.Slider(
+            label="Top-p",
+            minimum=0.1,
+            maximum=1.0,
+            step=0.05,
+            value=0.9,
+        )
+    model_dd.change(fn=provider_note, inputs=model_dd, outputs=provider_md)
+    gr.ChatInterface(
+        fn=generate_reply,
+        additional_inputs=[model_dd, system_prompt, max_new_tokens, temperature, top_p],
+        type="tuples",
+    )
+demo.queue(max_size=32).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio>=4.44.0
+huggingface_hub>=0.24.0
+requests>=2.32.3