Hrant commited on
Commit
2a8aa9e
·
verified ·
1 Parent(s): d9d7711

Deploy Qwen3.5 chat app with model selector

Browse files
Files changed (3) hide show
  1. README.md +6 -7
  2. app.py +160 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,12 +1,11 @@
1
  ---
2
- title: Qwen3 5
3
- emoji: 🌍
4
- colorFrom: indigo
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 6.8.0
8
  app_file: app.py
9
- pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
1
  ---
2
+ title: Qwen3.5
 
 
 
3
  sdk: gradio
 
4
  app_file: app.py
 
5
  ---
6
 
7
+ # Qwen3.5
8
+
9
+ Chat with models from the official Qwen 3.5 Hugging Face collection.
10
+
11
+ This Space expects an `HF_TOKEN` secret for inference calls.
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Any
5
+
6
+ import gradio as gr
7
+ import requests
8
+ from huggingface_hub import InferenceClient
9
+
10
+ COLLECTION_API = "https://huggingface.co/api/collections/Qwen/qwen35"
11
+
12
+
13
+ def fetch_qwen35_models() -> list[dict[str, Any]]:
14
+ try:
15
+ response = requests.get(COLLECTION_API, timeout=30)
16
+ response.raise_for_status()
17
+ payload = response.json()
18
+ except Exception:
19
+ # Minimal fallback for resilience if HF collection API is transiently unavailable.
20
+ return [
21
+ {"id": "Qwen/Qwen3.5-35B-A3B", "live_providers": ["unknown"]},
22
+ {"id": "Qwen/Qwen3.5-27B", "live_providers": ["unknown"]},
23
+ {"id": "Qwen/Qwen3.5-9B", "live_providers": ["unknown"]},
24
+ {"id": "Qwen/Qwen3.5-4B", "live_providers": ["unknown"]},
25
+ {"id": "Qwen/Qwen3.5-2B", "live_providers": ["unknown"]},
26
+ {"id": "Qwen/Qwen3.5-0.8B", "live_providers": ["unknown"]},
27
+ ]
28
+
29
+ models: list[dict[str, Any]] = []
30
+ for item in payload.get("items", []):
31
+ if item.get("type") != "model":
32
+ continue
33
+ model_id = item.get("id")
34
+ if not model_id:
35
+ continue
36
+
37
+ providers = []
38
+ for provider in item.get("availableInferenceProviders", []) or []:
39
+ if provider.get("providerStatus") == "live" and provider.get("modelStatus") == "live":
40
+ providers.append(str(provider.get("provider")))
41
+
42
+ models.append(
43
+ {
44
+ "id": model_id,
45
+ "live_providers": sorted(set(providers)),
46
+ }
47
+ )
48
+ return models
49
+
50
+
51
+ MODEL_INFO = fetch_qwen35_models()
52
+ MODEL_IDS = [x["id"] for x in MODEL_INFO]
53
+ DEFAULT_MODEL = MODEL_IDS[0] if MODEL_IDS else "Qwen/Qwen3.5-35B-A3B"
54
+
55
+ PROVIDER_LOOKUP = {x["id"]: x.get("live_providers", []) for x in MODEL_INFO}
56
+
57
+
58
+ def provider_note(model_id: str) -> str:
59
+ providers = PROVIDER_LOOKUP.get(model_id, [])
60
+ if providers:
61
+ return f"Live inference providers: {', '.join(providers)}"
62
+ return "No live provider listed by HF for this model right now. Try another model."
63
+
64
+
65
+ def generate_reply(
66
+ message: str,
67
+ history: list[tuple[str, str]],
68
+ model_id: str,
69
+ system_prompt: str,
70
+ max_new_tokens: int,
71
+ temperature: float,
72
+ top_p: float,
73
+ ) -> str:
74
+ token = os.getenv("HF_TOKEN")
75
+ client = InferenceClient(token=token, timeout=120)
76
+
77
+ messages = []
78
+ if system_prompt.strip():
79
+ messages.append({"role": "system", "content": system_prompt.strip()})
80
+
81
+ for user_msg, assistant_msg in history:
82
+ if user_msg:
83
+ messages.append({"role": "user", "content": user_msg})
84
+ if assistant_msg:
85
+ messages.append({"role": "assistant", "content": assistant_msg})
86
+
87
+ messages.append({"role": "user", "content": message})
88
+
89
+ try:
90
+ result = client.chat_completion(
91
+ model=model_id,
92
+ messages=messages,
93
+ max_tokens=int(max_new_tokens),
94
+ temperature=float(temperature),
95
+ top_p=float(top_p),
96
+ )
97
+ reply = result.choices[0].message.content
98
+ if isinstance(reply, str):
99
+ return reply
100
+ return str(reply)
101
+ except Exception as exc:
102
+ return (
103
+ f"Model call failed for `{model_id}`.\n\n"
104
+ f"Details: {exc}\n\n"
105
+ "Try another model from the dropdown. Some models may not currently have a live provider."
106
+ )
107
+
108
+
109
+ with gr.Blocks(title="Qwen3.5 Chat") as demo:
110
+ gr.Markdown("# Qwen3.5 Chat")
111
+ gr.Markdown(
112
+ "Select a model from the official Qwen3.5 collection and chat. "
113
+ "This Space uses Hugging Face Inference providers via `HF_TOKEN`."
114
+ )
115
+
116
+ model_dd = gr.Dropdown(
117
+ choices=MODEL_IDS,
118
+ value=DEFAULT_MODEL,
119
+ label="Qwen3.5 Model",
120
+ allow_custom_value=False,
121
+ )
122
+ provider_md = gr.Markdown(provider_note(DEFAULT_MODEL))
123
+
124
+ with gr.Accordion("Generation Settings", open=False):
125
+ system_prompt = gr.Textbox(
126
+ label="System prompt",
127
+ value="You are a helpful assistant.",
128
+ lines=2,
129
+ )
130
+ max_new_tokens = gr.Slider(
131
+ label="Max new tokens",
132
+ minimum=64,
133
+ maximum=4096,
134
+ step=32,
135
+ value=1024,
136
+ )
137
+ temperature = gr.Slider(
138
+ label="Temperature",
139
+ minimum=0.0,
140
+ maximum=2.0,
141
+ step=0.05,
142
+ value=0.7,
143
+ )
144
+ top_p = gr.Slider(
145
+ label="Top-p",
146
+ minimum=0.1,
147
+ maximum=1.0,
148
+ step=0.05,
149
+ value=0.9,
150
+ )
151
+
152
+ model_dd.change(fn=provider_note, inputs=model_dd, outputs=provider_md)
153
+
154
+ gr.ChatInterface(
155
+ fn=generate_reply,
156
+ additional_inputs=[model_dd, system_prompt, max_new_tokens, temperature, top_p],
157
+ type="tuples",
158
+ )
159
+
160
+ demo.queue(max_size=32).launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=4.44.0
2
+ huggingface_hub>=0.24.0
3
+ requests>=2.32.3