Ngixdev commited on
Commit
13d1862
·
verified ·
1 Parent(s): c16b401

Initial commit: Qwen3.5-9B API interface

Browse files
Files changed (3) hide show
  1. README.md +62 -6
  2. app.py +317 -0
  3. requirements.txt +2 -0
README.md CHANGED
@@ -1,12 +1,68 @@
1
  ---
2
- title: Qwen Api
3
- emoji: 👀
4
- colorFrom: purple
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 6.9.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Qwen API
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.29.0
8
  app_file: app.py
9
  pinned: false
10
+ license: apache-2.0
11
  ---
12
 
13
+ # Qwen3.5-9B Uncensored API Interface
14
+
15
+ API interface for [HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive](https://huggingface.co/HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive).
16
+
17
+ ## Features
18
+
19
+ - 9B parameters with 262K context window
20
+ - Fully uncensored (0/465 refusals)
21
+ - Multimodal capable (text, image, video)
22
+ - Supports 201 languages
23
+
24
+ ## API Usage
25
+
26
+ ### Python
27
+
28
+ ```python
29
+ from gradio_client import Client
30
+
31
+ client = Client("Ngixdev/qwen-api")
32
+
33
+ result = client.predict(
34
+ prompt="Your question here",
35
+ system_prompt="You are a helpful assistant",
36
+ temperature=0.7,
37
+ top_p=0.8,
38
+ max_tokens=2048,
39
+ api_name="/api_generate"
40
+ )
41
+ print(result)
42
+ ```
43
+
44
+ ### cURL
45
+
46
+ ```bash
47
+ curl -X POST https://ngixdev-qwen-api.hf.space/api/api_generate \
48
+ -H "Content-Type: application/json" \
49
+ -d '{
50
+ "data": [
51
+ "Your question here",
52
+ "You are a helpful assistant",
53
+ 0.7,
54
+ 0.8,
55
+ 2048
56
+ ]
57
+ }'
58
+ ```
59
+
60
+ ## Parameters
61
+
62
+ | Parameter | Type | Default | Description |
63
+ |-----------|------|---------|-------------|
64
+ | prompt | string | required | User prompt/question |
65
+ | system_prompt | string | "" | System instruction |
66
+ | temperature | float | 0.7 | Sampling temperature (0.0-2.0) |
67
+ | top_p | float | 0.8 | Nucleus sampling (0.0-1.0) |
68
+ | max_tokens | int | 2048 | Maximum tokens to generate |
app.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+ MODEL_ID = "HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive"
5
+
6
+ client = InferenceClient()
7
+
8
+ DEFAULT_PARAMS = {
9
+ "temperature": 0.7,
10
+ "top_p": 0.8,
11
+ "top_k": 20,
12
+ "max_tokens": 2048,
13
+ }
14
+
15
+
16
+ def generate_response(
17
+ message: str,
18
+ history: list,
19
+ system_prompt: str = "",
20
+ temperature: float = 0.7,
21
+ top_p: float = 0.8,
22
+ top_k: int = 20,
23
+ max_tokens: int = 2048,
24
+ ) -> str:
25
+ messages = []
26
+
27
+ if system_prompt.strip():
28
+ messages.append({"role": "system", "content": system_prompt})
29
+
30
+ for user_msg, assistant_msg in history:
31
+ if user_msg:
32
+ messages.append({"role": "user", "content": user_msg})
33
+ if assistant_msg:
34
+ messages.append({"role": "assistant", "content": assistant_msg})
35
+
36
+ messages.append({"role": "user", "content": message})
37
+
38
+ try:
39
+ response = client.chat_completion(
40
+ model=MODEL_ID,
41
+ messages=messages,
42
+ temperature=temperature,
43
+ top_p=top_p,
44
+ max_tokens=max_tokens,
45
+ )
46
+ return response.choices[0].message.content
47
+ except Exception as e:
48
+ return f"Error: {str(e)}"
49
+
50
+
51
+ def generate_stream(
52
+ message: str,
53
+ history: list,
54
+ system_prompt: str = "",
55
+ temperature: float = 0.7,
56
+ top_p: float = 0.8,
57
+ top_k: int = 20,
58
+ max_tokens: int = 2048,
59
+ ):
60
+ messages = []
61
+
62
+ if system_prompt.strip():
63
+ messages.append({"role": "system", "content": system_prompt})
64
+
65
+ for user_msg, assistant_msg in history:
66
+ if user_msg:
67
+ messages.append({"role": "user", "content": user_msg})
68
+ if assistant_msg:
69
+ messages.append({"role": "assistant", "content": assistant_msg})
70
+
71
+ messages.append({"role": "user", "content": message})
72
+
73
+ try:
74
+ stream = client.chat_completion(
75
+ model=MODEL_ID,
76
+ messages=messages,
77
+ temperature=temperature,
78
+ top_p=top_p,
79
+ max_tokens=max_tokens,
80
+ stream=True,
81
+ )
82
+
83
+ partial_message = ""
84
+ for chunk in stream:
85
+ if chunk.choices[0].delta.content:
86
+ partial_message += chunk.choices[0].delta.content
87
+ yield partial_message
88
+ except Exception as e:
89
+ yield f"Error: {str(e)}"
90
+
91
+
92
+ def api_generate(
93
+ prompt: str,
94
+ system_prompt: str = "",
95
+ temperature: float = 0.7,
96
+ top_p: float = 0.8,
97
+ max_tokens: int = 2048,
98
+ ) -> dict:
99
+ """
100
+ API endpoint for text generation.
101
+
102
+ Args:
103
+ prompt: The user prompt/question
104
+ system_prompt: Optional system instruction
105
+ temperature: Sampling temperature (0.0-2.0)
106
+ top_p: Nucleus sampling parameter (0.0-1.0)
107
+ max_tokens: Maximum tokens to generate
108
+
109
+ Returns:
110
+ Dictionary with 'response' key containing generated text
111
+ """
112
+ messages = []
113
+
114
+ if system_prompt.strip():
115
+ messages.append({"role": "system", "content": system_prompt})
116
+
117
+ messages.append({"role": "user", "content": prompt})
118
+
119
+ try:
120
+ response = client.chat_completion(
121
+ model=MODEL_ID,
122
+ messages=messages,
123
+ temperature=temperature,
124
+ top_p=top_p,
125
+ max_tokens=max_tokens,
126
+ )
127
+ return {"response": response.choices[0].message.content, "status": "success"}
128
+ except Exception as e:
129
+ return {"response": None, "status": "error", "error": str(e)}
130
+
131
+
132
+ with gr.Blocks(title="Qwen3.5-9B Uncensored API", theme=gr.themes.Soft()) as demo:
133
+ gr.Markdown(
134
+ """
135
+ # 🤖 Qwen3.5-9B Uncensored API Interface
136
+
137
+ Powered by [HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive](https://huggingface.co/HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive)
138
+
139
+ **Features:**
140
+ - 9B parameters with 262K context window
141
+ - Fully uncensored (0/465 refusals)
142
+ - Multimodal capable (text, image, video)
143
+ - Supports 201 languages
144
+
145
+ Use the chat interface below or access via API.
146
+ """
147
+ )
148
+
149
+ with gr.Tab("💬 Chat"):
150
+ chatbot = gr.Chatbot(height=500, label="Conversation")
151
+
152
+ with gr.Row():
153
+ msg = gr.Textbox(
154
+ label="Message",
155
+ placeholder="Type your message here...",
156
+ scale=4,
157
+ lines=2,
158
+ )
159
+ submit_btn = gr.Button("Send", variant="primary", scale=1)
160
+
161
+ with gr.Accordion("⚙️ Settings", open=False):
162
+ system_prompt = gr.Textbox(
163
+ label="System Prompt",
164
+ placeholder="Optional: Set behavior/personality for the model",
165
+ lines=3,
166
+ )
167
+ with gr.Row():
168
+ temperature = gr.Slider(
169
+ minimum=0.0,
170
+ maximum=2.0,
171
+ value=0.7,
172
+ step=0.1,
173
+ label="Temperature",
174
+ )
175
+ top_p = gr.Slider(
176
+ minimum=0.0,
177
+ maximum=1.0,
178
+ value=0.8,
179
+ step=0.05,
180
+ label="Top P",
181
+ )
182
+ with gr.Row():
183
+ top_k = gr.Slider(
184
+ minimum=1,
185
+ maximum=100,
186
+ value=20,
187
+ step=1,
188
+ label="Top K",
189
+ )
190
+ max_tokens = gr.Slider(
191
+ minimum=64,
192
+ maximum=8192,
193
+ value=2048,
194
+ step=64,
195
+ label="Max Tokens",
196
+ )
197
+
198
+ clear_btn = gr.Button("🗑️ Clear Chat")
199
+
200
+ def user_submit(message, history):
201
+ return "", history + [[message, None]]
202
+
203
+ def bot_response(history, system_prompt, temperature, top_p, top_k, max_tokens):
204
+ if not history:
205
+ return history
206
+
207
+ message = history[-1][0]
208
+ history_without_last = history[:-1]
209
+
210
+ response = ""
211
+ for partial in generate_stream(
212
+ message,
213
+ history_without_last,
214
+ system_prompt,
215
+ temperature,
216
+ top_p,
217
+ top_k,
218
+ max_tokens
219
+ ):
220
+ response = partial
221
+ history[-1][1] = response
222
+ yield history
223
+
224
+ msg.submit(
225
+ user_submit,
226
+ [msg, chatbot],
227
+ [msg, chatbot]
228
+ ).then(
229
+ bot_response,
230
+ [chatbot, system_prompt, temperature, top_p, top_k, max_tokens],
231
+ chatbot,
232
+ )
233
+
234
+ submit_btn.click(
235
+ user_submit,
236
+ [msg, chatbot],
237
+ [msg, chatbot]
238
+ ).then(
239
+ bot_response,
240
+ [chatbot, system_prompt, temperature, top_p, top_k, max_tokens],
241
+ chatbot,
242
+ )
243
+
244
+ clear_btn.click(lambda: [], None, chatbot)
245
+
246
+ with gr.Tab("🔌 API"):
247
+ gr.Markdown(
248
+ """
249
+ ## API Usage
250
+
251
+ This Space provides a REST API for programmatic access.
252
+
253
+ ### Python Example
254
+
255
+ ```python
256
+ from gradio_client import Client
257
+
258
+ client = Client("Ngixdev/qwen-api")
259
+
260
+ result = client.predict(
261
+ prompt="Explain quantum computing in simple terms",
262
+ system_prompt="You are a helpful assistant",
263
+ temperature=0.7,
264
+ top_p=0.8,
265
+ max_tokens=2048,
266
+ api_name="/api_generate"
267
+ )
268
+ print(result)
269
+ ```
270
+
271
+ ### cURL Example
272
+
273
+ ```bash
274
+ curl -X POST https://ngixdev-qwen-api.hf.space/api/api_generate \\
275
+ -H "Content-Type: application/json" \\
276
+ -d '{
277
+ "data": [
278
+ "Explain quantum computing",
279
+ "You are a helpful assistant",
280
+ 0.7,
281
+ 0.8,
282
+ 2048
283
+ ]
284
+ }'
285
+ ```
286
+ """
287
+ )
288
+
289
+ with gr.Row():
290
+ with gr.Column():
291
+ api_prompt = gr.Textbox(
292
+ label="Prompt",
293
+ placeholder="Enter your prompt here...",
294
+ lines=4,
295
+ )
296
+ api_system = gr.Textbox(
297
+ label="System Prompt (Optional)",
298
+ placeholder="Set behavior/personality...",
299
+ lines=2,
300
+ )
301
+ with gr.Row():
302
+ api_temp = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature")
303
+ api_top_p = gr.Slider(0.0, 1.0, 0.8, step=0.05, label="Top P")
304
+ api_max_tokens = gr.Slider(64, 8192, 2048, step=64, label="Max Tokens")
305
+ api_submit = gr.Button("Generate", variant="primary")
306
+
307
+ with gr.Column():
308
+ api_output = gr.JSON(label="API Response")
309
+
310
+ api_submit.click(
311
+ api_generate,
312
+ [api_prompt, api_system, api_temp, api_top_p, api_max_tokens],
313
+ api_output,
314
+ api_name="api_generate",
315
+ )
316
+
317
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio>=4.0.0
2
+ huggingface_hub>=0.20.0