beyoru commited on
Commit
8e33e20
·
verified ·
1 Parent(s): b0da11a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +233 -42
app.py CHANGED
@@ -1,11 +1,14 @@
1
  import os
 
2
  import torch
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
4
  from threading import Thread
5
  import gradio as gr
6
 
7
  MODEL_NAME = os.getenv('MODEL_ID')
8
  TOKEN = os.getenv('TOKEN')
 
9
 
10
  print("Loading model...")
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True, token=TOKEN)
@@ -18,28 +21,132 @@ model = AutoModelForCausalLM.from_pretrained(
18
  )
19
  print("Model loaded.")
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def playground(
22
  message,
23
  history,
24
  system_prompt,
 
25
  max_new_tokens,
26
  temperature,
27
  repetition_penalty,
28
  top_k,
29
- top_p
 
30
  ):
31
  if not isinstance(message, str) or not message.strip():
32
  yield ""
33
  return
34
 
35
- # Build conversation với system prompt
36
  conversation = []
37
 
38
- # Thêm system prompt nếu có
39
  if system_prompt and system_prompt.strip():
40
  conversation.append({"role": "system", "content": system_prompt.strip()})
41
 
42
- # Thêm lịch sử chat
43
  for user_msg, bot_msg in history:
44
  conversation.append({"role": "user", "content": user_msg})
45
  if bot_msg:
@@ -47,56 +154,130 @@ def playground(
47
 
48
  conversation.append({"role": "user", "content": message})
49
 
50
- if hasattr(tokenizer, "apply_chat_template"):
51
- prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
52
- else:
53
- prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in conversation]) + "\nassistant:"
54
-
55
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
56
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
57
-
58
- generation_kwargs = dict(
59
- **inputs,
60
- streamer=streamer,
61
- max_new_tokens=int(max_new_tokens),
62
- temperature=float(temperature),
63
- top_k=int(top_k) if top_k > 0 else None,
64
- top_p=float(top_p),
65
- repetition_penalty=float(repetition_penalty),
66
- do_sample=True if temperature > 0 else False,
67
- pad_token_id=tokenizer.eos_token_id
68
- )
69
-
70
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
71
- thread.start()
72
-
73
  generated_text = ""
74
- for new_text in streamer:
75
- generated_text += new_text
76
- yield generated_text
77
 
78
- thread.join()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  with gr.Blocks(fill_height=True, fill_width=True) as app:
81
  with gr.Sidebar():
82
- gr.Markdown("## Playground by UltimaX Intelligence")
83
  gr.HTML("""
84
  Runs <b><a href="https://huggingface.co/beyoru/Qwen3-0.9B-A0.6B" target="_blank">
85
- beyoru/Qwen3-0.9B-A0.6B</a></b> via <b>Hugging Face Transformers</b>.<br><br>
86
- <b>Support me at:</b>.<br><br>
87
  <a href="https://www.buymeacoffee.com/ductransa0g" target="_blank">
88
  <img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" width="150px">
89
  </a>
90
  """)
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  gr.Markdown("---")
93
  gr.Markdown("## System Prompt")
94
  system_prompt = gr.Textbox(
95
  label="System Prompt",
96
- placeholder="Enter custom system instructions here (optional)...",
97
  lines=4,
98
- value="You are a helpful AI assistant.",
99
- info="AI role custome"
100
  )
101
 
102
  gr.Markdown("---")
@@ -109,16 +290,26 @@ with gr.Blocks(fill_height=True, fill_width=True) as app:
109
 
110
  gr.ChatInterface(
111
  fn=playground,
112
- additional_inputs=[system_prompt, max_new_tokens, temperature, repetition_penalty, top_k, top_p],
 
 
 
 
 
 
 
 
 
113
  chatbot=gr.Chatbot(
114
- label="Qwen3-0.9B-A0.6B",
115
  show_copy_button=True,
116
  allow_tags=["think"],
117
  ),
118
  examples=[
119
- ["Hello who are you?"],
120
- ["How to solve 2x+1=3."],
121
- ["Example python code for async"]
 
122
  ],
123
  cache_examples=False,
124
  show_api=False
 
1
  import os
2
+ import json
3
  import torch
4
+ import requests
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
6
  from threading import Thread
7
  import gradio as gr
8
 
9
  MODEL_NAME = os.getenv('MODEL_ID')
10
  TOKEN = os.getenv('TOKEN')
11
+ MCP_URL = "https://beyoru-clone-tools.hf.space/gradio_api/mcp/"
12
 
13
  print("Loading model...")
14
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True, token=TOKEN)
 
21
  )
22
  print("Model loaded.")
23
 
24
+ # Define MCP tools schema
25
+ TOOLS = [
26
+ {
27
+ "type": "function",
28
+ "function": {
29
+ "name": "clone_tools_Web_Search",
30
+ "description": "Run a DuckDuckGo-backed search across text, news, images, videos, or books.",
31
+ "parameters": {
32
+ "type": "object",
33
+ "properties": {
34
+ "query": {"type": "string", "description": "The search query"},
35
+ "max_results": {"type": "number", "description": "Number of results to return (1-20)", "default": 5},
36
+ "search_type": {"type": "string", "enum": ["text", "news", "images", "videos", "books"], "default": "text"}
37
+ },
38
+ "required": ["query"]
39
+ }
40
+ }
41
+ },
42
+ {
43
+ "type": "function",
44
+ "function": {
45
+ "name": "clone_tools_Web_Fetch",
46
+ "description": "Fetch a webpage and return clean Markdown, raw HTML, or a list of links.",
47
+ "parameters": {
48
+ "type": "object",
49
+ "properties": {
50
+ "url": {"type": "string", "description": "The absolute URL to fetch"},
51
+ "max_chars": {"type": "number", "description": "Maximum characters to return (0 = no limit)", "default": 0},
52
+ "mode": {"type": "string", "enum": ["markdown", "html", "url_scraper"], "default": "markdown"}
53
+ },
54
+ "required": ["url"]
55
+ }
56
+ }
57
+ },
58
+ {
59
+ "type": "function",
60
+ "function": {
61
+ "name": "clone_tools_Code_Interpreter",
62
+ "description": "Execute Python code and return the output.",
63
+ "parameters": {
64
+ "type": "object",
65
+ "properties": {
66
+ "code": {"type": "string", "description": "Python source code to run"}
67
+ },
68
+ "required": ["code"]
69
+ }
70
+ }
71
+ },
72
+ {
73
+ "type": "function",
74
+ "function": {
75
+ "name": "clone_tools_Generate_Image",
76
+ "description": "Generate an image from a text prompt via Hugging Face inference.",
77
+ "parameters": {
78
+ "type": "object",
79
+ "properties": {
80
+ "prompt": {"type": "string", "description": "Text description of the image to generate"},
81
+ "model_id": {"type": "string", "default": "black-forest-labs/FLUX.1-dev"},
82
+ "steps": {"type": "number", "default": 30},
83
+ "width": {"type": "number", "default": 1024},
84
+ "height": {"type": "number", "default": 1024}
85
+ },
86
+ "required": ["prompt"]
87
+ }
88
+ }
89
+ }
90
+ ]
91
+
92
+ def call_mcp_tool(tool_name, parameters):
93
+ """Call MCP tool via HTTP endpoint"""
94
+ try:
95
+ # MCP endpoint format: tool name without clone_tools_ prefix
96
+ tool_endpoint = tool_name.replace("clone_tools_", "")
97
+ url = f"{MCP_URL}{tool_endpoint}"
98
+
99
+ response = requests.post(url, json=parameters, timeout=30)
100
+ response.raise_for_status()
101
+ return response.json()
102
+ except Exception as e:
103
+ return {"error": str(e)}
104
+
105
+ def process_tool_calls(tool_calls):
106
+ """Process tool calls and return results"""
107
+ results = []
108
+ for tool_call in tool_calls:
109
+ if isinstance(tool_call, dict):
110
+ func_name = tool_call.get("name")
111
+ func_args = tool_call.get("arguments", {})
112
+
113
+ if isinstance(func_args, str):
114
+ try:
115
+ func_args = json.loads(func_args)
116
+ except:
117
+ pass
118
+
119
+ result = call_mcp_tool(func_name, func_args)
120
+ results.append({
121
+ "tool_call_id": tool_call.get("id", "call_0"),
122
+ "role": "tool",
123
+ "name": func_name,
124
+ "content": json.dumps(result, ensure_ascii=False)
125
+ })
126
+ return results
127
+
128
  def playground(
129
  message,
130
  history,
131
  system_prompt,
132
+ enable_tools,
133
  max_new_tokens,
134
  temperature,
135
  repetition_penalty,
136
  top_k,
137
+ top_p,
138
+ max_tool_iterations
139
  ):
140
  if not isinstance(message, str) or not message.strip():
141
  yield ""
142
  return
143
 
144
+ # Build conversation
145
  conversation = []
146
 
 
147
  if system_prompt and system_prompt.strip():
148
  conversation.append({"role": "system", "content": system_prompt.strip()})
149
 
 
150
  for user_msg, bot_msg in history:
151
  conversation.append({"role": "user", "content": user_msg})
152
  if bot_msg:
 
154
 
155
  conversation.append({"role": "user", "content": message})
156
 
157
+ # Tool calling loop
158
+ iteration = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  generated_text = ""
 
 
 
160
 
161
+ while iteration < max_tool_iterations:
162
+ iteration += 1
163
+
164
+ # Apply chat template with tools if enabled
165
+ if enable_tools and hasattr(tokenizer, "apply_chat_template"):
166
+ prompt = tokenizer.apply_chat_template(
167
+ conversation,
168
+ tools=TOOLS,
169
+ tokenize=False,
170
+ add_generation_prompt=True
171
+ )
172
+ else:
173
+ prompt = tokenizer.apply_chat_template(
174
+ conversation,
175
+ tokenize=False,
176
+ add_generation_prompt=True
177
+ )
178
+
179
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
180
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
181
+
182
+ generation_kwargs = dict(
183
+ **inputs,
184
+ streamer=streamer,
185
+ max_new_tokens=int(max_new_tokens),
186
+ temperature=float(temperature),
187
+ top_k=int(top_k) if top_k > 0 else None,
188
+ top_p=float(top_p),
189
+ repetition_penalty=float(repetition_penalty),
190
+ do_sample=True if temperature > 0 else False,
191
+ pad_token_id=tokenizer.eos_token_id
192
+ )
193
+
194
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
195
+ thread.start()
196
+
197
+ current_output = ""
198
+ for new_text in streamer:
199
+ current_output += new_text
200
+ generated_text = current_output
201
+ yield generated_text
202
+
203
+ thread.join()
204
+
205
+ # Check for tool calls
206
+ tool_calls = None
207
+ try:
208
+ # Try to parse tool calls from output
209
+ if "<tool_call>" in current_output:
210
+ # Extract tool call JSON
211
+ import re
212
+ tool_match = re.search(r'<tool_call>(.*?)</tool_call>', current_output, re.DOTALL)
213
+ if tool_match:
214
+ tool_calls = json.loads(tool_match.group(1))
215
+ except:
216
+ pass
217
+
218
+ if not enable_tools or not tool_calls:
219
+ # No tool calls, return final response
220
+ break
221
+
222
+ # Process tool calls
223
+ generated_text += "\n\n🔧 **Executing tools...**\n"
224
+ yield generated_text
225
+
226
+ tool_results = process_tool_calls(tool_calls if isinstance(tool_calls, list) else [tool_calls])
227
+
228
+ # Add assistant message with tool calls
229
+ conversation.append({
230
+ "role": "assistant",
231
+ "content": current_output,
232
+ "tool_calls": tool_calls if isinstance(tool_calls, list) else [tool_calls]
233
+ })
234
+
235
+ # Add tool results
236
+ for result in tool_results:
237
+ conversation.append(result)
238
+ generated_text += f"\n✓ {result['name']}: {result['content'][:200]}...\n"
239
+ yield generated_text
240
+
241
+ generated_text += "\n**Processing results...**\n\n"
242
+ yield generated_text
243
+
244
+ # Continue conversation with tool results
245
+ # Reset generated_text for next iteration
246
+ generated_text = ""
247
 
248
  with gr.Blocks(fill_height=True, fill_width=True) as app:
249
  with gr.Sidebar():
250
+ gr.Markdown("## Playground with MCP Tools")
251
  gr.HTML("""
252
  Runs <b><a href="https://huggingface.co/beyoru/Qwen3-0.9B-A0.6B" target="_blank">
253
+ beyoru/Qwen3-0.9B-A0.6B</a></b> with <b>MCP Tools Integration</b>.<br><br>
254
+ <b>Support me at:</b><br><br>
255
  <a href="https://www.buymeacoffee.com/ductransa0g" target="_blank">
256
  <img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" width="150px">
257
  </a>
258
  """)
259
 
260
+ gr.Markdown("---")
261
+ gr.Markdown("## Tools Settings")
262
+ enable_tools = gr.Checkbox(
263
+ label="Enable MCP Tools",
264
+ value=True,
265
+ info="Allow model to call external tools (search, code, images)"
266
+ )
267
+ max_tool_iterations = gr.Slider(
268
+ 1, 5, value=3, step=1,
269
+ label="Max Tool Iterations",
270
+ info="Maximum number of tool calling rounds"
271
+ )
272
+
273
  gr.Markdown("---")
274
  gr.Markdown("## System Prompt")
275
  system_prompt = gr.Textbox(
276
  label="System Prompt",
277
+ placeholder="Enter custom system instructions...",
278
  lines=4,
279
+ value="You are a helpful AI assistant with access to tools for web search, code execution, and image generation. Use tools when needed to provide accurate and helpful responses.",
280
+ info="AI role and behavior"
281
  )
282
 
283
  gr.Markdown("---")
 
290
 
291
  gr.ChatInterface(
292
  fn=playground,
293
+ additional_inputs=[
294
+ system_prompt,
295
+ enable_tools,
296
+ max_new_tokens,
297
+ temperature,
298
+ repetition_penalty,
299
+ top_k,
300
+ top_p,
301
+ max_tool_iterations
302
+ ],
303
  chatbot=gr.Chatbot(
304
+ label="Qwen3-0.9B-A0.6B with MCP Tools",
305
  show_copy_button=True,
306
  allow_tags=["think"],
307
  ),
308
  examples=[
309
+ ["Search for the latest news about AI"],
310
+ ["Calculate the fibonacci sequence up to 10 using code"],
311
+ ["Generate an image of a cute robot"],
312
+ ["What's the weather like today?"]
313
  ],
314
  cache_examples=False,
315
  show_api=False