nacho commited on
Commit
3f9fa4b
·
1 Parent(s): 57be122

feat: 支持通过 Prompt 魔改拦截模拟工具调用 (Function Calling)

Browse files
Files changed (1) hide show
  1. main.py +174 -7
main.py CHANGED
@@ -83,6 +83,7 @@ class ChatCompletionRequest(BaseModel):
83
  stream: bool = False
84
  temperature: Optional[float] = None
85
  max_tokens: Optional[int] = None
 
86
 
87
 
88
  def verify_api_key(authorization: Optional[str] = Header(None)) -> str:
@@ -139,6 +140,10 @@ async def chat_completions(
139
  raise HTTPException(status_code=400, detail="No messages provided")
140
 
141
  prompt = request.messages[-1].content
 
 
 
 
142
 
143
  model = request.model
144
 
@@ -151,15 +156,38 @@ async def chat_completions(
151
  async def stream_with_cleanup():
152
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
153
  try:
 
 
 
 
154
  async for chunk_data in browser.stream_message(prompt, timeout=120, model=model):
155
  chunk_type = chunk_data.get("type", "content")
156
  chunk_text = chunk_data.get("chunk", "")
157
 
158
- delta = {}
159
  if chunk_type == "thinking":
160
- delta["reasoning_content"] = chunk_text
161
  else:
162
- delta["content"] = chunk_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  data = {
165
  "id": chunk_id,
@@ -176,6 +204,39 @@ async def chat_completions(
176
  }
177
  yield f"data: {json.dumps(data)}\n\n"
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  final_data = {
180
  "id": chunk_id,
181
  "object": "chat.completion.chunk",
@@ -215,6 +276,29 @@ async def chat_completions(
215
  message_data = {"role": "assistant", "content": content}
216
  if reasoning_content:
217
  message_data["reasoning_content"] = reasoning_content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
  return {
220
  "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
@@ -225,7 +309,7 @@ async def chat_completions(
225
  {
226
  "index": 0,
227
  "message": message_data,
228
- "finish_reason": "stop",
229
  }
230
  ],
231
  "usage": {
@@ -339,6 +423,11 @@ async def admin_chat(request: Request, admin_key: str = Header(...)):
339
  raise HTTPException(status_code=400, detail="No messages provided")
340
 
341
  prompt = req.messages[-1].content
 
 
 
 
 
342
  model = req.model
343
  account = await manager.acquire()
344
 
@@ -349,15 +438,37 @@ async def admin_chat(request: Request, admin_key: str = Header(...)):
349
  async def stream_with_cleanup():
350
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
351
  try:
 
 
 
 
352
  async for chunk_data in browser.stream_message(prompt, timeout=120, model=model):
353
  chunk_type = chunk_data.get("type", "content")
354
  chunk_text = chunk_data.get("chunk", "")
355
 
356
- delta = {}
357
  if chunk_type == "thinking":
358
- delta["reasoning_content"] = chunk_text
359
  else:
360
- delta["content"] = chunk_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
  data = {
363
  "id": chunk_id,
@@ -367,6 +478,39 @@ async def admin_chat(request: Request, admin_key: str = Header(...)):
367
  "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
368
  }
369
  yield f"data: {json.dumps(data)}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  yield f"data: {json.dumps({'id': chunk_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n"
371
  yield "data: [DONE]\n\n"
372
  except Exception as e:
@@ -388,6 +532,29 @@ async def admin_chat(request: Request, admin_key: str = Header(...)):
388
  message_data = {"role": "assistant", "content": content}
389
  if reasoning_content:
390
  message_data["reasoning_content"] = reasoning_content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
 
392
  return {
393
  "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
 
83
  stream: bool = False
84
  temperature: Optional[float] = None
85
  max_tokens: Optional[int] = None
86
+ tools: Optional[list[dict]] = None
87
 
88
 
89
  def verify_api_key(authorization: Optional[str] = Header(None)) -> str:
 
140
  raise HTTPException(status_code=400, detail="No messages provided")
141
 
142
  prompt = request.messages[-1].content
143
+
144
+ if request.tools:
145
+ tool_desc = json.dumps(request.tools, ensure_ascii=False)
146
+ prompt += f"\n\n[SYSTEM INSTRUCTION: You have access to the following tools:\n{tool_desc}\nIf you must use a tool to fulfill the request, output ONLY a JSON block wrapped in <tool_call>...</tool_call> tags, like:\n<tool_call>{{\"name\": \"tool_name\", \"arguments\": {{\"arg1\": \"value\"}} }}</tool_call>\nDo NOT output any other text if you are calling a tool.]"
147
 
148
  model = request.model
149
 
 
156
  async def stream_with_cleanup():
157
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
158
  try:
159
+ is_tool_call = False
160
+ not_tool_call = False
161
+ content_buffer = ""
162
+
163
  async for chunk_data in browser.stream_message(prompt, timeout=120, model=model):
164
  chunk_type = chunk_data.get("type", "content")
165
  chunk_text = chunk_data.get("chunk", "")
166
 
 
167
  if chunk_type == "thinking":
168
+ delta = {"reasoning_content": chunk_text}
169
  else:
170
+ if request.tools and not is_tool_call and not not_tool_call:
171
+ content_buffer += chunk_text
172
+ # Wait until we have enough characters to decide
173
+ if len(content_buffer) < 12:
174
+ if not "<tool_call>".startswith(content_buffer):
175
+ not_tool_call = True
176
+ delta = {"content": content_buffer}
177
+ else:
178
+ continue # keep buffering
179
+ else:
180
+ if content_buffer.startswith("<tool_call>"):
181
+ is_tool_call = True
182
+ continue # buffer the whole tool call
183
+ else:
184
+ not_tool_call = True
185
+ delta = {"content": content_buffer}
186
+ elif request.tools and is_tool_call:
187
+ content_buffer += chunk_text
188
+ continue # buffer until stream ends
189
+ else:
190
+ delta = {"content": chunk_text}
191
 
192
  data = {
193
  "id": chunk_id,
 
204
  }
205
  yield f"data: {json.dumps(data)}\n\n"
206
 
207
+ if is_tool_call:
208
+ # Process buffered tool call at the end
209
+ import re
210
+ m = re.search(r'<tool_call>(.*?)</tool_call>', content_buffer, re.DOTALL)
211
+ if m:
212
+ try:
213
+ tcall = json.loads(m.group(1))
214
+ t_name = tcall.get("name", "")
215
+ t_args = json.dumps(tcall.get("arguments", {}))
216
+ delta = {
217
+ "tool_calls": [
218
+ {
219
+ "index": 0,
220
+ "id": f"call_{uuid.uuid4().hex[:8]}",
221
+ "type": "function",
222
+ "function": {
223
+ "name": t_name,
224
+ "arguments": t_args
225
+ }
226
+ }
227
+ ]
228
+ }
229
+ data = {
230
+ "id": chunk_id,
231
+ "object": "chat.completion.chunk",
232
+ "created": int(time.time()),
233
+ "model": request.model,
234
+ "choices": [{"index": 0, "delta": delta, "finish_reason": "tool_calls"}]
235
+ }
236
+ yield f"data: {json.dumps(data)}\n\n"
237
+ except Exception as e:
238
+ logger.error("Failed to parse tool call: %s", e)
239
+
240
  final_data = {
241
  "id": chunk_id,
242
  "object": "chat.completion.chunk",
 
276
  message_data = {"role": "assistant", "content": content}
277
  if reasoning_content:
278
  message_data["reasoning_content"] = reasoning_content
279
+
280
+ finish_reason = "stop"
281
+
282
+ if request.tools and "<tool_call>" in content:
283
+ import re
284
+ m = re.search(r'<tool_call>(.*?)</tool_call>', content, re.DOTALL)
285
+ if m:
286
+ try:
287
+ tcall = json.loads(m.group(1))
288
+ message_data["content"] = None
289
+ message_data["tool_calls"] = [
290
+ {
291
+ "id": f"call_{uuid.uuid4().hex[:8]}",
292
+ "type": "function",
293
+ "function": {
294
+ "name": tcall.get("name", ""),
295
+ "arguments": json.dumps(tcall.get("arguments", {}))
296
+ }
297
+ }
298
+ ]
299
+ finish_reason = "tool_calls"
300
+ except Exception as e:
301
+ logger.error("Failed to parse non-stream tool call: %s", e)
302
 
303
  return {
304
  "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
 
309
  {
310
  "index": 0,
311
  "message": message_data,
312
+ "finish_reason": finish_reason,
313
  }
314
  ],
315
  "usage": {
 
423
  raise HTTPException(status_code=400, detail="No messages provided")
424
 
425
  prompt = req.messages[-1].content
426
+
427
+ if req.tools:
428
+ tool_desc = json.dumps(req.tools, ensure_ascii=False)
429
+ prompt += f"\n\n[SYSTEM INSTRUCTION: You have access to the following tools:\n{tool_desc}\nIf you must use a tool to fulfill the request, output ONLY a JSON block wrapped in <tool_call>...</tool_call> tags, like:\n<tool_call>{{\"name\": \"tool_name\", \"arguments\": {{\"arg1\": \"value\"}} }}</tool_call>\nDo NOT output any other text if you are calling a tool.]"
430
+
431
  model = req.model
432
  account = await manager.acquire()
433
 
 
438
  async def stream_with_cleanup():
439
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
440
  try:
441
+ is_tool_call = False
442
+ not_tool_call = False
443
+ content_buffer = ""
444
+
445
  async for chunk_data in browser.stream_message(prompt, timeout=120, model=model):
446
  chunk_type = chunk_data.get("type", "content")
447
  chunk_text = chunk_data.get("chunk", "")
448
 
 
449
  if chunk_type == "thinking":
450
+ delta = {"reasoning_content": chunk_text}
451
  else:
452
+ if req.tools and not is_tool_call and not not_tool_call:
453
+ content_buffer += chunk_text
454
+ if len(content_buffer) < 12:
455
+ if not "<tool_call>".startswith(content_buffer):
456
+ not_tool_call = True
457
+ delta = {"content": content_buffer}
458
+ else:
459
+ continue
460
+ else:
461
+ if content_buffer.startswith("<tool_call>"):
462
+ is_tool_call = True
463
+ continue
464
+ else:
465
+ not_tool_call = True
466
+ delta = {"content": content_buffer}
467
+ elif req.tools and is_tool_call:
468
+ content_buffer += chunk_text
469
+ continue
470
+ else:
471
+ delta = {"content": chunk_text}
472
 
473
  data = {
474
  "id": chunk_id,
 
478
  "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
479
  }
480
  yield f"data: {json.dumps(data)}\n\n"
481
+
482
+ if is_tool_call:
483
+ import re
484
+ m = re.search(r'<tool_call>(.*?)</tool_call>', content_buffer, re.DOTALL)
485
+ if m:
486
+ try:
487
+ tcall = json.loads(m.group(1))
488
+ t_name = tcall.get("name", "")
489
+ t_args = json.dumps(tcall.get("arguments", {}))
490
+ delta = {
491
+ "tool_calls": [
492
+ {
493
+ "index": 0,
494
+ "id": f"call_{uuid.uuid4().hex[:8]}",
495
+ "type": "function",
496
+ "function": {
497
+ "name": t_name,
498
+ "arguments": t_args
499
+ }
500
+ }
501
+ ]
502
+ }
503
+ data = {
504
+ "id": chunk_id,
505
+ "object": "chat.completion.chunk",
506
+ "created": int(time.time()),
507
+ "model": req.model,
508
+ "choices": [{"index": 0, "delta": delta, "finish_reason": "tool_calls"}]
509
+ }
510
+ yield f"data: {json.dumps(data)}\n\n"
511
+ except Exception as e:
512
+ logger.error("Failed to parse admin stream tool call: %s", e)
513
+
514
  yield f"data: {json.dumps({'id': chunk_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n"
515
  yield "data: [DONE]\n\n"
516
  except Exception as e:
 
532
  message_data = {"role": "assistant", "content": content}
533
  if reasoning_content:
534
  message_data["reasoning_content"] = reasoning_content
535
+
536
+ finish_reason = "stop"
537
+
538
+ if req.tools and "<tool_call>" in content:
539
+ import re
540
+ m = re.search(r'<tool_call>(.*?)</tool_call>', content, re.DOTALL)
541
+ if m:
542
+ try:
543
+ tcall = json.loads(m.group(1))
544
+ message_data["content"] = None
545
+ message_data["tool_calls"] = [
546
+ {
547
+ "id": f"call_{uuid.uuid4().hex[:8]}",
548
+ "type": "function",
549
+ "function": {
550
+ "name": tcall.get("name", ""),
551
+ "arguments": json.dumps(tcall.get("arguments", {}))
552
+ }
553
+ }
554
+ ]
555
+ finish_reason = "tool_calls"
556
+ except Exception as e:
557
+ logger.error("Failed to parse admin non-stream tool call: %s", e)
558
 
559
  return {
560
  "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",