Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -48,11 +48,11 @@ print(f"📊 MCP Services: {len(MCP_SERVICES)} services, {len(MCP_TOOLS)} tools"
|
|
| 48 |
# HuggingFace Inference API 实际限制约 8000-16000 tokens
|
| 49 |
# 为了安全,设置更低的限制
|
| 50 |
MAX_TOTAL_TOKENS = 6000 # 总上下文限制
|
| 51 |
-
MAX_TOOL_RESULT_CHARS =
|
| 52 |
MAX_HISTORY_CHARS = 500 # 单条历史消息最大字符数
|
| 53 |
MAX_HISTORY_TURNS = 2 # 最大历史轮数
|
| 54 |
-
MAX_TOOL_ITERATIONS =
|
| 55 |
-
MAX_OUTPUT_TOKENS =
|
| 56 |
|
| 57 |
def estimate_tokens(text):
|
| 58 |
"""估算文本 token 数量(粗略:1 token ≈ 2 字符)"""
|
|
@@ -193,7 +193,8 @@ def chatbot_response(message, history):
|
|
| 193 |
|
| 194 |
tool_calls_log = []
|
| 195 |
|
| 196 |
-
# LLM 调用循环(
|
|
|
|
| 197 |
for iteration in range(MAX_TOOL_ITERATIONS):
|
| 198 |
response = client.chat.completions.create(
|
| 199 |
model="Qwen/Qwen3-32B:groq",
|
|
@@ -217,21 +218,18 @@ def chatbot_response(message, history):
|
|
| 217 |
# 调用 MCP 工具
|
| 218 |
tool_result = call_mcp_tool(tool_name, tool_args)
|
| 219 |
|
| 220 |
-
#
|
| 221 |
result_str = json.dumps(tool_result, ensure_ascii=False)
|
| 222 |
|
| 223 |
-
# 截断到安全长度 (800字符 ≈ 400 tokens)
|
| 224 |
if len(result_str) > MAX_TOOL_RESULT_CHARS:
|
| 225 |
if isinstance(tool_result, dict) and "text" in tool_result:
|
| 226 |
-
# 如果是文本格式
|
| 227 |
truncated_text = truncate_text(tool_result["text"], MAX_TOOL_RESULT_CHARS - 50)
|
| 228 |
tool_result_truncated = {"text": truncated_text, "_truncated": True}
|
| 229 |
elif isinstance(tool_result, dict):
|
| 230 |
-
# JSON 格式,保留关键字段
|
| 231 |
truncated = {}
|
| 232 |
char_count = 0
|
| 233 |
-
for k, v in list(tool_result.items())[:
|
| 234 |
-
v_str = str(v)[:
|
| 235 |
truncated[k] = v_str
|
| 236 |
char_count += len(k) + len(v_str)
|
| 237 |
if char_count > MAX_TOOL_RESULT_CHARS:
|
|
@@ -255,6 +253,8 @@ def chatbot_response(message, history):
|
|
| 255 |
|
| 256 |
continue
|
| 257 |
else:
|
|
|
|
|
|
|
| 258 |
break
|
| 259 |
|
| 260 |
# 构建响应前缀(简化版)
|
|
@@ -299,20 +299,26 @@ def chatbot_response(message, history):
|
|
| 299 |
# 流式输出最终答案
|
| 300 |
yield response_prefix
|
| 301 |
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
stream=
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
except Exception as e:
|
| 318 |
import traceback
|
|
|
|
| 48 |
# HuggingFace Inference API 实际限制约 8000-16000 tokens
|
| 49 |
# 为了安全,设置更低的限制
|
| 50 |
MAX_TOTAL_TOKENS = 6000 # 总上下文限制
|
| 51 |
+
MAX_TOOL_RESULT_CHARS = 1500 # 工具返回最大字符数 (增加到1500)
|
| 52 |
MAX_HISTORY_CHARS = 500 # 单条历史消息最大字符数
|
| 53 |
MAX_HISTORY_TURNS = 2 # 最大历史轮数
|
| 54 |
+
MAX_TOOL_ITERATIONS = 6 # 最大工具调用轮数 (增加到6,支持多工具调用)
|
| 55 |
+
MAX_OUTPUT_TOKENS = 2000 # 最大输出 tokens (增加到2000)
|
| 56 |
|
| 57 |
def estimate_tokens(text):
|
| 58 |
"""估算文本 token 数量(粗略:1 token ≈ 2 字符)"""
|
|
|
|
| 193 |
|
| 194 |
tool_calls_log = []
|
| 195 |
|
| 196 |
+
# LLM 调用循环(支持多轮工具调用)
|
| 197 |
+
final_response_content = None
|
| 198 |
for iteration in range(MAX_TOOL_ITERATIONS):
|
| 199 |
response = client.chat.completions.create(
|
| 200 |
model="Qwen/Qwen3-32B:groq",
|
|
|
|
| 218 |
# 调用 MCP 工具
|
| 219 |
tool_result = call_mcp_tool(tool_name, tool_args)
|
| 220 |
|
| 221 |
+
# 限制返回结果大小
|
| 222 |
result_str = json.dumps(tool_result, ensure_ascii=False)
|
| 223 |
|
|
|
|
| 224 |
if len(result_str) > MAX_TOOL_RESULT_CHARS:
|
| 225 |
if isinstance(tool_result, dict) and "text" in tool_result:
|
|
|
|
| 226 |
truncated_text = truncate_text(tool_result["text"], MAX_TOOL_RESULT_CHARS - 50)
|
| 227 |
tool_result_truncated = {"text": truncated_text, "_truncated": True}
|
| 228 |
elif isinstance(tool_result, dict):
|
|
|
|
| 229 |
truncated = {}
|
| 230 |
char_count = 0
|
| 231 |
+
for k, v in list(tool_result.items())[:8]: # 保留前8个字段
|
| 232 |
+
v_str = str(v)[:300] # 每个值最多300字符
|
| 233 |
truncated[k] = v_str
|
| 234 |
char_count += len(k) + len(v_str)
|
| 235 |
if char_count > MAX_TOOL_RESULT_CHARS:
|
|
|
|
| 253 |
|
| 254 |
continue
|
| 255 |
else:
|
| 256 |
+
# 没有更多工具调用,保存最终答案
|
| 257 |
+
final_response_content = choice.message.content
|
| 258 |
break
|
| 259 |
|
| 260 |
# 构建响应前缀(简化版)
|
|
|
|
| 299 |
# 流式输出最终答案
|
| 300 |
yield response_prefix
|
| 301 |
|
| 302 |
+
# 如果已经有最终答案,直接流式输出
|
| 303 |
+
if final_response_content:
|
| 304 |
+
# 已经从循环中获得了最终答案,直接输出
|
| 305 |
+
yield response_prefix + final_response_content
|
| 306 |
+
else:
|
| 307 |
+
# 如果循环结束但没有最终答案(达到最大迭代次数),需要再调用一次让模型总结
|
| 308 |
+
stream = client.chat.completions.create(
|
| 309 |
+
model="Qwen/Qwen3-32B:groq",
|
| 310 |
+
messages=messages,
|
| 311 |
+
tools=None, # 不再允许调用工具
|
| 312 |
+
max_tokens=MAX_OUTPUT_TOKENS,
|
| 313 |
+
temperature=0.5,
|
| 314 |
+
stream=True
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
accumulated_text = ""
|
| 318 |
+
for chunk in stream:
|
| 319 |
+
if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content:
|
| 320 |
+
accumulated_text += chunk.choices[0].delta.content
|
| 321 |
+
yield response_prefix + accumulated_text
|
| 322 |
|
| 323 |
except Exception as e:
|
| 324 |
import traceback
|