web2api / core /api /react.py
ohmyapi's picture
feat: align hosted Space deployment with latest upstream
77169b4
"""
ReAct 模块:解析 LLM 纯文本输出(Thought/Action/Action Input),转换为 function_call 格式。
适用于不支持 function calling 的 LLM。提示词借鉴 Dify ReAct 结构与表述,保持行式格式。
"""
import json
import re
from typing import Any
# 复用 function_call 的工具描述格式化
from core.api.function_call import format_tools_for_prompt
# 固定 ReAct 提示词(借鉴 Dify ReAct 结构与表述,保持行式格式以兼容 parse_react_output)
REACT_PROMPT_FIXED = r"""Respond to the human as helpfully and accurately as possible.
You have access to the following tools (listed below under "## Available tools").
Use the following format:
Question: the input question you must answer
Thought: consider what you know and what to do next
Action: the tool name (exactly one of the tools listed below)
Action Input: a single-line JSON object as the tool input
Observation: the result of the action (injected by the system — do NOT output this yourself)
... (repeat Thought / Action / Action Input as needed; after each, the system adds Observation)
Thought: I know the final answer
Final Answer: your final response to the human
Provide only ONE action per response. Valid "Action" values: a tool name from the list, or (when done) output "Final Answer" / "最终答案" instead of Action + Action Input.
Rules:
- After "Action Input: {...}" you must STOP and wait for Observation. Do not add any text, code, or explanation after the JSON line.
- Action Input must be a single-line valid JSON. All double quotes `"` in JSON values must be escaped as `\"`. Do not output "Observation" yourself.
- Format is: Thought → Action → Action Input (or Final Answer when done). Then the system replies with Observation.
Begin. Always respond with a valid Thought then Action then Action Input (or Final Answer). Use tools when necessary; respond with Final Answer when appropriate.
"""
def format_react_prompt(
tools: list[dict[str, Any]],
tools_text: str | None = None,
) -> str:
"""用固定 ReAct 提示词构建系统前缀,并拼接可用工具列表。"""
if tools_text is None:
tools_text = format_tools_for_prompt(tools)
return REACT_PROMPT_FIXED + "\n\n---\n\n## Available tools\n\n" + tools_text + "\n"
def parse_react_output(text: str) -> dict[str, Any] | None:
"""
解析行式 ReAct 输出 (Thought / Action / Action Input)。
返回 {"type": "final_answer", "content": str} 或
{"type": "tool_call", "tool": str, "params": dict} 或 None(解析失败)。
注意:优先解析 Action,若同时存在 Action 与 Final Answer,则返回 tool_call,
以便正确下发 tool_calls 给客户端执行。
"""
if not text or not text.strip():
return None
# 1. 优先提取 Action + Action Input(若存在则返回 tool_call,避免被 Final Answer 抢先)
action_match = re.search(r"^\s*Action[::]\s*(\w+)", text, re.MULTILINE)
if action_match:
tool_name = action_match.group(1).strip()
# 2. 提取 Action Input(单行 JSON 或简单多行)
input_match = re.search(r"Action Input[::]\s*(\{[^\n]+\})", text)
json_str: str | None = None
if input_match:
json_str = input_match.group(1).strip()
else:
# 多行 JSON:从 Action Input 到下一关键字
start_m = re.search(r"Action Input[::]\s*", text)
if start_m:
rest = text[start_m.end() :]
end_m = re.search(
r"\n\s*(?:Thought|Action|Observation|Final)", rest, re.I
)
raw = rest[: end_m.start()].strip() if end_m else rest.strip()
if raw.startswith("{") and "}" in raw:
depth = 0
for i, c in enumerate(raw):
if c == "{":
depth += 1
elif c == "}":
depth -= 1
if depth == 0:
json_str = raw[: i + 1]
break
if not json_str:
return {
"type": "tool_call",
"tool": tool_name,
"params": {},
"parse_error": "no_action_input",
}
try:
params = json.loads(json_str)
except json.JSONDecodeError as e:
return {
"type": "tool_call",
"tool": tool_name,
"params": {},
"parse_error": str(e),
}
return {"type": "tool_call", "tool": tool_name, "params": params}
# 3. 无 Action 时,检查 Final Answer
m = re.search(
r"(?:Final Answer|最终答案)[::]\s*(.*)",
text,
re.DOTALL | re.I,
)
if m:
content = m.group(1).strip()
return {"type": "final_answer", "content": content}
return None
def react_output_to_tool_calls(parsed: dict[str, Any]) -> list[dict[str, Any]]:
"""
将 parse_react_output 的 tool_call 结果转为 function_call 的 tool_calls_list 格式。
供 build_tool_calls_response / build_tool_calls_chunk 使用。
"""
if parsed.get("type") != "tool_call":
return []
return [
{
"name": parsed.get("tool", ""),
"arguments": parsed.get("params", {}),
}
]
def format_react_final_answer_content(text: str) -> str:
"""
若 text 为 ReAct 的 Thought + Final Answer 格式,则将 Thought 用 <think> 包裹,
便于客户端识别为思考内容;否则返回原文本。
"""
if not text or not text.strip():
return text
# 匹配 Thought: ... 与 Final Answer: / 最终答案: ...
thought_m = re.search(
r"Thought[::]\s*(.+?)(?=\s*(?:Final Answer|最终答案)[::]|\Z)",
text,
re.DOTALL | re.I,
)
answer_m = re.search(
r"(?:Final Answer|最终答案)[::]\s*(.*)",
text,
re.DOTALL | re.I,
)
if thought_m and answer_m:
thought = (thought_m.group(1) or "").strip()
answer = (answer_m.group(1) or "").strip()
return f"<think>{thought}</think>\n\n{answer}"
return text
def extract_thought_so_far(buffer: str) -> tuple[str | None, bool]:
"""
从流式 buffer 中增量解析当前 Thought 内容(Thought: 到 Action:/Final Answer:/结尾)。
返回 (thought_content, thought_ended)。
- thought_content: 当前可确定的 Thought 正文(不含 "Thought:" 前缀),未出现 Thought: 则为 None。
- thought_ended: 是否已出现 Action: 或 Final Answer:,即 Thought 段已结束。
"""
content = buffer.lstrip()
if not content:
return (None, False)
# 必须已有 Thought:
thought_start = re.search(r"Thought[::]\s*", content, re.I)
if not thought_start:
return (None, False)
start = thought_start.end()
rest = content[start:]
# 先找完整结尾:Action: 或 Final Answer:(一出现就截断,不要求后面已有工具名)
action_m = re.search(r"Action[::]\s*", rest, re.I)
final_m = re.search(r"(?:Final Answer|最终答案)[::]\s*", rest, re.I)
end_pos: int | None = None
if action_m and (final_m is None or action_m.start() <= final_m.start()):
end_pos = action_m.start()
if final_m and (end_pos is None or final_m.start() < end_pos):
end_pos = final_m.start()
if end_pos is not None:
thought_content = rest[:end_pos].rstrip()
return (thought_content, True)
# 未出现完整关键字时,去掉末尾「可能是关键字前缀」的片段,避免把 "\nAc"、"tion:"、"r:"、" [完整回答]" 等当 thought 流式发出
thought_content = rest.rstrip()
for kw in ("Action:", "Final Answer:", "最终答案:"):
for i in range(len(kw), 0, -1):
if thought_content.lower().endswith(kw[:i].lower()):
thought_content = thought_content[:-i].rstrip()
break
# 再剥 "Final Answer:" 的尾部片段(流式时先收到 "Answer:"、"r:" 等),避免 [完整回答] 被算进 think
for suffix in (
" Final Answer:",
" Final Answer",
" Answer:",
" Answer",
"Answer:",
"Answer",
"nswer:",
"nswer",
"swer:",
"swer",
"wer:",
"wer",
"er:",
"er",
"r:",
"r",
):
if thought_content.endswith(suffix):
thought_content = thought_content[: -len(suffix)].rstrip()
break
return (thought_content, False)
def detect_react_mode(buffer: str) -> bool | None:
"""
判断 buffer 是否为 ReAct 工具调用模式(规范格式:Thought:/Action:/Action Input:)。
仅当出现该格式时才识别为 ReAct;未按规范返回一律视为纯文本。
None=尚未确定,True=ReAct 工具调用,False=普通文本或 Final Answer。
"""
stripped = buffer.lstrip()
if re.search(r"^\s*Action[::]\s*\w+", stripped, re.MULTILINE):
return True
if re.search(r"(?:Final Answer|最终答案)[::]", stripped, re.I):
return False
# 流式可能只传 Thought/Action 的前半段(如 "Th"、"Tho"),视为尚未确定,继续缓冲
lower = stripped.lower()
if lower and ("thought:".startswith(lower) or "action:".startswith(lower)):
return None
# 若 buffer 中已出现 Thought:,可能为前导语 + Thought 格式(第二轮常见),保持 None 等待 Action
if re.search(r"Thought[::]\s*", stripped, re.I):
return None
# 未按规范:首行不是 Thought:/Action: 开头则视为纯文本
if stripped and not re.match(r"^\s*(?:Thought|Action)[::]", stripped, re.I):
return False
return None