Spaces:

ohmyapi
/

web2api

Paused

File size: 9,948 Bytes

77169b4

"""
ReAct 模块：解析 LLM 纯文本输出（Thought/Action/Action Input），转换为 function_call 格式。
适用于不支持 function calling 的 LLM。提示词借鉴 Dify ReAct 结构与表述，保持行式格式。
"""

import json
import re
from typing import Any

# 复用 function_call 的工具描述格式化
from core.api.function_call import format_tools_for_prompt

# 固定 ReAct 提示词（借鉴 Dify ReAct 结构与表述，保持行式格式以兼容 parse_react_output）
REACT_PROMPT_FIXED = r"""Respond to the human as helpfully and accurately as possible.

You have access to the following tools (listed below under "## Available tools").

Use the following format:

Question: the input question you must answer
Thought: consider what you know and what to do next
Action: the tool name (exactly one of the tools listed below)
Action Input: a single-line JSON object as the tool input
Observation: the result of the action (injected by the system — do NOT output this yourself)
... (repeat Thought / Action / Action Input as needed; after each, the system adds Observation)
Thought: I know the final answer
Final Answer: your final response to the human

Provide only ONE action per response. Valid "Action" values: a tool name from the list, or (when done) output "Final Answer" / "最终答案" instead of Action + Action Input.

Rules:
- After "Action Input: {...}" you must STOP and wait for Observation. Do not add any text, code, or explanation after the JSON line.
- Action Input must be a single-line valid JSON. All double quotes `"` in JSON values must be escaped as `\"`. Do not output "Observation" yourself.
- Format is: Thought → Action → Action Input (or Final Answer when done). Then the system replies with Observation.

Begin. Always respond with a valid Thought then Action then Action Input (or Final Answer). Use tools when necessary; respond with Final Answer when appropriate.
"""


def format_react_prompt(
    tools: list[dict[str, Any]],
    tools_text: str | None = None,
) -> str:
    """用固定 ReAct 提示词构建系统前缀，并拼接可用工具列表。"""
    if tools_text is None:
        tools_text = format_tools_for_prompt(tools)
    return REACT_PROMPT_FIXED + "\n\n---\n\n## Available tools\n\n" + tools_text + "\n"


def parse_react_output(text: str) -> dict[str, Any] | None:
    """
    解析行式 ReAct 输出 (Thought / Action / Action Input)。
    返回 {"type": "final_answer", "content": str} 或
         {"type": "tool_call", "tool": str, "params": dict} 或 None（解析失败）。
    注意：优先解析 Action，若同时存在 Action 与 Final Answer，则返回 tool_call，
    以便正确下发 tool_calls 给客户端执行。
    """
    if not text or not text.strip():
        return None

    # 1. 优先提取 Action + Action Input（若存在则返回 tool_call，避免被 Final Answer 抢先）
    action_match = re.search(r"^\s*Action[:：]\s*(\w+)", text, re.MULTILINE)
    if action_match:
        tool_name = action_match.group(1).strip()

        # 2. 提取 Action Input（单行 JSON 或简单多行）
        input_match = re.search(r"Action Input[:：]\s*(\{[^\n]+\})", text)
        json_str: str | None = None
        if input_match:
            json_str = input_match.group(1).strip()
        else:
            # 多行 JSON：从 Action Input 到下一关键字
            start_m = re.search(r"Action Input[:：]\s*", text)
            if start_m:
                rest = text[start_m.end() :]
                end_m = re.search(
                    r"\n\s*(?:Thought|Action|Observation|Final)", rest, re.I
                )
                raw = rest[: end_m.start()].strip() if end_m else rest.strip()
                if raw.startswith("{") and "}" in raw:
                    depth = 0
                    for i, c in enumerate(raw):
                        if c == "{":
                            depth += 1
                        elif c == "}":
                            depth -= 1
                            if depth == 0:
                                json_str = raw[: i + 1]
                                break

        if not json_str:
            return {
                "type": "tool_call",
                "tool": tool_name,
                "params": {},
                "parse_error": "no_action_input",
            }

        try:
            params = json.loads(json_str)
        except json.JSONDecodeError as e:
            return {
                "type": "tool_call",
                "tool": tool_name,
                "params": {},
                "parse_error": str(e),
            }

        return {"type": "tool_call", "tool": tool_name, "params": params}

    # 3. 无 Action 时，检查 Final Answer
    m = re.search(
        r"(?:Final Answer|最终答案)[:：]\s*(.*)",
        text,
        re.DOTALL | re.I,
    )
    if m:
        content = m.group(1).strip()
        return {"type": "final_answer", "content": content}

    return None


def react_output_to_tool_calls(parsed: dict[str, Any]) -> list[dict[str, Any]]:
    """
    将 parse_react_output 的 tool_call 结果转为 function_call 的 tool_calls_list 格式。
    供 build_tool_calls_response / build_tool_calls_chunk 使用。
    """
    if parsed.get("type") != "tool_call":
        return []
    return [
        {
            "name": parsed.get("tool", ""),
            "arguments": parsed.get("params", {}),
        }
    ]


def format_react_final_answer_content(text: str) -> str:
    """
    若 text 为 ReAct 的 Thought + Final Answer 格式，则将 Thought 用 <think> 包裹，
    便于客户端识别为思考内容；否则返回原文本。
    """
    if not text or not text.strip():
        return text
    # 匹配 Thought: ... 与 Final Answer: / 最终答案: ...
    thought_m = re.search(
        r"Thought[:：]\s*(.+?)(?=\s*(?:Final Answer|最终答案)[:：]|\Z)",
        text,
        re.DOTALL | re.I,
    )
    answer_m = re.search(
        r"(?:Final Answer|最终答案)[:：]\s*(.*)",
        text,
        re.DOTALL | re.I,
    )
    if thought_m and answer_m:
        thought = (thought_m.group(1) or "").strip()
        answer = (answer_m.group(1) or "").strip()
        return f"<think>{thought}</think>\n\n{answer}"
    return text


def extract_thought_so_far(buffer: str) -> tuple[str | None, bool]:
    """
    从流式 buffer 中增量解析当前 Thought 内容（Thought: 到 Action:/Final Answer:/结尾）。
    返回 (thought_content, thought_ended)。
    - thought_content: 当前可确定的 Thought 正文（不含 "Thought:" 前缀），未出现 Thought: 则为 None。
    - thought_ended: 是否已出现 Action: 或 Final Answer:，即 Thought 段已结束。
    """
    content = buffer.lstrip()
    if not content:
        return (None, False)
    # 必须已有 Thought:
    thought_start = re.search(r"Thought[:：]\s*", content, re.I)
    if not thought_start:
        return (None, False)
    start = thought_start.end()
    rest = content[start:]
    # 先找完整结尾：Action: 或 Final Answer:（一出现就截断，不要求后面已有工具名）
    action_m = re.search(r"Action[:：]\s*", rest, re.I)
    final_m = re.search(r"(?:Final Answer|最终答案)[:：]\s*", rest, re.I)
    end_pos: int | None = None
    if action_m and (final_m is None or action_m.start() <= final_m.start()):
        end_pos = action_m.start()
    if final_m and (end_pos is None or final_m.start() < end_pos):
        end_pos = final_m.start()
    if end_pos is not None:
        thought_content = rest[:end_pos].rstrip()
        return (thought_content, True)
    # 未出现完整关键字时，去掉末尾「可能是关键字前缀」的片段，避免把 "\nAc"、"tion:"、"r:"、" [完整回答]" 等当 thought 流式发出
    thought_content = rest.rstrip()
    for kw in ("Action:", "Final Answer:", "最终答案:"):
        for i in range(len(kw), 0, -1):
            if thought_content.lower().endswith(kw[:i].lower()):
                thought_content = thought_content[:-i].rstrip()
                break
    # 再剥 "Final Answer:" 的尾部片段（流式时先收到 "Answer:"、"r:" 等），避免 [完整回答] 被算进 think
    for suffix in (
        " Final Answer:",
        " Final Answer",
        " Answer:",
        " Answer",
        "Answer:",
        "Answer",
        "nswer:",
        "nswer",
        "swer:",
        "swer",
        "wer:",
        "wer",
        "er:",
        "er",
        "r:",
        "r",
    ):
        if thought_content.endswith(suffix):
            thought_content = thought_content[: -len(suffix)].rstrip()
            break
    return (thought_content, False)


def detect_react_mode(buffer: str) -> bool | None:
    """
    判断 buffer 是否为 ReAct 工具调用模式（规范格式：Thought:/Action:/Action Input:）。
    仅当出现该格式时才识别为 ReAct；未按规范返回一律视为纯文本。
    None=尚未确定，True=ReAct 工具调用，False=普通文本或 Final Answer。
    """
    stripped = buffer.lstrip()
    if re.search(r"^\s*Action[:：]\s*\w+", stripped, re.MULTILINE):
        return True
    if re.search(r"(?:Final Answer|最终答案)[:：]", stripped, re.I):
        return False
    # 流式可能只传 Thought/Action 的前半段（如 "Th"、"Tho"），视为尚未确定，继续缓冲
    lower = stripped.lower()
    if lower and ("thought:".startswith(lower) or "action:".startswith(lower)):
        return None
    # 若 buffer 中已出现 Thought:，可能为前导语 + Thought 格式（第二轮常见），保持 None 等待 Action
    if re.search(r"Thought[:：]\s*", stripped, re.I):
        return None
    # 未按规范：首行不是 Thought:/Action: 开头则视为纯文本
    if stripped and not re.match(r"^\s*(?:Thought|Action)[:：]", stripped, re.I):
        return False
    return None