File size: 9,948 Bytes
77169b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
"""
ReAct 模块:解析 LLM 纯文本输出(Thought/Action/Action Input),转换为 function_call 格式。
适用于不支持 function calling 的 LLM。提示词借鉴 Dify ReAct 结构与表述,保持行式格式。
"""

import json
import re
from typing import Any

# 复用 function_call 的工具描述格式化
from core.api.function_call import format_tools_for_prompt

# 固定 ReAct 提示词(借鉴 Dify ReAct 结构与表述,保持行式格式以兼容 parse_react_output)
REACT_PROMPT_FIXED = r"""Respond to the human as helpfully and accurately as possible.

You have access to the following tools (listed below under "## Available tools").

Use the following format:

Question: the input question you must answer
Thought: consider what you know and what to do next
Action: the tool name (exactly one of the tools listed below)
Action Input: a single-line JSON object as the tool input
Observation: the result of the action (injected by the system — do NOT output this yourself)
... (repeat Thought / Action / Action Input as needed; after each, the system adds Observation)
Thought: I know the final answer
Final Answer: your final response to the human

Provide only ONE action per response. Valid "Action" values: a tool name from the list, or (when done) output "Final Answer" / "最终答案" instead of Action + Action Input.

Rules:
- After "Action Input: {...}" you must STOP and wait for Observation. Do not add any text, code, or explanation after the JSON line.
- Action Input must be a single-line valid JSON. All double quotes `"` in JSON values must be escaped as `\"`. Do not output "Observation" yourself.
- Format is: Thought → Action → Action Input (or Final Answer when done). Then the system replies with Observation.

Begin. Always respond with a valid Thought then Action then Action Input (or Final Answer). Use tools when necessary; respond with Final Answer when appropriate.
"""


def format_react_prompt(
    tools: list[dict[str, Any]],
    tools_text: str | None = None,
) -> str:
    """用固定 ReAct 提示词构建系统前缀,并拼接可用工具列表。"""
    if tools_text is None:
        tools_text = format_tools_for_prompt(tools)
    return REACT_PROMPT_FIXED + "\n\n---\n\n## Available tools\n\n" + tools_text + "\n"


def parse_react_output(text: str) -> dict[str, Any] | None:
    """
    解析行式 ReAct 输出 (Thought / Action / Action Input)。
    返回 {"type": "final_answer", "content": str} 或
         {"type": "tool_call", "tool": str, "params": dict} 或 None(解析失败)。
    注意:优先解析 Action,若同时存在 Action 与 Final Answer,则返回 tool_call,
    以便正确下发 tool_calls 给客户端执行。
    """
    if not text or not text.strip():
        return None

    # 1. 优先提取 Action + Action Input(若存在则返回 tool_call,避免被 Final Answer 抢先)
    action_match = re.search(r"^\s*Action[::]\s*(\w+)", text, re.MULTILINE)
    if action_match:
        tool_name = action_match.group(1).strip()

        # 2. 提取 Action Input(单行 JSON 或简单多行)
        input_match = re.search(r"Action Input[::]\s*(\{[^\n]+\})", text)
        json_str: str | None = None
        if input_match:
            json_str = input_match.group(1).strip()
        else:
            # 多行 JSON:从 Action Input 到下一关键字
            start_m = re.search(r"Action Input[::]\s*", text)
            if start_m:
                rest = text[start_m.end() :]
                end_m = re.search(
                    r"\n\s*(?:Thought|Action|Observation|Final)", rest, re.I
                )
                raw = rest[: end_m.start()].strip() if end_m else rest.strip()
                if raw.startswith("{") and "}" in raw:
                    depth = 0
                    for i, c in enumerate(raw):
                        if c == "{":
                            depth += 1
                        elif c == "}":
                            depth -= 1
                            if depth == 0:
                                json_str = raw[: i + 1]
                                break

        if not json_str:
            return {
                "type": "tool_call",
                "tool": tool_name,
                "params": {},
                "parse_error": "no_action_input",
            }

        try:
            params = json.loads(json_str)
        except json.JSONDecodeError as e:
            return {
                "type": "tool_call",
                "tool": tool_name,
                "params": {},
                "parse_error": str(e),
            }

        return {"type": "tool_call", "tool": tool_name, "params": params}

    # 3. 无 Action 时,检查 Final Answer
    m = re.search(
        r"(?:Final Answer|最终答案)[::]\s*(.*)",
        text,
        re.DOTALL | re.I,
    )
    if m:
        content = m.group(1).strip()
        return {"type": "final_answer", "content": content}

    return None


def react_output_to_tool_calls(parsed: dict[str, Any]) -> list[dict[str, Any]]:
    """
    将 parse_react_output 的 tool_call 结果转为 function_call 的 tool_calls_list 格式。
    供 build_tool_calls_response / build_tool_calls_chunk 使用。
    """
    if parsed.get("type") != "tool_call":
        return []
    return [
        {
            "name": parsed.get("tool", ""),
            "arguments": parsed.get("params", {}),
        }
    ]


def format_react_final_answer_content(text: str) -> str:
    """
    若 text 为 ReAct 的 Thought + Final Answer 格式,则将 Thought 用 <think> 包裹,
    便于客户端识别为思考内容;否则返回原文本。
    """
    if not text or not text.strip():
        return text
    # 匹配 Thought: ... 与 Final Answer: / 最终答案: ...
    thought_m = re.search(
        r"Thought[::]\s*(.+?)(?=\s*(?:Final Answer|最终答案)[::]|\Z)",
        text,
        re.DOTALL | re.I,
    )
    answer_m = re.search(
        r"(?:Final Answer|最终答案)[::]\s*(.*)",
        text,
        re.DOTALL | re.I,
    )
    if thought_m and answer_m:
        thought = (thought_m.group(1) or "").strip()
        answer = (answer_m.group(1) or "").strip()
        return f"<think>{thought}</think>\n\n{answer}"
    return text


def extract_thought_so_far(buffer: str) -> tuple[str | None, bool]:
    """
    从流式 buffer 中增量解析当前 Thought 内容(Thought: 到 Action:/Final Answer:/结尾)。
    返回 (thought_content, thought_ended)。
    - thought_content: 当前可确定的 Thought 正文(不含 "Thought:" 前缀),未出现 Thought: 则为 None。
    - thought_ended: 是否已出现 Action: 或 Final Answer:,即 Thought 段已结束。
    """
    content = buffer.lstrip()
    if not content:
        return (None, False)
    # 必须已有 Thought:
    thought_start = re.search(r"Thought[::]\s*", content, re.I)
    if not thought_start:
        return (None, False)
    start = thought_start.end()
    rest = content[start:]
    # 先找完整结尾:Action: 或 Final Answer:(一出现就截断,不要求后面已有工具名)
    action_m = re.search(r"Action[::]\s*", rest, re.I)
    final_m = re.search(r"(?:Final Answer|最终答案)[::]\s*", rest, re.I)
    end_pos: int | None = None
    if action_m and (final_m is None or action_m.start() <= final_m.start()):
        end_pos = action_m.start()
    if final_m and (end_pos is None or final_m.start() < end_pos):
        end_pos = final_m.start()
    if end_pos is not None:
        thought_content = rest[:end_pos].rstrip()
        return (thought_content, True)
    # 未出现完整关键字时,去掉末尾「可能是关键字前缀」的片段,避免把 "\nAc"、"tion:"、"r:"、" [完整回答]" 等当 thought 流式发出
    thought_content = rest.rstrip()
    for kw in ("Action:", "Final Answer:", "最终答案:"):
        for i in range(len(kw), 0, -1):
            if thought_content.lower().endswith(kw[:i].lower()):
                thought_content = thought_content[:-i].rstrip()
                break
    # 再剥 "Final Answer:" 的尾部片段(流式时先收到 "Answer:"、"r:" 等),避免 [完整回答] 被算进 think
    for suffix in (
        " Final Answer:",
        " Final Answer",
        " Answer:",
        " Answer",
        "Answer:",
        "Answer",
        "nswer:",
        "nswer",
        "swer:",
        "swer",
        "wer:",
        "wer",
        "er:",
        "er",
        "r:",
        "r",
    ):
        if thought_content.endswith(suffix):
            thought_content = thought_content[: -len(suffix)].rstrip()
            break
    return (thought_content, False)


def detect_react_mode(buffer: str) -> bool | None:
    """
    判断 buffer 是否为 ReAct 工具调用模式(规范格式:Thought:/Action:/Action Input:)。
    仅当出现该格式时才识别为 ReAct;未按规范返回一律视为纯文本。
    None=尚未确定,True=ReAct 工具调用,False=普通文本或 Final Answer。
    """
    stripped = buffer.lstrip()
    if re.search(r"^\s*Action[::]\s*\w+", stripped, re.MULTILINE):
        return True
    if re.search(r"(?:Final Answer|最终答案)[::]", stripped, re.I):
        return False
    # 流式可能只传 Thought/Action 的前半段(如 "Th"、"Tho"),视为尚未确定,继续缓冲
    lower = stripped.lower()
    if lower and ("thought:".startswith(lower) or "action:".startswith(lower)):
        return None
    # 若 buffer 中已出现 Thought:,可能为前导语 + Thought 格式(第二轮常见),保持 None 等待 Action
    if re.search(r"Thought[::]\s*", stripped, re.I):
        return None
    # 未按规范:首行不是 Thought:/Action: 开头则视为纯文本
    if stripped and not re.match(r"^\s*(?:Thought|Action)[::]", stripped, re.I):
        return False
    return None