Commit
·
758564e
1
Parent(s):
061585b
Updated versions of logging and some improvements of nodes configuration, adding gpt-5-mini
Browse files- data/images.jpeg +0 -0
- src/agent.py +10 -3
- src/config.py +9 -75
- src/nodes.py +137 -9
- src/prompts/prompts.py +24 -19
- src/state.py +1 -0
- src/tools/code_interpreter.py +1 -1
- src/tools/tools.py +1 -1
- src/tools/web_crawler.py +0 -0
- src/utils/utils.py +7 -1
- src/workflow_test.ipynb +41 -119
data/images.jpeg
ADDED
|
src/agent.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
from nodes import (query_input, complexity_assessor, planner, agent, simple_executor, critic_evaluator, replanner, enhanced_finalizer)
|
| 3 |
from state import AgentState
|
| 4 |
from langgraph.graph import StateGraph, END
|
| 5 |
-
from nodes import should_continue, should_use_planning, should_replan
|
| 6 |
from langgraph.checkpoint.memory import MemorySaver
|
| 7 |
from config import DEBUGGING_TOOL_NODE
|
| 8 |
|
|
@@ -13,11 +13,13 @@ def build_workflow(checkpointer=None) -> StateGraph[AgentState]:
|
|
| 13 |
builder.add_node("PLANNING", planner)
|
| 14 |
builder.add_node("AGENT", agent)
|
| 15 |
builder.add_node("TOOLS", DEBUGGING_TOOL_NODE)
|
|
|
|
| 16 |
builder.add_node("FINALIZER", enhanced_finalizer)
|
| 17 |
builder.add_node("SIMPLE_EXECUTOR", simple_executor)
|
| 18 |
builder.add_node("CRITIC", critic_evaluator)
|
| 19 |
builder.add_node("REPLANNER", replanner)
|
| 20 |
|
|
|
|
| 21 |
builder.set_entry_point("INPUT")
|
| 22 |
builder.add_edge("INPUT", "COMPLEXITY_ASSESSOR")
|
| 23 |
|
|
@@ -26,8 +28,13 @@ def build_workflow(checkpointer=None) -> StateGraph[AgentState]:
|
|
| 26 |
should_use_planning,
|
| 27 |
{"simple_executor": "SIMPLE_EXECUTOR", "planner": "PLANNING"},
|
| 28 |
)
|
| 29 |
-
builder.
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
builder.add_edge("PLANNING", "AGENT")
|
| 33 |
builder.add_conditional_edges(
|
|
|
|
| 2 |
from nodes import (query_input, complexity_assessor, planner, agent, simple_executor, critic_evaluator, replanner, enhanced_finalizer)
|
| 3 |
from state import AgentState
|
| 4 |
from langgraph.graph import StateGraph, END
|
| 5 |
+
from nodes import should_continue, should_use_planning, should_replan, should_use_tools_simple_executor
|
| 6 |
from langgraph.checkpoint.memory import MemorySaver
|
| 7 |
from config import DEBUGGING_TOOL_NODE
|
| 8 |
|
|
|
|
| 13 |
builder.add_node("PLANNING", planner)
|
| 14 |
builder.add_node("AGENT", agent)
|
| 15 |
builder.add_node("TOOLS", DEBUGGING_TOOL_NODE)
|
| 16 |
+
builder.add_node("TOOLS_SIMPLE", DEBUGGING_TOOL_NODE)
|
| 17 |
builder.add_node("FINALIZER", enhanced_finalizer)
|
| 18 |
builder.add_node("SIMPLE_EXECUTOR", simple_executor)
|
| 19 |
builder.add_node("CRITIC", critic_evaluator)
|
| 20 |
builder.add_node("REPLANNER", replanner)
|
| 21 |
|
| 22 |
+
|
| 23 |
builder.set_entry_point("INPUT")
|
| 24 |
builder.add_edge("INPUT", "COMPLEXITY_ASSESSOR")
|
| 25 |
|
|
|
|
| 28 |
should_use_planning,
|
| 29 |
{"simple_executor": "SIMPLE_EXECUTOR", "planner": "PLANNING"},
|
| 30 |
)
|
| 31 |
+
builder.add_conditional_edges(
|
| 32 |
+
"SIMPLE_EXECUTOR",
|
| 33 |
+
should_use_tools_simple_executor,
|
| 34 |
+
{"tools": "TOOLS_SIMPLE", "final_answer": "FINALIZER"},
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
builder.add_edge("TOOLS_SIMPLE", "FINALIZER")
|
| 38 |
|
| 39 |
builder.add_edge("PLANNING", "AGENT")
|
| 40 |
builder.add_conditional_edges(
|
src/config.py
CHANGED
|
@@ -11,85 +11,19 @@ config = {"configurable": {"thread_id": "1"}, "recursion_limit" : 50}
|
|
| 11 |
TOOLS = [download_file_from_url, web_search,
|
| 12 |
arxiv_search, wiki_search, add, subtract, multiply, divide,
|
| 13 |
power, analyze_excel_file, analyze_csv_file, analyze_docx_file,
|
| 14 |
-
analyze_pdf_file, analyze_txt_file,
|
| 15 |
vision_qa_gemma, safe_code_run]
|
| 16 |
|
| 17 |
-
class DebuggingToolNode(ToolNode):
|
| 18 |
-
def __init__(self, tools):
|
| 19 |
-
super().__init__(tools)
|
| 20 |
-
|
| 21 |
-
def __call__(self, state):
|
| 22 |
-
log_stage("TOOL NODE", subtitle="Dispatching tool calls", icon="🛠️")
|
| 23 |
-
|
| 24 |
-
messages = state.get("messages", [])
|
| 25 |
-
last_message = messages[-1] if messages else None
|
| 26 |
-
|
| 27 |
-
if not last_message or not hasattr(last_message, "tool_calls"):
|
| 28 |
-
log_stage("TOOL ERROR", subtitle="No tool calls found", icon="❌")
|
| 29 |
-
return state
|
| 30 |
-
|
| 31 |
-
tool_calls = last_message.tool_calls
|
| 32 |
-
log_stage("TOOL DISPATCH", subtitle=f"Executing {len(tool_calls)} tool(s)", icon="🔧")
|
| 33 |
-
for call in tool_calls:
|
| 34 |
-
print(f" - {call['name']}: {call['args']}")
|
| 35 |
-
|
| 36 |
-
try:
|
| 37 |
-
# Выполняем инструменты
|
| 38 |
-
result = super().__call__(state)
|
| 39 |
-
|
| 40 |
-
# Проверяем результаты
|
| 41 |
-
new_messages = result.get("messages", [])
|
| 42 |
-
tool_messages = [msg for msg in new_messages[len(messages):]
|
| 43 |
-
if isinstance(msg, ToolMessage)]
|
| 44 |
-
|
| 45 |
-
log_stage("TOOL RESULTS", subtitle=f"Got {len(tool_messages)} responses", icon="📨")
|
| 46 |
-
|
| 47 |
-
# Логируем результаты
|
| 48 |
-
for msg in tool_messages:
|
| 49 |
-
content_preview = msg.content[:100] + "..." if len(msg.content) > 100 else msg.content
|
| 50 |
-
print(f" - {msg.name}: {content_preview}")
|
| 51 |
-
|
| 52 |
-
# Автоматически добавляем сигнал завершения шага после успешного выполнения инструментов
|
| 53 |
-
if tool_messages:
|
| 54 |
-
current_step = state.get("current_step", 0)
|
| 55 |
-
plan = state.get("plan")
|
| 56 |
-
|
| 57 |
-
if plan and current_step < len(plan.steps):
|
| 58 |
-
step_completion_msg = AIMessage(
|
| 59 |
-
content=f"STEP COMPLETE: Successfully executed {len(tool_messages)} tool(s) for step {plan.steps[current_step].id}"
|
| 60 |
-
)
|
| 61 |
-
result["messages"] = result["messages"] + [step_completion_msg]
|
| 62 |
-
log_stage("STEP COMPLETION", subtitle=f"Step {current_step + 1} marked complete", icon="✅")
|
| 63 |
-
|
| 64 |
-
# Продвигаем к следующему шагу
|
| 65 |
-
result["current_step"] = current_step + 1
|
| 66 |
-
result["reasoning_done"] = False # Сброс для следующего шага
|
| 67 |
-
|
| 68 |
-
return result
|
| 69 |
-
|
| 70 |
-
except Exception as exc:
|
| 71 |
-
log_stage("TOOL ERROR", subtitle=f"{type(exc).__name__}: {exc}", icon="❌")
|
| 72 |
-
print(f"Full error: {repr(exc)}")
|
| 73 |
-
|
| 74 |
-
# Создаем ToolMessage для каждого failed tool call
|
| 75 |
-
error_messages = []
|
| 76 |
-
for call in tool_calls:
|
| 77 |
-
error_msg = ToolMessage(
|
| 78 |
-
content=f"ERROR: {type(exc).__name__}: {exc}",
|
| 79 |
-
tool_call_id=call.get("id") or "unknown_call",
|
| 80 |
-
name=call.get("name", "unknown_tool"),
|
| 81 |
-
)
|
| 82 |
-
error_messages.append(error_msg)
|
| 83 |
-
|
| 84 |
-
return {"messages": messages + error_messages}
|
| 85 |
-
|
| 86 |
|
| 87 |
TOOL_NODE = ToolNode(TOOLS)
|
| 88 |
-
DEBUGGING_TOOL_NODE =
|
| 89 |
-
|
| 90 |
-
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.25
|
| 91 |
-
|
| 92 |
-
planner_llm =
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
|
| 95 |
|
|
|
|
| 11 |
TOOLS = [download_file_from_url, web_search,
|
| 12 |
arxiv_search, wiki_search, add, subtract, multiply, divide,
|
| 13 |
power, analyze_excel_file, analyze_csv_file, analyze_docx_file,
|
| 14 |
+
analyze_pdf_file, analyze_txt_file,
|
| 15 |
vision_qa_gemma, safe_code_run]
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
TOOL_NODE = ToolNode(TOOLS)
|
| 19 |
+
DEBUGGING_TOOL_NODE = TOOL_NODE
|
| 20 |
+
|
| 21 |
+
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.7) #default 0.25
|
| 22 |
+
llm_deterministic = ChatOpenAI(model="gpt-5-mini", temperature=0.05)
|
| 23 |
+
planner_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.1).with_structured_output(PlannerPlan)
|
| 24 |
+
llm_criticist = ChatOpenAI(model="gpt-5-mini", temperature=0.3)
|
| 25 |
+
llm_with_tools = llm_deterministic.bind_tools(TOOLS)
|
| 26 |
+
llm_reasoning = ChatOpenAI(model="gpt-5-mini", temperature=0.3)
|
| 27 |
|
| 28 |
|
| 29 |
|
src/nodes.py
CHANGED
|
@@ -13,7 +13,7 @@ from prompts.prompts import (
|
|
| 13 |
CRITIC_PROMPT,
|
| 14 |
)
|
| 15 |
|
| 16 |
-
from config import
|
| 17 |
from schemas import PlannerPlan, ComplexityLevel, CritiqueFeedback, ExecutionReport, ToolExecution
|
| 18 |
|
| 19 |
from utils.utils import (
|
|
@@ -113,6 +113,8 @@ def agent(state: AgentState) -> AgentState:
|
|
| 113 |
current_step = state.get("current_step", 0)
|
| 114 |
reasoning_done = state.get("reasoning_done", False)
|
| 115 |
plan: Optional[PlannerPlan] = state.get("plan")
|
|
|
|
|
|
|
| 116 |
#steps = state["plan"].steps
|
| 117 |
|
| 118 |
"""
|
|
@@ -175,6 +177,15 @@ def agent(state: AgentState) -> AgentState:
|
|
| 175 |
file_contents = state.get("file_contents", {})
|
| 176 |
file_list = ", ".join(file_contents.keys()) if file_contents else "none provided"
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
system_message = SystemMessage(
|
| 179 |
content=SYSTEM_EXECUTOR_PROMPT.format(
|
| 180 |
plan_summary=plan.summary,
|
|
@@ -197,7 +208,7 @@ def agent(state: AgentState) -> AgentState:
|
|
| 197 |
)
|
| 198 |
)
|
| 199 |
stack = [system_message] + state["messages"] + [instruction]
|
| 200 |
-
reasoning_response =
|
| 201 |
log_stage("REASONING", subtitle=f"{current_step_info.id}", icon="🧠")
|
| 202 |
print(reasoning_response.content)
|
| 203 |
|
|
@@ -224,12 +235,13 @@ def agent(state: AgentState) -> AgentState:
|
|
| 224 |
Explain what you need to do and why, then end your response.
|
| 225 |
|
| 226 |
REASONING IS IMPERATIVE BEFORE ANY TOOL CALLS.
|
|
|
|
| 227 |
"""
|
| 228 |
|
| 229 |
sys_msg = SystemMessage(content = reasoning_prompt)
|
| 230 |
stack = [sys_msg] + state["messages"]
|
| 231 |
|
| 232 |
-
step =
|
| 233 |
print("=== REASONING STEP ===")
|
| 234 |
print(step.content)
|
| 235 |
|
|
@@ -254,6 +266,7 @@ def agent(state: AgentState) -> AgentState:
|
|
| 254 |
# Используем модель С инструментами для выполнения
|
| 255 |
step = llm_with_tools.invoke(stack)
|
| 256 |
print("=== TOOL EXECUTION ===")
|
|
|
|
| 257 |
print(f"Tool calls: {step.tool_calls}")
|
| 258 |
|
| 259 |
return {
|
|
@@ -265,10 +278,17 @@ def agent(state: AgentState) -> AgentState:
|
|
| 265 |
def should_continue(state : AgentState) -> bool:
|
| 266 |
|
| 267 |
last_message = state["messages"][-1]
|
|
|
|
| 268 |
reasoning_done = state.get("reasoning_done", False)
|
| 269 |
plan = state.get("plan", None)
|
| 270 |
current_step = state.get("current_step", 0)
|
| 271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
#ПРИОРИТЕТ 1: Если есть tool_calls - выполняем их
|
| 273 |
if hasattr(last_message, "tool_calls") and last_message.tool_calls:
|
| 274 |
return "tools"
|
|
@@ -368,7 +388,7 @@ def enhanced_finalizer(state: AgentState) -> AgentState:
|
|
| 368 |
Be thorough but concise. This report will be evaluated by a critic for quality assurance.
|
| 369 |
"""
|
| 370 |
|
| 371 |
-
report_llm =
|
| 372 |
|
| 373 |
execution_report = report_llm.invoke([
|
| 374 |
SystemMessage(content=report_generator_prompt),
|
|
@@ -406,12 +426,26 @@ def simple_executor(state: AgentState) -> AgentState:
|
|
| 406 |
SystemMessage(content=simple_prompt),
|
| 407 |
HumanMessage(content=state['query'])
|
| 408 |
])
|
|
|
|
|
|
|
| 409 |
|
| 410 |
return {
|
| 411 |
"messages": state["messages"] + [response],
|
| 412 |
"final_answer": response.content
|
| 413 |
}
|
| 414 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
|
| 416 |
def should_use_planning(state: AgentState) -> str:
|
| 417 |
"""Route based on complexity assessment."""
|
|
@@ -428,7 +462,7 @@ def critic_evaluator(state: AgentState) -> AgentState:
|
|
| 428 |
print("=== ENHANCED ANSWER CRITIQUE ===")
|
| 429 |
|
| 430 |
report = state.get("execution_report")
|
| 431 |
-
critic_llm =
|
| 432 |
|
| 433 |
critique_prompt = CRITIC_PROMPT.format(
|
| 434 |
query=report.query_summary,
|
|
@@ -495,7 +529,7 @@ def should_replan(state: AgentState) -> str:
|
|
| 495 |
|
| 496 |
return "end"
|
| 497 |
|
| 498 |
-
def
|
| 499 |
"""Create a revised plan based on critic feedback."""
|
| 500 |
print("=== REPLANNING ===")
|
| 501 |
|
|
@@ -541,7 +575,11 @@ def replanner(state: AgentState) -> AgentState:
|
|
| 541 |
essential_messages.append(msg)
|
| 542 |
|
| 543 |
print(f"Cleaned message history: {len(current_messages)} -> {len(essential_messages)} messages")
|
| 544 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 545 |
return {
|
| 546 |
"plan": revised_plan,
|
| 547 |
"current_step": 0,
|
|
@@ -550,12 +588,102 @@ def replanner(state: AgentState) -> AgentState:
|
|
| 550 |
"execution_report": None
|
| 551 |
}
|
| 552 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 553 |
|
| 554 |
def complexity_assessor(state: AgentState) -> AgentState:
|
| 555 |
"""Assess query complexity and determine if planning is needed."""
|
| 556 |
print("=== COMPLEXITY ASSESSMENT ===")
|
| 557 |
|
| 558 |
-
complexity_llm =
|
| 559 |
|
| 560 |
assessment_message = [
|
| 561 |
SystemMessage(content=COMPLEXITY_ASSESSOR_PROMPT.strip()),
|
|
@@ -571,4 +699,4 @@ def complexity_assessor(state: AgentState) -> AgentState:
|
|
| 571 |
return {
|
| 572 |
"complexity_assessment": assessment,
|
| 573 |
"messages": state["messages"] + assessment_message
|
| 574 |
-
}
|
|
|
|
| 13 |
CRITIC_PROMPT,
|
| 14 |
)
|
| 15 |
|
| 16 |
+
from config import llm_reasoning, TOOLS, planner_llm, llm_with_tools, llm_deterministic, llm_criticist
|
| 17 |
from schemas import PlannerPlan, ComplexityLevel, CritiqueFeedback, ExecutionReport, ToolExecution
|
| 18 |
|
| 19 |
from utils.utils import (
|
|
|
|
| 113 |
current_step = state.get("current_step", 0)
|
| 114 |
reasoning_done = state.get("reasoning_done", False)
|
| 115 |
plan: Optional[PlannerPlan] = state.get("plan")
|
| 116 |
+
previous_tool_results = state.get("previous_tool_results", {})
|
| 117 |
+
|
| 118 |
#steps = state["plan"].steps
|
| 119 |
|
| 120 |
"""
|
|
|
|
| 177 |
file_contents = state.get("file_contents", {})
|
| 178 |
file_list = ", ".join(file_contents.keys()) if file_contents else "none provided"
|
| 179 |
|
| 180 |
+
# Добавляем информацию о предыдущих результатах (UPDATE)
|
| 181 |
+
previous_results_context = ""
|
| 182 |
+
if previous_tool_results:
|
| 183 |
+
previous_results_context = f"\n\nPREVIOUS CALCULATION RESULTS:\n"
|
| 184 |
+
for tool_call_id, result in previous_tool_results.items():
|
| 185 |
+
previous_results_context += f"- {tool_call_id}: {result}\n"
|
| 186 |
+
previous_results_context += "You can reference these results in your calculations.\n"
|
| 187 |
+
|
| 188 |
+
|
| 189 |
system_message = SystemMessage(
|
| 190 |
content=SYSTEM_EXECUTOR_PROMPT.format(
|
| 191 |
plan_summary=plan.summary,
|
|
|
|
| 208 |
)
|
| 209 |
)
|
| 210 |
stack = [system_message] + state["messages"] + [instruction]
|
| 211 |
+
reasoning_response = llm_reasoning.invoke(stack) #default llm
|
| 212 |
log_stage("REASONING", subtitle=f"{current_step_info.id}", icon="🧠")
|
| 213 |
print(reasoning_response.content)
|
| 214 |
|
|
|
|
| 235 |
Explain what you need to do and why, then end your response.
|
| 236 |
|
| 237 |
REASONING IS IMPERATIVE BEFORE ANY TOOL CALLS.
|
| 238 |
+
FOR MORE COMPLEX UNDERSTANDING -> USE RESULTS AND INSIGHTS FROM PREVIOUS STEPS.
|
| 239 |
"""
|
| 240 |
|
| 241 |
sys_msg = SystemMessage(content = reasoning_prompt)
|
| 242 |
stack = [sys_msg] + state["messages"]
|
| 243 |
|
| 244 |
+
step = llm_reasoning.invoke(stack)
|
| 245 |
print("=== REASONING STEP ===")
|
| 246 |
print(step.content)
|
| 247 |
|
|
|
|
| 266 |
# Используем модель С инструментами для выполнения
|
| 267 |
step = llm_with_tools.invoke(stack)
|
| 268 |
print("=== TOOL EXECUTION ===")
|
| 269 |
+
print(step)
|
| 270 |
print(f"Tool calls: {step.tool_calls}")
|
| 271 |
|
| 272 |
return {
|
|
|
|
| 278 |
def should_continue(state : AgentState) -> bool:
|
| 279 |
|
| 280 |
last_message = state["messages"][-1]
|
| 281 |
+
print(f"=== LAST MESSAGE WAS: {last_message} ===")
|
| 282 |
reasoning_done = state.get("reasoning_done", False)
|
| 283 |
plan = state.get("plan", None)
|
| 284 |
current_step = state.get("current_step", 0)
|
| 285 |
|
| 286 |
+
print(f"=== SHOULD_CONTINUE DEBUG ===")
|
| 287 |
+
print(f"Current step: {current_step}")
|
| 288 |
+
print(f"Plan steps: {len(plan.steps) if plan else 0}")
|
| 289 |
+
print(f"Reasoning done: {reasoning_done}")
|
| 290 |
+
print(f"Last message type: {type(last_message).__name__}")
|
| 291 |
+
|
| 292 |
#ПРИОРИТЕТ 1: Если есть tool_calls - выполняем их
|
| 293 |
if hasattr(last_message, "tool_calls") and last_message.tool_calls:
|
| 294 |
return "tools"
|
|
|
|
| 388 |
Be thorough but concise. This report will be evaluated by a critic for quality assurance.
|
| 389 |
"""
|
| 390 |
|
| 391 |
+
report_llm = llm_deterministic.with_structured_output(ExecutionReport)
|
| 392 |
|
| 393 |
execution_report = report_llm.invoke([
|
| 394 |
SystemMessage(content=report_generator_prompt),
|
|
|
|
| 426 |
SystemMessage(content=simple_prompt),
|
| 427 |
HumanMessage(content=state['query'])
|
| 428 |
])
|
| 429 |
+
|
| 430 |
+
print("Response generated for simple query.")
|
| 431 |
|
| 432 |
return {
|
| 433 |
"messages": state["messages"] + [response],
|
| 434 |
"final_answer": response.content
|
| 435 |
}
|
| 436 |
|
| 437 |
+
def should_use_tools_simple_executor(state: AgentState) -> str:
|
| 438 |
+
"""Decide whether to use tools or answer directly in simple executor."""
|
| 439 |
+
last_message = state["messages"][-1]
|
| 440 |
+
|
| 441 |
+
if hasattr(last_message, "tool_calls") and last_message.tool_calls:
|
| 442 |
+
return "tools"
|
| 443 |
+
|
| 444 |
+
if hasattr(last_message, "content") and "<FINAL_ANSWER>" in last_message.content:
|
| 445 |
+
return "final_answer"
|
| 446 |
+
|
| 447 |
+
return "final_answer"
|
| 448 |
+
|
| 449 |
|
| 450 |
def should_use_planning(state: AgentState) -> str:
|
| 451 |
"""Route based on complexity assessment."""
|
|
|
|
| 462 |
print("=== ENHANCED ANSWER CRITIQUE ===")
|
| 463 |
|
| 464 |
report = state.get("execution_report")
|
| 465 |
+
critic_llm = llm_criticist.with_structured_output(CritiqueFeedback)
|
| 466 |
|
| 467 |
critique_prompt = CRITIC_PROMPT.format(
|
| 468 |
query=report.query_summary,
|
|
|
|
| 529 |
|
| 530 |
return "end"
|
| 531 |
|
| 532 |
+
def replanner_old(state: AgentState) -> AgentState:
|
| 533 |
"""Create a revised plan based on critic feedback."""
|
| 534 |
print("=== REPLANNING ===")
|
| 535 |
|
|
|
|
| 575 |
essential_messages.append(msg)
|
| 576 |
|
| 577 |
print(f"Cleaned message history: {len(current_messages)} -> {len(essential_messages)} messages")
|
| 578 |
+
print("=== ESSENTIAL MESSAGES ===")
|
| 579 |
+
print(essential_messages)
|
| 580 |
+
print("=== AGENT STATE ===")
|
| 581 |
+
print(state["messages"])
|
| 582 |
+
|
| 583 |
return {
|
| 584 |
"plan": revised_plan,
|
| 585 |
"current_step": 0,
|
|
|
|
| 588 |
"execution_report": None
|
| 589 |
}
|
| 590 |
|
| 591 |
+
def replanner(state: AgentState) -> AgentState:
|
| 592 |
+
"""Create a revised plan based on critic feedback."""
|
| 593 |
+
print("=== REPLANNING ===")
|
| 594 |
+
|
| 595 |
+
critique = state["critique_feedback"]
|
| 596 |
+
previous_plan = state.get("plan")
|
| 597 |
+
|
| 598 |
+
replan_prompt = f"""
|
| 599 |
+
{SYSTEM_PROMPT_PLANNER}
|
| 600 |
+
|
| 601 |
+
REPLANNING CONTEXT:
|
| 602 |
+
Original Query: {state['query']}
|
| 603 |
+
Previous Plan: {previous_plan if previous_plan else {}}
|
| 604 |
+
|
| 605 |
+
CRITIC FEEDBACK:
|
| 606 |
+
- Quality Score: {critique.quality_score}/10
|
| 607 |
+
- Issues Found: {critique.errors_found}
|
| 608 |
+
- Missing Elements: {critique.missing_elements}
|
| 609 |
+
- Improvement Suggestions: {critique.suggested_improvements}
|
| 610 |
+
- Specific Instructions: {critique.replan_instructions}
|
| 611 |
+
|
| 612 |
+
Create a REVISED plan that addresses these issues. Focus on fixing the identified problems.
|
| 613 |
+
"""
|
| 614 |
+
|
| 615 |
+
revised_plan = planner_llm.invoke([
|
| 616 |
+
SystemMessage(content=replan_prompt),
|
| 617 |
+
HumanMessage(content="Create a revised plan based on the feedback.")
|
| 618 |
+
])
|
| 619 |
+
|
| 620 |
+
print("Plan revised based on critic feedback")
|
| 621 |
+
|
| 622 |
+
# ИСПРАВЛЕНИЕ: Сохраняем важные результаты инструментов
|
| 623 |
+
current_messages = state.get("messages", [])
|
| 624 |
+
|
| 625 |
+
# Находим полезные результаты инструментов
|
| 626 |
+
preserved_messages = []
|
| 627 |
+
tool_results = {}
|
| 628 |
+
|
| 629 |
+
for i, msg in enumerate(current_messages):
|
| 630 |
+
# Сохраняем системные сообщения и пользовательские запросы
|
| 631 |
+
if isinstance(msg, (SystemMessage, HumanMessage)):
|
| 632 |
+
# Фильтруем только исходные запросы, не промпты планировщика
|
| 633 |
+
if (isinstance(msg, HumanMessage) or
|
| 634 |
+
("complexity" in msg.content.lower() and "assessor" in msg.content.lower())):
|
| 635 |
+
preserved_messages.append(msg)
|
| 636 |
+
|
| 637 |
+
# Сохраняем успешные результаты инструментов
|
| 638 |
+
elif isinstance(msg, ToolMessage) and msg.content and msg.content.strip():
|
| 639 |
+
# Проверяем, что это полезный результат
|
| 640 |
+
try:
|
| 641 |
+
# Если результат можно преобразовать в число - это вычисление
|
| 642 |
+
float(msg.content.strip())
|
| 643 |
+
preserved_messages.append(msg)
|
| 644 |
+
tool_results[msg.tool_call_id] = msg.content
|
| 645 |
+
|
| 646 |
+
# Также нужно сохранить соответствующий AIMessage с tool_call
|
| 647 |
+
for j in range(i-1, -1, -1):
|
| 648 |
+
if (isinstance(current_messages[j], AIMessage) and
|
| 649 |
+
hasattr(current_messages[j], 'tool_calls') and
|
| 650 |
+
current_messages[j].tool_calls):
|
| 651 |
+
for tool_call in current_messages[j].tool_calls:
|
| 652 |
+
if tool_call['id'] == msg.tool_call_id:
|
| 653 |
+
if current_messages[j] not in preserved_messages:
|
| 654 |
+
preserved_messages.insert(-1, current_messages[j])
|
| 655 |
+
break
|
| 656 |
+
break
|
| 657 |
+
except (ValueError, AttributeError):
|
| 658 |
+
# Если не число, но содержательный результат, тоже сохраняем
|
| 659 |
+
if len(msg.content.strip()) > 1: # Минимальная длина для сохранения
|
| 660 |
+
preserved_messages.append(msg)
|
| 661 |
+
|
| 662 |
+
print(f"Preserved {len(tool_results)} tool results")
|
| 663 |
+
print(f"Cleaned message history: {len(current_messages)} -> {len(preserved_messages)} messages")
|
| 664 |
+
|
| 665 |
+
# Добавляем контекст о доступных результатах
|
| 666 |
+
if tool_results:
|
| 667 |
+
context_msg = HumanMessage(
|
| 668 |
+
content=f"Previous calculation results available: {tool_results}"
|
| 669 |
+
)
|
| 670 |
+
preserved_messages.append(context_msg)
|
| 671 |
+
|
| 672 |
+
return {
|
| 673 |
+
"plan": revised_plan,
|
| 674 |
+
"current_step": 0,
|
| 675 |
+
"reasoning_done": False,
|
| 676 |
+
"messages": preserved_messages,
|
| 677 |
+
"execution_report": None,
|
| 678 |
+
# Сохраняем важную информацию о предыдущих вычислениях
|
| 679 |
+
"previous_tool_results": tool_results
|
| 680 |
+
}
|
| 681 |
|
| 682 |
def complexity_assessor(state: AgentState) -> AgentState:
|
| 683 |
"""Assess query complexity and determine if planning is needed."""
|
| 684 |
print("=== COMPLEXITY ASSESSMENT ===")
|
| 685 |
|
| 686 |
+
complexity_llm = llm_deterministic.with_structured_output(ComplexityLevel)
|
| 687 |
|
| 688 |
assessment_message = [
|
| 689 |
SystemMessage(content=COMPLEXITY_ASSESSOR_PROMPT.strip()),
|
|
|
|
| 699 |
return {
|
| 700 |
"complexity_assessment": assessment,
|
| 701 |
"messages": state["messages"] + assessment_message
|
| 702 |
+
}
|
src/prompts/prompts.py
CHANGED
|
@@ -15,38 +15,39 @@ TASK BREAKDOWN EXAMPLES:
|
|
| 15 |
Example 1: "Analyze sales data and calculate growth rates"
|
| 16 |
{{
|
| 17 |
"steps": [
|
| 18 |
-
{{"id": "s1", "goal": "Load and examine the sales data file", "tool": "
|
| 19 |
-
{{"id": "s2", "goal": "Calculate monthly growth rates using Python", "tool": "
|
| 20 |
-
{{"id": "s3", "goal": "Generate summary statistics and trends", "tool": "
|
| 21 |
]
|
| 22 |
-
}}
|
| 23 |
|
| 24 |
Example 2: "Research recent AI developments and summarize key trends"
|
| 25 |
{{
|
| 26 |
"steps": [
|
| 27 |
{{"id": "s1", "goal": "Search for recent AI news and developments", "tool": "web_search"}},
|
| 28 |
-
{{"id": "s2", "goal": "
|
| 29 |
-
{{"id": "s3", "goal": "
|
|
|
|
| 30 |
]
|
| 31 |
}}
|
| 32 |
|
| 33 |
Example 3: "Compare performance metrics between two datasets"
|
| 34 |
{{
|
| 35 |
"steps": [
|
| 36 |
-
{{"id": "s1", "goal": "Load first dataset and examine structure", "tool": "
|
| 37 |
-
{{"id": "s2", "goal": "Load second dataset and examine structure", "tool": "
|
| 38 |
-
{{"id": "s3", "goal": "Calculate statistical metrics for both datasets using code", "tool": "
|
| 39 |
-
{{"id": "s4", "goal": "Perform statistical comparison and significance testing", "tool": "
|
| 40 |
]
|
| 41 |
}}
|
| 42 |
|
| 43 |
Example 4: "Create a budget analysis from expense data"
|
| 44 |
{{
|
| 45 |
"steps": [
|
| 46 |
-
{{"id": "s1", "goal": "Load expense data and validate format", "tool": "
|
| 47 |
-
{{"id": "s2", "goal": "Calculate category totals and percentages using code", "tool": "
|
| 48 |
-
{{"id": "s3", "goal": "Generate budget variance analysis and projections", "tool": "
|
| 49 |
-
{{"id": "s4", "goal": "Create visualization of spending patterns", "tool": "
|
| 50 |
]
|
| 51 |
}}
|
| 52 |
|
|
@@ -69,7 +70,7 @@ Return a single JSON object with this structure:
|
|
| 69 |
}}
|
| 70 |
|
| 71 |
Ground rules:
|
| 72 |
-
- Prefer 2-4 steps for most tasks. Single steps only for truly trivial queries.
|
| 73 |
- Break down complex tasks into logical components - don't try to solve everything at once
|
| 74 |
- Use tool names exactly as listed. If no tool is needed, set "tool": null.
|
| 75 |
- Never assume files or URLs exist—plan to search/download before analysing.
|
|
@@ -94,10 +95,10 @@ Available tools: {tool_catalogue}
|
|
| 94 |
Known local files: {file_list}
|
| 95 |
|
| 96 |
CRITICAL COMPUTATION RULE: You MUST use tools for ANY numerical calculation, counting, or mathematical operation. This includes:
|
| 97 |
-
- Simple arithmetic (use
|
| 98 |
-
- Data analysis and statistics (use
|
| 99 |
-
- Counting items, rows, or occurrences (use
|
| 100 |
-
- Percentage calculations (use
|
| 101 |
- Any mathematical transformation or formula application
|
| 102 |
|
| 103 |
NEVER perform manual calculations or provide estimated numbers.
|
|
@@ -121,6 +122,8 @@ COMPLEXITY LEVELS:
|
|
| 121 |
1. SIMPLE: Direct questions that can be answered immediately without tools or with single tool use
|
| 122 |
- Examples: "What is photosynthesis?", "Define machine learning", "What's the capital of France?"
|
| 123 |
- NOTE: Simple math like "2+2" still requires calculator tool but counts as SIMPLE
|
|
|
|
|
|
|
| 124 |
|
| 125 |
2. MODERATE: Questions requiring 2-4 tool calls or basic multi-step analysis
|
| 126 |
- Examples: "Search for recent news about AI", "Analyze this CSV file for trends", "Calculate ROI from this data"
|
|
@@ -130,6 +133,8 @@ COMPLEXITY LEVELS:
|
|
| 130 |
- Examples: "Research market trends and create investment strategy", "Analyze multiple data sources and predict outcomes"
|
| 131 |
- "Build comprehensive report from various inputs", "Multi-stage data processing with validation"
|
| 132 |
|
|
|
|
|
|
|
| 133 |
ASSESSMENT CRITERIA:
|
| 134 |
- Number of distinct steps likely needed (1 = Simple, 2-4 = Moderate, 5+ = Complex)
|
| 135 |
- Tool complexity and dependencies between steps
|
|
|
|
| 15 |
Example 1: "Analyze sales data and calculate growth rates"
|
| 16 |
{{
|
| 17 |
"steps": [
|
| 18 |
+
{{"id": "s1", "goal": "Load and examine the sales data file", "tool": "analyze_(csv, docx, pdf etc.)_file"}},
|
| 19 |
+
{{"id": "s2", "goal": "Calculate monthly growth rates using Python", "tool": "safe_code_run"}},
|
| 20 |
+
{{"id": "s3", "goal": "Generate summary statistics and trends", "tool": "safe_code_run"}}
|
| 21 |
]
|
| 22 |
+
}}ф
|
| 23 |
|
| 24 |
Example 2: "Research recent AI developments and summarize key trends"
|
| 25 |
{{
|
| 26 |
"steps": [
|
| 27 |
{{"id": "s1", "goal": "Search for recent AI news and developments", "tool": "web_search"}},
|
| 28 |
+
{{"id": "s2", "goal": "Download relevant articles", "tool": "ddownload_file_from_url"}},
|
| 29 |
+
{{"id": "s3", "goal": "Extract and organize key information from articles", "tool": "analyze_(csv, docx, pdf etc.)_file"}},
|
| 30 |
+
{{"id": "s4", "goal": "Analyze and synthesize key trends from gathered information", "tool": null}}
|
| 31 |
]
|
| 32 |
}}
|
| 33 |
|
| 34 |
Example 3: "Compare performance metrics between two datasets"
|
| 35 |
{{
|
| 36 |
"steps": [
|
| 37 |
+
{{"id": "s1", "goal": "Load first dataset and examine structure", "tool": "analyze_(csv, docx, pdf etc.)_file"}},
|
| 38 |
+
{{"id": "s2", "goal": "Load second dataset and examine structure", "tool": "analyze_(csv, docx, pdf etc.)_file"}},
|
| 39 |
+
{{"id": "s3", "goal": "Calculate statistical metrics for both datasets using code", "tool": "safe_code_run"}},
|
| 40 |
+
{{"id": "s4", "goal": "Perform statistical comparison and significance testing", "tool": "safe_code_run"}}
|
| 41 |
]
|
| 42 |
}}
|
| 43 |
|
| 44 |
Example 4: "Create a budget analysis from expense data"
|
| 45 |
{{
|
| 46 |
"steps": [
|
| 47 |
+
{{"id": "s1", "goal": "Load expense data and validate format", "tool": "analyze_(csv, docx, pdf etc.)_file"}},
|
| 48 |
+
{{"id": "s2", "goal": "Calculate category totals and percentages using code", "tool": "safe_code_run"}},
|
| 49 |
+
{{"id": "s3", "goal": "Generate budget variance analysis and projections", "tool": "safe_code_run"}},
|
| 50 |
+
{{"id": "s4", "goal": "Create visualization of spending patterns", "tool": "safe_code_run"}}
|
| 51 |
]
|
| 52 |
}}
|
| 53 |
|
|
|
|
| 70 |
}}
|
| 71 |
|
| 72 |
Ground rules:
|
| 73 |
+
- Prefer 2-4 steps for most tasks. Single steps only for truly trivial queries. Calculation tasks must use tools always.
|
| 74 |
- Break down complex tasks into logical components - don't try to solve everything at once
|
| 75 |
- Use tool names exactly as listed. If no tool is needed, set "tool": null.
|
| 76 |
- Never assume files or URLs exist—plan to search/download before analysing.
|
|
|
|
| 95 |
Known local files: {file_list}
|
| 96 |
|
| 97 |
CRITICAL COMPUTATION RULE: You MUST use tools for ANY numerical calculation, counting, or mathematical operation. This includes:
|
| 98 |
+
- Simple arithmetic (use tools add, subtract, multiply, divide, power)
|
| 99 |
+
- Data analysis and statistics (use safe_code_run)
|
| 100 |
+
- Counting items, rows, or occurrences (use safe_code_run)
|
| 101 |
+
- Percentage calculations (use add, subtract, multiply, divide, power/safe_code_run)
|
| 102 |
- Any mathematical transformation or formula application
|
| 103 |
|
| 104 |
NEVER perform manual calculations or provide estimated numbers.
|
|
|
|
| 122 |
1. SIMPLE: Direct questions that can be answered immediately without tools or with single tool use
|
| 123 |
- Examples: "What is photosynthesis?", "Define machine learning", "What's the capital of France?"
|
| 124 |
- NOTE: Simple math like "2+2" still requires calculator tool but counts as SIMPLE
|
| 125 |
+
|
| 126 |
+
!ALSO: It can be a logical reasoning or explanation task that does not require tools.
|
| 127 |
|
| 128 |
2. MODERATE: Questions requiring 2-4 tool calls or basic multi-step analysis
|
| 129 |
- Examples: "Search for recent news about AI", "Analyze this CSV file for trends", "Calculate ROI from this data"
|
|
|
|
| 133 |
- Examples: "Research market trends and create investment strategy", "Analyze multiple data sources and predict outcomes"
|
| 134 |
- "Build comprehensive report from various inputs", "Multi-stage data processing with validation"
|
| 135 |
|
| 136 |
+
MOST OF THE LOGICAL TASKS ARE SIMPLE, UNLESS THEY REQUIRE TOOLS.
|
| 137 |
+
|
| 138 |
ASSESSMENT CRITERIA:
|
| 139 |
- Number of distinct steps likely needed (1 = Simple, 2-4 = Moderate, 5+ = Complex)
|
| 140 |
- Tool complexity and dependencies between steps
|
src/state.py
CHANGED
|
@@ -19,4 +19,5 @@ class AgentState(MessagesState):
|
|
| 19 |
iteration_count :int
|
| 20 |
max_iterations: int
|
| 21 |
execution_report : ExecutionReport
|
|
|
|
| 22 |
|
|
|
|
| 19 |
iteration_count :int
|
| 20 |
max_iterations: int
|
| 21 |
execution_report : ExecutionReport
|
| 22 |
+
previous_tool_results: Dict[str, str] # НОВОЕ ПОЛЕ для сохранения результатов
|
| 23 |
|
src/tools/code_interpreter.py
CHANGED
|
@@ -91,7 +91,7 @@ def _child_exec(payload: Dict[str, Any], queue: mp.Queue):
|
|
| 91 |
safe_names = [
|
| 92 |
"abs","all","any","bool","dict","float","int","len","list","max","min",
|
| 93 |
"range","str","sum","print","enumerate","zip","map","filter","sorted",
|
| 94 |
-
"reversed","complex","pow","divmod"
|
| 95 |
]
|
| 96 |
safe_builtins = {n: getattr(builtins, n) for n in safe_names}
|
| 97 |
|
|
|
|
| 91 |
safe_names = [
|
| 92 |
"abs","all","any","bool","dict","float","int","len","list","max","min",
|
| 93 |
"range","str","sum","print","enumerate","zip","map","filter","sorted",
|
| 94 |
+
"reversed","complex","pow","divmod", "round", "next", "set", "tuple", "type", "isinstance", "issubclass",
|
| 95 |
]
|
| 96 |
safe_builtins = {n: getattr(builtins, n) for n in safe_names}
|
| 97 |
|
src/tools/tools.py
CHANGED
|
@@ -101,7 +101,7 @@ def preprocess_files(files: List[str]) -> Dict[str, Dict[str, Any]]:
|
|
| 101 |
info["suggested_tool"] = "analyze_txt_file"
|
| 102 |
elif file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
|
| 103 |
info["type"] = "image"
|
| 104 |
-
info["suggested_tool"] = "
|
| 105 |
else:
|
| 106 |
info["type"] = "unknown"
|
| 107 |
info["suggested_tool"] = "analyze_txt_file (fallback)"
|
|
|
|
| 101 |
info["suggested_tool"] = "analyze_txt_file"
|
| 102 |
elif file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
|
| 103 |
info["type"] = "image"
|
| 104 |
+
info["suggested_tool"] = "vision_qa_gemma"
|
| 105 |
else:
|
| 106 |
info["type"] = "unknown"
|
| 107 |
info["suggested_tool"] = "analyze_txt_file (fallback)"
|
src/tools/web_crawler.py
ADDED
|
File without changes
|
src/utils/utils.py
CHANGED
|
@@ -155,4 +155,10 @@ def complexity_assessor(state: AgentState) -> AgentState:
|
|
| 155 |
return {
|
| 156 |
"complexity_assessment": assessment,
|
| 157 |
"messages": state["messages"] + assessment_message
|
| 158 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
return {
|
| 156 |
"complexity_assessment": assessment,
|
| 157 |
"messages": state["messages"] + assessment_message
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def trim(s: str, max_len: int = 10_000) -> str:
|
| 162 |
+
if s and len(s) > max_len:
|
| 163 |
+
return s[:max_len] + "... [truncated]"
|
| 164 |
+
return s
|
src/workflow_test.ipynb
CHANGED
|
@@ -16,7 +16,8 @@
|
|
| 16 |
],
|
| 17 |
"source": [
|
| 18 |
"from agent import build_workflow\n",
|
| 19 |
-
"from config import config"
|
|
|
|
| 20 |
]
|
| 21 |
},
|
| 22 |
{
|
|
@@ -43,131 +44,44 @@
|
|
| 43 |
"💡 ════════════════════\n",
|
| 44 |
" • files: none provided\n",
|
| 45 |
"=== COMPLEXITY ASSESSMENT ===\n",
|
| 46 |
-
"Complexity:
|
| 47 |
-
"Needs planning:
|
| 48 |
-
"Reasoning:
|
| 49 |
-
"
|
| 50 |
-
"
|
| 51 |
-
"🧭 PLANNING \n",
|
| 52 |
-
"🧭 ════════════════════\n",
|
| 53 |
-
"\n",
|
| 54 |
-
"🧭 ════════════════════\n",
|
| 55 |
-
"🧭 PLANNER OUTPUT \n",
|
| 56 |
-
"🧭 ════════════════════\n",
|
| 57 |
-
"Task type: info\n",
|
| 58 |
-
"Summary: I will perform a web search to gather information about Nikita Miroshnichenko, including his background as a student at UNIL and any working experience at EPFL.\n",
|
| 59 |
-
"Steps:\n",
|
| 60 |
-
" s1 → Search for information about Nikita Miroshnichenko to confirm his background and work experience.\n",
|
| 61 |
-
" tool: web_search\n",
|
| 62 |
-
" inputs: Nikita Miroshnichenko UNIL EPFL\n",
|
| 63 |
-
" expected: Find relevant information confirming his student status and any work experience at EPFL.\n",
|
| 64 |
-
" on_fail: replan\n",
|
| 65 |
-
"Answer guidelines: Provide a concise summary based on the information found, including citations if applicable.\n",
|
| 66 |
-
"\n",
|
| 67 |
-
"🤖 ════════════════════\n",
|
| 68 |
-
"🤖 EXECUTION \n",
|
| 69 |
-
"🤖 ════════════════════\n",
|
| 70 |
-
"🤖 Step 1/1: Search for information about Nikita Miroshnichenko to confirm his background and work experience.\n",
|
| 71 |
-
" • step_id: s1\n",
|
| 72 |
-
" • tool: web_search\n",
|
| 73 |
-
" • expected: Find relevant information confirming his student status and any work experience at EPFL.\n",
|
| 74 |
-
"\n",
|
| 75 |
-
"🧠 ════════════════════\n",
|
| 76 |
-
"🧠 REASONING \n",
|
| 77 |
-
"🧠 ════════════════════\n",
|
| 78 |
-
"🧠 s1\n",
|
| 79 |
-
"<REASONING> The query requires gathering information about Nikita Miroshnichenko, specifically his background as a student at UNIL and any work experience at EPFL. This involves performing a web search to find relevant details about him, which will help in writing a short summary. The first step will be to use the web_search tool to collect this information. The expected outcome is to obtain sufficient data to confirm his educational background and work experience, which will then allow for the creation of a summary. Since this is a research task that requires external information, it is classified as a moderate complexity task. </REASONING>\n",
|
| 80 |
-
"=== REASONING STEP ===\n",
|
| 81 |
-
"{\n",
|
| 82 |
-
" \"task_type\": \"info\",\n",
|
| 83 |
-
" \"summary\": \"The plan involves searching for information about Nikita Miroshnichenko to confirm his background as a student at UNIL and any work experience at EPFL.\",\n",
|
| 84 |
-
" \"assumptions\": [\"Nikita Miroshnichenko is a student at UNIL\", \"There may be publicly available information regarding his work experience at EPFL\"],\n",
|
| 85 |
-
" \"steps\": [\n",
|
| 86 |
-
" {\n",
|
| 87 |
-
" \"id\": \"s1\",\n",
|
| 88 |
-
" \"goal\": \"Search for information about Nikita Miroshnichenko to confirm his background and work experience.\",\n",
|
| 89 |
-
" \"tool\": \"web_search\",\n",
|
| 90 |
-
" \"inputs\": \"Nikita Miroshnichenko UNIL EPFL\",\n",
|
| 91 |
-
" \"expected_result\": \"Find relevant information confirming his student status and any work experience at EPFL.\",\n",
|
| 92 |
-
" \"on_fail\": \"replan\"\n",
|
| 93 |
-
" }\n",
|
| 94 |
-
" ],\n",
|
| 95 |
-
" \"answer_guidelines\": \"Provide a summary of the findings, including citations for any sources used.\"\n",
|
| 96 |
-
"}\n",
|
| 97 |
-
"\n",
|
| 98 |
-
"🤖 ════════════════════\n",
|
| 99 |
-
"🤖 EXECUTION \n",
|
| 100 |
-
"🤖 ════════════════════\n",
|
| 101 |
-
"🤖 Step 1/1: Search for information about Nikita Miroshnichenko to confirm his background and work experience.\n",
|
| 102 |
-
" • step_id: s1\n",
|
| 103 |
-
" • tool: web_search\n",
|
| 104 |
-
" • expected: Find relevant information confirming his student status and any work experience at EPFL.\n",
|
| 105 |
-
"=== TOOL EXECUTION ===\n",
|
| 106 |
-
"Tool calls: [{'name': 'web_search', 'args': {'query': 'Nikita Miroshnichenko UNIL EPFL'}, 'id': 'call_TJN5zTZWXac12m0so0FrKpOr', 'type': 'tool_call'}]\n"
|
| 107 |
-
]
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"name": "stderr",
|
| 111 |
-
"output_type": "stream",
|
| 112 |
-
"text": [
|
| 113 |
-
"d:\\ankelodon_multiagent_system\\src\\tools\\tools.py:228: LangChainDeprecationWarning: The class `TavilySearchResults` was deprecated in LangChain 0.3.25 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-tavily package and should be used instead. To use it run `pip install -U :class:`~langchain-tavily` and import as `from :class:`~langchain_tavily import TavilySearch``.\n",
|
| 114 |
-
" raw_results = TavilySearchResults(max_results=max_results).invoke(query)\n"
|
| 115 |
-
]
|
| 116 |
-
},
|
| 117 |
-
{
|
| 118 |
-
"name": "stdout",
|
| 119 |
-
"output_type": "stream",
|
| 120 |
-
"text": [
|
| 121 |
-
"\n",
|
| 122 |
-
"✅ ════════════════════\n",
|
| 123 |
-
"✅ PLAN COMPLETE \n",
|
| 124 |
-
"✅ ════════════════════\n",
|
| 125 |
-
"✅ All steps executed\n",
|
| 126 |
"=== GENERATING EXECUTION REPORT ===\n",
|
| 127 |
"Report generated - Confidence: high\n",
|
| 128 |
-
"Key findings:
|
| 129 |
-
"Data sources:
|
| 130 |
-
"query_summary='The
|
| 131 |
"=== ENHANCED ANSWER CRITIQUE ===\n",
|
| 132 |
-
"Quality Score:
|
| 133 |
"Complete: True\n",
|
| 134 |
"Accurate: True\n",
|
|
|
|
| 135 |
"=== REPLAN DECISION ===\n",
|
| 136 |
"Iteration: 1/10\n",
|
| 137 |
-
"Quality score:
|
| 138 |
"Needs replanning: False\n",
|
| 139 |
"Quality acceptable, ending execution\n"
|
| 140 |
]
|
| 141 |
}
|
| 142 |
],
|
| 143 |
"source": [
|
| 144 |
-
"query = \"
|
| 145 |
"result = graph.invoke({\"query\" : query, \"current_step\": 0, \"reasoning_done\": False, \"files\" : [], \"files_contents\" : {}, \"iteration_count\" : 0, \"max_iterations\" : 10, \"plan\" : None} , config = config)"
|
| 146 |
]
|
| 147 |
},
|
| 148 |
{
|
| 149 |
"cell_type": "code",
|
| 150 |
-
"execution_count":
|
| 151 |
"metadata": {},
|
| 152 |
"outputs": [
|
| 153 |
{
|
| 154 |
"name": "stdout",
|
| 155 |
"output_type": "stream",
|
| 156 |
"text": [
|
| 157 |
-
"FINAL ANSWER:
|
| 158 |
-
"\n",
|
| 159 |
-
"SUMMARY:\n",
|
| 160 |
-
"The user requested information about Nikita Miroshnichenko, a student from UNIL, and inquired about his working experience at EPFL.\n",
|
| 161 |
-
"\n",
|
| 162 |
-
"KEY FINDINGS:\n",
|
| 163 |
-
"• Nikita Miroshnichenko is a student at UNIL.\n",
|
| 164 |
-
"• He has been associated with EPFL, confirming his work experience there.\n",
|
| 165 |
-
"\n",
|
| 166 |
-
"SOURCES:\n",
|
| 167 |
-
"• https://topline.com/people/nikita-miroshnichenko-182776498\n",
|
| 168 |
-
"\n",
|
| 169 |
-
"LIMITATIONS:\n",
|
| 170 |
-
"• The information retrieved is based on available online sources, which may not be exhaustive or fully up-to-date.\n"
|
| 171 |
]
|
| 172 |
}
|
| 173 |
],
|
|
@@ -177,34 +91,29 @@
|
|
| 177 |
},
|
| 178 |
{
|
| 179 |
"cell_type": "code",
|
| 180 |
-
"execution_count":
|
| 181 |
"metadata": {},
|
| 182 |
"outputs": [
|
| 183 |
{
|
| 184 |
"data": {
|
| 185 |
"text/plain": [
|
| 186 |
-
"{'messages': [SystemMessage(content='You are a COMPLEXITY ASSESSOR for a multi-tool agent system.\\nYour job is to analyze user queries and determine their complexity level and processing requirements.\\n\\nCOMPLEXITY LEVELS:\\n1. SIMPLE: Direct questions that can be answered immediately without tools or with single tool use\\n - Examples: \"What is
|
| 187 |
-
" HumanMessage(content='Query:
|
| 188 |
-
"
|
| 189 |
-
"
|
| 190 |
-
"
|
| 191 |
-
"
|
| 192 |
-
"
|
| 193 |
-
"
|
| 194 |
-
" 'query': 'Find info about Nikita Miroshnichenko, its a student from UNIL, and write a short summary about him. Is it true that he has a working experience at EPFL?',\n",
|
| 195 |
-
" 'final_answer': 'FINAL ANSWER: Nikita Miroshnichenko is a student at UNIL and has confirmed working experience at EPFL.\\n\\nSUMMARY:\\nThe user requested information about Nikita Miroshnichenko, a student from UNIL, and inquired about his working experience at EPFL.\\n\\nKEY FINDINGS:\\n• Nikita Miroshnichenko is a student at UNIL.\\n• He has been associated with EPFL, confirming his work experience there.\\n\\nSOURCES:\\n• https://topline.com/people/nikita-miroshnichenko-182776498\\n\\nLIMITATIONS:\\n• The information retrieved is based on available online sources, which may not be exhaustive or fully up-to-date.',\n",
|
| 196 |
-
" 'plan': PlannerPlan(task_type='info', summary='I will perform a web search to gather information about Nikita Miroshnichenko, including his background as a student at UNIL and any working experience at EPFL.', assumptions=[], steps=[PlanStep(id='s1', goal='Search for information about Nikita Miroshnichenko to confirm his background and work experience.', tool='web_search', inputs='Nikita Miroshnichenko UNIL EPFL', expected_result='Find relevant information confirming his student status and any work experience at EPFL.', on_fail='replan')], answer_guidelines='Provide a concise summary based on the information found, including citations if applicable.'),\n",
|
| 197 |
-
" 'complexity_assessment': ComplexityLevel(level='complex', reasoning='This query involves multiple steps: first, gathering information about Nikita Miroshnichenko, which may require searching through various sources; second, verifying his affiliation with UNIL and any working experience at EPFL; and third, synthesizing this information into a coherent summary. The need to cross-reference information adds to the complexity, as it requires careful reasoning to ensure accuracy.', needs_planning=True, suggested_approach='Begin by searching for Nikita Miroshnichenko on academic and professional platforms to gather relevant information. Verify his student status at UNIL and check for any records of employment or internships at EPFL. Compile the findings into a concise summary.'),\n",
|
| 198 |
-
" 'current_step': 1,\n",
|
| 199 |
" 'reasoning_done': False,\n",
|
| 200 |
" 'files': [],\n",
|
| 201 |
-
" 'critique_feedback': CritiqueFeedback(quality_score=
|
| 202 |
" 'iteration_count': 1,\n",
|
| 203 |
" 'max_iterations': 10,\n",
|
| 204 |
-
" 'execution_report': ExecutionReport(query_summary='The
|
| 205 |
]
|
| 206 |
},
|
| 207 |
-
"execution_count":
|
| 208 |
"metadata": {},
|
| 209 |
"output_type": "execute_result"
|
| 210 |
}
|
|
@@ -212,6 +121,19 @@
|
|
| 212 |
"source": [
|
| 213 |
"result"
|
| 214 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
}
|
| 216 |
],
|
| 217 |
"metadata": {
|
|
|
|
| 16 |
],
|
| 17 |
"source": [
|
| 18 |
"from agent import build_workflow\n",
|
| 19 |
+
"from config import config\n",
|
| 20 |
+
"from tools.code_interpreter import safe_code_run"
|
| 21 |
]
|
| 22 |
},
|
| 23 |
{
|
|
|
|
| 44 |
"💡 ════════════════════\n",
|
| 45 |
" • files: none provided\n",
|
| 46 |
"=== COMPLEXITY ASSESSMENT ===\n",
|
| 47 |
+
"Complexity: simple\n",
|
| 48 |
+
"Needs planning: False\n",
|
| 49 |
+
"Reasoning: Initial state: 200 coins, all but 30 are face-up → 30 face-down (tails). He takes 30 coins; if x of those were face-down, the remaining pile has 30−x face-down. Flipping the 30-coin pile turns its face-down count into 30−x as well, so both piles have equal face-down coins. The larger pile was observed to have 14 face-down coins, so his pile also has 14 face-down coins. His reward is 2 gold coins per face-down coin in his pile: 14×2 = 28.\n",
|
| 50 |
+
"=== SIMPLE EXECUTION ===\n",
|
| 51 |
+
"Response generated for simple query.\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
"=== GENERATING EXECUTION REPORT ===\n",
|
| 53 |
"Report generated - Confidence: high\n",
|
| 54 |
+
"Key findings: 5\n",
|
| 55 |
+
"Data sources: 2\n",
|
| 56 |
+
"query_summary='Riddle: 200 coins where all but 30 are face-up (so 30 face-down). The adventurer removed 30 coins as his pile (unknown orientations), flipped each coin in that pile, then observed the larger pile contained 14 face-down coins. Determine whether he succeeded and how many coins he won (2 gold per face-down coin in his pile).' approach_used=\"Algebraic reasoning with simple variables: let x = number of face-down coins initially in the 30-coin pile removed. Use conservation of face-down coins to compute final counts after flipping, then compute reward = 2 * (final face-downs in adventurer's pile).\" tools_executed=[] key_findings=['Total coins = 200. Initially face-down coins = 30 (since all but 30 are face-up).', 'Let x = number of face-down coins among the 30 coins removed. After flipping those 30 coins, that pile has (30 - x) face-down coins.', 'The remaining (larger) pile has initial face-down coins = 30 - x, which matches (30 - x) after the flip, so both piles have equal face-down counts.', 'Given the larger pile was observed to have 14 face-down coins, 30 - x = 14 => x = 16.', \"Therefore the adventurer's pile also has 14 face-down coins after flipping, yielding a reward of 14 * 2 = 28 coins.\"] data_sources=['Problem statement provided in the query', 'Basic arithmetic/algebra reasoning'] assumptions_made=[\"Interpretation 'all but 30 are face-up' means exactly 30 coins are face-down initially.\", \"The adventurer's pile is the 30-coin pile he removed and flipped; the 'larger pile' refers to the remaining 170-coin pile.\", \"Reward is exactly 2 gold coins per face-down coin in the adventurer's pile, as stated.\"] confidence_level='high' limitations=['Solution depends on standard interpretation of the riddle wording; if alternative interpretations were intended, results could differ.', 'No external references were used; reasoning is self-contained.'] final_answer='28'\n",
|
| 57 |
"=== ENHANCED ANSWER CRITIQUE ===\n",
|
| 58 |
+
"Quality Score: 8/10\n",
|
| 59 |
"Complete: True\n",
|
| 60 |
"Accurate: True\n",
|
| 61 |
+
"Issues found: [\"Minor imprecise phrasing: the report mentions 'conservation of face-down coins' which is misleading — flipping changes the total number of face-down coins. The correct point is that after flipping the removed 30 coins, the removed pile ends up with (30 - x) face-down coins, which equals the remaining pile's unchanged count of (30 - x).\", 'No explicit, single-sentence statement answering the two parts of the query (Did he succeed? How many gold coins did he win?). The final numeric answer is present but the success statement is implicit.']\n",
|
| 62 |
"=== REPLAN DECISION ===\n",
|
| 63 |
"Iteration: 1/10\n",
|
| 64 |
+
"Quality score: 8\n",
|
| 65 |
"Needs replanning: False\n",
|
| 66 |
"Quality acceptable, ending execution\n"
|
| 67 |
]
|
| 68 |
}
|
| 69 |
],
|
| 70 |
"source": [
|
| 71 |
+
"query = \"Here's a fun riddle that I'd like you to try.\\n\\nAn adventurer exploring an ancient tomb came across a horde of gold coins, all neatly stacked in columns. As he reached to scoop them into his backpack, a mysterious voice filled the room. \\\"You have fallen for my trap adventurer,\\\" the voice began, and suddenly the doorway to the chamber was sealed by a heavy rolling disk of stone. The adventurer tried to move the stone disk but was unable to budge the heavy stone. Trapped, he was startled when the voice again spoke. \\n\\n\\\"If you solve my riddle, I will reward you with a portion of my riches, but if you are not clever, you will never leave this treasure chamber. Before you are 200 gold coins. I pose a challenge to you, adventurer. Within these stacks of coins, all but 30 are face-up. You must divide the coins into two piles, one is yours, and one is mine. You may place as many coins as you like in either pile. You may flip any coins over, but you may not balance any coins on their edges. For every face-down coin in your pile, you will be rewarded with two gold coins. But be warned, if both piles do not contain the same number of face-down coins, the door will remain sealed for all eternity!\\\"\\n\\nThe adventurer smiled, as this would be an easy task. All he had to do was flip over every coin so it was face down, and he would win the entire treasure! As he moved to the columns of coins, however, the light suddenly faded, and he was left in total darkness. The adventurer reached forward and picked up one of the coins, and was shocked when he realized that both sides felt almost the same. Without the light, he was unable to determine which side of the coin was heads and which side was tails. He carefully replaced the coin in its original orientation and tried to think of a way to solve the puzzle. Finally, out of desperation, the adventurer removed 30 coins to create his pile. He then carefully flipped over each coin in his pile, so its orientation was inverted from its original state.\\n\\n\\\"I've finished,\\\" he said, and the lights returned. Looking at the two piles, he noticed that the larger pile contained 14 face-down coins.\\n\\nWhat was the outcome for the adventurer? If he failed the challenge, please respond with \\\"The adventurer died.\\\" Otherwise, please provide the number of coins the adventurer won at the conclusion of the riddle. If the adventurer won any coins, provide your response as the number of coins, with no other text.\"\n",
|
| 72 |
"result = graph.invoke({\"query\" : query, \"current_step\": 0, \"reasoning_done\": False, \"files\" : [], \"files_contents\" : {}, \"iteration_count\" : 0, \"max_iterations\" : 10, \"plan\" : None} , config = config)"
|
| 73 |
]
|
| 74 |
},
|
| 75 |
{
|
| 76 |
"cell_type": "code",
|
| 77 |
+
"execution_count": 4,
|
| 78 |
"metadata": {},
|
| 79 |
"outputs": [
|
| 80 |
{
|
| 81 |
"name": "stdout",
|
| 82 |
"output_type": "stream",
|
| 83 |
"text": [
|
| 84 |
+
"FINAL ANSWER: 28\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
]
|
| 86 |
}
|
| 87 |
],
|
|
|
|
| 91 |
},
|
| 92 |
{
|
| 93 |
"cell_type": "code",
|
| 94 |
+
"execution_count": 5,
|
| 95 |
"metadata": {},
|
| 96 |
"outputs": [
|
| 97 |
{
|
| 98 |
"data": {
|
| 99 |
"text/plain": [
|
| 100 |
+
"{'messages': [SystemMessage(content='You are a COMPLEXITY ASSESSOR for a multi-tool agent system.\\nYour job is to analyze user queries and determine their complexity level and processing requirements.\\n\\nCOMPLEXITY LEVELS:\\n1. SIMPLE: Direct questions that can be answered immediately without tools or with single tool use\\n - Examples: \"What is photosynthesis?\", \"Define machine learning\", \"What\\'s the capital of France?\"\\n - NOTE: Simple math like \"2+2\" still requires calculator tool but counts as SIMPLE\\n\\n !ALSO: It can be a logical reasoning or explanation task that does not require tools.\\n \\n2. MODERATE: Questions requiring 2-4 tool calls or basic multi-step analysis\\n - Examples: \"Search for recent news about AI\", \"Analyze this CSV file for trends\", \"Calculate ROI from this data\"\\n - \"Compare two datasets\", \"Summarize multiple documents\"\\n \\n3. COMPLEX: Multi-step problems requiring planning, multiple tools, and sophisticated reasoning\\n - Examples: \"Research market trends and create investment strategy\", \"Analyze multiple data sources and predict outcomes\"\\n - \"Build comprehensive report from various inputs\", \"Multi-stage data processing with validation\"\\n\\nMOST OF THE LOGICAL TASKS ARE SIMPLE, UNLESS THEY REQUIRE TOOLS.\\n\\nASSESSMENT CRITERIA:\\n- Number of distinct steps likely needed (1 = Simple, 2-4 = Moderate, 5+ = Complex)\\n- Tool complexity and dependencies between steps\\n- Data processing requirements and validation needs\\n- Need for intermediate reasoning and synthesis\\n- Risk of failure without proper step-by-step planning\\n- Presence of calculations (automatically requires tool usage)\\n\\nSPECIAL CONSIDERATIONS:\\n- Any calculation/counting task requires tools (affects complexity assessment)\\n- File analysis tasks usually need multiple steps (load + analyze + calculate)\\n- Research tasks typically need search + fetch + synthesis steps\\n- Comparison tasks need separate analysis steps for each item being compared\\n\\nRULES:\\n- SIMPLE queries may bypass planning for non-calculation tasks\\n- MODERATE queries benefit from lightweight planning\\n- COMPLEX queries require full planning with fallbacks\\n- When in doubt, err toward higher complexity\\n- Calculation tasks are never truly \"simple\" due to mandatory tool usage\\n\\nAnalyze the query and respond with your assessment.', additional_kwargs={}, response_metadata={}, id='26e46b0a-44ea-4f15-be78-dd9e141ac21b'),\n",
|
| 101 |
+
" HumanMessage(content='Query: Here\\'s a fun riddle that I\\'d like you to try.\\n\\nAn adventurer exploring an ancient tomb came across a horde of gold coins, all neatly stacked in columns. As he reached to scoop them into his backpack, a mysterious voice filled the room. \"You have fallen for my trap adventurer,\" the voice began, and suddenly the doorway to the chamber was sealed by a heavy rolling disk of stone. The adventurer tried to move the stone disk but was unable to budge the heavy stone. Trapped, he was startled when the voice again spoke. \\n\\n\"If you solve my riddle, I will reward you with a portion of my riches, but if you are not clever, you will never leave this treasure chamber. Before you are 200 gold coins. I pose a challenge to you, adventurer. Within these stacks of coins, all but 30 are face-up. You must divide the coins into two piles, one is yours, and one is mine. You may place as many coins as you like in either pile. You may flip any coins over, but you may not balance any coins on their edges. For every face-down coin in your pile, you will be rewarded with two gold coins. But be warned, if both piles do not contain the same number of face-down coins, the door will remain sealed for all eternity!\"\\n\\nThe adventurer smiled, as this would be an easy task. All he had to do was flip over every coin so it was face down, and he would win the entire treasure! As he moved to the columns of coins, however, the light suddenly faded, and he was left in total darkness. The adventurer reached forward and picked up one of the coins, and was shocked when he realized that both sides felt almost the same. Without the light, he was unable to determine which side of the coin was heads and which side was tails. He carefully replaced the coin in its original orientation and tried to think of a way to solve the puzzle. Finally, out of desperation, the adventurer removed 30 coins to create his pile. He then carefully flipped over each coin in his pile, so its orientation was inverted from its original state.\\n\\n\"I\\'ve finished,\" he said, and the lights returned. Looking at the two piles, he noticed that the larger pile contained 14 face-down coins.\\n\\nWhat was the outcome for the adventurer? If he failed the challenge, please respond with \"The adventurer died.\" Otherwise, please provide the number of coins the adventurer won at the conclusion of the riddle. If the adventurer won any coins, provide your response as the number of coins, with no other text.', additional_kwargs={}, response_metadata={}, id='85bfac92-7cdb-48f0-b211-9593a6dfc851'),\n",
|
| 102 |
+
" AIMessage(content='28', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 970, 'prompt_tokens': 2706, 'total_tokens': 3676, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 960, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-5-mini-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CIbxZ2RCMWUu7YmfpqFUpa2eMG39g', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--fd1c06e8-0aa2-49db-8693-33ac60a1b382-0', usage_metadata={'input_tokens': 2706, 'output_tokens': 970, 'total_tokens': 3676, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 960}})],\n",
|
| 103 |
+
" 'query': 'Here\\'s a fun riddle that I\\'d like you to try.\\n\\nAn adventurer exploring an ancient tomb came across a horde of gold coins, all neatly stacked in columns. As he reached to scoop them into his backpack, a mysterious voice filled the room. \"You have fallen for my trap adventurer,\" the voice began, and suddenly the doorway to the chamber was sealed by a heavy rolling disk of stone. The adventurer tried to move the stone disk but was unable to budge the heavy stone. Trapped, he was startled when the voice again spoke. \\n\\n\"If you solve my riddle, I will reward you with a portion of my riches, but if you are not clever, you will never leave this treasure chamber. Before you are 200 gold coins. I pose a challenge to you, adventurer. Within these stacks of coins, all but 30 are face-up. You must divide the coins into two piles, one is yours, and one is mine. You may place as many coins as you like in either pile. You may flip any coins over, but you may not balance any coins on their edges. For every face-down coin in your pile, you will be rewarded with two gold coins. But be warned, if both piles do not contain the same number of face-down coins, the door will remain sealed for all eternity!\"\\n\\nThe adventurer smiled, as this would be an easy task. All he had to do was flip over every coin so it was face down, and he would win the entire treasure! As he moved to the columns of coins, however, the light suddenly faded, and he was left in total darkness. The adventurer reached forward and picked up one of the coins, and was shocked when he realized that both sides felt almost the same. Without the light, he was unable to determine which side of the coin was heads and which side was tails. He carefully replaced the coin in its original orientation and tried to think of a way to solve the puzzle. Finally, out of desperation, the adventurer removed 30 coins to create his pile. He then carefully flipped over each coin in his pile, so its orientation was inverted from its original state.\\n\\n\"I\\'ve finished,\" he said, and the lights returned. Looking at the two piles, he noticed that the larger pile contained 14 face-down coins.\\n\\nWhat was the outcome for the adventurer? If he failed the challenge, please respond with \"The adventurer died.\" Otherwise, please provide the number of coins the adventurer won at the conclusion of the riddle. If the adventurer won any coins, provide your response as the number of coins, with no other text.',\n",
|
| 104 |
+
" 'final_answer': 'FINAL ANSWER: 28',\n",
|
| 105 |
+
" 'plan': None,\n",
|
| 106 |
+
" 'complexity_assessment': ComplexityLevel(level='simple', reasoning='Initial state: 200 coins, all but 30 are face-up → 30 face-down (tails). He takes 30 coins; if x of those were face-down, the remaining pile has 30−x face-down. Flipping the 30-coin pile turns its face-down count into 30−x as well, so both piles have equal face-down coins. The larger pile was observed to have 14 face-down coins, so his pile also has 14 face-down coins. His reward is 2 gold coins per face-down coin in his pile: 14×2 = 28.', needs_planning=False, suggested_approach='Use simple counting: compute initial number of face-down coins (30), let x be face-down among the 30 taken, show flipping yields equal counts, then multiply the final face-down count in his pile by 2 to get the reward.'),\n",
|
| 107 |
+
" 'current_step': 0,\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
" 'reasoning_done': False,\n",
|
| 109 |
" 'files': [],\n",
|
| 110 |
+
" 'critique_feedback': CritiqueFeedback(quality_score=8, is_complete=True, is_accurate=True, missing_elements=[], errors_found=[\"Minor imprecise phrasing: the report mentions 'conservation of face-down coins' which is misleading — flipping changes the total number of face-down coins. The correct point is that after flipping the removed 30 coins, the removed pile ends up with (30 - x) face-down coins, which equals the remaining pile's unchanged count of (30 - x).\", 'No explicit, single-sentence statement answering the two parts of the query (Did he succeed? How many gold coins did he win?). The final numeric answer is present but the success statement is implicit.'], suggested_improvements=[\"Clarify the flipping logic and avoid the word 'conservation' for face-down coins; explicitly show how removed-pile face-down count transforms from x to (30 - x).\", \"Add an explicit conclusion sentence: e.g., 'Yes — he succeeded; his pile has 14 face-down coins after flipping, so he wins 14 * 2 = 28 gold.'\", 'Add a brief sanity check (optional): pick an example x (like x = 16) and show counts before and after flipping to demonstrate consistency.', 'If following strict tooling policies, note that no external tools were required for this simple algebraic reasoning. If automated-tool usage is mandatory in your environment, run a quick symbolic/numeric check with the chosen tool and cite it.'], needs_replanning=False, replan_instructions=None),\n",
|
| 111 |
" 'iteration_count': 1,\n",
|
| 112 |
" 'max_iterations': 10,\n",
|
| 113 |
+
" 'execution_report': ExecutionReport(query_summary='Riddle: 200 coins where all but 30 are face-up (so 30 face-down). The adventurer removed 30 coins as his pile (unknown orientations), flipped each coin in that pile, then observed the larger pile contained 14 face-down coins. Determine whether he succeeded and how many coins he won (2 gold per face-down coin in his pile).', approach_used=\"Algebraic reasoning with simple variables: let x = number of face-down coins initially in the 30-coin pile removed. Use conservation of face-down coins to compute final counts after flipping, then compute reward = 2 * (final face-downs in adventurer's pile).\", tools_executed=[], key_findings=['Total coins = 200. Initially face-down coins = 30 (since all but 30 are face-up).', 'Let x = number of face-down coins among the 30 coins removed. After flipping those 30 coins, that pile has (30 - x) face-down coins.', 'The remaining (larger) pile has initial face-down coins = 30 - x, which matches (30 - x) after the flip, so both piles have equal face-down counts.', 'Given the larger pile was observed to have 14 face-down coins, 30 - x = 14 => x = 16.', \"Therefore the adventurer's pile also has 14 face-down coins after flipping, yielding a reward of 14 * 2 = 28 coins.\"], data_sources=['Problem statement provided in the query', 'Basic arithmetic/algebra reasoning'], assumptions_made=[\"Interpretation 'all but 30 are face-up' means exactly 30 coins are face-down initially.\", \"The adventurer's pile is the 30-coin pile he removed and flipped; the 'larger pile' refers to the remaining 170-coin pile.\", \"Reward is exactly 2 gold coins per face-down coin in the adventurer's pile, as stated.\"], confidence_level='high', limitations=['Solution depends on standard interpretation of the riddle wording; if alternative interpretations were intended, results could differ.', 'No external references were used; reasoning is self-contained.'], final_answer='28')}"
|
| 114 |
]
|
| 115 |
},
|
| 116 |
+
"execution_count": 5,
|
| 117 |
"metadata": {},
|
| 118 |
"output_type": "execute_result"
|
| 119 |
}
|
|
|
|
| 121 |
"source": [
|
| 122 |
"result"
|
| 123 |
]
|
| 124 |
+
},
|
| 125 |
+
{
|
| 126 |
+
"cell_type": "code",
|
| 127 |
+
"execution_count": 6,
|
| 128 |
+
"metadata": {},
|
| 129 |
+
"outputs": [],
|
| 130 |
+
"source": [
|
| 131 |
+
"#TO-DO\n",
|
| 132 |
+
"#1. Check routing with REPLANNER -> может придумывать несуществующие инструменты\n",
|
| 133 |
+
"#2. Add crawling tool\n",
|
| 134 |
+
"#3. Enhance description of coder tool and прописать более четко в промпте важность вывода через print() или return или result/_\n",
|
| 135 |
+
"#4. Смягчить критика"
|
| 136 |
+
]
|
| 137 |
}
|
| 138 |
],
|
| 139 |
"metadata": {
|