KaiserShultz commited on
Commit
758564e
·
1 Parent(s): 061585b

Updated versions of logging and some improvements of nodes configuration, adding gpt-5-mini

Browse files
data/images.jpeg ADDED
src/agent.py CHANGED
@@ -2,7 +2,7 @@
2
  from nodes import (query_input, complexity_assessor, planner, agent, simple_executor, critic_evaluator, replanner, enhanced_finalizer)
3
  from state import AgentState
4
  from langgraph.graph import StateGraph, END
5
- from nodes import should_continue, should_use_planning, should_replan
6
  from langgraph.checkpoint.memory import MemorySaver
7
  from config import DEBUGGING_TOOL_NODE
8
 
@@ -13,11 +13,13 @@ def build_workflow(checkpointer=None) -> StateGraph[AgentState]:
13
  builder.add_node("PLANNING", planner)
14
  builder.add_node("AGENT", agent)
15
  builder.add_node("TOOLS", DEBUGGING_TOOL_NODE)
 
16
  builder.add_node("FINALIZER", enhanced_finalizer)
17
  builder.add_node("SIMPLE_EXECUTOR", simple_executor)
18
  builder.add_node("CRITIC", critic_evaluator)
19
  builder.add_node("REPLANNER", replanner)
20
 
 
21
  builder.set_entry_point("INPUT")
22
  builder.add_edge("INPUT", "COMPLEXITY_ASSESSOR")
23
 
@@ -26,8 +28,13 @@ def build_workflow(checkpointer=None) -> StateGraph[AgentState]:
26
  should_use_planning,
27
  {"simple_executor": "SIMPLE_EXECUTOR", "planner": "PLANNING"},
28
  )
29
- builder.add_edge("SIMPLE_EXECUTOR", "FINALIZER")
30
-
 
 
 
 
 
31
 
32
  builder.add_edge("PLANNING", "AGENT")
33
  builder.add_conditional_edges(
 
2
  from nodes import (query_input, complexity_assessor, planner, agent, simple_executor, critic_evaluator, replanner, enhanced_finalizer)
3
  from state import AgentState
4
  from langgraph.graph import StateGraph, END
5
+ from nodes import should_continue, should_use_planning, should_replan, should_use_tools_simple_executor
6
  from langgraph.checkpoint.memory import MemorySaver
7
  from config import DEBUGGING_TOOL_NODE
8
 
 
13
  builder.add_node("PLANNING", planner)
14
  builder.add_node("AGENT", agent)
15
  builder.add_node("TOOLS", DEBUGGING_TOOL_NODE)
16
+ builder.add_node("TOOLS_SIMPLE", DEBUGGING_TOOL_NODE)
17
  builder.add_node("FINALIZER", enhanced_finalizer)
18
  builder.add_node("SIMPLE_EXECUTOR", simple_executor)
19
  builder.add_node("CRITIC", critic_evaluator)
20
  builder.add_node("REPLANNER", replanner)
21
 
22
+
23
  builder.set_entry_point("INPUT")
24
  builder.add_edge("INPUT", "COMPLEXITY_ASSESSOR")
25
 
 
28
  should_use_planning,
29
  {"simple_executor": "SIMPLE_EXECUTOR", "planner": "PLANNING"},
30
  )
31
+ builder.add_conditional_edges(
32
+ "SIMPLE_EXECUTOR",
33
+ should_use_tools_simple_executor,
34
+ {"tools": "TOOLS_SIMPLE", "final_answer": "FINALIZER"},
35
+ )
36
+
37
+ builder.add_edge("TOOLS_SIMPLE", "FINALIZER")
38
 
39
  builder.add_edge("PLANNING", "AGENT")
40
  builder.add_conditional_edges(
src/config.py CHANGED
@@ -11,85 +11,19 @@ config = {"configurable": {"thread_id": "1"}, "recursion_limit" : 50}
11
  TOOLS = [download_file_from_url, web_search,
12
  arxiv_search, wiki_search, add, subtract, multiply, divide,
13
  power, analyze_excel_file, analyze_csv_file, analyze_docx_file,
14
- analyze_pdf_file, analyze_txt_file, analyze_image_file,
15
  vision_qa_gemma, safe_code_run]
16
 
17
- class DebuggingToolNode(ToolNode):
18
- def __init__(self, tools):
19
- super().__init__(tools)
20
-
21
- def __call__(self, state):
22
- log_stage("TOOL NODE", subtitle="Dispatching tool calls", icon="🛠️")
23
-
24
- messages = state.get("messages", [])
25
- last_message = messages[-1] if messages else None
26
-
27
- if not last_message or not hasattr(last_message, "tool_calls"):
28
- log_stage("TOOL ERROR", subtitle="No tool calls found", icon="❌")
29
- return state
30
-
31
- tool_calls = last_message.tool_calls
32
- log_stage("TOOL DISPATCH", subtitle=f"Executing {len(tool_calls)} tool(s)", icon="🔧")
33
- for call in tool_calls:
34
- print(f" - {call['name']}: {call['args']}")
35
-
36
- try:
37
- # Выполняем инструменты
38
- result = super().__call__(state)
39
-
40
- # Проверяем результаты
41
- new_messages = result.get("messages", [])
42
- tool_messages = [msg for msg in new_messages[len(messages):]
43
- if isinstance(msg, ToolMessage)]
44
-
45
- log_stage("TOOL RESULTS", subtitle=f"Got {len(tool_messages)} responses", icon="📨")
46
-
47
- # Логируем результаты
48
- for msg in tool_messages:
49
- content_preview = msg.content[:100] + "..." if len(msg.content) > 100 else msg.content
50
- print(f" - {msg.name}: {content_preview}")
51
-
52
- # Автоматически добавляем сигнал завершения шага после успешного выполнения инструментов
53
- if tool_messages:
54
- current_step = state.get("current_step", 0)
55
- plan = state.get("plan")
56
-
57
- if plan and current_step < len(plan.steps):
58
- step_completion_msg = AIMessage(
59
- content=f"STEP COMPLETE: Successfully executed {len(tool_messages)} tool(s) for step {plan.steps[current_step].id}"
60
- )
61
- result["messages"] = result["messages"] + [step_completion_msg]
62
- log_stage("STEP COMPLETION", subtitle=f"Step {current_step + 1} marked complete", icon="✅")
63
-
64
- # Продвигаем к следующему шагу
65
- result["current_step"] = current_step + 1
66
- result["reasoning_done"] = False # Сброс для следующего шага
67
-
68
- return result
69
-
70
- except Exception as exc:
71
- log_stage("TOOL ERROR", subtitle=f"{type(exc).__name__}: {exc}", icon="❌")
72
- print(f"Full error: {repr(exc)}")
73
-
74
- # Создаем ToolMessage для каждого failed tool call
75
- error_messages = []
76
- for call in tool_calls:
77
- error_msg = ToolMessage(
78
- content=f"ERROR: {type(exc).__name__}: {exc}",
79
- tool_call_id=call.get("id") or "unknown_call",
80
- name=call.get("name", "unknown_tool"),
81
- )
82
- error_messages.append(error_msg)
83
-
84
- return {"messages": messages + error_messages}
85
-
86
 
87
  TOOL_NODE = ToolNode(TOOLS)
88
- DEBUGGING_TOOL_NODE = DebuggingToolNode(TOOLS)
89
-
90
- llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.25)
91
- llm_with_tools = llm.bind_tools(TOOLS)
92
- planner_llm = llm.with_structured_output(PlannerPlan)
 
 
 
93
 
94
 
95
 
 
11
  TOOLS = [download_file_from_url, web_search,
12
  arxiv_search, wiki_search, add, subtract, multiply, divide,
13
  power, analyze_excel_file, analyze_csv_file, analyze_docx_file,
14
+ analyze_pdf_file, analyze_txt_file,
15
  vision_qa_gemma, safe_code_run]
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  TOOL_NODE = ToolNode(TOOLS)
19
+ DEBUGGING_TOOL_NODE = TOOL_NODE
20
+
21
+ llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.7) #default 0.25
22
+ llm_deterministic = ChatOpenAI(model="gpt-5-mini", temperature=0.05)
23
+ planner_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.1).with_structured_output(PlannerPlan)
24
+ llm_criticist = ChatOpenAI(model="gpt-5-mini", temperature=0.3)
25
+ llm_with_tools = llm_deterministic.bind_tools(TOOLS)
26
+ llm_reasoning = ChatOpenAI(model="gpt-5-mini", temperature=0.3)
27
 
28
 
29
 
src/nodes.py CHANGED
@@ -13,7 +13,7 @@ from prompts.prompts import (
13
  CRITIC_PROMPT,
14
  )
15
 
16
- from config import llm, TOOLS, planner_llm, llm_with_tools
17
  from schemas import PlannerPlan, ComplexityLevel, CritiqueFeedback, ExecutionReport, ToolExecution
18
 
19
  from utils.utils import (
@@ -113,6 +113,8 @@ def agent(state: AgentState) -> AgentState:
113
  current_step = state.get("current_step", 0)
114
  reasoning_done = state.get("reasoning_done", False)
115
  plan: Optional[PlannerPlan] = state.get("plan")
 
 
116
  #steps = state["plan"].steps
117
 
118
  """
@@ -175,6 +177,15 @@ def agent(state: AgentState) -> AgentState:
175
  file_contents = state.get("file_contents", {})
176
  file_list = ", ".join(file_contents.keys()) if file_contents else "none provided"
177
 
 
 
 
 
 
 
 
 
 
178
  system_message = SystemMessage(
179
  content=SYSTEM_EXECUTOR_PROMPT.format(
180
  plan_summary=plan.summary,
@@ -197,7 +208,7 @@ def agent(state: AgentState) -> AgentState:
197
  )
198
  )
199
  stack = [system_message] + state["messages"] + [instruction]
200
- reasoning_response = llm.invoke(stack)
201
  log_stage("REASONING", subtitle=f"{current_step_info.id}", icon="🧠")
202
  print(reasoning_response.content)
203
 
@@ -224,12 +235,13 @@ def agent(state: AgentState) -> AgentState:
224
  Explain what you need to do and why, then end your response.
225
 
226
  REASONING IS IMPERATIVE BEFORE ANY TOOL CALLS.
 
227
  """
228
 
229
  sys_msg = SystemMessage(content = reasoning_prompt)
230
  stack = [sys_msg] + state["messages"]
231
 
232
- step = llm.invoke(stack)
233
  print("=== REASONING STEP ===")
234
  print(step.content)
235
 
@@ -254,6 +266,7 @@ def agent(state: AgentState) -> AgentState:
254
  # Используем модель С инструментами для выполнения
255
  step = llm_with_tools.invoke(stack)
256
  print("=== TOOL EXECUTION ===")
 
257
  print(f"Tool calls: {step.tool_calls}")
258
 
259
  return {
@@ -265,10 +278,17 @@ def agent(state: AgentState) -> AgentState:
265
  def should_continue(state : AgentState) -> bool:
266
 
267
  last_message = state["messages"][-1]
 
268
  reasoning_done = state.get("reasoning_done", False)
269
  plan = state.get("plan", None)
270
  current_step = state.get("current_step", 0)
271
 
 
 
 
 
 
 
272
  #ПРИОРИТЕТ 1: Если есть tool_calls - выполняем их
273
  if hasattr(last_message, "tool_calls") and last_message.tool_calls:
274
  return "tools"
@@ -368,7 +388,7 @@ def enhanced_finalizer(state: AgentState) -> AgentState:
368
  Be thorough but concise. This report will be evaluated by a critic for quality assurance.
369
  """
370
 
371
- report_llm = llm.with_structured_output(ExecutionReport)
372
 
373
  execution_report = report_llm.invoke([
374
  SystemMessage(content=report_generator_prompt),
@@ -406,12 +426,26 @@ def simple_executor(state: AgentState) -> AgentState:
406
  SystemMessage(content=simple_prompt),
407
  HumanMessage(content=state['query'])
408
  ])
 
 
409
 
410
  return {
411
  "messages": state["messages"] + [response],
412
  "final_answer": response.content
413
  }
414
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
  def should_use_planning(state: AgentState) -> str:
417
  """Route based on complexity assessment."""
@@ -428,7 +462,7 @@ def critic_evaluator(state: AgentState) -> AgentState:
428
  print("=== ENHANCED ANSWER CRITIQUE ===")
429
 
430
  report = state.get("execution_report")
431
- critic_llm = llm.with_structured_output(CritiqueFeedback)
432
 
433
  critique_prompt = CRITIC_PROMPT.format(
434
  query=report.query_summary,
@@ -495,7 +529,7 @@ def should_replan(state: AgentState) -> str:
495
 
496
  return "end"
497
 
498
- def replanner(state: AgentState) -> AgentState:
499
  """Create a revised plan based on critic feedback."""
500
  print("=== REPLANNING ===")
501
 
@@ -541,7 +575,11 @@ def replanner(state: AgentState) -> AgentState:
541
  essential_messages.append(msg)
542
 
543
  print(f"Cleaned message history: {len(current_messages)} -> {len(essential_messages)} messages")
544
-
 
 
 
 
545
  return {
546
  "plan": revised_plan,
547
  "current_step": 0,
@@ -550,12 +588,102 @@ def replanner(state: AgentState) -> AgentState:
550
  "execution_report": None
551
  }
552
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
 
554
  def complexity_assessor(state: AgentState) -> AgentState:
555
  """Assess query complexity and determine if planning is needed."""
556
  print("=== COMPLEXITY ASSESSMENT ===")
557
 
558
- complexity_llm = llm.with_structured_output(ComplexityLevel)
559
 
560
  assessment_message = [
561
  SystemMessage(content=COMPLEXITY_ASSESSOR_PROMPT.strip()),
@@ -571,4 +699,4 @@ def complexity_assessor(state: AgentState) -> AgentState:
571
  return {
572
  "complexity_assessment": assessment,
573
  "messages": state["messages"] + assessment_message
574
- }
 
13
  CRITIC_PROMPT,
14
  )
15
 
16
+ from config import llm_reasoning, TOOLS, planner_llm, llm_with_tools, llm_deterministic, llm_criticist
17
  from schemas import PlannerPlan, ComplexityLevel, CritiqueFeedback, ExecutionReport, ToolExecution
18
 
19
  from utils.utils import (
 
113
  current_step = state.get("current_step", 0)
114
  reasoning_done = state.get("reasoning_done", False)
115
  plan: Optional[PlannerPlan] = state.get("plan")
116
+ previous_tool_results = state.get("previous_tool_results", {})
117
+
118
  #steps = state["plan"].steps
119
 
120
  """
 
177
  file_contents = state.get("file_contents", {})
178
  file_list = ", ".join(file_contents.keys()) if file_contents else "none provided"
179
 
180
+ # Добавляем информацию о предыдущих результатах (UPDATE)
181
+ previous_results_context = ""
182
+ if previous_tool_results:
183
+ previous_results_context = f"\n\nPREVIOUS CALCULATION RESULTS:\n"
184
+ for tool_call_id, result in previous_tool_results.items():
185
+ previous_results_context += f"- {tool_call_id}: {result}\n"
186
+ previous_results_context += "You can reference these results in your calculations.\n"
187
+
188
+
189
  system_message = SystemMessage(
190
  content=SYSTEM_EXECUTOR_PROMPT.format(
191
  plan_summary=plan.summary,
 
208
  )
209
  )
210
  stack = [system_message] + state["messages"] + [instruction]
211
+ reasoning_response = llm_reasoning.invoke(stack) #default llm
212
  log_stage("REASONING", subtitle=f"{current_step_info.id}", icon="🧠")
213
  print(reasoning_response.content)
214
 
 
235
  Explain what you need to do and why, then end your response.
236
 
237
  REASONING IS IMPERATIVE BEFORE ANY TOOL CALLS.
238
+ FOR MORE COMPLEX UNDERSTANDING -> USE RESULTS AND INSIGHTS FROM PREVIOUS STEPS.
239
  """
240
 
241
  sys_msg = SystemMessage(content = reasoning_prompt)
242
  stack = [sys_msg] + state["messages"]
243
 
244
+ step = llm_reasoning.invoke(stack)
245
  print("=== REASONING STEP ===")
246
  print(step.content)
247
 
 
266
  # Используем модель С инструментами для выполнения
267
  step = llm_with_tools.invoke(stack)
268
  print("=== TOOL EXECUTION ===")
269
+ print(step)
270
  print(f"Tool calls: {step.tool_calls}")
271
 
272
  return {
 
278
  def should_continue(state : AgentState) -> bool:
279
 
280
  last_message = state["messages"][-1]
281
+ print(f"=== LAST MESSAGE WAS: {last_message} ===")
282
  reasoning_done = state.get("reasoning_done", False)
283
  plan = state.get("plan", None)
284
  current_step = state.get("current_step", 0)
285
 
286
+ print(f"=== SHOULD_CONTINUE DEBUG ===")
287
+ print(f"Current step: {current_step}")
288
+ print(f"Plan steps: {len(plan.steps) if plan else 0}")
289
+ print(f"Reasoning done: {reasoning_done}")
290
+ print(f"Last message type: {type(last_message).__name__}")
291
+
292
  #ПРИОРИТЕТ 1: Если есть tool_calls - выполняем их
293
  if hasattr(last_message, "tool_calls") and last_message.tool_calls:
294
  return "tools"
 
388
  Be thorough but concise. This report will be evaluated by a critic for quality assurance.
389
  """
390
 
391
+ report_llm = llm_deterministic.with_structured_output(ExecutionReport)
392
 
393
  execution_report = report_llm.invoke([
394
  SystemMessage(content=report_generator_prompt),
 
426
  SystemMessage(content=simple_prompt),
427
  HumanMessage(content=state['query'])
428
  ])
429
+
430
+ print("Response generated for simple query.")
431
 
432
  return {
433
  "messages": state["messages"] + [response],
434
  "final_answer": response.content
435
  }
436
 
437
+ def should_use_tools_simple_executor(state: AgentState) -> str:
438
+ """Decide whether to use tools or answer directly in simple executor."""
439
+ last_message = state["messages"][-1]
440
+
441
+ if hasattr(last_message, "tool_calls") and last_message.tool_calls:
442
+ return "tools"
443
+
444
+ if hasattr(last_message, "content") and "<FINAL_ANSWER>" in last_message.content:
445
+ return "final_answer"
446
+
447
+ return "final_answer"
448
+
449
 
450
  def should_use_planning(state: AgentState) -> str:
451
  """Route based on complexity assessment."""
 
462
  print("=== ENHANCED ANSWER CRITIQUE ===")
463
 
464
  report = state.get("execution_report")
465
+ critic_llm = llm_criticist.with_structured_output(CritiqueFeedback)
466
 
467
  critique_prompt = CRITIC_PROMPT.format(
468
  query=report.query_summary,
 
529
 
530
  return "end"
531
 
532
+ def replanner_old(state: AgentState) -> AgentState:
533
  """Create a revised plan based on critic feedback."""
534
  print("=== REPLANNING ===")
535
 
 
575
  essential_messages.append(msg)
576
 
577
  print(f"Cleaned message history: {len(current_messages)} -> {len(essential_messages)} messages")
578
+ print("=== ESSENTIAL MESSAGES ===")
579
+ print(essential_messages)
580
+ print("=== AGENT STATE ===")
581
+ print(state["messages"])
582
+
583
  return {
584
  "plan": revised_plan,
585
  "current_step": 0,
 
588
  "execution_report": None
589
  }
590
 
591
+ def replanner(state: AgentState) -> AgentState:
592
+ """Create a revised plan based on critic feedback."""
593
+ print("=== REPLANNING ===")
594
+
595
+ critique = state["critique_feedback"]
596
+ previous_plan = state.get("plan")
597
+
598
+ replan_prompt = f"""
599
+ {SYSTEM_PROMPT_PLANNER}
600
+
601
+ REPLANNING CONTEXT:
602
+ Original Query: {state['query']}
603
+ Previous Plan: {previous_plan if previous_plan else {}}
604
+
605
+ CRITIC FEEDBACK:
606
+ - Quality Score: {critique.quality_score}/10
607
+ - Issues Found: {critique.errors_found}
608
+ - Missing Elements: {critique.missing_elements}
609
+ - Improvement Suggestions: {critique.suggested_improvements}
610
+ - Specific Instructions: {critique.replan_instructions}
611
+
612
+ Create a REVISED plan that addresses these issues. Focus on fixing the identified problems.
613
+ """
614
+
615
+ revised_plan = planner_llm.invoke([
616
+ SystemMessage(content=replan_prompt),
617
+ HumanMessage(content="Create a revised plan based on the feedback.")
618
+ ])
619
+
620
+ print("Plan revised based on critic feedback")
621
+
622
+ # ИСПРАВЛЕНИЕ: Сохраняем важные результаты инструментов
623
+ current_messages = state.get("messages", [])
624
+
625
+ # Находим полезные результаты инструментов
626
+ preserved_messages = []
627
+ tool_results = {}
628
+
629
+ for i, msg in enumerate(current_messages):
630
+ # Сохраняем системные сообщения и пользовательские запросы
631
+ if isinstance(msg, (SystemMessage, HumanMessage)):
632
+ # Фильтруем только исходные запросы, не промпты планировщика
633
+ if (isinstance(msg, HumanMessage) or
634
+ ("complexity" in msg.content.lower() and "assessor" in msg.content.lower())):
635
+ preserved_messages.append(msg)
636
+
637
+ # Сохраняем успешные результаты инструментов
638
+ elif isinstance(msg, ToolMessage) and msg.content and msg.content.strip():
639
+ # Проверяем, что это полезный результат
640
+ try:
641
+ # Если результат можно преобразовать в число - это вычисление
642
+ float(msg.content.strip())
643
+ preserved_messages.append(msg)
644
+ tool_results[msg.tool_call_id] = msg.content
645
+
646
+ # Также нужно сохранить соответствующий AIMessage с tool_call
647
+ for j in range(i-1, -1, -1):
648
+ if (isinstance(current_messages[j], AIMessage) and
649
+ hasattr(current_messages[j], 'tool_calls') and
650
+ current_messages[j].tool_calls):
651
+ for tool_call in current_messages[j].tool_calls:
652
+ if tool_call['id'] == msg.tool_call_id:
653
+ if current_messages[j] not in preserved_messages:
654
+ preserved_messages.insert(-1, current_messages[j])
655
+ break
656
+ break
657
+ except (ValueError, AttributeError):
658
+ # Если не число, но содержательный результат, тоже сохраняем
659
+ if len(msg.content.strip()) > 1: # Минимальная длина для сохранения
660
+ preserved_messages.append(msg)
661
+
662
+ print(f"Preserved {len(tool_results)} tool results")
663
+ print(f"Cleaned message history: {len(current_messages)} -> {len(preserved_messages)} messages")
664
+
665
+ # Добавляем контекст о доступных результатах
666
+ if tool_results:
667
+ context_msg = HumanMessage(
668
+ content=f"Previous calculation results available: {tool_results}"
669
+ )
670
+ preserved_messages.append(context_msg)
671
+
672
+ return {
673
+ "plan": revised_plan,
674
+ "current_step": 0,
675
+ "reasoning_done": False,
676
+ "messages": preserved_messages,
677
+ "execution_report": None,
678
+ # Сохраняем важную информацию о предыдущих вычислениях
679
+ "previous_tool_results": tool_results
680
+ }
681
 
682
  def complexity_assessor(state: AgentState) -> AgentState:
683
  """Assess query complexity and determine if planning is needed."""
684
  print("=== COMPLEXITY ASSESSMENT ===")
685
 
686
+ complexity_llm = llm_deterministic.with_structured_output(ComplexityLevel)
687
 
688
  assessment_message = [
689
  SystemMessage(content=COMPLEXITY_ASSESSOR_PROMPT.strip()),
 
699
  return {
700
  "complexity_assessment": assessment,
701
  "messages": state["messages"] + assessment_message
702
+ }
src/prompts/prompts.py CHANGED
@@ -15,38 +15,39 @@ TASK BREAKDOWN EXAMPLES:
15
  Example 1: "Analyze sales data and calculate growth rates"
16
  {{
17
  "steps": [
18
- {{"id": "s1", "goal": "Load and examine the sales data file", "tool": "file_reader"}},
19
- {{"id": "s2", "goal": "Calculate monthly growth rates using Python", "tool": "code_executor"}},
20
- {{"id": "s3", "goal": "Generate summary statistics and trends", "tool": "code_executor"}}
21
  ]
22
- }}
23
 
24
  Example 2: "Research recent AI developments and summarize key trends"
25
  {{
26
  "steps": [
27
  {{"id": "s1", "goal": "Search for recent AI news and developments", "tool": "web_search"}},
28
- {{"id": "s2", "goal": "Fetch detailed content from top 3-5 relevant articles", "tool": "web_fetch"}},
29
- {{"id": "s3", "goal": "Analyze and synthesize key trends from gathered information", "tool": null}}
 
30
  ]
31
  }}
32
 
33
  Example 3: "Compare performance metrics between two datasets"
34
  {{
35
  "steps": [
36
- {{"id": "s1", "goal": "Load first dataset and examine structure", "tool": "file_reader"}},
37
- {{"id": "s2", "goal": "Load second dataset and examine structure", "tool": "file_reader"}},
38
- {{"id": "s3", "goal": "Calculate statistical metrics for both datasets using code", "tool": "code_executor"}},
39
- {{"id": "s4", "goal": "Perform statistical comparison and significance testing", "tool": "code_executor"}}
40
  ]
41
  }}
42
 
43
  Example 4: "Create a budget analysis from expense data"
44
  {{
45
  "steps": [
46
- {{"id": "s1", "goal": "Load expense data and validate format", "tool": "file_reader"}},
47
- {{"id": "s2", "goal": "Calculate category totals and percentages using code", "tool": "code_executor"}},
48
- {{"id": "s3", "goal": "Generate budget variance analysis and projections", "tool": "code_executor"}},
49
- {{"id": "s4", "goal": "Create visualization of spending patterns", "tool": "code_executor"}}
50
  ]
51
  }}
52
 
@@ -69,7 +70,7 @@ Return a single JSON object with this structure:
69
  }}
70
 
71
  Ground rules:
72
- - Prefer 2-4 steps for most tasks. Single steps only for truly trivial queries.
73
  - Break down complex tasks into logical components - don't try to solve everything at once
74
  - Use tool names exactly as listed. If no tool is needed, set "tool": null.
75
  - Never assume files or URLs exist—plan to search/download before analysing.
@@ -94,10 +95,10 @@ Available tools: {tool_catalogue}
94
  Known local files: {file_list}
95
 
96
  CRITICAL COMPUTATION RULE: You MUST use tools for ANY numerical calculation, counting, or mathematical operation. This includes:
97
- - Simple arithmetic (use calculator tool)
98
- - Data analysis and statistics (use code execution)
99
- - Counting items, rows, or occurrences (use code)
100
- - Percentage calculations (use calculator/code)
101
  - Any mathematical transformation or formula application
102
 
103
  NEVER perform manual calculations or provide estimated numbers.
@@ -121,6 +122,8 @@ COMPLEXITY LEVELS:
121
  1. SIMPLE: Direct questions that can be answered immediately without tools or with single tool use
122
  - Examples: "What is photosynthesis?", "Define machine learning", "What's the capital of France?"
123
  - NOTE: Simple math like "2+2" still requires calculator tool but counts as SIMPLE
 
 
124
 
125
  2. MODERATE: Questions requiring 2-4 tool calls or basic multi-step analysis
126
  - Examples: "Search for recent news about AI", "Analyze this CSV file for trends", "Calculate ROI from this data"
@@ -130,6 +133,8 @@ COMPLEXITY LEVELS:
130
  - Examples: "Research market trends and create investment strategy", "Analyze multiple data sources and predict outcomes"
131
  - "Build comprehensive report from various inputs", "Multi-stage data processing with validation"
132
 
 
 
133
  ASSESSMENT CRITERIA:
134
  - Number of distinct steps likely needed (1 = Simple, 2-4 = Moderate, 5+ = Complex)
135
  - Tool complexity and dependencies between steps
 
15
  Example 1: "Analyze sales data and calculate growth rates"
16
  {{
17
  "steps": [
18
+ {{"id": "s1", "goal": "Load and examine the sales data file", "tool": "analyze_(csv, docx, pdf etc.)_file"}},
19
+ {{"id": "s2", "goal": "Calculate monthly growth rates using Python", "tool": "safe_code_run"}},
20
+ {{"id": "s3", "goal": "Generate summary statistics and trends", "tool": "safe_code_run"}}
21
  ]
22
+ }}ф
23
 
24
  Example 2: "Research recent AI developments and summarize key trends"
25
  {{
26
  "steps": [
27
  {{"id": "s1", "goal": "Search for recent AI news and developments", "tool": "web_search"}},
28
+ {{"id": "s2", "goal": "Download relevant articles", "tool": "ddownload_file_from_url"}},
29
+ {{"id": "s3", "goal": "Extract and organize key information from articles", "tool": "analyze_(csv, docx, pdf etc.)_file"}},
30
+ {{"id": "s4", "goal": "Analyze and synthesize key trends from gathered information", "tool": null}}
31
  ]
32
  }}
33
 
34
  Example 3: "Compare performance metrics between two datasets"
35
  {{
36
  "steps": [
37
+ {{"id": "s1", "goal": "Load first dataset and examine structure", "tool": "analyze_(csv, docx, pdf etc.)_file"}},
38
+ {{"id": "s2", "goal": "Load second dataset and examine structure", "tool": "analyze_(csv, docx, pdf etc.)_file"}},
39
+ {{"id": "s3", "goal": "Calculate statistical metrics for both datasets using code", "tool": "safe_code_run"}},
40
+ {{"id": "s4", "goal": "Perform statistical comparison and significance testing", "tool": "safe_code_run"}}
41
  ]
42
  }}
43
 
44
  Example 4: "Create a budget analysis from expense data"
45
  {{
46
  "steps": [
47
+ {{"id": "s1", "goal": "Load expense data and validate format", "tool": "analyze_(csv, docx, pdf etc.)_file"}},
48
+ {{"id": "s2", "goal": "Calculate category totals and percentages using code", "tool": "safe_code_run"}},
49
+ {{"id": "s3", "goal": "Generate budget variance analysis and projections", "tool": "safe_code_run"}},
50
+ {{"id": "s4", "goal": "Create visualization of spending patterns", "tool": "safe_code_run"}}
51
  ]
52
  }}
53
 
 
70
  }}
71
 
72
  Ground rules:
73
+ - Prefer 2-4 steps for most tasks. Single steps only for truly trivial queries. Calculation tasks must use tools always.
74
  - Break down complex tasks into logical components - don't try to solve everything at once
75
  - Use tool names exactly as listed. If no tool is needed, set "tool": null.
76
  - Never assume files or URLs exist—plan to search/download before analysing.
 
95
  Known local files: {file_list}
96
 
97
  CRITICAL COMPUTATION RULE: You MUST use tools for ANY numerical calculation, counting, or mathematical operation. This includes:
98
+ - Simple arithmetic (use tools add, subtract, multiply, divide, power)
99
+ - Data analysis and statistics (use safe_code_run)
100
+ - Counting items, rows, or occurrences (use safe_code_run)
101
+ - Percentage calculations (use add, subtract, multiply, divide, power/safe_code_run)
102
  - Any mathematical transformation or formula application
103
 
104
  NEVER perform manual calculations or provide estimated numbers.
 
122
  1. SIMPLE: Direct questions that can be answered immediately without tools or with single tool use
123
  - Examples: "What is photosynthesis?", "Define machine learning", "What's the capital of France?"
124
  - NOTE: Simple math like "2+2" still requires calculator tool but counts as SIMPLE
125
+
126
+ !ALSO: It can be a logical reasoning or explanation task that does not require tools.
127
 
128
  2. MODERATE: Questions requiring 2-4 tool calls or basic multi-step analysis
129
  - Examples: "Search for recent news about AI", "Analyze this CSV file for trends", "Calculate ROI from this data"
 
133
  - Examples: "Research market trends and create investment strategy", "Analyze multiple data sources and predict outcomes"
134
  - "Build comprehensive report from various inputs", "Multi-stage data processing with validation"
135
 
136
+ MOST OF THE LOGICAL TASKS ARE SIMPLE, UNLESS THEY REQUIRE TOOLS.
137
+
138
  ASSESSMENT CRITERIA:
139
  - Number of distinct steps likely needed (1 = Simple, 2-4 = Moderate, 5+ = Complex)
140
  - Tool complexity and dependencies between steps
src/state.py CHANGED
@@ -19,4 +19,5 @@ class AgentState(MessagesState):
19
  iteration_count :int
20
  max_iterations: int
21
  execution_report : ExecutionReport
 
22
 
 
19
  iteration_count :int
20
  max_iterations: int
21
  execution_report : ExecutionReport
22
+ previous_tool_results: Dict[str, str] # НОВОЕ ПОЛЕ для сохранения результатов
23
 
src/tools/code_interpreter.py CHANGED
@@ -91,7 +91,7 @@ def _child_exec(payload: Dict[str, Any], queue: mp.Queue):
91
  safe_names = [
92
  "abs","all","any","bool","dict","float","int","len","list","max","min",
93
  "range","str","sum","print","enumerate","zip","map","filter","sorted",
94
- "reversed","complex","pow","divmod"
95
  ]
96
  safe_builtins = {n: getattr(builtins, n) for n in safe_names}
97
 
 
91
  safe_names = [
92
  "abs","all","any","bool","dict","float","int","len","list","max","min",
93
  "range","str","sum","print","enumerate","zip","map","filter","sorted",
94
+ "reversed","complex","pow","divmod", "round", "next", "set", "tuple", "type", "isinstance", "issubclass",
95
  ]
96
  safe_builtins = {n: getattr(builtins, n) for n in safe_names}
97
 
src/tools/tools.py CHANGED
@@ -101,7 +101,7 @@ def preprocess_files(files: List[str]) -> Dict[str, Dict[str, Any]]:
101
  info["suggested_tool"] = "analyze_txt_file"
102
  elif file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
103
  info["type"] = "image"
104
- info["suggested_tool"] = "if its about image itself : analyze_image_file, if its aboutrt content or qa : vision_qa_gemma ONLY"
105
  else:
106
  info["type"] = "unknown"
107
  info["suggested_tool"] = "analyze_txt_file (fallback)"
 
101
  info["suggested_tool"] = "analyze_txt_file"
102
  elif file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
103
  info["type"] = "image"
104
+ info["suggested_tool"] = "vision_qa_gemma"
105
  else:
106
  info["type"] = "unknown"
107
  info["suggested_tool"] = "analyze_txt_file (fallback)"
src/tools/web_crawler.py ADDED
File without changes
src/utils/utils.py CHANGED
@@ -155,4 +155,10 @@ def complexity_assessor(state: AgentState) -> AgentState:
155
  return {
156
  "complexity_assessment": assessment,
157
  "messages": state["messages"] + assessment_message
158
- }
 
 
 
 
 
 
 
155
  return {
156
  "complexity_assessment": assessment,
157
  "messages": state["messages"] + assessment_message
158
+ }
159
+
160
+
161
+ def trim(s: str, max_len: int = 10_000) -> str:
162
+ if s and len(s) > max_len:
163
+ return s[:max_len] + "... [truncated]"
164
+ return s
src/workflow_test.ipynb CHANGED
@@ -16,7 +16,8 @@
16
  ],
17
  "source": [
18
  "from agent import build_workflow\n",
19
- "from config import config"
 
20
  ]
21
  },
22
  {
@@ -43,131 +44,44 @@
43
  "💡 ════════════════════\n",
44
  " • files: none provided\n",
45
  "=== COMPLEXITY ASSESSMENT ===\n",
46
- "Complexity: complex\n",
47
- "Needs planning: True\n",
48
- "Reasoning: This query involves multiple steps: first, gathering information about Nikita Miroshnichenko, which may require searching through various sources; second, verifying his affiliation with UNIL and any working experience at EPFL; and third, synthesizing this information into a coherent summary. The need to cross-reference information adds to the complexity, as it requires careful reasoning to ensure accuracy.\n",
49
- "\n",
50
- "🧭 ════════════════════\n",
51
- "🧭 PLANNING \n",
52
- "🧭 ════════════════════\n",
53
- "\n",
54
- "🧭 ════════════════════\n",
55
- "🧭 PLANNER OUTPUT \n",
56
- "🧭 ════════════════════\n",
57
- "Task type: info\n",
58
- "Summary: I will perform a web search to gather information about Nikita Miroshnichenko, including his background as a student at UNIL and any working experience at EPFL.\n",
59
- "Steps:\n",
60
- " s1 → Search for information about Nikita Miroshnichenko to confirm his background and work experience.\n",
61
- " tool: web_search\n",
62
- " inputs: Nikita Miroshnichenko UNIL EPFL\n",
63
- " expected: Find relevant information confirming his student status and any work experience at EPFL.\n",
64
- " on_fail: replan\n",
65
- "Answer guidelines: Provide a concise summary based on the information found, including citations if applicable.\n",
66
- "\n",
67
- "🤖 ════════════════════\n",
68
- "🤖 EXECUTION \n",
69
- "🤖 ════════════════════\n",
70
- "🤖 Step 1/1: Search for information about Nikita Miroshnichenko to confirm his background and work experience.\n",
71
- " • step_id: s1\n",
72
- " • tool: web_search\n",
73
- " • expected: Find relevant information confirming his student status and any work experience at EPFL.\n",
74
- "\n",
75
- "🧠 ════════════════════\n",
76
- "🧠 REASONING \n",
77
- "🧠 ════════════════════\n",
78
- "🧠 s1\n",
79
- "<REASONING> The query requires gathering information about Nikita Miroshnichenko, specifically his background as a student at UNIL and any work experience at EPFL. This involves performing a web search to find relevant details about him, which will help in writing a short summary. The first step will be to use the web_search tool to collect this information. The expected outcome is to obtain sufficient data to confirm his educational background and work experience, which will then allow for the creation of a summary. Since this is a research task that requires external information, it is classified as a moderate complexity task. </REASONING>\n",
80
- "=== REASONING STEP ===\n",
81
- "{\n",
82
- " \"task_type\": \"info\",\n",
83
- " \"summary\": \"The plan involves searching for information about Nikita Miroshnichenko to confirm his background as a student at UNIL and any work experience at EPFL.\",\n",
84
- " \"assumptions\": [\"Nikita Miroshnichenko is a student at UNIL\", \"There may be publicly available information regarding his work experience at EPFL\"],\n",
85
- " \"steps\": [\n",
86
- " {\n",
87
- " \"id\": \"s1\",\n",
88
- " \"goal\": \"Search for information about Nikita Miroshnichenko to confirm his background and work experience.\",\n",
89
- " \"tool\": \"web_search\",\n",
90
- " \"inputs\": \"Nikita Miroshnichenko UNIL EPFL\",\n",
91
- " \"expected_result\": \"Find relevant information confirming his student status and any work experience at EPFL.\",\n",
92
- " \"on_fail\": \"replan\"\n",
93
- " }\n",
94
- " ],\n",
95
- " \"answer_guidelines\": \"Provide a summary of the findings, including citations for any sources used.\"\n",
96
- "}\n",
97
- "\n",
98
- "🤖 ════════════════════\n",
99
- "🤖 EXECUTION \n",
100
- "🤖 ════════════════════\n",
101
- "🤖 Step 1/1: Search for information about Nikita Miroshnichenko to confirm his background and work experience.\n",
102
- " • step_id: s1\n",
103
- " • tool: web_search\n",
104
- " • expected: Find relevant information confirming his student status and any work experience at EPFL.\n",
105
- "=== TOOL EXECUTION ===\n",
106
- "Tool calls: [{'name': 'web_search', 'args': {'query': 'Nikita Miroshnichenko UNIL EPFL'}, 'id': 'call_TJN5zTZWXac12m0so0FrKpOr', 'type': 'tool_call'}]\n"
107
- ]
108
- },
109
- {
110
- "name": "stderr",
111
- "output_type": "stream",
112
- "text": [
113
- "d:\\ankelodon_multiagent_system\\src\\tools\\tools.py:228: LangChainDeprecationWarning: The class `TavilySearchResults` was deprecated in LangChain 0.3.25 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-tavily package and should be used instead. To use it run `pip install -U :class:`~langchain-tavily` and import as `from :class:`~langchain_tavily import TavilySearch``.\n",
114
- " raw_results = TavilySearchResults(max_results=max_results).invoke(query)\n"
115
- ]
116
- },
117
- {
118
- "name": "stdout",
119
- "output_type": "stream",
120
- "text": [
121
- "\n",
122
- "✅ ════════════════════\n",
123
- "✅ PLAN COMPLETE \n",
124
- "✅ ════════════════════\n",
125
- "✅ All steps executed\n",
126
  "=== GENERATING EXECUTION REPORT ===\n",
127
  "Report generated - Confidence: high\n",
128
- "Key findings: 2\n",
129
- "Data sources: 1\n",
130
- "query_summary='The user requested information about Nikita Miroshnichenko, a student from UNIL, and inquired about his working experience at EPFL.' approach_used=\"A web search was conducted to gather relevant information regarding Nikita Miroshnichenko's background as a student at UNIL and any work experience he may have at EPFL.\" tools_executed=[ToolExecution(tool_name='web_search', arguments=\"{'query': 'Nikita Miroshnichenko UNIL EPFL'}\", call_id='call_TJN5zTZWXac12m0so0FrKpOr')] key_findings=['Nikita Miroshnichenko is a student at UNIL.', 'He has been associated with EPFL, confirming his work experience there.'] data_sources=['https://topline.com/people/nikita-miroshnichenko-182776498'] assumptions_made=[] confidence_level='high' limitations=['The information retrieved is based on available online sources, which may not be exhaustive or fully up-to-date.'] final_answer='Nikita Miroshnichenko is a student at UNIL and has confirmed working experience at EPFL.'\n",
131
  "=== ENHANCED ANSWER CRITIQUE ===\n",
132
- "Quality Score: 6/10\n",
133
  "Complete: True\n",
134
  "Accurate: True\n",
 
135
  "=== REPLAN DECISION ===\n",
136
  "Iteration: 1/10\n",
137
- "Quality score: 6\n",
138
  "Needs replanning: False\n",
139
  "Quality acceptable, ending execution\n"
140
  ]
141
  }
142
  ],
143
  "source": [
144
- "query = \"Find info about Nikita Miroshnichenko, its a student from UNIL, and write a short summary about him. Is it true that he has a working experience at EPFL?\"\n",
145
  "result = graph.invoke({\"query\" : query, \"current_step\": 0, \"reasoning_done\": False, \"files\" : [], \"files_contents\" : {}, \"iteration_count\" : 0, \"max_iterations\" : 10, \"plan\" : None} , config = config)"
146
  ]
147
  },
148
  {
149
  "cell_type": "code",
150
- "execution_count": 7,
151
  "metadata": {},
152
  "outputs": [
153
  {
154
  "name": "stdout",
155
  "output_type": "stream",
156
  "text": [
157
- "FINAL ANSWER: Nikita Miroshnichenko is a student at UNIL and has confirmed working experience at EPFL.\n",
158
- "\n",
159
- "SUMMARY:\n",
160
- "The user requested information about Nikita Miroshnichenko, a student from UNIL, and inquired about his working experience at EPFL.\n",
161
- "\n",
162
- "KEY FINDINGS:\n",
163
- "• Nikita Miroshnichenko is a student at UNIL.\n",
164
- "• He has been associated with EPFL, confirming his work experience there.\n",
165
- "\n",
166
- "SOURCES:\n",
167
- "• https://topline.com/people/nikita-miroshnichenko-182776498\n",
168
- "\n",
169
- "LIMITATIONS:\n",
170
- "• The information retrieved is based on available online sources, which may not be exhaustive or fully up-to-date.\n"
171
  ]
172
  }
173
  ],
@@ -177,34 +91,29 @@
177
  },
178
  {
179
  "cell_type": "code",
180
- "execution_count": 8,
181
  "metadata": {},
182
  "outputs": [
183
  {
184
  "data": {
185
  "text/plain": [
186
- "{'messages': [SystemMessage(content='You are a COMPLEXITY ASSESSOR for a multi-tool agent system.\\nYour job is to analyze user queries and determine their complexity level and processing requirements.\\n\\nCOMPLEXITY LEVELS:\\n1. SIMPLE: Direct questions that can be answered immediately without tools or with single tool use\\n - Examples: \"What is 2+2?\", \"Define photosynthesis\", \"What\\'s the capital of France?\"\\n \\n2. MODERATE: Questions requiring 1-3 tool calls or basic analysis\\n - Examples: \"Search for recent news about AI\", \"Analyze this CSV file\", \"What\\'s the weather tomorrow?\"\\n \\n3. COMPLEX: Multi-step problems requiring planning, multiple tools, or sophisticated reasoning\\n - Examples: Research tasks, multi-file analysis, calculations with dependencies, creative projects\\n\\nASSESSMENT CRITERIA:\\n- Number of steps likely needed\\n- Tool complexity and dependencies\\n- Data processing requirements\\n- Need for intermediate reasoning\\n- Risk of failure without proper planning\\n\\nRULES:\\n- SIMPLE queries bypass planning entirely\\n- MODERATE queries may use lightweight planning\\n- COMPLEX queries require full planning with fallbacks\\n- When in doubt, err toward higher complexity\\n\\nAnalyze the query and respond with your assessment.', additional_kwargs={}, response_metadata={}, id='11b7e36b-63f4-4dab-b911-19a122ded253'),\n",
187
- " HumanMessage(content='Query: Find info about Nikita Miroshnichenko, its a student from UNIL, and write a short summary about him. Is it true that he has a working experience at EPFL?', additional_kwargs={}, response_metadata={}, id='33ed5c6b-c4af-4080-8531-afa04709ae79'),\n",
188
- " SystemMessage(content='You are the planner of a multi-tool agent. Build a short, realistic plan that the executor can follow.\\n\\nAvailable tools: add, analyze_csv_file, analyze_docx_file, analyze_excel_file, analyze_image_file, analyze_pdf_file, analyze_txt_file, arxiv_search, divide, download_file_from_url, multiply, power, safe_code_run, subtract, vision_qa_gemma, web_search, wiki_search\\nKnown local files: none provided\\nAdditional context: None\\n\\nReturn a single JSON object with this structure:\\n{\\n \"task_type\": \"info|calc|table|doc_qa|image_qa|multi_hop\",\\n \"summary\": \"One sentence on the chosen approach\",\\n \"assumptions\": [\"optional clarifications\"],\\n \"steps\": [\\n {\\n \"id\": \"s1\",\\n \"goal\": \"Action to take and why it helps\",\\n \"tool\": \"tool_name_or_null\",\\n \"inputs\": \"Key parameters or references (files, URLs, prior steps)\",\\n \"expected_result\": \"How you know the step succeeded\",\\n \"on_fail\": \"replan|stop\"\\n }\\n ],\\n \"answer_guidelines\": \"Reminders for the final response (citations, format, units, etc.)\"\\n}\\n\\nGround rules:\\n- Prefer 1–3 steps. Only add a step if it changes the outcome. For complex tasks, up to 5-7 steps is okay.\\n- Use tool names exactly as listed. If no tool is needed, set \"tool\": null.\\n- Never assume files or URLs exist—plan to search/download before analysing.\\n- Skip download steps when the required file is already provided.\\n- Ensure later steps only depend on results created by earlier steps.\\n- If the query is trivial, return an empty steps list and explain the direct answer in \"summary\".', additional_kwargs={}, response_metadata={}, id='a2291408-86bd-4a5a-ad97-88ba7ca26f8a'),\n",
189
- " HumanMessage(content='Find info about Nikita Miroshnichenko, its a student from UNIL, and write a short summary about him. Is it true that he has a working experience at EPFL?', additional_kwargs={}, response_metadata={}, id='02bdaf14-e770-4903-8bd2-ec4bce7070a0'),\n",
190
- " AIMessage(content='{\\n \"task_type\": \"info\",\\n \"summary\": \"The plan involves searching for information about Nikita Miroshnichenko to confirm his background as a student at UNIL and any work experience at EPFL.\",\\n \"assumptions\": [\"Nikita Miroshnichenko is a student at UNIL\", \"There may be publicly available information regarding his work experience at EPFL\"],\\n \"steps\": [\\n {\\n \"id\": \"s1\",\\n \"goal\": \"Search for information about Nikita Miroshnichenko to confirm his background and work experience.\",\\n \"tool\": \"web_search\",\\n \"inputs\": \"Nikita Miroshnichenko UNIL EPFL\",\\n \"expected_result\": \"Find relevant information confirming his student status and any work experience at EPFL.\",\\n \"on_fail\": \"replan\"\\n }\\n ],\\n \"answer_guidelines\": \"Provide a summary of the findings, including citations for any sources used.\"\\n}', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 206, 'prompt_tokens': 1088, 'total_tokens': 1294, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_51db84afab', 'id': 'chatcmpl-CHEdmytbl8Nei62qo9Ti4se6AOc5O', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--bbddcd4a-f737-4033-b789-adcf6c3bacb5-0', usage_metadata={'input_tokens': 1088, 'output_tokens': 206, 'total_tokens': 1294, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}),\n",
191
- " AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_TJN5zTZWXac12m0so0FrKpOr', 'function': {'arguments': '{\"query\":\"Nikita Miroshnichenko UNIL EPFL\"}', 'name': 'web_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 2672, 'total_tokens': 2697, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 1920}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_560af6e559', 'id': 'chatcmpl-CHEdrHAP7W9cDsebqQu7iAIVGl2OF', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--9d9e841f-b6eb-4df1-8695-3cb5d35e83b7-0', tool_calls=[{'name': 'web_search', 'args': {'query': 'Nikita Miroshnichenko UNIL EPFL'}, 'id': 'call_TJN5zTZWXac12m0so0FrKpOr', 'type': 'tool_call'}], usage_metadata={'input_tokens': 2672, 'output_tokens': 25, 'total_tokens': 2697, 'input_token_details': {'audio': 0, 'cache_read': 1920}, 'output_token_details': {'audio': 0, 'reasoning': 0}}),\n",
192
- " ToolMessage(content='{\"query\": \"Nikita Miroshnichenko UNIL EPFL\", \"provider\": \"tavily\", \"items\": [{\"url\": \"https://topline.com/people/nikita-miroshnichenko-182776498\", \"title\": \"Nikita Miroshnichenko - Topline\", \"snippet\": \"###### The International Festival of Engineering Science and Technology in Tunisia I-FEST\\\\u00b2 (Silver medal)\\\\n\\\\n###### Molecular phylogenetic&bioinformatics course\\\\n\\\\n###### Molecular biology&genetics course\\\\n\\\\n###### Python programming course\\\\n\\\\n#### Experience\\\\n\\\\n##### University of Lausanne - UNIL\\\\n\\\\n##### Student Laboratory Assistant\\\\n\\\\n##### EPFL (\\\\u00c9cole polytechnique f\\\\u00e9d\\\\u00e9rale de Lausanne)\\\\n\\\\n##### Laboratory A\\\\u2026\", \"published\": null, \"source\": \"topline.com\"}, {\"url\": \"https://cdn5.f-cdn.com/files/download/223843566/Nikita_Miroshnichenko_technical_CV_2024.pdf\", \"title\": \"[PDF] Nikita Miroshnichenko\", \"snippet\": \"development in this field. If possible, I will be glad to apply my work experience in biotech/neurotech-oriented projects. NOVEMBER 2023 \\\\u2013 PRESENT Research Assistant. Computational Biology and Cancer Genomics Group Department of Computational Biology | UNIL. Lausanne \\\\u2022 Processing raw single-cell RNAseq data, building bioinformatics pipelines for oncology and genomic research. \\\\u2022 Development of a P\\\\u2026\", \"published\": null, \"source\": \"f-cdn.com\"}, {\"url\": \"https://ch.linkedin.com/in/nikita-miroshnichenko\", \"title\": \"Nikita Miroshnichenko \\\\u2013 AI Engineer | Biotech Enthusiast - LinkedIn\", \"snippet\": \"Nikita Miroshnichenko. AI Engineer | Biotech Enthusiast | Researcher | Entrepreneur. University of Lausanne - UNIL Taras Shevchenko National University of Kyiv\", \"published\": null, \"source\": \"linkedin.com\"}, {\"url\": \"https://www.transfermarkt.com/nikita-miroshnichenko/profil/spieler/561855\", \"title\": \"Nikita Miroshnichenko - Player profile 25/26 - Transfermarkt\", \"snippet\": \"Transfermarkt\\\\nUEFA Champions League\\\\nPremier League\\\\nLaLiga\\\\nSerie A\\\\nBundesliga\\\\nLigue 1\\\\n\\\\nNikita Miroshnichenko\\\\n\\\\n# #18 Nikita Miroshnichenko\\\\n\\\\nShinnik Yaroslavl\\\\n1.Division1.Division\\\\nRussiaSecond Tier\\\\nNikita Miroshnichenko\\\\n\\\\nfnl.pro\\\\n\\\\n+\\\\nRussia \\\\nSalavat, ... \\\\nRussia Russia\\\\nRussiaRussia U17\\\\n\\\\nLast update: 02.06.2025\\\\n\\\\n## Player data\\\\n\\\\nRussia\\\\nRussia Russia\\\\nShinnik Yaroslavl\\\\n\\\\n## Stats of Nikita Miroshnichenko\\\\n\\\\u2026\", \"published\": null, \"source\": \"transfermarkt.com\"}, {\"url\": \"http://arxiv.org/list/physics.optics/2019-12?skip=125&show=2000\", \"title\": \"Optics Dec 2019 - arXiv\", \"snippet\": \"Cornell University\\\\narxiv logo\\\\n\\\\nHelp | Advanced Search\\\\n\\\\narXiv logo\\\\nCornell University Logo\\\\n\\\\n## quick links\\\\n\\\\n# Optics\\\\n\\\\n## Authors and titles for December 2019\\\\n\\\\narXiv Operational Status \\\\nGet status notifications via\\\\nemail\\\\nor slack\", \"published\": null, \"source\": \"arxiv.org\"}]}', name='web_search', id='cd84b5bb-27ed-495b-942a-70de16013c44', tool_call_id='call_TJN5zTZWXac12m0so0FrKpOr'),\n",
193
- " AIMessage(content='All plan steps completed. <FINAL_ANSWER>', additional_kwargs={}, response_metadata={}, id='3463b080-aa10-4906-bc0c-4d6e49bb8d8a')],\n",
194
- " 'query': 'Find info about Nikita Miroshnichenko, its a student from UNIL, and write a short summary about him. Is it true that he has a working experience at EPFL?',\n",
195
- " 'final_answer': 'FINAL ANSWER: Nikita Miroshnichenko is a student at UNIL and has confirmed working experience at EPFL.\\n\\nSUMMARY:\\nThe user requested information about Nikita Miroshnichenko, a student from UNIL, and inquired about his working experience at EPFL.\\n\\nKEY FINDINGS:\\n• Nikita Miroshnichenko is a student at UNIL.\\n• He has been associated with EPFL, confirming his work experience there.\\n\\nSOURCES:\\n• https://topline.com/people/nikita-miroshnichenko-182776498\\n\\nLIMITATIONS:\\n• The information retrieved is based on available online sources, which may not be exhaustive or fully up-to-date.',\n",
196
- " 'plan': PlannerPlan(task_type='info', summary='I will perform a web search to gather information about Nikita Miroshnichenko, including his background as a student at UNIL and any working experience at EPFL.', assumptions=[], steps=[PlanStep(id='s1', goal='Search for information about Nikita Miroshnichenko to confirm his background and work experience.', tool='web_search', inputs='Nikita Miroshnichenko UNIL EPFL', expected_result='Find relevant information confirming his student status and any work experience at EPFL.', on_fail='replan')], answer_guidelines='Provide a concise summary based on the information found, including citations if applicable.'),\n",
197
- " 'complexity_assessment': ComplexityLevel(level='complex', reasoning='This query involves multiple steps: first, gathering information about Nikita Miroshnichenko, which may require searching through various sources; second, verifying his affiliation with UNIL and any working experience at EPFL; and third, synthesizing this information into a coherent summary. The need to cross-reference information adds to the complexity, as it requires careful reasoning to ensure accuracy.', needs_planning=True, suggested_approach='Begin by searching for Nikita Miroshnichenko on academic and professional platforms to gather relevant information. Verify his student status at UNIL and check for any records of employment or internships at EPFL. Compile the findings into a concise summary.'),\n",
198
- " 'current_step': 1,\n",
199
  " 'reasoning_done': False,\n",
200
  " 'files': [],\n",
201
- " 'critique_feedback': CritiqueFeedback(quality_score=6, is_complete=True, is_accurate=True, missing_elements=['Details about the specific role or position held by Nikita Miroshnichenko at EPFL', 'Information on the duration of his work experience at EPFL', 'Any notable projects or contributions made during his time at EPFL'], errors_found=[], suggested_improvements=[\"Include more specific details about Nikita's role at EPFL to provide a clearer picture of his experience.\", 'Add information about the duration of his work experience to contextualize his involvement.', 'Mention any projects or contributions he made during his time at EPFL to enhance the depth of the report.'], needs_replanning=False, replan_instructions=None),\n",
202
  " 'iteration_count': 1,\n",
203
  " 'max_iterations': 10,\n",
204
- " 'execution_report': ExecutionReport(query_summary='The user requested information about Nikita Miroshnichenko, a student from UNIL, and inquired about his working experience at EPFL.', approach_used=\"A web search was conducted to gather relevant information regarding Nikita Miroshnichenko's background as a student at UNIL and any work experience he may have at EPFL.\", tools_executed=[ToolExecution(tool_name='web_search', arguments=\"{'query': 'Nikita Miroshnichenko UNIL EPFL'}\", call_id='call_TJN5zTZWXac12m0so0FrKpOr')], key_findings=['Nikita Miroshnichenko is a student at UNIL.', 'He has been associated with EPFL, confirming his work experience there.'], data_sources=['https://topline.com/people/nikita-miroshnichenko-182776498'], assumptions_made=[], confidence_level='high', limitations=['The information retrieved is based on available online sources, which may not be exhaustive or fully up-to-date.'], final_answer='Nikita Miroshnichenko is a student at UNIL and has confirmed working experience at EPFL.')}"
205
  ]
206
  },
207
- "execution_count": 8,
208
  "metadata": {},
209
  "output_type": "execute_result"
210
  }
@@ -212,6 +121,19 @@
212
  "source": [
213
  "result"
214
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  }
216
  ],
217
  "metadata": {
 
16
  ],
17
  "source": [
18
  "from agent import build_workflow\n",
19
+ "from config import config\n",
20
+ "from tools.code_interpreter import safe_code_run"
21
  ]
22
  },
23
  {
 
44
  "💡 ════════════════════\n",
45
  " • files: none provided\n",
46
  "=== COMPLEXITY ASSESSMENT ===\n",
47
+ "Complexity: simple\n",
48
+ "Needs planning: False\n",
49
+ "Reasoning: Initial state: 200 coins, all but 30 are face-up 30 face-down (tails). He takes 30 coins; if x of those were face-down, the remaining pile has 30−x face-down. Flipping the 30-coin pile turns its face-down count into 30−x as well, so both piles have equal face-down coins. The larger pile was observed to have 14 face-down coins, so his pile also has 14 face-down coins. His reward is 2 gold coins per face-down coin in his pile: 14×2 = 28.\n",
50
+ "=== SIMPLE EXECUTION ===\n",
51
+ "Response generated for simple query.\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  "=== GENERATING EXECUTION REPORT ===\n",
53
  "Report generated - Confidence: high\n",
54
+ "Key findings: 5\n",
55
+ "Data sources: 2\n",
56
+ "query_summary='Riddle: 200 coins where all but 30 are face-up (so 30 face-down). The adventurer removed 30 coins as his pile (unknown orientations), flipped each coin in that pile, then observed the larger pile contained 14 face-down coins. Determine whether he succeeded and how many coins he won (2 gold per face-down coin in his pile).' approach_used=\"Algebraic reasoning with simple variables: let x = number of face-down coins initially in the 30-coin pile removed. Use conservation of face-down coins to compute final counts after flipping, then compute reward = 2 * (final face-downs in adventurer's pile).\" tools_executed=[] key_findings=['Total coins = 200. Initially face-down coins = 30 (since all but 30 are face-up).', 'Let x = number of face-down coins among the 30 coins removed. After flipping those 30 coins, that pile has (30 - x) face-down coins.', 'The remaining (larger) pile has initial face-down coins = 30 - x, which matches (30 - x) after the flip, so both piles have equal face-down counts.', 'Given the larger pile was observed to have 14 face-down coins, 30 - x = 14 => x = 16.', \"Therefore the adventurer's pile also has 14 face-down coins after flipping, yielding a reward of 14 * 2 = 28 coins.\"] data_sources=['Problem statement provided in the query', 'Basic arithmetic/algebra reasoning'] assumptions_made=[\"Interpretation 'all but 30 are face-up' means exactly 30 coins are face-down initially.\", \"The adventurer's pile is the 30-coin pile he removed and flipped; the 'larger pile' refers to the remaining 170-coin pile.\", \"Reward is exactly 2 gold coins per face-down coin in the adventurer's pile, as stated.\"] confidence_level='high' limitations=['Solution depends on standard interpretation of the riddle wording; if alternative interpretations were intended, results could differ.', 'No external references were used; reasoning is self-contained.'] final_answer='28'\n",
57
  "=== ENHANCED ANSWER CRITIQUE ===\n",
58
+ "Quality Score: 8/10\n",
59
  "Complete: True\n",
60
  "Accurate: True\n",
61
+ "Issues found: [\"Minor imprecise phrasing: the report mentions 'conservation of face-down coins' which is misleading — flipping changes the total number of face-down coins. The correct point is that after flipping the removed 30 coins, the removed pile ends up with (30 - x) face-down coins, which equals the remaining pile's unchanged count of (30 - x).\", 'No explicit, single-sentence statement answering the two parts of the query (Did he succeed? How many gold coins did he win?). The final numeric answer is present but the success statement is implicit.']\n",
62
  "=== REPLAN DECISION ===\n",
63
  "Iteration: 1/10\n",
64
+ "Quality score: 8\n",
65
  "Needs replanning: False\n",
66
  "Quality acceptable, ending execution\n"
67
  ]
68
  }
69
  ],
70
  "source": [
71
+ "query = \"Here's a fun riddle that I'd like you to try.\\n\\nAn adventurer exploring an ancient tomb came across a horde of gold coins, all neatly stacked in columns. As he reached to scoop them into his backpack, a mysterious voice filled the room. \\\"You have fallen for my trap adventurer,\\\" the voice began, and suddenly the doorway to the chamber was sealed by a heavy rolling disk of stone. The adventurer tried to move the stone disk but was unable to budge the heavy stone. Trapped, he was startled when the voice again spoke. \\n\\n\\\"If you solve my riddle, I will reward you with a portion of my riches, but if you are not clever, you will never leave this treasure chamber. Before you are 200 gold coins. I pose a challenge to you, adventurer. Within these stacks of coins, all but 30 are face-up. You must divide the coins into two piles, one is yours, and one is mine. You may place as many coins as you like in either pile. You may flip any coins over, but you may not balance any coins on their edges. For every face-down coin in your pile, you will be rewarded with two gold coins. But be warned, if both piles do not contain the same number of face-down coins, the door will remain sealed for all eternity!\\\"\\n\\nThe adventurer smiled, as this would be an easy task. All he had to do was flip over every coin so it was face down, and he would win the entire treasure! As he moved to the columns of coins, however, the light suddenly faded, and he was left in total darkness. The adventurer reached forward and picked up one of the coins, and was shocked when he realized that both sides felt almost the same. Without the light, he was unable to determine which side of the coin was heads and which side was tails. He carefully replaced the coin in its original orientation and tried to think of a way to solve the puzzle. Finally, out of desperation, the adventurer removed 30 coins to create his pile. He then carefully flipped over each coin in his pile, so its orientation was inverted from its original state.\\n\\n\\\"I've finished,\\\" he said, and the lights returned. Looking at the two piles, he noticed that the larger pile contained 14 face-down coins.\\n\\nWhat was the outcome for the adventurer? If he failed the challenge, please respond with \\\"The adventurer died.\\\" Otherwise, please provide the number of coins the adventurer won at the conclusion of the riddle. If the adventurer won any coins, provide your response as the number of coins, with no other text.\"\n",
72
  "result = graph.invoke({\"query\" : query, \"current_step\": 0, \"reasoning_done\": False, \"files\" : [], \"files_contents\" : {}, \"iteration_count\" : 0, \"max_iterations\" : 10, \"plan\" : None} , config = config)"
73
  ]
74
  },
75
  {
76
  "cell_type": "code",
77
+ "execution_count": 4,
78
  "metadata": {},
79
  "outputs": [
80
  {
81
  "name": "stdout",
82
  "output_type": "stream",
83
  "text": [
84
+ "FINAL ANSWER: 28\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  ]
86
  }
87
  ],
 
91
  },
92
  {
93
  "cell_type": "code",
94
+ "execution_count": 5,
95
  "metadata": {},
96
  "outputs": [
97
  {
98
  "data": {
99
  "text/plain": [
100
+ "{'messages': [SystemMessage(content='You are a COMPLEXITY ASSESSOR for a multi-tool agent system.\\nYour job is to analyze user queries and determine their complexity level and processing requirements.\\n\\nCOMPLEXITY LEVELS:\\n1. SIMPLE: Direct questions that can be answered immediately without tools or with single tool use\\n - Examples: \"What is photosynthesis?\", \"Define machine learning\", \"What\\'s the capital of France?\"\\n - NOTE: Simple math like \"2+2\" still requires calculator tool but counts as SIMPLE\\n\\n !ALSO: It can be a logical reasoning or explanation task that does not require tools.\\n \\n2. MODERATE: Questions requiring 2-4 tool calls or basic multi-step analysis\\n - Examples: \"Search for recent news about AI\", \"Analyze this CSV file for trends\", \"Calculate ROI from this data\"\\n - \"Compare two datasets\", \"Summarize multiple documents\"\\n \\n3. COMPLEX: Multi-step problems requiring planning, multiple tools, and sophisticated reasoning\\n - Examples: \"Research market trends and create investment strategy\", \"Analyze multiple data sources and predict outcomes\"\\n - \"Build comprehensive report from various inputs\", \"Multi-stage data processing with validation\"\\n\\nMOST OF THE LOGICAL TASKS ARE SIMPLE, UNLESS THEY REQUIRE TOOLS.\\n\\nASSESSMENT CRITERIA:\\n- Number of distinct steps likely needed (1 = Simple, 2-4 = Moderate, 5+ = Complex)\\n- Tool complexity and dependencies between steps\\n- Data processing requirements and validation needs\\n- Need for intermediate reasoning and synthesis\\n- Risk of failure without proper step-by-step planning\\n- Presence of calculations (automatically requires tool usage)\\n\\nSPECIAL CONSIDERATIONS:\\n- Any calculation/counting task requires tools (affects complexity assessment)\\n- File analysis tasks usually need multiple steps (load + analyze + calculate)\\n- Research tasks typically need search + fetch + synthesis steps\\n- Comparison tasks need separate analysis steps for each item being compared\\n\\nRULES:\\n- SIMPLE queries may bypass planning for non-calculation tasks\\n- MODERATE queries benefit from lightweight planning\\n- COMPLEX queries require full planning with fallbacks\\n- When in doubt, err toward higher complexity\\n- Calculation tasks are never truly \"simple\" due to mandatory tool usage\\n\\nAnalyze the query and respond with your assessment.', additional_kwargs={}, response_metadata={}, id='26e46b0a-44ea-4f15-be78-dd9e141ac21b'),\n",
101
+ " HumanMessage(content='Query: Here\\'s a fun riddle that I\\'d like you to try.\\n\\nAn adventurer exploring an ancient tomb came across a horde of gold coins, all neatly stacked in columns. As he reached to scoop them into his backpack, a mysterious voice filled the room. \"You have fallen for my trap adventurer,\" the voice began, and suddenly the doorway to the chamber was sealed by a heavy rolling disk of stone. The adventurer tried to move the stone disk but was unable to budge the heavy stone. Trapped, he was startled when the voice again spoke. \\n\\n\"If you solve my riddle, I will reward you with a portion of my riches, but if you are not clever, you will never leave this treasure chamber. Before you are 200 gold coins. I pose a challenge to you, adventurer. Within these stacks of coins, all but 30 are face-up. You must divide the coins into two piles, one is yours, and one is mine. You may place as many coins as you like in either pile. You may flip any coins over, but you may not balance any coins on their edges. For every face-down coin in your pile, you will be rewarded with two gold coins. But be warned, if both piles do not contain the same number of face-down coins, the door will remain sealed for all eternity!\"\\n\\nThe adventurer smiled, as this would be an easy task. All he had to do was flip over every coin so it was face down, and he would win the entire treasure! As he moved to the columns of coins, however, the light suddenly faded, and he was left in total darkness. The adventurer reached forward and picked up one of the coins, and was shocked when he realized that both sides felt almost the same. Without the light, he was unable to determine which side of the coin was heads and which side was tails. He carefully replaced the coin in its original orientation and tried to think of a way to solve the puzzle. Finally, out of desperation, the adventurer removed 30 coins to create his pile. He then carefully flipped over each coin in his pile, so its orientation was inverted from its original state.\\n\\n\"I\\'ve finished,\" he said, and the lights returned. Looking at the two piles, he noticed that the larger pile contained 14 face-down coins.\\n\\nWhat was the outcome for the adventurer? If he failed the challenge, please respond with \"The adventurer died.\" Otherwise, please provide the number of coins the adventurer won at the conclusion of the riddle. If the adventurer won any coins, provide your response as the number of coins, with no other text.', additional_kwargs={}, response_metadata={}, id='85bfac92-7cdb-48f0-b211-9593a6dfc851'),\n",
102
+ " AIMessage(content='28', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 970, 'prompt_tokens': 2706, 'total_tokens': 3676, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 960, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-5-mini-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CIbxZ2RCMWUu7YmfpqFUpa2eMG39g', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--fd1c06e8-0aa2-49db-8693-33ac60a1b382-0', usage_metadata={'input_tokens': 2706, 'output_tokens': 970, 'total_tokens': 3676, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 960}})],\n",
103
+ " 'query': 'Here\\'s a fun riddle that I\\'d like you to try.\\n\\nAn adventurer exploring an ancient tomb came across a horde of gold coins, all neatly stacked in columns. As he reached to scoop them into his backpack, a mysterious voice filled the room. \"You have fallen for my trap adventurer,\" the voice began, and suddenly the doorway to the chamber was sealed by a heavy rolling disk of stone. The adventurer tried to move the stone disk but was unable to budge the heavy stone. Trapped, he was startled when the voice again spoke. \\n\\n\"If you solve my riddle, I will reward you with a portion of my riches, but if you are not clever, you will never leave this treasure chamber. Before you are 200 gold coins. I pose a challenge to you, adventurer. Within these stacks of coins, all but 30 are face-up. You must divide the coins into two piles, one is yours, and one is mine. You may place as many coins as you like in either pile. You may flip any coins over, but you may not balance any coins on their edges. For every face-down coin in your pile, you will be rewarded with two gold coins. But be warned, if both piles do not contain the same number of face-down coins, the door will remain sealed for all eternity!\"\\n\\nThe adventurer smiled, as this would be an easy task. All he had to do was flip over every coin so it was face down, and he would win the entire treasure! As he moved to the columns of coins, however, the light suddenly faded, and he was left in total darkness. The adventurer reached forward and picked up one of the coins, and was shocked when he realized that both sides felt almost the same. Without the light, he was unable to determine which side of the coin was heads and which side was tails. He carefully replaced the coin in its original orientation and tried to think of a way to solve the puzzle. Finally, out of desperation, the adventurer removed 30 coins to create his pile. He then carefully flipped over each coin in his pile, so its orientation was inverted from its original state.\\n\\n\"I\\'ve finished,\" he said, and the lights returned. Looking at the two piles, he noticed that the larger pile contained 14 face-down coins.\\n\\nWhat was the outcome for the adventurer? If he failed the challenge, please respond with \"The adventurer died.\" Otherwise, please provide the number of coins the adventurer won at the conclusion of the riddle. If the adventurer won any coins, provide your response as the number of coins, with no other text.',\n",
104
+ " 'final_answer': 'FINAL ANSWER: 28',\n",
105
+ " 'plan': None,\n",
106
+ " 'complexity_assessment': ComplexityLevel(level='simple', reasoning='Initial state: 200 coins, all but 30 are face-up 30 face-down (tails). He takes 30 coins; if x of those were face-down, the remaining pile has 30−x face-down. Flipping the 30-coin pile turns its face-down count into 30−x as well, so both piles have equal face-down coins. The larger pile was observed to have 14 face-down coins, so his pile also has 14 face-down coins. His reward is 2 gold coins per face-down coin in his pile: 14×2 = 28.', needs_planning=False, suggested_approach='Use simple counting: compute initial number of face-down coins (30), let x be face-down among the 30 taken, show flipping yields equal counts, then multiply the final face-down count in his pile by 2 to get the reward.'),\n",
107
+ " 'current_step': 0,\n",
 
 
 
 
 
108
  " 'reasoning_done': False,\n",
109
  " 'files': [],\n",
110
+ " 'critique_feedback': CritiqueFeedback(quality_score=8, is_complete=True, is_accurate=True, missing_elements=[], errors_found=[\"Minor imprecise phrasing: the report mentions 'conservation of face-down coins' which is misleading — flipping changes the total number of face-down coins. The correct point is that after flipping the removed 30 coins, the removed pile ends up with (30 - x) face-down coins, which equals the remaining pile's unchanged count of (30 - x).\", 'No explicit, single-sentence statement answering the two parts of the query (Did he succeed? How many gold coins did he win?). The final numeric answer is present but the success statement is implicit.'], suggested_improvements=[\"Clarify the flipping logic and avoid the word 'conservation' for face-down coins; explicitly show how removed-pile face-down count transforms from x to (30 - x).\", \"Add an explicit conclusion sentence: e.g., 'Yes — he succeeded; his pile has 14 face-down coins after flipping, so he wins 14 * 2 = 28 gold.'\", 'Add a brief sanity check (optional): pick an example x (like x = 16) and show counts before and after flipping to demonstrate consistency.', 'If following strict tooling policies, note that no external tools were required for this simple algebraic reasoning. If automated-tool usage is mandatory in your environment, run a quick symbolic/numeric check with the chosen tool and cite it.'], needs_replanning=False, replan_instructions=None),\n",
111
  " 'iteration_count': 1,\n",
112
  " 'max_iterations': 10,\n",
113
+ " 'execution_report': ExecutionReport(query_summary='Riddle: 200 coins where all but 30 are face-up (so 30 face-down). The adventurer removed 30 coins as his pile (unknown orientations), flipped each coin in that pile, then observed the larger pile contained 14 face-down coins. Determine whether he succeeded and how many coins he won (2 gold per face-down coin in his pile).', approach_used=\"Algebraic reasoning with simple variables: let x = number of face-down coins initially in the 30-coin pile removed. Use conservation of face-down coins to compute final counts after flipping, then compute reward = 2 * (final face-downs in adventurer's pile).\", tools_executed=[], key_findings=['Total coins = 200. Initially face-down coins = 30 (since all but 30 are face-up).', 'Let x = number of face-down coins among the 30 coins removed. After flipping those 30 coins, that pile has (30 - x) face-down coins.', 'The remaining (larger) pile has initial face-down coins = 30 - x, which matches (30 - x) after the flip, so both piles have equal face-down counts.', 'Given the larger pile was observed to have 14 face-down coins, 30 - x = 14 => x = 16.', \"Therefore the adventurer's pile also has 14 face-down coins after flipping, yielding a reward of 14 * 2 = 28 coins.\"], data_sources=['Problem statement provided in the query', 'Basic arithmetic/algebra reasoning'], assumptions_made=[\"Interpretation 'all but 30 are face-up' means exactly 30 coins are face-down initially.\", \"The adventurer's pile is the 30-coin pile he removed and flipped; the 'larger pile' refers to the remaining 170-coin pile.\", \"Reward is exactly 2 gold coins per face-down coin in the adventurer's pile, as stated.\"], confidence_level='high', limitations=['Solution depends on standard interpretation of the riddle wording; if alternative interpretations were intended, results could differ.', 'No external references were used; reasoning is self-contained.'], final_answer='28')}"
114
  ]
115
  },
116
+ "execution_count": 5,
117
  "metadata": {},
118
  "output_type": "execute_result"
119
  }
 
121
  "source": [
122
  "result"
123
  ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": 6,
128
+ "metadata": {},
129
+ "outputs": [],
130
+ "source": [
131
+ "#TO-DO\n",
132
+ "#1. Check routing with REPLANNER -> может придумывать несуществующие инструменты\n",
133
+ "#2. Add crawling tool\n",
134
+ "#3. Enhance description of coder tool and прописать более четко в промпте важность вывода через print() или return или result/_\n",
135
+ "#4. Смягчить критика"
136
+ ]
137
  }
138
  ],
139
  "metadata": {