Spaces:

KaiserShultz
/

Ankelodon_AI_Multi_task_agentic_system

Sleeping

App Files Files Community

Nikita Miroshnichenko commited on Sep 18, 2025

Commit

f988705

unverified ·

2 Parent(s): da9a42e b033223

Merge pull request #1 from Lebaranto/codex/refactor-planner-and-executor-prompts

Browse files

Files changed (4) hide show

src/nodes.py +305 -185
src/prompts/prompts.py +46 -62
src/schemas.py +11 -21
src/utils/utils.py +58 -1

src/nodes.py CHANGED Viewed

@@ -1,161 +1,233 @@
 import os
 from state import AgentState
 from tools.tools import preprocess_files
 from langgraph.prebuilt import ToolNode
-from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
-from prompts.prompts import SYSTEM_PROMPT_PLANNER, SYSTEM_EXECUTOR_PROMPT, COMPLEXITY_ASSESSOR_PROMPT, CRITIC_PROMPT
 from config import llm, TOOLS, planner_llm, llm_with_tools
 from schemas import PlannerPlan, ComplexityLevel, CritiqueFeedback, ExecutionReport, ToolExecution
-from utils.utils import format_final_answer, clean_message_history
-def query_input(state : AgentState) -> AgentState:
-    print("=== USER QUERY TRANSFERED TO AGENT ===")
     files = state.get("files", [])
     if files:
-        print(f"Processing {len(files)} files:")
         file_info = preprocess_files(files)
         for file_path, info in file_info.items():
-            print(f"  - {file_path}: {info['type']} ({info['size']} bytes) -> {info['suggested_tool']}")
         state["file_contents"] = file_info
         file_context = "\n\n=== AVAILABLE FILES FOR ANALYSIS ===\n"
         for file_path, info in file_info.items():
             filename = os.path.basename(file_path)
             file_context += f"File: {filename}\n"
-            file_context += f"  - Type: {info['type']}\n"
             file_context += f"  - Size: {info['size']} bytes\n"
             file_context += f"  - Suggested tool: {info['suggested_tool']}\n"
             if info.get("preview"):
                 file_context += f"  - Preview: {info['preview']}\n"
             file_context += "\n"
-        # Добавляем инструкции по работе с файлами
         file_context += "IMPORTANT: Use the suggested tools to analyze these files before processing their data.\n"
         file_context += "File paths are available in the agent state and can be passed directly to analysis tools.\n"
         original_query = state.get("query", "")
         state["query"] = original_query + file_context
     return state
-def planner(state : AgentState) -> AgentState:
     sys_stack = [
-            SystemMessage(content=SYSTEM_PROMPT_PLANNER.strip()),
-            HumanMessage(content=state["query"]),
-        ]
     plan: PlannerPlan = planner_llm.invoke(sys_stack)
-    print("=== GENERATED PLAN ===")
-    return {"messages" : sys_stack + state["messages"],
-            "plan": plan,
-            "current_step ": 0,
-            "reasoning_done": False}
 def agent(state: AgentState) -> AgentState:
-    """
-    sys_msg = SystemMessage(
-        content=SYSTEM_EXECUTOR_PROMPT.strip().format(
-            plan=json.dumps(state["plan"], indent=2)
-        )
-    )
-    """
     current_step = state.get("current_step", 0)
     reasoning_done = state.get("reasoning_done", False)
-    plan = state.get("plan", {})
-    steps = state["plan"].steps
-    print(f"=== AGENT DEBUG ===")
-    print(f"Current step: {current_step}")
-    print(f"Reasoning done: {reasoning_done}")
-    print(f"Plan exists: {plan is not None}")
-    print(f"Total steps in plan: {len(plan.steps) if plan else 'No plan'}")
-    if not plan or not hasattr(plan, 'steps') or not plan.steps:
-        print("ERROR: No valid plan found!")
         return {
-            "messages": state["messages"] + [AIMessage(content="No valid plan available. <FINAL_ANSWER>")],
-            "reasoning_done": False
         }
     steps = plan.steps
-    if current_step >= len(steps):
-        print("All plan steps completed, moving to finalization")
         return {
-            "messages": state["messages"] + [AIMessage(content="All steps completed. <FINAL_ANSWER>")],
-            "reasoning_done": False
         }
     current_step_info = steps[current_step]
-    print(f"Executing step {current_step + 1}: {current_step_info.description}")
     if not reasoning_done:
-        # ✅ ДОБАВЛЕНО: Специальный контекст для файлов
-        file_context = ""
-        file_contents = state.get("file_contents", {})
-        if file_contents:
-            file_context = "\n\nAVAILABLE FILES IN CURRENT SESSION:\n"
-            for filepath, info in file_contents.items():
-                filename = os.path.basename(filepath)
-                file_context += f"- {filename}: {info['type']} file, suggested tool: {info['suggested_tool']}\n"
-                file_context += f"  Path: {filepath}\n"
-        reasoning_prompt = f"""
-        {SYSTEM_EXECUTOR_PROMPT}
-        CURRENT TASK: You must perform reasoning for step {current_step + 1}.
-        STEP INFO: {current_step_info}\n\n
-        FILE CONTEXT: {file_contents}
-        CRITICAL: You MUST output your reasoning in <REASONING> tags, but DO NOT call any tools yet.
-        Explain what you need to do and why, then end your response.
-        REASONING IS IMPERATIVE BEFORE ANY TOOL CALLS.
-        """
-        sys_msg = SystemMessage(content = reasoning_prompt)
-        stack = [sys_msg] + state["messages"]
-        step = llm.invoke(stack)
-        print("=== REASONING STEP ===")
-        print(step.content)
-        return {
-            "messages" : state["messages"] + [step],
-            "reasoning_done" : True
-        }
-    else:
-        tool_prompt = f"""
-        Now execute the tool for step {current_step + 1}.
-        You have already done the reasoning. Now call the appropriate tool with the correct parameters.
-        Available file paths: {list(state.get("file_contents", {}).keys())}\n
-        IMPORTANT NOTE: IF YOU DECIDED TO USE safe_code_run, MAKE SURE TO FINISH CALCULATIONS WITH print() or saving to a variable NAMED 'result' so that the output can be captured!
-        AVAILABLE TOOLS: {', '.join([tool.name for tool in TOOLS])}
-        """
-        sys_msg = SystemMessage(content=tool_prompt)
-        stack = [sys_msg] + state["messages"]  # Берем последние сообщения включая reasoning
-        # Используем модель С инструментами для выполнения
-        step = llm_with_tools.invoke(stack)
-        print("=== TOOL EXECUTION ===")
-        print(f"Tool calls: {step.tool_calls}")
-        return {
-            "messages": state["messages"] + [step],
-            "current_step": current_step + 1 if step.tool_calls else current_step,
-            "reasoning_done": False  # Сбрасываем для следующего шага
-        }
 def should_continue(state : AgentState) -> bool:
     last_message = state["messages"][-1]
@@ -185,19 +257,45 @@ def should_continue(state : AgentState) -> bool:
 class DebuggingToolNode(ToolNode):
     def __init__(self, tools):
         super().__init__(tools)
     def __call__(self, state):
-        print("=== TOOL EXECUTION STARTED ===")
-        result = super().__call__(state)
-        print("=== TOOL EXECUTION COMPLETED ===")
-        return result
 def enhanced_finalizer(state: AgentState) -> AgentState:
     """Generate comprehensive execution report for critic evaluation."""
-    print("=== GENERATING EXECUTION REPORT ===")
     # Extract tool execution information
     tools_executed = []
     data_sources = []
@@ -222,20 +320,22 @@ def enhanced_finalizer(state: AgentState) -> AgentState:
     plan = state.get("plan")
     approach_used = "Direct execution"
     assumptions_made = []
     if plan:
-        approach_used = f"{plan.task_type} approach with {len(plan.steps)} steps"
         assumptions_made = plan.assumptions
     # Generate structured report (КОСТЫЛЬ ЗДЕСЬ!)
     report_generator_prompt = f"""
     Generate a comprehensive execution report for the following query processing:
     ORIGINAL QUERY: {state['query']}
     EXECUTION CONTEXT:
     - Complexity Level: {state.get('complexity_assessment', {}).level}
-    - Plan Used: {plan if plan else {}}
     - Tools Executed: {tools_executed}
     - Available Files: {list(state.get('file_contents', {}).keys())}
@@ -262,13 +362,18 @@ def enhanced_finalizer(state: AgentState) -> AgentState:
         HumanMessage(content="Generate the execution report.")
     ])
-    print(f"Report generated - Confidence: {execution_report.confidence_level}")
-    print(f"Key findings: {len(execution_report.key_findings)}")
-    print(f"Data sources: {len(execution_report.data_sources)}")
     # Format final answer for user
     formatted_answer = format_final_answer(execution_report, state.get('complexity_assessment', {}))
-    print(execution_report)
     return {
         "execution_report": execution_report,
         "final_answer": formatted_answer
@@ -277,23 +382,25 @@ def enhanced_finalizer(state: AgentState) -> AgentState:
 def simple_executor(state: AgentState) -> AgentState:
     """Handle simple queries directly without planning."""
-    print("=== SIMPLE EXECUTION ===")
     # For simple queries, use the LLM with tools directly
     simple_prompt = f"""
     Answer this simple query directly and efficiently: {state['query']}
-    You have access to tools if needed, but try to answer directly when possible.
-    If you need files, they are available at: {list(state.get('file_contents', {}).keys())}
-    Provide a clear, concise answer.
     """
     response = llm_with_tools.invoke([
         SystemMessage(content=simple_prompt),
         HumanMessage(content=state['query'])
     ])
     return {
         "messages": state["messages"] + [response],
         "final_answer": response.content
@@ -312,8 +419,8 @@ def should_use_planning(state: AgentState) -> str:
 def critic_evaluator(state: AgentState) -> AgentState:
     """Enhanced critic that evaluates execution reports."""
-    print("=== ENHANCED ANSWER CRITIQUE ===")
     report = state.get("execution_report")
     critic_llm = llm.with_structured_output(CritiqueFeedback)
@@ -333,15 +440,22 @@ def critic_evaluator(state: AgentState) -> AgentState:
         HumanMessage(content="Evaluate this execution report thoroughly.")
     ])
-    print(f"Quality Score: {critique.quality_score}/10")
-    print(f"Complete: {critique.is_complete}")
-    print(f"Accurate: {critique.is_accurate}")
     if critique.errors_found:
-        print(f"Issues found: {critique.errors_found}")
     if critique.needs_replanning:
-        print(f"Replanning needed: {critique.replan_instructions}")
     return {
         "critique_feedback": critique,
@@ -355,64 +469,63 @@ def should_replan(state: AgentState) -> str:
     critique = state.get("critique_feedback")
     iteration_count = state.get("iteration_count", 0)
     max_iterations = state.get("max_iterations", 3)
-    print(f"=== REPLAN DECISION ===")
-    print(f"Iteration: {iteration_count}/{max_iterations}")
-    print(f"Quality score: {critique.quality_score if critique else 'N/A'}")
-    print(f"Needs replanning: {critique.needs_replanning if critique else 'N/A'}")
     if not critique:
         return "end"
     # Stop if max iterations reached
     if iteration_count >= max_iterations:
-        print(f"Max iterations ({max_iterations}) reached. Accepting current answer.")
         return "end"
     # Accept if quality is good enough
     if critique.quality_score >= 7 or not critique.needs_replanning:
-        print("Quality acceptable, ending execution")
         return "end"
     # Replan if quality is poor and we haven't exceeded max iterations
     if critique.needs_replanning and iteration_count < max_iterations:
-        print("Replanning due to critic feedback...")
         return "replan"
     return "end"
 def replanner(state: AgentState) -> AgentState:
     """Create a revised plan based on critic feedback."""
-    print("=== REPLANNING ===")
     critique = state["critique_feedback"]
     previous_plan = state.get("plan")
-    replan_prompt = f"""
-    {SYSTEM_PROMPT_PLANNER}
-    REPLANNING CONTEXT:
-    Original Query: {state['query']}
-    Previous Plan: {previous_plan if previous_plan else {}}
-    CRITIC FEEDBACK:
-    - Quality Score: {critique.quality_score}/10
-    - Issues Found: {critique.errors_found}
-    - Missing Elements: {critique.missing_elements}
-    - Improvement Suggestions: {critique.suggested_improvements}
-    - Specific Instructions: {critique.replan_instructions}
-    Create a REVISED plan that addresses these issues. Focus on fixing the identified problems.
-    """
     revised_plan = planner_llm.invoke([
         SystemMessage(content=replan_prompt),
         HumanMessage(content="Create a revised plan based on the feedback.")
     ])
-    print("Plan revised based on critic feedback")
     # Очищаем историю сообщений от неполных tool_calls
     current_messages = state.get("messages", [])
     cleaned_messages = clean_message_history(current_messages)
@@ -427,8 +540,12 @@ def replanner(state: AgentState) -> AgentState:
                 isinstance(msg, HumanMessage)):
                 essential_messages.append(msg)
-    print(f"Cleaned message history: {len(current_messages)} -> {len(essential_messages)} messages")
     return {
         "plan": revised_plan,
         "current_step": 0,
@@ -440,21 +557,24 @@ def replanner(state: AgentState) -> AgentState:
 def complexity_assessor(state: AgentState) -> AgentState:
     """Assess query complexity and determine if planning is needed."""
-    print("=== COMPLEXITY ASSESSMENT ===")
     complexity_llm = llm.with_structured_output(ComplexityLevel)
     assessment_message = [
         SystemMessage(content=COMPLEXITY_ASSESSOR_PROMPT.strip()),
         HumanMessage(content=f"Query: {state['query']}")
     ]
     assessment = complexity_llm.invoke(assessment_message)
-    print(f"Complexity: {assessment.level}")
-    print(f"Needs planning: {assessment.needs_planning}")
-    print(f"Reasoning: {assessment.reasoning}")
     return {
         "complexity_assessment": assessment,
         "messages": state["messages"] + assessment_message

 import os
+from typing import Optional
 from state import AgentState
 from tools.tools import preprocess_files
 from langgraph.prebuilt import ToolNode
+from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, ToolMessage
+from prompts.prompts import (
+    SYSTEM_PROMPT_PLANNER,
+    SYSTEM_EXECUTOR_PROMPT,
+    COMPLEXITY_ASSESSOR_PROMPT,
+    CRITIC_PROMPT,
+)
 from config import llm, TOOLS, planner_llm, llm_with_tools
 from schemas import PlannerPlan, ComplexityLevel, CritiqueFeedback, ExecutionReport, ToolExecution
+from utils.utils import (
+    format_final_answer,
+    clean_message_history,
+    log_stage,
+    log_key_values,
+    display_plan,
+    format_plan_overview,
+)
+def _build_planner_prompt(state: AgentState, extra_context: Optional[str] = None) -> str:
+    tool_catalogue = ", ".join(sorted(tool.name for tool in TOOLS))
+    file_paths = state.get("files", [])
+    file_list = ", ".join(os.path.basename(path) for path in file_paths) if file_paths else "none provided"
+    extra = extra_context.strip() if extra_context else "None"
+    return SYSTEM_PROMPT_PLANNER.format(
+        tool_catalogue=tool_catalogue,
+        file_list=file_list,
+        extra_context=extra,
+    ).strip()
+def query_input(state: AgentState) -> AgentState:
+    log_stage("USER QUERY", icon="💡")
     files = state.get("files", [])
     if files:
+        log_stage("FILE PREPARATION", subtitle=f"Processing {len(files)} file(s)", icon="📁")
         file_info = preprocess_files(files)
         for file_path, info in file_info.items():
+            log_key_values(
+                [
+                    ("path", file_path),
+                    ("type", info["type"]),
+                    ("size", f"{info['size']} bytes"),
+                    ("suggested_tool", info["suggested_tool"]),
+                ]
+            )
         state["file_contents"] = file_info
         file_context = "\n\n=== AVAILABLE FILES FOR ANALYSIS ===\n"
         for file_path, info in file_info.items():
             filename = os.path.basename(file_path)
             file_context += f"File: {filename}\n"
+            file_context += f"  - Type: {info['type']}\n"
             file_context += f"  - Size: {info['size']} bytes\n"
             file_context += f"  - Suggested tool: {info['suggested_tool']}\n"
             if info.get("preview"):
                 file_context += f"  - Preview: {info['preview']}\n"
             file_context += "\n"
         file_context += "IMPORTANT: Use the suggested tools to analyze these files before processing their data.\n"
         file_context += "File paths are available in the agent state and can be passed directly to analysis tools.\n"
         original_query = state.get("query", "")
         state["query"] = original_query + file_context
+    else:
+        log_key_values([("files", "none provided")])
     return state
+def planner(state: AgentState) -> AgentState:
+    log_stage("PLANNING", icon="🧭")
+    planner_prompt = _build_planner_prompt(state)
     sys_stack = [
+        SystemMessage(content=planner_prompt),
+        HumanMessage(content=state["query"]),
+    ]
     plan: PlannerPlan = planner_llm.invoke(sys_stack)
+    display_plan(plan)
+    return {
+        "messages": state["messages"] + sys_stack,
+        "plan": plan,
+        "current_step": 0,
+        "reasoning_done": False,
+    }
 def agent(state: AgentState) -> AgentState:
     current_step = state.get("current_step", 0)
     reasoning_done = state.get("reasoning_done", False)
+    plan: Optional[PlannerPlan] = state.get("plan")
+    if not plan or not hasattr(plan, "steps"):
+        log_stage("PLAN VALIDATION", subtitle="Planner returned no actionable steps", icon="⚠️")
+        warning = AIMessage(content="No valid plan available. <FINAL_ANSWER>")
         return {
+            "messages": state["messages"] + [warning],
+            "reasoning_done": False,
         }
     steps = plan.steps
+    total_steps = len(steps)
+    if total_steps == 0:
+        log_stage("PLAN VALIDATION", subtitle="Plan indicates direct answer", icon="ℹ️")
+        direct = AIMessage(content="Plan has no steps; respond directly. <FINAL_ANSWER>")
+        return {
+            "messages": state["messages"] + [direct],
+            "reasoning_done": False,
+        }
+    if current_step >= total_steps:
+        log_stage("PLAN COMPLETE", subtitle="All steps executed", icon="✅")
+        completion = AIMessage(content="All plan steps completed. <FINAL_ANSWER>")
         return {
+            "messages": state["messages"] + [completion],
+            "reasoning_done": False,
         }
     current_step_info = steps[current_step]
+    log_stage(
+        "EXECUTION",
+        subtitle=f"Step {current_step + 1}/{total_steps}: {current_step_info.goal}",
+        icon="🤖",
+    )
+    log_key_values(
+        [
+            ("step_id", current_step_info.id),
+            ("tool", current_step_info.tool or "none"),
+            ("expected", current_step_info.expected_result),
+        ]
+    )
+    plan_overview = format_plan_overview(plan)
+    tool_catalogue = ", ".join(sorted(tool.name for tool in TOOLS))
+    file_contents = state.get("file_contents", {})
+    file_list = ", ".join(file_contents.keys()) if file_contents else "none provided"
+    system_message = SystemMessage(
+        content=SYSTEM_EXECUTOR_PROMPT.format(
+            plan_summary=plan.summary,
+            plan_overview=plan_overview,
+            current_step_id=current_step_info.id,
+            step_goal=current_step_info.goal,
+            step_tool=current_step_info.tool or "no tool (respond directly)",
+            tool_catalogue=tool_catalogue,
+            file_list=file_list,
+        ).strip()
+    )
     if not reasoning_done:
+        instruction = HumanMessage(
+            content=(
+                "Provide reasoning for this step inside <REASONING>...</REASONING>. "
+                "Do not call any tools yet."
+            )
+        )
+        stack = [system_message] + state["messages"] + [instruction]
+        reasoning_response = llm.invoke(stack)
+        log_stage("REASONING", subtitle=f"{current_step_info.id}", icon="🧠")
+        print(reasoning_response.content)
+        return {
+            "messages": state["messages"] + [reasoning_response],
+            "reasoning_done": True,
+        }
+    available_tools = {tool.name for tool in TOOLS}
+    if current_step_info.tool and current_step_info.tool not in available_tools:
+        log_stage(
+            "TOOL WARNING",
+            subtitle=f"Unknown tool '{current_step_info.tool}' in plan",
+            icon="⚠️",
+        )
+        warning = AIMessage(
+            content=(
+                f"<REASONING>Unable to execute {current_step_info.id}: tool "
+                f"'{current_step_info.tool}' is unavailable. Requesting replanning.</REASONING>"
+            )
+        )
+        print(warning.content)
+        return {
+            "messages": state["messages"] + [warning],
+            "reasoning_done": False,
+        }
+    execution_instruction = HumanMessage(
+        content=(
+            "Execute the planned action now. If a tool is required, call it with the "
+            "correct arguments. After success, respond with STEP COMPLETE. If inputs are "
+            "missing, explain the issue in <REASONING> without new tool calls."
+        )
+    )
+    stack = [system_message] + state["messages"] + [execution_instruction]
+    execution_response = llm_with_tools.invoke(stack)
+    if execution_response.tool_calls:
+        tool_names = ", ".join(call["name"] for call in execution_response.tool_calls)
+        log_stage("TOOL CALL", subtitle=f"{current_step_info.id} → {tool_names}", icon="🛠️")
+        print(execution_response.tool_calls)
+    else:
+        log_stage("EXECUTION OUTPUT", subtitle=current_step_info.id, icon="🛠️")
+        if execution_response.content:
+            print(execution_response.content)
+    advance = False
+    if execution_response.tool_calls:
+        advance = True
+    elif execution_response.content and (
+        "STEP COMPLETE" in execution_response.content or "<FINAL_ANSWER>" in execution_response.content
+    ):
+        advance = True
+    next_step = current_step + 1 if advance and current_step < total_steps else current_step
+    return {
+        "messages": state["messages"] + [execution_response],
+        "current_step": next_step,
+        "reasoning_done": False,
+    }
 def should_continue(state : AgentState) -> bool:
     last_message = state["messages"][-1]
 class DebuggingToolNode(ToolNode):
     def __init__(self, tools):
         super().__init__(tools)
     def __call__(self, state):
+        log_stage("TOOL NODE", subtitle="Dispatching tool calls", icon="🛠️")
+        try:
+            result = super().__call__(state)
+            log_stage("TOOL NODE", subtitle="Tool execution completed", icon="✅")
+            return result
+        except Exception as exc:
+            log_stage("TOOL ERROR", subtitle=f"{type(exc).__name__}: {exc}", icon="❌")
+            messages = state.get("messages", [])
+            last_message = messages[-1] if messages else None
+            tool_calls = getattr(last_message, "tool_calls", []) if last_message else []
+            error_messages = []
+            for call in tool_calls:
+                error_messages.append(
+                    ToolMessage(
+                        content=f"ERROR: {type(exc).__name__}: {exc}",
+                        tool_call_id=call.get("id") or "unknown_call",
+                        name=call.get("name"),
+                    )
+                )
+            if not error_messages:
+                error_messages.append(
+                    ToolMessage(
+                        content=f"ERROR: {type(exc).__name__}: {exc}",
+                        tool_call_id="unknown_call",
+                    )
+                )
+            return {"messages": messages + error_messages}
 def enhanced_finalizer(state: AgentState) -> AgentState:
     """Generate comprehensive execution report for critic evaluation."""
+    log_stage("FINALIZER", subtitle="Compiling execution report", icon="📄")
     # Extract tool execution information
     tools_executed = []
     data_sources = []
     plan = state.get("plan")
     approach_used = "Direct execution"
     assumptions_made = []
+    plan_overview = ""
     if plan:
+        approach_used = f"{plan.task_type} plan – {plan.summary}"
         assumptions_made = plan.assumptions
+        plan_overview = format_plan_overview(plan)
     # Generate structured report (КОСТЫЛЬ ЗДЕСЬ!)
     report_generator_prompt = f"""
     Generate a comprehensive execution report for the following query processing:
     ORIGINAL QUERY: {state['query']}
     EXECUTION CONTEXT:
     - Complexity Level: {state.get('complexity_assessment', {}).level}
+    - Plan Used: {plan_overview if plan_overview else 'direct response'}
     - Tools Executed: {tools_executed}
     - Available Files: {list(state.get('file_contents', {}).keys())}
         HumanMessage(content="Generate the execution report.")
     ])
+    log_key_values(
+        [
+            ("confidence", execution_report.confidence_level),
+            ("findings", str(len(execution_report.key_findings))),
+            ("sources", str(len(execution_report.data_sources))),
+        ]
+    )
     # Format final answer for user
     formatted_answer = format_final_answer(execution_report, state.get('complexity_assessment', {}))
+    log_stage("FINAL ANSWER PREVIEW", icon="📬")
+    print(formatted_answer)
     return {
         "execution_report": execution_report,
         "final_answer": formatted_answer
 def simple_executor(state: AgentState) -> AgentState:
     """Handle simple queries directly without planning."""
+    log_stage("SIMPLE EXECUTION", subtitle="Handling low-complexity query", icon="⚡")
     # For simple queries, use the LLM with tools directly
     simple_prompt = f"""
     Answer this simple query directly and efficiently: {state['query']}
+    Stay factual, cite tools only if you actually call them, and avoid inventing files or URLs.
+    Known files: {list(state.get('file_contents', {}).keys())}
+    If no tool is required, respond immediately with the final answer.
     """
     response = llm_with_tools.invoke([
         SystemMessage(content=simple_prompt),
         HumanMessage(content=state['query'])
     ])
+    log_stage("SIMPLE EXECUTION OUTPUT", icon="📬")
+    print(response.content)
     return {
         "messages": state["messages"] + [response],
         "final_answer": response.content
 def critic_evaluator(state: AgentState) -> AgentState:
     """Enhanced critic that evaluates execution reports."""
+    log_stage("CRITIC", subtitle="Evaluating execution report", icon="🔍")
     report = state.get("execution_report")
     critic_llm = llm.with_structured_output(CritiqueFeedback)
         HumanMessage(content="Evaluate this execution report thoroughly.")
     ])
+    log_key_values(
+        [
+            ("quality", f"{critique.quality_score}/10"),
+            ("complete", str(critique.is_complete)),
+            ("accurate", str(critique.is_accurate)),
+        ]
+    )
     if critique.errors_found:
+        log_stage("CRITIC ISSUES", icon="⚠️")
+        for issue in critique.errors_found:
+            print(f" - {issue}")
     if critique.needs_replanning:
+        log_stage("CRITIC REPLAN", subtitle="Replanning requested", icon="♻️")
+        print(critique.replan_instructions)
     return {
         "critique_feedback": critique,
     critique = state.get("critique_feedback")
     iteration_count = state.get("iteration_count", 0)
     max_iterations = state.get("max_iterations", 3)
+    subtitle = f"Iteration {iteration_count}/{max_iterations}"
+    log_stage("REPLAN DECISION", subtitle=subtitle, icon="🧭")
+    if critique:
+        log_key_values(
+            [
+                ("quality", str(critique.quality_score)),
+                ("needs_replanning", str(critique.needs_replanning)),
+            ]
+        )
     if not critique:
         return "end"
     # Stop if max iterations reached
     if iteration_count >= max_iterations:
+        log_stage("REPLAN DECISION", subtitle="Max iterations reached", icon="🛑")
         return "end"
     # Accept if quality is good enough
     if critique.quality_score >= 7 or not critique.needs_replanning:
+        log_stage("REPLAN DECISION", subtitle="Accepting current answer", icon="✅")
         return "end"
     # Replan if quality is poor and we haven't exceeded max iterations
     if critique.needs_replanning and iteration_count < max_iterations:
+        log_stage("REPLAN DECISION", subtitle="Triggering replanner", icon="♻️")
         return "replan"
     return "end"
 def replanner(state: AgentState) -> AgentState:
     """Create a revised plan based on critic feedback."""
+    log_stage("REPLANNER", subtitle="Adjusting plan based on feedback", icon="♻️")
     critique = state["critique_feedback"]
     previous_plan = state.get("plan")
+    previous_summary = previous_plan.summary if previous_plan else "no previous plan"
+    issues = ", ".join(critique.errors_found) if critique.errors_found else "none"
+    improvements = ", ".join(critique.suggested_improvements) if critique.suggested_improvements else "none"
+    extra_context = (
+        f"Replanning requested by critic. Previous plan summary: {previous_summary}. "
+        f"Critic score: {critique.quality_score}/10. Issues: {issues}. "
+        f"Improvements to address: {improvements}. Specific instructions: "
+        f"{critique.replan_instructions or 'none'}"
+    )
+    replan_prompt = _build_planner_prompt(state, extra_context=extra_context)
     revised_plan = planner_llm.invoke([
         SystemMessage(content=replan_prompt),
         HumanMessage(content="Create a revised plan based on the feedback.")
     ])
+    display_plan(revised_plan)
     # Очищаем историю сообщений от неполных tool_calls
     current_messages = state.get("messages", [])
     cleaned_messages = clean_message_history(current_messages)
                 isinstance(msg, HumanMessage)):
                 essential_messages.append(msg)
+    log_stage(
+        "REPLANNER",
+        subtitle=f"Cleaned history: {len(current_messages)} → {len(essential_messages)}",
+        icon="🧹",
+    )
     return {
         "plan": revised_plan,
         "current_step": 0,
 def complexity_assessor(state: AgentState) -> AgentState:
     """Assess query complexity and determine if planning is needed."""
+    log_stage("COMPLEXITY", subtitle="Assessing task difficulty", icon="📊")
     complexity_llm = llm.with_structured_output(ComplexityLevel)
     assessment_message = [
         SystemMessage(content=COMPLEXITY_ASSESSOR_PROMPT.strip()),
         HumanMessage(content=f"Query: {state['query']}")
     ]
     assessment = complexity_llm.invoke(assessment_message)
+    log_key_values(
+        [
+            ("level", assessment.level),
+            ("needs_planning", str(assessment.needs_planning)),
+            ("reasoning", assessment.reasoning),
+        ]
+    )
     return {
         "complexity_assessment": assessment,
         "messages": state["messages"] + assessment_message

src/prompts/prompts.py CHANGED Viewed

@@ -1,72 +1,56 @@
 SYSTEM_PROMPT_PLANNER = """
-You are the PLANNER of a multi-tool agent (GAIA I–II level). Produce a minimal, reliable plan to solve the user's request using available tools. You DO NOT call tools; output ONLY a JSON plan. Tools are bound via .bind_tools()—use EXACT names.
-CORE RULES:
-- MINIMALITY: 1-3 steps max; chain only essentials (e.g., search → download → analyze).
-- ROUTING: Classify as info (web facts), calc (math on known data), table (CSV/Excel agg), doc_qa (PDF/DOCX/TXT extract), image_qa (IMG OCR/vision), multi_hop (anything cross-modality or research—default for unknowns).
-- PREREQUISITES: For external docs/images (e.g., "paper X", URLs): ALWAYS start with web_search/arxiv_search → download_file_from_url (local path like "paper.pdf") → analyze_*. NEVER assume local files—validate existence implicitly via chain.
-- COST-AWARE: Cheap first: search snippets > full download > compute. No raw files to safe_code_run—extract first.
-- EVIDENCE: Mandate citations/pages for facts; units/rounding explicit in guidelines.
-- FALLBACKS: Every step needs success_criteria; on_fail="replan" (default) or "sN" (jump). Add 1 fallback step if high-risk (e.g., no-results → alt query).
-ROUTING PATTERNS (MANDATORY CHAINS):
-- info: web_search/wiki_search/arxiv_search → cite snippets.
-- calc: If data missing, insert extract step → safe_code_run (e.g., "sum volumes from text").
-- table: analyze_csv_file/analyze_excel_file (preview) → safe_code_run (agg/query).
-- doc_qa: web_search("paper title PDF") → download_file_from_url → analyze_pdf_file/analyze_docx_file (query="vials fluid ml") → safe_code_run if sum needed.
-- image_qa: web_search → download_file_from_url → analyze_image_file/vision_qa_gemma → safe_code_run for chart-to-table.
-- multi_hop: Decompose (e.g., sub-query1: search; sub-query2: extract) → synthesize.
-Output ONLY valid JSON:
-{
   "task_type": "info|calc|table|doc_qa|image_qa|multi_hop",
-  "assumptions": ["..."],  // 0-2 max; e.g., "Paper details vials explicitly"
-  "plan_rationale": "Brief: why route + key tools/chain",  // 1 sentence
-  "steps": [  // 1-3 only
-    {
       "id": "s1",
-      "description": "Precise action + why (e.g., 'web_search for paper PDF to locate source')",
-      "evidence_needed": ["citations","page_numbers","stats_check"],  // 1-3
-      "success_criteria": "e.g., 'Top result has PDF URL; or data extracted'",
-      "on_fail": "replan|sN",  // Default: replan
-      "outputs_to_state": ["e.g., 'pdf_url', 'extracted_text'"]  // For chaining
-    }
   ],
-  "answer_guidelines": {
-    "final_answer_template": "e.g., 'Cumulative volume: X mL (from [cite])'",
-    "citations_required": true,
-    "min_citations": 1,
-    "units_policy": "e.g., 'mL; convert if cm³'",
-    "rounding_policy": "e.g., 'Nearest integer'",
-    "include_artifacts": ["snippets","tables"]  // 0-2
-  }
-}
-CONSTRAINTS:
-- Valid JSON only—no extras. If query trivial (no tools), task_type="info" with 0 steps.
-- Exact tool names: web_search, download_file_from_url, analyze_pdf_file, safe_code_run, etc.
-- For research: If no chain, replan triggers auto-fix.
 """
 SYSTEM_EXECUTOR_PROMPT = """
-ROLE: EXECUTOR of multi-tool agent (GAIA level). You follow the FIXED {plan} EXACTLY—no changes, no new steps. Current step: {current_step_id} ("{step_desc}"). Advance ONE step per response.
-EXECUTION RULES:
-- BEFORE EVERY TOOL: <REASONING> (2-3 sentences: What step? Why this tool? Exact inputs? Expected output?) </REASONING>
-- THEN: Tool call ONLY for this step (exact name/args from plan). NO OTHER OUTPUT.
-- NO TOOLS? Direct output (e.g., "Calc: 5 mL") + set reasoning_done=True.
-- Check state for priors (e.g., if s2 needs pdf_url from s1, wait/replan if missing).
-- On fail (bad output): <REASONING>Assess + on_fail action</REASONING> then tool or stop.
-- END STEP: If success, output "STEP COMPLETE: {outputs_to_state}" to advance.
-RESOURCE CHAIN (MANDATORY IF NEEDED):
-- External doc? Use plan's search→download before analyze.
-- NEVER guess paths—use state["files"] or replan.
-OUTPUT FORMAT: <REASONING>...</REASONING> [tool call or direct] [STEP COMPLETE if done]. NO JSON/PLANS/MARKDOWN.
-FAILSAFE: If unclear, <REASONING>Replan needed</REASONING> and stop.
-DO NOT FORGET TO ADD <FINAL_ANSWER> IF YOU THINK IT'S TIME TO ANSWER THE USER AND YOU HAVE ALL THE DATA FOR EXACT ANSWER.
 """

 SYSTEM_PROMPT_PLANNER = """
+You are the planner of a multi-tool agent. Build a short, realistic plan that the executor can follow.
+Available tools: {tool_catalogue}
+Known local files: {file_list}
+Additional context: {extra_context}
+Return a single JSON object with this structure:
+{{
   "task_type": "info|calc|table|doc_qa|image_qa|multi_hop",
+  "summary": "One sentence on the chosen approach",
+  "assumptions": ["optional clarifications"],
+  "steps": [
+    {{
       "id": "s1",
+      "goal": "Action to take and why it helps",
+      "tool": "tool_name_or_null",
+      "inputs": "Key parameters or references (files, URLs, prior steps)",
+      "expected_result": "How you know the step succeeded",
+      "on_fail": "replan|stop"
+    }}
   ],
+  "answer_guidelines": "Reminders for the final response (citations, format, units, etc.)"
+}}
+Ground rules:
+- Prefer 1–3 steps. Only add a step if it changes the outcome.
+- Use tool names exactly as listed. If no tool is needed, set "tool": null.
+- Never assume files or URLs exist—plan to search/download before analysing.
+- Skip download steps when the required file is already provided.
+- Ensure later steps only depend on results created by earlier steps.
+- If the query is trivial, return an empty steps list and explain the direct answer in "summary".
 """
 SYSTEM_EXECUTOR_PROMPT = """
+You are the executor of a grounded multi-tool agent.
+Plan summary: {plan_summary}
+Step map:
+{plan_overview}
+Current focus: {current_step_id} — {step_goal}
+Suggested tool: {step_tool}
+Available tools: {tool_catalogue}
+Known local files: {file_list}
+Execution rules:
+1. Stay aligned with the plan—no new steps or speculative actions.
+2. Before every tool call, respond with <REASONING>…</REASONING> explaining the step, chosen tool, inputs, and expected outcome.
+3. Call at most one tool per turn. After a successful step, state "STEP COMPLETE".
+4. If required inputs are missing (e.g., file not downloaded), explain the issue in <REASONING> and wait for replanning.
+5. Never invent file paths, URLs, or results. When unsure, request replanning instead of guessing.
+6. If no tool is needed, answer directly after the reasoning.
 """

src/schemas.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from typing import Any, Dict, List, Optional, Literal, Iterable
-from pydantic import BaseModel, Field, ValidationError
 class ComplexityLevel(BaseModel):
@@ -21,32 +21,22 @@ class CritiqueFeedback(BaseModel):
 TaskType = Literal["info", "calc", "table", "doc_qa", "image_qa", "multi_hop"]
-EvidenceTag = Literal["citations", "page_numbers", "figure_captions", "stats_check", "unit_check"]
 class PlanStep(BaseModel):
-    id: str
-    description: str
-    #tool: Optional[str] = Field(default=None, description="Exact tool name or null for reasoning step")
-    #args_hint: Dict[str, Any] = Field(default_factory=dict)
-    evidence_needed: List[EvidenceTag] = Field(default_factory=list)
-    success_criteria: str
-    on_fail: str = Field(default="replan", description="One of: 'replan' | 'stop' | step-id")
-    outputs_to_state: List[str] = Field(default_factory=list)
-class AnswerGuidelines(BaseModel):
-    final_answer_template: str
-    citations_required: bool = False
-    min_citations: int = 0
-    units_policy: Optional[str] = None
-    rounding_policy: Optional[str] = None
-    include_artifacts: List[str] = Field(default_factory=list)
 class PlannerPlan(BaseModel):
     task_type: TaskType
     assumptions: List[str] = Field(default_factory=list)
-    plan_rationale: str
-    steps: List[PlanStep]
-    answer_guidelines: AnswerGuidelines
 class ToolExecution(BaseModel):

+from typing import List, Optional, Literal
+from pydantic import BaseModel, Field
 class ComplexityLevel(BaseModel):
 TaskType = Literal["info", "calc", "table", "doc_qa", "image_qa", "multi_hop"]
 class PlanStep(BaseModel):
+    id: str = Field(description="Unique step identifier (e.g., s1)")
+    goal: str = Field(description="What the step accomplishes and why")
+    tool: Optional[str] = Field(default=None, description="Exact tool name or null when no tool is required")
+    inputs: Optional[str] = Field(default=None, description="Important inputs or references needed for the step")
+    expected_result: str = Field(description="How to confirm the step succeeded")
+    on_fail: str = Field(default="replan", description="Fallback action if the step fails (replan or stop)")
 class PlannerPlan(BaseModel):
     task_type: TaskType
+    summary: str = Field(description="Short explanation of the chosen strategy")
     assumptions: List[str] = Field(default_factory=list)
+    steps: List[PlanStep] = Field(default_factory=list)
+    answer_guidelines: Optional[str] = Field(default=None, description="Reminders for formatting, citations, etc.")
 class ToolExecution(BaseModel):

src/utils/utils.py CHANGED Viewed

@@ -1,9 +1,66 @@
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
-from schemas import ComplexityLevel, ExecutionReport
 from prompts.prompts import COMPLEXITY_ASSESSOR_PROMPT
 from config import llm
 from state import AgentState
 def clean_message_history(messages):
     """
     Очищает историю сообщений от неполных циклов tool_calls/responses.

+from typing import Iterable, Optional
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
+from schemas import ComplexityLevel, ExecutionReport, PlannerPlan
 from prompts.prompts import COMPLEXITY_ASSESSOR_PROMPT
 from config import llm
 from state import AgentState
+def log_stage(title: str, subtitle: Optional[str] = None, icon: str = "🚀") -> None:
+    """Render a banner for the current execution stage."""
+    title_line = f" {title.strip()} "
+    border = icon + " " + "═" * max(len(title_line), 20)
+    print(f"\n{border}\n{icon} {title_line}\n{icon} " + "═" * max(len(title_line), 20))
+    if subtitle:
+        print(f"{icon} {subtitle}")
+def log_key_values(pairs: Iterable[tuple[str, str]]) -> None:
+    """Pretty-print simple key/value diagnostics."""
+    for key, value in pairs:
+        print(f"   • {key}: {value}")
+def format_plan_overview(plan: PlannerPlan) -> str:
+    """Create a human-readable summary of plan steps."""
+    if not plan or not plan.steps:
+        return "(no steps – direct response)"
+    lines = []
+    for step in plan.steps:
+        tool_hint = step.tool if step.tool else "no tool"
+        lines.append(f"{step.id}: {step.goal} [{tool_hint}]")
+    return "\n".join(lines)
+def display_plan(plan: PlannerPlan) -> None:
+    """Print plan contents in a compact, readable form."""
+    log_stage("PLANNER OUTPUT", icon="🧭")
+    print(f"Task type: {plan.task_type}")
+    print(f"Summary: {plan.summary}")
+    if plan.assumptions:
+        print("Assumptions:")
+        for item in plan.assumptions:
+            print(f"   - {item}")
+    print("Steps:")
+    for step in plan.steps:
+        print(f"   {step.id} → {step.goal}")
+        if step.tool:
+            print(f"      tool: {step.tool}")
+        if step.inputs:
+            print(f"      inputs: {step.inputs}")
+        print(f"      expected: {step.expected_result}")
+        if step.on_fail:
+            print(f"      on_fail: {step.on_fail}")
+    if plan.answer_guidelines:
+        print(f"Answer guidelines: {plan.answer_guidelines}")
 def clean_message_history(messages):
     """
     Очищает историю сообщений от неполных циклов tool_calls/responses.